[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"$fNKNgw-806kyp92Pt6zvHxGUAT095YlM6b-aFz14Nxq8":3},{"id":4,"source":5,"question":6,"options":7,"answer":12,"related":13,"type":47,"origin":104,"createTime":37},108640839,"v1","在Q-Learning中,所谓的Q函数是指()",[8,9,10,11],"状态动作函数","状态值函数","动作值函数","策略函数",[],[14,27,38,48,54,58,68,78,84,94],{"id":15,"source":5,"question":16,"options":17,"answer":22,"related":23,"type":24,"origin":25,"createTime":26},108640830,"用于监督分类的算法有()",[18,19,20,21],"支持向量机","决策树","神经网络","线性回归",[],[],1,null,"2023-11-26T09:08:34+08:00",{"id":28,"source":5,"question":29,"options":30,"answer":35,"related":36,"type":24,"origin":25,"createTime":37},108640834,"在强化学习中,主体和环境之间交互的要素有()",[31,32,33,34],"状态","动作","回报","强化",[],[],"2023-11-26T09:08:35+08:00",{"id":39,"source":5,"question":40,"options":41,"answer":45,"related":46,"type":47,"origin":25,"createTime":37},108640835,"典型的\"鸡尾酒会\"问题中,提取出不同人说话的声音是属于()",[42,43,44,21],"监督学习","非监督学习","强化学习",[],[],0,{"id":49,"source":5,"question":50,"options":51,"answer":52,"related":53,"type":47,"origin":25,"createTime":37},108640838,"()有跟环境进行交互,从反馈当中进行不断的学习的过程",[42,43,44,21],[],[],{"id":4,"source":5,"question":6,"options":55,"answer":56,"related":57,"type":47,"origin":25,"createTime":37},[8,9,10,11],[],[],{"id":59,"source":5,"question":60,"options":61,"answer":66,"related":67,"type":47,"origin":25,"createTime":37},108640840,"Q函数Q(s,a)是指在一个给定状态s下,采取某一个动作a之后,后续的各个状态所能得到的回报的()",[62,63,64,65],"期望值","最大值","最小值","总和",[],[],{"id":69,"source":5,"question":70,"options":71,"answer":76,"related":77,"type":47,"origin":25,"createTime":37},108640844,"在强化学习的过程中,学习率α越大,表示采用新的尝试得到的结果比例越(),保持旧的结果的比例越()",[72,73,74,75],"大;小","大;大","小;小","小;大",[],[],{"id":79,"source":5,"question":80,"options":81,"answer":82,"related":83,"type":47,"origin":25,"createTime":37},108640848,"在ε-greedy策略当中,ε的值越大,表示采用随机的一个动作的概率越(),采用当前Q函数值最大的动作的概率越()",[72,73,74,75],[],[],{"id":85,"source":5,"question":86,"options":87,"answer":92,"related":93,"type":47,"origin":25,"createTime":37},108640850,"在强化学习过程中,()表示随机地采取某个动作,以便于尝试各种结果;()表示采取当前认为最优的动作,以便于进一步优化评估当前认为最优的动作的值",[88,89,90,91],"探索;开发","开发;探索","探索;输出","开发;输出",[],[],{"id":95,"source":5,"question":96,"options":97,"answer":102,"related":103,"type":47,"origin":25,"createTime":37},108640852,"强化学习中,()主要探索未知的动作会产生的效果,有利于更新Q值,获得更好的策略",[98,99,100,101],"探索","开发","输入","输出",[],[],{"courseName":105,"courseImg":106,"workName":107,"workId":108,"count":109,"courseId":110},"人工智能与信息社会","https:\u002F\u002Ftihai-oss-cloud.itihey.com\u002Fimg\u002F151b1e04e0edd24518d87450e76e122a.jpg","第六章","19801cb3128b458f82e7baf17ff847b4",12,"306d47969b59e6f36470c235ae597dac"]