[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"$fDFGCivQMbqr7IQ1LaB4K5v_dgnqOCCCLMIVnzT_iAiE":3},{"answer":4,"createTime":5,"id":6,"options":7,"origin":12,"question":19,"related":20,"source":30,"type":31},[],"2023-05-10 18:21:13",4262776,[8,9,10,11],"探索","开发","输入","输出",{"count":13,"courseId":14,"courseImg":15,"courseName":16,"workId":17,"workName":18},12,"306d47969b59e6f36470c235ae597dac","https:\u002F\u002Ftihai-oss-cloud.itihey.com\u002Fimg\u002F151b1e04e0edd24518d87450e76e122a.jpg","人工智能与信息社会","1f42f489a0a84d0a9134eb008362491b","第六章","强化学习中,()主要探索未知的动作会产生的效果,有利于更新Q值,获得更好的策略",[21,32,37,46,55,64,69,78,81,90],{"answer":22,"createTime":5,"id":23,"options":24,"question":29,"source":30,"type":31},[],4262769,[25,26,27,28],"监督学习","非监督学习","强化学习","线性回归","典型的&quot;鸡尾酒会&quot;问题中,提取出不同人说话的声音是属于()","v1",0,{"answer":33,"createTime":5,"id":34,"options":35,"question":36,"source":30,"type":31},[],4262770,[25,26,27,28],"()有跟环境进行交互,从反馈当中进行不断的学习的过程",{"answer":38,"createTime":5,"id":39,"options":40,"question":45,"source":30,"type":31},[],4262771,[41,42,43,44],"状态动作函数","状态值函数","动作值函数","策略函数","在Q-Learning中,所谓的Q函数是指()",{"answer":47,"createTime":5,"id":48,"options":49,"question":54,"source":30,"type":31},[],4262772,[50,51,52,53],"期望值","最大值","最小值","总和","Q函数Q(s,a)是指在一个给定状态s下,采取某一个动作a之后,后续的各个状态所能得到的回报的()",{"answer":56,"createTime":5,"id":57,"options":58,"question":63,"source":30,"type":31},[],4262773,[59,60,61,62],"大;小","大;大","小;小","小;大","在强化学习的过程中,学习率&alpha;越大,表示采用新的尝试得到的结果比例越(),保持旧的结果的比例越()",{"answer":65,"createTime":5,"id":66,"options":67,"question":68,"source":30,"type":31},[],4262774,[59,60,61,62],"在&epsilon;-greedy策略当中,&epsilon;的值越大,表示采用随机的一个动作的概率越(),采用当前Q函数值最大的动作的概率越()",{"answer":70,"createTime":5,"id":71,"options":72,"question":77,"source":30,"type":31},[],4262775,[73,74,75,76],"探索;开发","开发;探索","探索;输出","开发;输出","在强化学习过程中,()表示随机地采取某个动作,以便于尝试各种结果;()表示采取当前认为最优的动作,以便于进一步优化评估当前认为最优的动作的值",{"answer":79,"createTime":5,"id":6,"options":80,"question":19,"source":30,"type":31},[],[8,9,10,11],{"answer":82,"createTime":5,"id":83,"options":84,"question":89,"source":30,"type":31},[],4262777,[85,86,87,88],"外部影响","主体内因","历史状态","当前状态","马尔可夫性质强调在每一个动作状态序列中,下一个状态与()有关",{"answer":91,"createTime":5,"id":92,"options":93,"question":98,"source":30,"type":31},[],4262778,[94,95,96,97],"客观性","主体性","超前性","滞后性","强化学习的回报值一个重要特点是具有()"]