[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"$fqBc9Ln5J7zctEchovKPaopZWW2Wd8AcRjev3txt-yJ0":3},{"answer":4,"createTime":5,"id":6,"options":7,"origin":8,"question":15,"related":16,"source":27,"type":68},[],"2025-12-05 10:06:47",251333412,[],{"count":9,"courseId":10,"courseImg":11,"courseName":12,"workId":13,"workName":14},11,"53e1d2ef4961cca8eea3e23969ad2cb9","https:\u002F\u002Ftihai-oss-cloud.itihey.com\u002Fimg\u002F03a579384a6dc297c89809b582fcc767.png","默认课程","aa3456c90ccd4bb8a715503a5ad72ed9","5.4.1","马尔可夫决策过程(MDP)由状态集合 S、动作集合 A、状态转移概率 P、奖励函数 R 和____&gamma;组成",[17,29,38,47,56,65,69,77,82,87],{"answer":18,"createTime":19,"id":20,"options":21,"question":26,"source":27,"type":28},[],"2025-12-05 10:03:20",251328302,[22,23,24,25],"新状态和奖励","仅新状态","仅奖励","随机信息","强化学习中,智能体与环境交互时,环境根据智能体采取的动作返回( )","v1",0,{"answer":30,"createTime":19,"id":31,"options":32,"question":37,"source":27,"type":28},[],251328303,[33,34,35,36],"基于策略的强化学习算法","基于价值的强化学习算法","无模型的强化学习算法","模型基的强化学习算法","Q - learning 算法属于( )",{"answer":39,"createTime":19,"id":40,"options":41,"question":46,"source":27,"type":28},[],251328304,[42,43,44,45],"经验回放","目标网络","&epsilon; - greedy 策略","策略梯度","以下用于解决强化学习中探索与利用平衡问题的方法是( )",{"answer":48,"createTime":19,"id":49,"options":50,"question":55,"source":27,"type":28},[],251328305,[51,52,53,54],"减少计算量","提高样本利用率,打破样本间的相关性","加速模型收敛","降低模型复杂度","深度 Q 网络(DQN)中,引入经验回放机制的主要目的是( )",{"answer":57,"createTime":19,"id":58,"options":59,"question":64,"source":27,"type":28},[],251328307,[60,61,62,63],"调整学习率","控制探索与利用的平衡","衡量未来奖励的重要程度","决定策略更新的频率","在强化学习中,折扣因子 &gamma;(0 &le; &gamma; &le; 1)的作用是( )",{"answer":66,"createTime":5,"id":6,"options":67,"question":15,"source":27,"type":68},[],[],2,{"answer":70,"createTime":5,"id":71,"options":72,"question":75,"source":27,"type":76},[],251333413,[73,74],"正确","错误","强化学习中,智能体不需要知道环境的具体模型也能学习到最优策略",3,{"answer":78,"createTime":5,"id":79,"options":80,"question":81,"source":27,"type":76},[],251333414,[73,74],"策略梯度算法只能应用于离散动作空间的强化学习问题",{"answer":83,"createTime":5,"id":84,"options":85,"question":86,"source":27,"type":76},[],251333415,[73,74],"在强化学习中,奖励函数的设计对学习效果影响不大",{"answer":88,"createTime":5,"id":89,"options":90,"question":91,"source":27,"type":76},[],251333416,[73,74],"深度强化学习结合了深度学习强大的特征提取能力和强化学习的决策能力"]