[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"$f7YL0OeZZ9dII62d1bG6-6wRMv2uHHd1-YHlg2vcVTSk":3},{"answer":4,"createTime":5,"id":6,"options":7,"origin":12,"question":19,"related":20,"source":24,"type":25},[],"2024-09-29 23:37:44",160552670,[8,9,10,11],"策略","环境模型","即时奖励","价值函数",{"count":13,"courseId":14,"courseImg":15,"courseName":16,"workId":17,"workName":18},2,"70cd3c4e5c77be90d7c263c1365140f1","https:\u002F\u002Ftihai-oss-cloud.itihey.com\u002Fimg\u002F07f2a8a44b36416b98f72a4beebdfd60.png","计算与人工智能概论","c2d15957cc484ca4b41bb501b6142565","强化学习——概念","强化学习的基本要素中,()是环境因智能体的动作为其产生的反馈",[21,26],{"answer":22,"createTime":5,"id":6,"options":23,"question":19,"source":24,"type":25},[],[8,9,10,11],"v1",0,{"answer":27,"createTime":5,"id":28,"options":29,"question":30,"source":24,"type":25},[],160552671,[8,9,10,11],"从(状态,动作)到状态的映射,决定不同状态之间的转移概率"]