[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"$fV3JKqLooxCENz-iVmQyqsF1LljscUiMbfMKcJ7mZ4gY":3},{"answer":4,"createTime":5,"id":6,"options":7,"origin":10,"question":14,"related":15,"source":19,"type":20},[],"2025-09-15 22:05:13",1069717796,[8,9],"对","错",{"courseId":11,"workId":12,"workName":13},"1000128447","61849690","第三章单元测试","强化学习中的agent通过不断与环境交互来获取反馈,最终获得一个最优策略.( )",[16,21,31,40,50],{"answer":17,"createTime":5,"id":6,"options":18,"question":14,"source":19,"type":20},[],[8,9],"v2",3,{"answer":22,"createTime":5,"id":23,"options":24,"question":29,"source":19,"type":30},[],1069717853,[25,26,27,28],"通过环境反馈学习","需要大量的标注数据","强化学习 agent 通过与环境的互动获得奖励","主要依赖训练集的标签数据","以下哪些是强化学习的特征?( )",1,{"answer":32,"createTime":5,"id":33,"options":34,"question":39,"source":19,"type":30},[],1069717860,[35,36,37,38],"线性回归","K-means聚类","支持向量机(SVM)","决策树","以下哪些算法属于监督学习?( )",{"answer":41,"createTime":5,"id":42,"options":43,"question":48,"source":19,"type":49},[],1069717866,[44,45,46,47],"未标注数据","标注数据","模型权重","随机数据","在监督学习中,训练数据通常包含以下哪一项?( )",0,{"answer":51,"createTime":5,"id":52,"options":53,"question":56,"source":19,"type":49},[],1069718076,[54,55,36,38],"K-近邻算法","支持向量机","以下哪种算法属于非监督学习?( )"]