[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"$fujIPbkW5MjlVzxpC8tp3Z-yfwwQFbZRLJ5JQ-c7-A-Q":3},{"answer":4,"createTime":5,"id":6,"options":7,"origin":12,"question":16,"related":17,"source":25,"type":30},[],"2025-04-28 17:37:13",1069533190,[8,9,10,11],"通过环境反馈学习","需要大量的标注数据","强化学习 agent 通过与环境的互动获得奖励","主要依赖训练集的标签数据",{"courseId":13,"workId":14,"workName":15},"1000128447","61792230","第三章单元测试","以下哪些是强化学习的特征?( )",[18,27,31,40,50],{"answer":19,"createTime":5,"id":20,"options":21,"question":24,"source":25,"type":26},[],1069533188,[22,23],"对","错","强化学习中的agent通过不断与环境交互来获取反馈,最终获得一个最优策略.( )","v2",3,{"answer":28,"createTime":5,"id":6,"options":29,"question":16,"source":25,"type":30},[],[8,9,10,11],1,{"answer":32,"createTime":5,"id":33,"options":34,"question":39,"source":25,"type":30},[],1069533192,[35,36,37,38],"线性回归","K-means聚类","支持向量机(SVM)","决策树","以下哪些算法属于监督学习?( )",{"answer":41,"createTime":5,"id":42,"options":43,"question":48,"source":25,"type":49},[],1069533194,[44,45,46,47],"未标注数据","标注数据","模型权重","随机数据","在监督学习中,训练数据通常包含以下哪一项?( )",0,{"answer":51,"createTime":5,"id":52,"options":53,"question":56,"source":25,"type":49},[],1069533208,[54,55,36,38],"K-近邻算法","支持向量机","以下哪种算法属于非监督学习?( )"]