[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"$fxLVrjXURIqSZIJuEs1-JLVqylUALiTaqp4Aky9FGOR8":3},{"answer":4,"createTime":5,"id":6,"options":7,"origin":12,"question":19,"related":20,"source":30,"type":31},[],"2026-04-14 10:49:43",340255416,[8,9,10,11],"手动累加梯度并跳过optimizer.step()","每次loss.backward()后立即调用optimizer.step()","累积损失值后再调用一次backward()","多次loss.backward()后调用一次optimizer.step()",{"count":13,"courseId":14,"courseImg":15,"courseName":16,"workId":17,"workName":18},11,"53e1d2ef4961cca8eea3e23969ad2cb9","https:\u002F\u002Ftihai-oss-cloud.itihey.com\u002Fimg\u002F03a579384a6dc297c89809b582fcc767.png","默认课程","work_51694495","第3章习题","在PyTorch中实现梯度累积的正确代码逻辑是",[21,32,41,50,59,68,77,86,95,104],{"answer":22,"createTime":5,"id":23,"options":24,"question":29,"source":30,"type":31},[],340255407,[25,26,27,28],"手写数字分类(0-9)","时间序列预测","图像回归任务","二分类情感分析","输入784节点,输出10节点的网络最可能用于","v1",0,{"answer":33,"createTime":5,"id":34,"options":35,"question":40,"source":30,"type":31},[],340255408,[36,37,38,39],"DataLoader","Dataset","TensorDataset","BatchSampler","PyTorch中实现数据分批的类是",{"answer":42,"createTime":5,"id":43,"options":44,"question":49,"source":30,"type":31},[],340255409,[45,46,47,48],"784&times;512 + 512&times;10","784 + 512 + 10","&times;512 + (512+1)&times;10","784&times;512&times;10","某全连接网络输入层784节点,隐藏层512节点,输出层10节点,总参数量为",{"answer":51,"createTime":5,"id":52,"options":53,"question":58,"source":30,"type":31},[],340255410,[54,55,56,57],"减少测试集误差","统一数据分布,加速收敛","增加数据多样性","降低模型复杂度","对输入数据归一化的核心目的是",{"answer":60,"createTime":5,"id":61,"options":62,"question":67,"source":30,"type":31},[],340255411,[63,64,65,66],"每个神经元与相邻层的所有神经元连接","同一层的神经元之间互相连接","每个神经元仅与下一层的部分神经元连接","网络层数必须大于3层","以下关于全连接神经网络(FCNN)的描述,正确的是",{"answer":69,"createTime":5,"id":70,"options":71,"question":76,"source":30,"type":31},[],340255412,[72,73,74,75],"更新参数&rarr;前向计算损失&rarr;反向传播梯度","反向传播梯度&rarr;前向计算损失&rarr;更新参数","随机初始化&rarr;更新参数&rarr;反向传播","前向计算损失&rarr;反向传播梯度&rarr;更新参数","反向传播的正确流程是",{"answer":78,"createTime":5,"id":79,"options":80,"question":85,"source":30,"type":31},[],340255413,[81,82,83,84],"BCELoss","MSELoss","BCEWithLogitsLoss","CrossEntropyLoss","当网络输出层未使用激活函数(如直接输出logits),且任务是三分类问题时,应选择哪个损失函数",{"answer":87,"createTime":5,"id":88,"options":89,"question":94,"source":30,"type":31},[],340255414,[90,91,92,93],"动态调整学习率","减少显存占用","提高测试精度","加速反向传播","梯度累加的主要作用是",{"answer":96,"createTime":5,"id":97,"options":98,"question":103,"source":30,"type":31},[],340255415,[99,100,101,102],"缓解后期过拟合","防止训练初期震荡","加速前向传播","提高模型容量","学习率衰减的主要目的是",{"answer":105,"createTime":5,"id":6,"options":106,"question":19,"source":30,"type":31},[],[8,9,10,11]]