引言 上篇文章我们学了张量操作和自动微分——相当于拿到了砖头和水泥。这篇我们来盖第一栋房子:用 PyTorch 的 nn.Module 构建全连接神经网络,完成手写数字识别 。
写完这篇文章的代码,你就走通了深度学习的完整流程:数据加载 → 模型定义 → 训练 → 评估 。
前置知识
尤其是 PyTorch(一)中的 requires_grad、backward()、梯度更新——这些是本文的基础。
一、项目结构总览 1 2 3 4 5 6 7 8 9 10 11 12 13 ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ DataLoader │────▶│ Model │────▶│ Trainer │ │ 数据加载器 │ │ 模型定义 │ │ 训练循环 │ ├──────────────┤ ├──────────────┤ ├──────────────┤ │ - MNIST 数据 │ │ - 网络层定义 │ │ - 前向传播 │ │ - 批量加载 │ │ - 前向传播 │ │ - 计算损失 │ │ - 数据增强 │ │ - 参数管理 │ │ - 反向传播 │ └──────────────┘ └──────────────┘ └──────────────┘ │ ┌─────▼──────┐ │ Eval │ │ 模型评估 │ └────────────┘
二、数据准备 我们使用 MNIST 数据集——28×28 像素的手写数字图片(0-9)。
2.1 加载与预处理 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 import torchimport torch.nn as nnimport torch.optim as optimimport torch.nn.functional as Ffrom torch.utils.data import DataLoaderfrom torchvision import datasets, transformsimport matplotlib.pyplot as pltBATCH_SIZE = 64 EPOCHS = 5 LEARNING_RATE = 0.001 transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307 ,), (0.3081 ,)) ]) train_dataset = datasets.MNIST( root='./data' , train=True , download=True , transform=transform ) test_dataset = datasets.MNIST( root='./data' , train=False , download=True , transform=transform ) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True ) test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False ) print (f"训练集: {len (train_dataset)} 张图片" )print (f"测试集: {len (test_dataset)} 张图片" )images, labels = next (iter (train_loader)) print (f"一批图像的形状: {images.shape} " ) print (f"一批标签的形状: {labels.shape} " )
输出:
1 2 3 4 训练集: 60000 张图片 测试集: 10000 张图片 一批图像的形状: torch.Size([64, 1, 28, 28]) 一批标签的形状: torch.Size([64])
2.2 可视化几个样本 1 2 3 4 5 6 7 8 fig, axes = plt.subplots(2 , 3 , figsize=(8 , 5 )) for i, ax in enumerate (axes.flat): ax.imshow(images[i].squeeze(), cmap='gray' ) ax.set_title(f'标签: {labels[i].item()} ' ) ax.axis('off' ) plt.tight_layout() plt.show()
三、定义网络:nn.Module PyTorch 中所有神经网络都继承自 nn.Module:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 class FullyConnectedNet (nn.Module): """三层全连接神经网络,用于 MNIST 分类""" def __init__ (self ): super ().__init__() self .fc1 = nn.Linear(28 * 28 , 128 ) self .fc2 = nn.Linear(128 , 64 ) self .fc3 = nn.Linear(64 , 10 ) self .dropout = nn.Dropout(0.2 ) def forward (self, x ): """ 前向传播(只需定义这个,反向传播自动算) x: 输入张量 [batch_size, 1, 28, 28] """ x = x.view(x.size(0 ), -1 ) x = F.relu(self .fc1(x)) x = self .dropout(x) x = F.relu(self .fc2(x)) x = self .dropout(x) x = self .fc3(x) return x model = FullyConnectedNet() print (model)
输出:
1 2 3 4 5 6 FullyConnectedNet( (fc1): Linear(in_features=784, out_features=128, bias=True) (fc2): Linear(in_features=128, out_features=64, bias=True) (fc3): Linear(in_features=64, out_features=10, bias=True) (dropout): Dropout(p=0.2, inplace=False) )
为什么输出层不做 Softmax? nn.CrossEntropyLoss 内部包含了 LogSoftmax + NLLLoss,所以输出层直接输出原始 logits 即可。
如果你需要概率值(比如做推理展示),用 F.softmax(model(x), dim=1)。
四、训练循环 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu' ) model = model.to(device) print (f"训练设备: {device} " )criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) train_losses = [] train_accs = [] for epoch in range (EPOCHS): model.train() running_loss = 0.0 correct = 0 total = 0 for batch_idx, (data, target) in enumerate (train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() running_loss += loss.item() _, predicted = output.max (1 ) total += target.size(0 ) correct += predicted.eq(target).sum ().item() if batch_idx % 200 == 199 : avg_loss = running_loss / 200 acc = 100. * correct / total print (f'Epoch {epoch+1 } /{EPOCHS} | Batch {batch_idx+1 } /{len (train_loader)} | ' f'Loss: {avg_loss:.4 f} | Acc: {acc:.2 f} %' ) running_loss = 0.0 epoch_loss = running_loss / len (train_loader) epoch_acc = 100. * correct / total train_losses.append(epoch_loss) train_accs.append(epoch_acc) print (f'═══ Epoch {epoch+1 } 完成 | Loss: {epoch_loss:.4 f} | Acc: {epoch_acc:.2 f} % ═══' )
训练过程中你会看到类似这样的输出:
1 2 3 4 5 Epoch 1/5 | Batch 200/938 | Loss: 0.3825 | Acc: 87.50% Epoch 1/5 | Batch 400/938 | Loss: 0.2562 | Acc: 91.25% ... ═══ Epoch 1 完成 | Loss: 0.3012 | Acc: 91.33% ═══ ═══ Epoch 5 完成 | Loss: 0.0815 | Acc: 97.52% ═══
训练模式 vs 评估模式 1 2 model.train() model.eval ()
忘记切换模式是新手最常见的 bug 之一。
五、模型评估 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 model.eval () correct = 0 total = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) _, predicted = output.max (1 ) total += target.size(0 ) correct += predicted.eq(target).sum ().item() test_acc = 100. * correct / total print (f'测试集准确率: {test_acc:.2 f} %' )
输出:
查看错误分类的样本 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 model.eval () all_images = [] all_preds = [] all_labels = [] with torch.no_grad(): for data, target in test_loader: data = data.to(device) output = model(data) _, preds = output.max (1 ) all_images.append(data.cpu()) all_preds.append(preds.cpu()) all_labels.append(target) images = torch.cat(all_images) preds = torch.cat(all_preds) labels = torch.cat(all_labels) wrong_mask = preds != labels wrong_images = images[wrong_mask] wrong_preds = preds[wrong_mask] wrong_labels = labels[wrong_mask] print (f"总共错 {wrong_mask.sum ().item()} 张(准确率 {100 -wrong_mask.sum ().item()/len (labels)*100 :.2 f} %)" )fig, axes = plt.subplots(3 , 3 , figsize=(9 , 9 )) for i, ax in enumerate (axes.flat): if i < len (wrong_images): ax.imshow(wrong_images[i].squeeze(), cmap='gray' ) ax.set_title(f'预测: {wrong_preds[i].item()} | 真实: {wrong_labels[i].item()} ' , color='red' ) ax.axis('off' ) plt.tight_layout() plt.show()
六、保存与加载模型 1 2 3 4 5 6 7 8 torch.save(model.state_dict(), 'mnist_fc.pth' ) model = FullyConnectedNet() model.load_state_dict(torch.load('mnist_fc.pth' )) model.eval () print ("模型加载完成 ✅" )
七、完整代码 把所有代码合并成一个文件 train_mnist.py:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 import torchimport torch.nn as nnimport torch.optim as optimimport torch.nn.functional as Ffrom torch.utils.data import DataLoaderfrom torchvision import datasets, transformsclass FullyConnectedNet (nn.Module): def __init__ (self ): super ().__init__() self .fc1 = nn.Linear(784 , 128 ) self .fc2 = nn.Linear(128 , 64 ) self .fc3 = nn.Linear(64 , 10 ) self .dropout = nn.Dropout(0.2 ) def forward (self, x ): x = x.view(x.size(0 ), -1 ) x = F.relu(self .fc1(x)) x = self .dropout(x) x = F.relu(self .fc2(x)) x = self .dropout(x) x = self .fc3(x) return x def main (): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu' ) print (f"设备: {device} " ) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307 ,), (0.3081 ,)) ]) train_loader = DataLoader( datasets.MNIST('./data' , train=True , download=True , transform=transform), batch_size=64 , shuffle=True ) test_loader = DataLoader( datasets.MNIST('./data' , train=False , transform=transform), batch_size=64 , shuffle=False ) model = FullyConnectedNet().to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001 ) for epoch in range (5 ): model.train() for data, target in train_loader: data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() print (f'Epoch {epoch+1 } 完成' ) model.eval () correct = total = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) _, preds = model(data).max (1 ) total += target.size(0 ) correct += preds.eq(target).sum ().item() print (f'测试准确率: {100. * correct / total:.2 f} %' ) torch.save(model.state_dict(), 'mnist_fc.pth' ) if __name__ == '__main__' : main()
八、常见问题 Q:为什么我训练时 loss 不下降? A:(1)检查学习率是不是太大/太小(0.001 是安全值);(2)检查数据归一化;(3)检查是否忘记 optimizer.zero_grad()。
Q:全连接网络和卷积网络有什么区别? A:全连接把每个像素独立看待,忽略了图片的空间结构 (相邻像素的关系)。CNN 用卷积核保留空间信息,效果更好——下一篇会讲。
Q:Dropout 是什么原理? A:训练时随机让一部分神经元输出为 0(本文中 20%),迫使网络不依赖某个特定神经元,提高泛化能力。
总结 本文你完成了:
环节
技术
掌握
数据准备
Dataset、DataLoader、Transform
✅
模型定义
nn.Module、nn.Linear、forward()
✅ 核心
训练循环
Zero_grad → Forward → Loss → Backward → Step
✅ 核心
评估
model.eval()、torch.no_grad()
✅
保存/加载
state_dict
✅
完整流程
端到端训练一个识别手写数字的网络
✅ 里程碑
你已经从”了解深度学习”进入了能实战深度学习的阶段 。
下一步推荐: