安装 PyTorch
# 访问 https://pytorch.org/get-started/locally/ 来获取适合你系统的安装命令# 例如,安装 CPU 版本的 PyTorch:pip install torch torchvision torchaudio
导入 PyTorch
import torchimport torchvisionimport torchvision.transforms as transformsfrom torch.utils.data import DataLoader
理解张量(Tensors)
x = torch.randn(5, 3) # 创建一个形状为 (5, 3) 的随机张量print(x)
自动微分(Autograd)
y = x ** 2 # 计算 x 的平方z = y.mean() # 计算 y 的均值# 反向传播,计算 z 对 x 的梯度z.backward()print(x.grad) # x 的梯度
定义模型
import torch.nn as nnimport torch.nn.functional as Fclass Net(nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(784, 128) # 784 输入特征,128 隐藏单元 self.fc2 = nn.Linear(128, 64) self.fc3 = nn.Linear(64, 10) # 10 类别输出 def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return xnet = Net()
数据加载和预处理
transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
训练模型
import torch.optim as optimcriterion = nn.CrossEntropyLoss()optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)for epoch in range(10): # 循环 10 个 epoch running_loss = 0.0 for images, labels in trainloader: optimizer.zero_grad() # 清除之前的梯度 outputs = net(images) # 前向传播 loss = criterion(outputs, labels) # 计算损失 loss.backward() # 反向传播 optimizer.step() # 更新权重 running_loss += loss.item() print(f'Epoch {epoch+1}, Loss: {running_loss/len(trainloader)}')
评估模型
correct = 0total = 0with torch.no_grad(): for images, labels in validloader: outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item()print(f'Accuracy of the network on the {total} test images: {100 * correct / total}%')
保存和加载模型
PATH = './mnist_net.pth'torch.save(net.state_dict(), PATH)# 加载模型from torch.nn import DataParallel# 假设我们加载模型到 GPUdevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")model = Net().to(device)model.load_state_dict(torch.load(PATH))model.eval()
探索高级特性
自定义层和函数:创建自定义层和前向传播函数。
多 GPU 训练:使用
torch.nn.DataParallel
或torch.nn.parallel.DistributedDataParallel
。混合精度训练:使用
torch.cuda.amp
来加速训练并减少内存使用。微调和迁移学习:利用预训练模型进行特定任务的训练。