123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365 |
- import matplotlib.pyplot as plt
- import torch
- import torch.nn as nn
- import torch.optim as optim
- from torchvision import datasets, transforms
- from torch.utils.data import DataLoader
- import numpy as np
- import tkinter as tk
- from PIL import Image, ImageDraw
- from tkinter import messagebox
- import cv2
- from torchsummary import summary
- from torchviz import make_dot
- import netron
- workmode = 1 #0:训练 1:加载模型
- # 数据预处理
- if workmode==0:
- transform = transforms.Compose([
- # transforms.RandomRotation(10), # 随机旋转 10 度
- # transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), # 随机平移
- transforms.ToTensor(),
- transforms.Normalize((0.1307,), (0.3081,))
- ])
- else:
- transform = transforms.Compose([
- transforms.ToTensor(),
- transforms.Normalize((0.1307,), (0.3081,))
- ])
- # 加载训练集和测试集
- train_dataset = datasets.MNIST('data', train=True, download=True, transform=transform)
- test_dataset = datasets.MNIST('data', train=False, transform=transform)
- train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
- test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
- test_loader_predicted=np.zeros(len(test_loader.dataset))
- def showfig(images,labels,totalnum,num):
- # 选择要显示的图片数量
- num_images = num
- # 创建一个子图布局
- fig, axes = plt.subplots(1, num_images, figsize=(15, 3))
- # 遍历数据集并显示图片和标签
- for i in range(num_images):
- # image, label = train_dataset[i]
- random_numbers = np.random.choice(totalnum+1, num_images, replace=False)
- # image = train_dataset.train_data[random_numbers[i]]
- # label = train_dataset.train_labels[random_numbers[i]]
- image = images[random_numbers[i]]
- label = labels[random_numbers[i]]
- # 将张量转换为numpy数组并调整维度
- image = image.squeeze().numpy()
- # 显示图片
- axes[i].imshow(image, cmap='gray')
- # 设置标题为标签
- axes[i].set_title(f'idx-{random_numbers[i]}-Label: {label}')
- axes[i].axis('off')
- # # # 显示图形
- # plt.show()
- # while 1:
- # pass
- showfig(train_dataset.data,train_dataset.targets,60000,4)
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- # # 定义神经网络模型
- # class Net(nn.Module):
- # def __init__(self):
- # super(Net, self).__init__()
- # self.fc1 = nn.Linear(784, 128)
- # self.fc2 = nn.Linear(128, 64)
- # self.fc3 = nn.Linear(64, 10)
- #
- # def forward(self, x):
- # x = x.view(-1, 784)
- # x = torch.relu(self.fc1(x))
- # x = torch.relu(self.fc2(x))
- # x = self.fc3(x)
- # return x
- # model = Net()
- # criterion = nn.CrossEntropyLoss()
- # optimizer = optim.SGD(model.parameters(), lr=0.01)
- # # 定义修改后的 AlexNet 模型,适应 MNIST 数据集
- # class AlexNet(nn.Module):
- # def __init__(self, num_classes=10):
- # super(AlexNet, self).__init__()
- # self.features = nn.Sequential(
- # nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1), # 修改卷积核大小和通道数
- # nn.ReLU(inplace=True),
- # nn.MaxPool2d(kernel_size=2, stride=2), # 修改池化核大小和步长
- # nn.Conv2d(32, 64, kernel_size=3, padding=1),
- # nn.ReLU(inplace=True),
- # nn.MaxPool2d(kernel_size=2, stride=2),
- # nn.Conv2d(64, 128, kernel_size=3, padding=1),
- # nn.ReLU(inplace=True),
- # nn.Conv2d(128, 128, kernel_size=3, padding=1),
- # nn.ReLU(inplace=True),
- # nn.Conv2d(128, 128, kernel_size=3, padding=1),
- # nn.ReLU(inplace=True),
- # nn.MaxPool2d(kernel_size=2, stride=2),
- # )
- # self.classifier = nn.Sequential(
- # nn.Dropout(),
- # nn.Linear(128 * 3 * 3, 128), # 修改全连接层输入维度
- # nn.ReLU(inplace=True),
- # nn.Dropout(),
- # nn.Linear(128, 128),
- # nn.ReLU(inplace=True),
- # nn.Linear(128, num_classes),
- # )
- #
- # def forward(self, x):
- # x = self.features(x)
- # x = x.view(x.size(0), 128 * 3 * 3) # 修改展平后的维度
- # x = self.classifier(x)
- # return x
- #
- #
- # # 初始化模型、损失函数和优化器
- # model = AlexNet().to(device)
- # LeNet-5模型定义
- class LeNet5(nn.Module):
- def __init__(self):
- super(LeNet5, self).__init__()
- self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
- self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
- self.fc1 = nn.Linear(16 * 4 * 4, 120)
- self.fc2 = nn.Linear(120, 84)
- self.fc3 = nn.Linear(84, 10)
- def forward(self, x):
- x = torch.relu(self.conv1(x))
- x = nn.MaxPool2d(2)(x)
- x = torch.relu(self.conv2(x))
- x = nn.MaxPool2d(2)(x)
- x = x.view(x.size(0), -1)
- x = torch.relu(self.fc1(x))
- x = torch.relu(self.fc2(x))
- x = self.fc3(x)
- return x
- # 实例化模型
- model = LeNet5()
- criterion = nn.CrossEntropyLoss()
- optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
- # # 打印模型结构
- # summary(model, input_size=(1, 28, 28))
- # # 创建一个随机输入张量
- # x = torch.randn(1, 1, 28, 28).to(device)
- # # 前向传播
- # y = model(x).to(device)
- # # 使用 torchviz 生成计算图
- # dot = make_dot(y, params=dict(model.named_parameters()))
- # # 保存计算图为图像文件(这里保存为 PNG 格式)
- # dot.render('alexnet_model', format='png', cleanup=True, view=True)
- # 训练过程
- def train(model, train_loader, optimizer, criterion, epoch):
- model.train()
- for batch_idx, (data, target) in enumerate(train_loader):
- optimizer.zero_grad()
- output = model(data)
- loss = criterion(output, target)
- loss.backward()
- optimizer.step()
- if batch_idx % 100 == 0:
- print('Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
- epoch, batch_idx * len(data), len(train_loader.dataset),
- 100. * batch_idx / len(train_loader), loss.item()))
- # 验证过程
- def test(model, test_loader):
- model.eval()
- correct = 0
- total = 0
- with torch.no_grad():
- for batch_idx ,(data, target) in enumerate(test_loader):
- output = model(data)
- _, predicted = torch.max(output.data, 1)
- test_loader_predicted[batch_idx*64+0:batch_idx*64+64]=predicted.numpy()
- total += target.size(0)
- correct += (predicted == target).sum().item()
- accuracy = 100 * correct / total
- print('Test Accuracy: {:.2f}%'.format(accuracy))
- # # 训练和验证模型
- if workmode==0: #0:训练
- for epoch in range(10):
- train(model, train_loader, optimizer, criterion, epoch)
- test(model, test_loader)
- torch.save(model, 'model.pth')
- print(f'save model as model.pth')
- else: #1:加载模型
- print(f'load model.pth')
- try:
- model = torch.load('model.pth', weights_only=False)
- model.eval()
- except Exception as e:
- print(f"加载模型时出现错误: {e}")
- test(model, test_loader)
- # netron.start('model.pth') # 输出网络结构图
- showfig(test_loader.dataset.data, test_loader_predicted, 10000, 4)
- plt.show()
- def save_drawing():
- global drawing_points
- # 创建一个空白图像
- image = Image.new("RGB", (canvas.winfo_width(), canvas.winfo_height()), "black")
- draw = ImageDraw.Draw(image)
- # 绘制线条
- for i in range(1, len(drawing_points)):
- x1,y1=drawing_points[i - 1]
- x2, y2 = drawing_points[i]
- if (x1 is not None) and (x2 is not None) and (y1 is not None) and (y2 is not None):
- draw.line((x1, y1, x2, y2), fill="white", width=20)
- image = image.convert('L')
- image1=image.resize((28,28))
- # # 4. 转换为 numpy 数组
- # image_array = np.array(image1)
- #
- # # 5. 二值化
- # _, binary_image = cv2.threshold(image_array, 127, 255, cv2.THRESH_BINARY)
- #
- # # 6. 居中处理
- # rows, cols = binary_image.shape
- # M = cv2.moments(binary_image)
- # if M["m00"] != 0:
- # cX = int(M["m10"] / M["m00"])
- # cY = int(M["m01"] / M["m00"])
- # else:
- # cX, cY = 0, 0
- # shift_x = cols / 2 - cX
- # shift_y = rows / 2 - cY
- # M = np.float32([[1, 0, shift_x], [0, 1, shift_y]])
- # centered_image = cv2.warpAffine(binary_image, M, (cols, rows))
- #
- # # 7. 归一化
- # normalized_image = centered_image / 255.0
- #
- # # 8. 调整维度以适应模型输入
- # final_image = normalized_image.reshape(28, 28)
- # image1 = Image.fromarray(final_image)
- # # 转换为numpy数组
- # img_array = np.array(image1)
- # # 中值滤波
- # filtered_img = cv2.medianBlur(img_array, 3)
- # # 转换回Image对象(如果需要的话)
- # image1 = Image.fromarray(filtered_img)
- tensor_image = transform(image1) #torch.Size([3, 28, 28])
- # gray_tensor = torch.mean(tensor_image, dim=0, keepdim=True)
- # pool = torch.nn.MaxPool2d(kernel_size=10, stride=10)
- # pooled_image = pool(gray_tensor.unsqueeze(0)).squeeze(0)
- # pooled_image = gray_tensor
- pooled_image = tensor_image.unsqueeze(0)
- # print(f'tensor_image :{gray_tensor.shape} -pooled_image:{pooled_image.shape}')
- # simage=pooled_image.view(28,28)
- # simage = (simage - simage.min()) / (simage.max() - simage.min())
- # np_array = (simage.numpy() * 255).astype('uint8')
- # image_f = Image.fromarray(np_array)
- # image_f.show()
- with torch.no_grad():
- output = torch.softmax(model(pooled_image),1)
- # print(f'output.data={output}')
- v, predicted = torch.max(output, 1)
- print(f'预测数字={predicted.numpy()[0]},概率:{(v*100).numpy()[0]:.2f}%')
- messagebox.showinfo('识别结果',f'predicted={predicted.numpy()[0]}')
- drawing_points=[]
- canvas.delete("all")
- # 保存图像
- image.save("drawing.png")
- image1.save("drawing28x28.png")
- # print("绘画已保存为 drawing.png")
- last_x,last_y=None,None
- # last_y=[]
- def on_mouse_move(event):
- global last_x,last_y
- # drawing_points.append((event.x, event.y))
- drawing_points.append((last_x, last_y))
- if (last_x is not None) and (last_y is not None) :
- canvas.create_line(last_x, last_y, event.x, event.y, fill="white", width=20, smooth=True, splinesteps=10)
- # canvas.create_line(last_x, last_y , event.x, event.y, fill="white", width=20)
- last_x, last_y = event.x, event.y
- def on_mouse_release(event):
- global last_x, last_y
- last_x, last_y = None, None
- # print("on_mouse_release")
- pass
- root = tk.Tk()
- canvas = tk.Canvas(root, width=280*2, height=280*2, bg="black")
- canvas.pack()
- # canvas_show = tk.Canvas(root, width=280, height=280, bg="black")
- # canvas_show.pack()
- button = tk.Button(root, text="识别", command=save_drawing)
- button.pack()
- drawing_points = []
- canvas.bind("<B1-Motion>", on_mouse_move)
- canvas.bind("<ButtonRelease-1>", on_mouse_release)
- root.mainloop()
|