banner
Fight4354

Fight4354

AI,Chem,Science,Study,Share,Hobby,LLM,Life,Sport

Softmax Regression Code

Need to know code implementation details
These modules are common in NN, and the subsequent learning is an extension based on this

Total Training Function#

##### %matplotlib inline
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l
from IPython import display

def get_dataloader_workers():
    """Use 4 processes to read data"""
    return 4

def load_data_fashion_mnist(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and load it into memory"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0,transforms.Resize(resize)) # If a Resize parameter is passed in, perform the resize operation
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root="01_data/01_DataSet_FashionMNIST",train=True,transform=trans,download=True)
    mnist_test = torchvision.datasets.FashionMNIST(root="01_data/01_DataSet_FashionMNIST",train=False,transform=trans,download=True)            
    return (data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=get_dataloader_workers()),
           data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=get_dataloader_workers()))               


batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size) # Return iterators for training and testing sets     

num_inputs = 784
num_outputs = 10
w = torch.normal(0,0.01,size=(num_inputs,num_outputs),requires_grad=True)
b = torch.zeros(num_outputs,requires_grad=True)

def softmax(X):
    X_exp = torch.exp(X) # Perform exponentiation on each element
    partition = X_exp.sum(1,keepdim=True) 
    return X_exp / partition # Broadcasting mechanism is applied here

# Implement softmax regression model
def net(X):
    return softmax(torch.matmul(X.reshape((-1,w.shape[0])),w)+b) # -1 is the default batch size, indicating how many images there are, each image is represented by a one-dimensional array of 784 elements      

def cross_entropy(y_hat, y):
    return -torch.log(y_hat[range(len(y_hat)),y]) # y_hat[range(len(y_hat)),y] extracts the values corresponding to the indices in y. The input y should be the index of the maximum probability      

def accuracy(y_hat,y):
    """Calculate the number of correct predictions"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1: # y_hat.shape[1]>1 indicates more than one category, each category has its own probability   
        y_hat = y_hat.argmax(axis=1) # y_hat.argmax(axis=1) finds the index of the maximum value in each row
    cmp = y_hat.type(y.dtype) == y # First evaluate the logical operator ==, then assign to cmp, cmp is a boolean type data
    return float(cmp.type(y.dtype).sum()) # Get the type of y.dtype as input parameter, convert cmp's type to y's type (int), then sum       

# Can evaluate the accuracy of any model net
def evaluate_accuracy(net,data_iter):
    """Calculate the model's accuracy on the specified dataset"""
    if isinstance(net,torch.nn.Module): # If the net model is a neural network implemented by torch.nn.Module, set it to evaluation mode     
        net.eval()  # Set the model to evaluation mode
    metric = Accumulator(2) # Correct predictions, total predictions, metric is an instance of the accumulator, which stores two numbers
    for X, y in data_iter:
        metric.add(accuracy(net(X),y),y.numel()) # net(X) inputs X into the model to obtain predictions. y.numel() is the total number of samples
    return metric[0] / metric[1] # Number of correctly classified samples / Total number of samples

# The Accumulator instance creates 2 variables to store the number of correct predictions and the total number of predictions
class Accumulator:
    """Accumulate over n variables"""
    def __init__(self,n):
        self.data = [0.0] * n
        
    def add(self, *args):
        self.data = [a+float(b) for a,b in zip(self.data,args)] # The zip function packs the first position elements of two lists, the second position elements...
        
    def reset(self):
        self.data = [0.0] * len(self.data)
        
    def __getitem__(self,idx):
        return self.data[idx]

# Training function
def train_epoch_ch3(net, train_iter, loss, updater):
    if isinstance(net, torch.nn.Module):
        net.train() # Enable training mode
    metric = Accumulator(3)
    for X, y in train_iter:
        y_hat = net(X)
        l = loss(y_hat,y) # Calculate loss
        if isinstance(updater, torch.optim.Optimizer): # If updater is a PyTorch optimizer
            updater.zero_grad()
            l.mean().backward()  # Here the average value of loss is taken
            updater.step()
            metric.add(float(l)*len(y),accuracy(y_hat,y),y.size().numel()) # Total training loss, number of correct samples, total number of samples   
        else:
            l.sum().backward()
            updater(X.shape[0])
            metric.add(float(l.sum()),accuracy(y_hat,y),y.numel()) 
    return metric[0] / metric[2], metric[1] / metric[2] # Total loss accumulated divided by total number of samples, total correct count divided by total number of samples  


    
class Animator:
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
                ylim=None, xscale='linear',yscale='linear',
                fmts=('-','m--','g-.','r:'),nrows=1,ncols=1,
                figsize=(3.5,2.5)): 
        if legend is None:
            legend = []
        d2l.use_svg_display()
        self.fig, self.axes = d2l.plt.subplots(nrows,ncols,figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes,]
        self.config_axes = lambda: d2l.set_axes(self.axes[0],xlabel,ylabel,xlim,ylim,xscale,yscale,legend)         
        self.X, self.Y, self.fmts = None, None, fmts
        
    def add(self, x, y):
        if not hasattr(y, "__len__"):
            y = [y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)] 
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a,b) in enumerate(zip(x,y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()
        display.display(self.fig)
        display.clear_output(wait=True)

# Total training function        
def train_ch3(net,train_iter,test_iter,loss,num_epochs,updater):
    animator = Animator(xlabel='epoch',xlim=[1,num_epochs],ylim=[0.3,0.9],       
                       legend=['train loss','train acc','test acc'])
    for epoch in range(num_epochs):  # Variable num_epochs iterates over data
        train_metrics = train_epoch_ch3(net,train_iter,loss,updater) # Returns two values, total loss and total accuracy
        test_acc = evaluate_accuracy(net, test_iter) # Evaluate accuracy on the test dataset, returns only one value, total accuracy  
        animator.add(epoch+1,train_metrics+(test_acc,)) # train_metrics+(test_acc,) only adds the two values of accuracy,
    train_loss, train_acc = train_metrics
    
# Mini-batch stochastic gradient descent to optimize the model's loss function
lr = 0.1
def updater(batch_size):
    return d2l.sgd([w,b],lr,batch_size)

num_epochs = 100
train_ch3(net,train_iter,test_iter,cross_entropy,num_epochs,updater)

Predict Data#

def predict_ch3(net,test_iter,n=12):
    for X, y in test_iter: 
        break # Only take out a batch of six data
    trues = d2l.get_fashion_mnist_labels(y)
    preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    titles = [true + '\n' + pred for true, pred in zip(trues,preds)]
    d2l.show_images(X[0:n].reshape((n,28,28)),1,n,titles=titles[0:n])
    
predict_ch3(net,test_iter)
Loading...
Ownership of this post data is guaranteed by blockchain and smart contracts to the creator alone.