Pytorch使用多GPU

在caffe中训练的时候如果使用多GPU则直接在运行程序的时候指定GPU的index即可,但是在Pytorch中则需要在声明模型之后,对声明的模型进行初始化,如:

cnn = DataParallel(AlexNet())


之后直接运行Pytorch之后则默认使用所有的GPU,为了说明上述初始化的作用,我用了一组畸变图像的数据集,写了一个Resent的模块,过了50个epoch,对比一下实验耗时的差别,代码如下:

  1 # -*- coding: utf-8 -*-
  2 # Implementation of https://arxiv.org/pdf/1512.03385.pdf/
  3 # See section 4.2 for model architecture on CIFAR-10.
  4 # Some part of the code was referenced below.
  5 # https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
  6 
  7 import os
  8 from PIL import Image
  9 import time
 10 
 11 import torch
 12 import torch.nn as nn
 13 import torchvision.datasets as dsets
 14 import torchvision.transforms as transforms
 15 from torch.autograd import Variable
 16 import torch.utils.data as data
 17 from torch.nn import DataParallel
 18 
 19 
 20 kwargs = {'num_workers': 1, 'pin_memory': True}
 21 # def my dataloader, return the data and corresponding label
 22 
 23 
 24 def default_loader(path):
 25     return Image.open(path).convert('RGB')
 26 
 27 
 28 class myImageFloder(data.Dataset):  # Class inheritance
 29     def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader):
 30         fh = open(label)
 31         c = 0
 32         imgs = []
 33         class_names = []
 34         for line in fh.readlines():
 35             if c == 0:
 36                 class_names = [n.strip() for n in line.rstrip().split('    ')]
 37             else:
 38                 cls = line.split()  # cls is a list
 39                 fn = cls.pop(0)
 40                 if os.path.isfile(os.path.join(root, fn)):
 41                     imgs.append((fn, tuple([float(v) for v in cls])))  # imgs is the list,and the content is the tuple
 42                     # we can use the append way to append the element for list
 43             c = c + 1
 44         self.root = root
 45         self.imgs = imgs
 46         self.classes = class_names
 47         self.transform = transform
 48         self.target_transform = target_transform
 49         self.loader = loader
 50 
 51     def __getitem__(self, index):
 52         fn, label = self.imgs[index]  # eventhough the imgs is just a list, it can return the elements of is
 53         # in a proper way
 54         img = self.loader(os.path.join(self.root, fn))
 55         if self.transform is not None:
 56             img = self.transform(img)
 57         return img, torch.Tensor(label)
 58 
 59     def __len__(self):
 60         return len(self.imgs)
 61 
 62     def getName(self):
 63         return self.classes
 64 
 65 mytransform = transforms.Compose([transforms.ToTensor()])  # almost dont do any operation
 66 train_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Training"
 67 test_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Testing"
 68 train_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_train.txt"
 69 test_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_test.txt"
 70 
 71 train_loader = torch.utils.data.DataLoader(
 72     myImageFloder(root=train_data_root, label=train_label, transform=mytransform),
 73     batch_size=64, shuffle=True, **kwargs)
 74 
 75 test_loader = torch.utils.data.DataLoader(
 76     myImageFloder(root=test_data_root, label=test_label, transform=mytransform),
 77     batch_size=64, shuffle=True, **kwargs)
 78 
 79 
 80 # 3x3 Convolution
 81 def conv3x3(in_channels, out_channels, stride=1):
 82     return nn.Conv2d(in_channels, out_channels, kernel_size=3,
 83                      stride=stride, padding=1, bias=False)
 84 
 85 
 86 # Residual Block
 87 class ResidualBlock(nn.Module):
 88     def __init__(self, in_channels, out_channels, stride=1, downsample=None):
 89         super(ResidualBlock, self).__init__()
 90         self.conv1 = conv3x3(in_channels, out_channels, stride)  # kernel size is default 3
 91         self.bn1 = nn.BatchNorm2d(out_channels)
 92         self.relu = nn.ReLU(inplace=True)
 93         self.conv2 = conv3x3(out_channels, out_channels)
 94         self.bn2 = nn.BatchNorm2d(out_channels)
 95         self.downsample = downsample
 96 
 97     def forward(self, x):
 98         residual = x
 99         out = self.conv1(x)
100         out = self.bn1(out)
101         out = self.relu(out)
102         out = self.conv2(out)
103         out = self.bn2(out)
104         if self.downsample:
105             residual = self.downsample(x)
106         out += residual
107         out = self.relu(out)
108         return out
109 
110 
111 # ResNet Module
112 class ResNet(nn.Module):
113     def __init__(self, block, layers, num_classes=1):
114         super(ResNet, self).__init__()
115         self.in_channels = 16
116         self.conv = conv3x3(3, 16)
117         self.bn = nn.BatchNorm2d(16)
118         self.relu = nn.ReLU(inplace=True)
119         self.layer1 = self.make_layer(block, 16, layers[0])
120         self.layer2 = self.make_layer(block, 32, layers[0], 2)
121         self.layer3 = self.make_layer(block, 64, layers[1], 2)  # the input arg is blocks and the stride
122         self.layer4 = self.make_layer(block, 128, layers[1], 2)
123         self.layer5 = self.make_layer(block, 256, layers[1], 2)
124         self.avg_pool = nn.AvgPool2d(kernel_size=8,stride=8)  # 2*2
125         self.fc = nn.Linear(256*2*2, num_classes)
126 
127     def make_layer(self, block, out_channels, blocks, stride=1):
128         downsample = None
129         if (stride != 1) or (self.in_channels != out_channels):  # the input channel is not consistant with the output's
130             downsample = nn.Sequential(  # do the downsample, def a conv, for example: 256*256*16 -> 128*128*32
131                 conv3x3(self.in_channels, out_channels, stride=stride),
132                 nn.BatchNorm2d(out_channels))
133         layers = []
134         layers.append(block(self.in_channels, out_channels, stride, downsample))
135         self.in_channels = out_channels  # update the input channel and the output channel
136         for i in range(1, blocks):  # reduce a block because the first block is already appened
137             layers.append(block(out_channels, out_channels))  # 32*32 -> 8*8
138         return nn.Sequential(*layers)
139 
140     def forward(self, x):
141         out = self.conv(x)
142         out = self.bn(out)
143         out = self.relu(out)
144         out = self.layer1(out)
145         out = self.layer2(out)
146         out = self.layer3(out)
147         out=self.layer4(out)
148         out=self.layer5(out)
149         out = self.avg_pool(out)
150         out = out.view(out.size(0), -1)
151         out = self.fc(out)
152         return out
153 
154 
155 resnet = DataParallel(ResNet(ResidualBlock, [3, 3, 3]))
156 resnet.cuda()
157 
158 # Loss and Optimizer
159 criterion = nn.MSELoss()
160 lr = 0.001
161 optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
162 
163 # Training
164 start=time.clock()
165 for epoch in range(50):
166     for i, (images, labels) in enumerate(train_loader):
167         images = Variable(images.cuda())
168         labels = Variable(labels.cuda())
169 
170         # Forward + Backward + Optimize
171         optimizer.zero_grad()
172         outputs = resnet(images)
173         loss = criterion(outputs, labels)
174         loss.backward()
175         optimizer.step()
176 
177         if (i + 1) % 100 == 0:
178             print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" % (epoch + 1, 80, i + 1, 500, loss.data[0]))
179 
180     # Decaying Learning Rate
181     if (epoch + 1) % 20 == 0:
182         lr /= 3
183         optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
184 elapsed=time.clock()-start
185 print("time used:",elapsed)
186 #         # Test
187 # correct = 0
188 # total = 0
189 # for images, labels in test_loader:
190 #     images = Variable(images.cuda())
191 #     outputs = resnet(images)
192 #     _, predicted = torch.max(outputs.data, 1)
193 #     total += labels.size(0)
194 #     correct += (predicted.cpu() == labels).sum()
195 #
196 # print('Accuracy of the model on the test images: %d %%' % (100 * correct / total))
197 
198 # Save the Model
199 torch.save(resnet.state_dict(), 'resnet.pkl')

作为对比实验,我们同时把ResNet的声明方式修改为

 1 resnet = ResNet(ResidualBlock, [3, 3, 3])

其余不变,再运行程序的时候不指定GPU,直接python resnet.py,在声明DataParallel时,运行耗时结果如下:

('time used:', 17124.861335999998),watch -n 1 nvidia-smi确实显示占用两块GPU

在不声明DataParallel时,实验运行结果耗时如下:

('time used:', 30318.149681000003),watch -n 1 nvidia-smi确实显示占用一块GPU

可以看出,在声明DataParallel时时间压缩了近一半,所以在声明DataParalle是使用多GPU运行Pytorch的一种方法。

官方的doc也给出了多GPU使用的例子以及部分数据在GPU与部分数据在CPU上运行的例子

以下是两组实验结果的输出:

DataParalle初始化

  1 Epoch [1/80], Iter [100/500] Loss: 916.5578
  2 
  3 Epoch [1/80], Iter [200/500] Loss: 172.2591
  4 
  5 Epoch [1/80], Iter [300/500] Loss: 179.8360
  6 
  7 Epoch [1/80], Iter [400/500] Loss: 259.6867
  8 
  9 Epoch [1/80], Iter [500/500] Loss: 244.0616
 10 
 11 Epoch [1/80], Iter [600/500] Loss: 74.7015
 12 
 13 Epoch [1/80], Iter [700/500] Loss: 63.1657
 14 
 15 Epoch [1/80], Iter [800/500] Loss: 90.3517
 16 
 17 Epoch [1/80], Iter [900/500] Loss: 70.4562
 18 
 19 Epoch [2/80], Iter [100/500] Loss: 52.3249
 20 
 21 Epoch [2/80], Iter [200/500] Loss: 129.1855
 22 
 23 Epoch [2/80], Iter [300/500] Loss: 110.0157
 24 
 25 Epoch [2/80], Iter [400/500] Loss: 64.9313
 26 
 27 Epoch [2/80], Iter [500/500] Loss: 87.8385
 28 
 29 Epoch [2/80], Iter [600/500] Loss: 118.5828
 30 
 31 Epoch [2/80], Iter [700/500] Loss: 123.9575
 32 
 33 Epoch [2/80], Iter [800/500] Loss: 79.1908
 34 
 35 Epoch [2/80], Iter [900/500] Loss: 61.8099
 36 
 37 Epoch [3/80], Iter [100/500] Loss: 50.4294
 38 
 39 Epoch [3/80], Iter [200/500] Loss: 106.8135
 40 
 41 Epoch [3/80], Iter [300/500] Loss: 83.2198
 42 
 43 Epoch [3/80], Iter [400/500] Loss: 60.7116
 44 
 45 Epoch [3/80], Iter [500/500] Loss: 101.9553
 46 
 47 Epoch [3/80], Iter [600/500] Loss: 64.6967
 48 
 49 Epoch [3/80], Iter [700/500] Loss: 66.2446
 50 
 51 Epoch [3/80], Iter [800/500] Loss: 81.1825
 52 
 53 Epoch [3/80], Iter [900/500] Loss: 53.9905
 54 
 55 Epoch [4/80], Iter [100/500] Loss: 76.2977
 56 
 57 Epoch [4/80], Iter [200/500] Loss: 18.4255
 58 
 59 Epoch [4/80], Iter [300/500] Loss: 57.6188
 60 
 61 Epoch [4/80], Iter [400/500] Loss: 45.6235
 62 
 63 Epoch [4/80], Iter [500/500] Loss: 82.9265
 64 
 65 Epoch [4/80], Iter [600/500] Loss: 119.6085
 66 
 67 Epoch [4/80], Iter [700/500] Loss: 53.1355
 68 
 69 Epoch [4/80], Iter [800/500] Loss: 29.5248
 70 
 71 Epoch [4/80], Iter [900/500] Loss: 57.0401
 72 
 73 Epoch [5/80], Iter [100/500] Loss: 47.2671
 74 
 75 Epoch [5/80], Iter [200/500] Loss: 31.6928
 76 
 77 Epoch [5/80], Iter [300/500] Loss: 38.0040
 78 
 79 Epoch [5/80], Iter [400/500] Loss: 24.5184
 80 
 81 Epoch [5/80], Iter [500/500] Loss: 33.8515
 82 
 83 Epoch [5/80], Iter [600/500] Loss: 43.6560
 84 
 85 Epoch [5/80], Iter [700/500] Loss: 68.2500
 86 
 87 Epoch [5/80], Iter [800/500] Loss: 30.8259
 88 
 89 Epoch [5/80], Iter [900/500] Loss: 43.9696
 90 
 91 Epoch [6/80], Iter [100/500] Loss: 22.4120
 92 
 93 Epoch [6/80], Iter [200/500] Loss: 45.5722
 94 
 95 Epoch [6/80], Iter [300/500] Loss: 26.8331
 96 
 97 Epoch [6/80], Iter [400/500] Loss: 58.1139
 98 
 99 Epoch [6/80], Iter [500/500] Loss: 12.8767
100 
101 Epoch [6/80], Iter [600/500] Loss: 26.6725
102 
103 Epoch [6/80], Iter [700/500] Loss: 31.9800
104 
105 Epoch [6/80], Iter [800/500] Loss: 91.2332
106 
107 Epoch [6/80], Iter [900/500] Loss: 44.1361
108 
109 Epoch [7/80], Iter [100/500] Loss: 13.1401
110 
111 Epoch [7/80], Iter [200/500] Loss: 20.9435
112 
113 Epoch [7/80], Iter [300/500] Loss: 28.0944
114 
115 Epoch [7/80], Iter [400/500] Loss: 24.0240
116 
117 Epoch [7/80], Iter [500/500] Loss: 43.3279
118 
119 Epoch [7/80], Iter [600/500] Loss: 23.3077
120 
121 Epoch [7/80], Iter [700/500] Loss: 32.9658
122 
123 Epoch [7/80], Iter [800/500] Loss: 27.2044
124 
125 Epoch [7/80], Iter [900/500] Loss: 25.5850
126 
127 Epoch [8/80], Iter [100/500] Loss: 39.7642
128 
129 Epoch [8/80], Iter [200/500] Loss: 17.7421
130 
131 Epoch [8/80], Iter [300/500] Loss: 29.8965
132 
133 Epoch [8/80], Iter [400/500] Loss: 20.6153
134 
135 Epoch [8/80], Iter [500/500] Loss: 43.0224
136 
137 Epoch [8/80], Iter [600/500] Loss: 58.1552
138 
139 Epoch [8/80], Iter [700/500] Loss: 19.1967
140 
141 Epoch [8/80], Iter [800/500] Loss: 34.9122
142 
143 Epoch [8/80], Iter [900/500] Loss: 15.0651
144 
145 Epoch [9/80], Iter [100/500] Loss: 18.5950
146 
147 Epoch [9/80], Iter [200/500] Loss: 36.1891
148 
149 Epoch [9/80], Iter [300/500] Loss: 22.4936
150 
151 Epoch [9/80], Iter [400/500] Loss: 14.8044
152 
153 Epoch [9/80], Iter [500/500] Loss: 16.6958
154 
155 Epoch [9/80], Iter [600/500] Loss: 24.8461
156 
157 Epoch [9/80], Iter [700/500] Loss: 13.7112
158 
159 Epoch [9/80], Iter [800/500] Loss: 21.2906
160 
161 Epoch [9/80], Iter [900/500] Loss: 31.6950
162 
163 Epoch [10/80], Iter [100/500] Loss: 20.7707
164 
165 Epoch [10/80], Iter [200/500] Loss: 15.6260
166 
167 Epoch [10/80], Iter [300/500] Loss: 28.5737
168 
169 Epoch [10/80], Iter [400/500] Loss: 36.6791
170 
171 Epoch [10/80], Iter [500/500] Loss: 38.9839
172 
173 Epoch [10/80], Iter [600/500] Loss: 14.4459
174 
175 Epoch [10/80], Iter [700/500] Loss: 10.0907
176 
177 Epoch [10/80], Iter [800/500] Loss: 17.9035
178 
179 Epoch [10/80], Iter [900/500] Loss: 24.5759
180 
181 Epoch [11/80], Iter [100/500] Loss: 19.8531
182 
183 Epoch [11/80], Iter [200/500] Loss: 15.7126
184 
185 Epoch [11/80], Iter [300/500] Loss: 18.0198
186 
187 Epoch [11/80], Iter [400/500] Loss: 19.3038
188 
189 Epoch [11/80], Iter [500/500] Loss: 27.4435
190 
191 Epoch [11/80], Iter [600/500] Loss: 18.1086
192 
193 Epoch [11/80], Iter [700/500] Loss: 10.8124
194 
195 Epoch [11/80], Iter [800/500] Loss: 31.2389
196 
197 Epoch [11/80], Iter [900/500] Loss: 14.4881
198 
199 Epoch [12/80], Iter [100/500] Loss: 10.6320
200 
201 Epoch [12/80], Iter [200/500] Loss: 26.8394
202 
203 Epoch [12/80], Iter [300/500] Loss: 16.0246
204 
205 Epoch [12/80], Iter [400/500] Loss: 16.3263
206 
207 Epoch [12/80], Iter [500/500] Loss: 24.5880
208 
209 Epoch [12/80], Iter [600/500] Loss: 15.7498
210 
211 Epoch [12/80], Iter [700/500] Loss: 11.4933
212 
213 Epoch [12/80], Iter [800/500] Loss: 9.7252
214 
215 Epoch [12/80], Iter [900/500] Loss: 31.6774
216 
217 Epoch [13/80], Iter [100/500] Loss: 21.1929
218 
219 Epoch [13/80], Iter [200/500] Loss: 17.0953
220 
221 Epoch [13/80], Iter [300/500] Loss: 21.1883
222 
223 Epoch [13/80], Iter [400/500] Loss: 15.9005
224 
225 Epoch [13/80], Iter [500/500] Loss: 14.7924
226 
227 Epoch [13/80], Iter [600/500] Loss: 12.4324
228 
229 Epoch [13/80], Iter [700/500] Loss: 12.0840
230 
231 Epoch [13/80], Iter [800/500] Loss: 30.9664
232 
233 Epoch [13/80], Iter [900/500] Loss: 14.9601
234 
235 Epoch [14/80], Iter [100/500] Loss: 6.5126
236 
237 Epoch [14/80], Iter [200/500] Loss: 11.3227
238 
239 Epoch [14/80], Iter [300/500] Loss: 12.9980
240 
241 Epoch [14/80], Iter [400/500] Loss: 13.8523
242 
243 Epoch [14/80], Iter [500/500] Loss: 10.6771
244 
245 Epoch [14/80], Iter [600/500] Loss: 7.3953
246 
247 Epoch [14/80], Iter [700/500] Loss: 14.6829
248 
249 Epoch [14/80], Iter [800/500] Loss: 15.6956
250 
251 Epoch [14/80], Iter [900/500] Loss: 21.8876
252 
253 Epoch [15/80], Iter [100/500] Loss: 5.1943
254 
255 Epoch [15/80], Iter [200/500] Loss: 13.0731
256 
257 Epoch [15/80], Iter [300/500] Loss: 6.8931
258 
259 Epoch [15/80], Iter [400/500] Loss: 15.3212
260 
261 Epoch [15/80], Iter [500/500] Loss: 8.1775
262 
263 Epoch [15/80], Iter [600/500] Loss: 11.5664
264 
265 Epoch [15/80], Iter [700/500] Loss: 5.5951
266 
267 Epoch [15/80], Iter [800/500] Loss: 10.9075
268 
269 Epoch [15/80], Iter [900/500] Loss: 14.8503
270 
271 Epoch [16/80], Iter [100/500] Loss: 19.5184
272 
273 Epoch [16/80], Iter [200/500] Loss: 10.3570
274 
275 Epoch [16/80], Iter [300/500] Loss: 10.0997
276 
277 Epoch [16/80], Iter [400/500] Loss: 9.7350
278 
279 Epoch [16/80], Iter [500/500] Loss: 11.3000
280 
281 Epoch [16/80], Iter [600/500] Loss: 21.6213
282 
283 Epoch [16/80], Iter [700/500] Loss: 9.7907
284 
285 Epoch [16/80], Iter [800/500] Loss: 10.0128
286 
287 Epoch [16/80], Iter [900/500] Loss: 10.7869
288 
289 Epoch [17/80], Iter [100/500] Loss: 9.2015
290 
291 Epoch [17/80], Iter [200/500] Loss: 7.3021
292 
293 Epoch [17/80], Iter [300/500] Loss: 5.9662
294 
295 Epoch [17/80], Iter [400/500] Loss: 17.5215
296 
297 Epoch [17/80], Iter [500/500] Loss: 7.3349
298 
299 Epoch [17/80], Iter [600/500] Loss: 8.5626
300 
301 Epoch [17/80], Iter [700/500] Loss: 12.7575
302 
303 Epoch [17/80], Iter [800/500] Loss: 10.7792
304 
305 Epoch [17/80], Iter [900/500] Loss: 7.0889
306 
307 Epoch [18/80], Iter [100/500] Loss: 10.5613
308 
309 Epoch [18/80], Iter [200/500] Loss: 3.0777
310 
311 Epoch [18/80], Iter [300/500] Loss: 6.3598
312 
313 Epoch [18/80], Iter [400/500] Loss: 7.9515
314 
315 Epoch [18/80], Iter [500/500] Loss: 10.8023
316 
317 Epoch [18/80], Iter [600/500] Loss: 7.3443
318 
319 Epoch [18/80], Iter [700/500] Loss: 8.0862
320 
321 Epoch [18/80], Iter [800/500] Loss: 15.2795
322 
323 Epoch [18/80], Iter [900/500] Loss: 10.2788
324 
325 Epoch [19/80], Iter [100/500] Loss: 5.0786
326 
327 Epoch [19/80], Iter [200/500] Loss: 8.8248
328 
329 Epoch [19/80], Iter [300/500] Loss: 4.9262
330 
331 Epoch [19/80], Iter [400/500] Loss: 7.8992
332 
333 Epoch [19/80], Iter [500/500] Loss: 13.1279
334 
335 Epoch [19/80], Iter [600/500] Loss: 8.2703
336 
337 Epoch [19/80], Iter [700/500] Loss: 4.1547
338 
339 Epoch [19/80], Iter [800/500] Loss: 9.0542
340 
341 Epoch [19/80], Iter [900/500] Loss: 6.7904
342 
343 Epoch [20/80], Iter [100/500] Loss: 8.6150
344 
345 Epoch [20/80], Iter [200/500] Loss: 3.7212
346 
347 Epoch [20/80], Iter [300/500] Loss: 6.2832
348 
349 Epoch [20/80], Iter [400/500] Loss: 10.1591
350 
351 Epoch [20/80], Iter [500/500] Loss: 9.7668
352 
353 Epoch [20/80], Iter [600/500] Loss: 4.7498
354 
355 Epoch [20/80], Iter [700/500] Loss: 4.8831
356 
357 Epoch [20/80], Iter [800/500] Loss: 7.7877
358 
359 Epoch [20/80], Iter [900/500] Loss: 8.5114
360 
361 Epoch [21/80], Iter [100/500] Loss: 2.1853
362 
363 Epoch [21/80], Iter [200/500] Loss: 5.8741
364 
365 Epoch [21/80], Iter [300/500] Loss: 5.3676
366 
367 Epoch [21/80], Iter [400/500] Loss: 3.1155
368 
369 Epoch [21/80], Iter [500/500] Loss: 4.2433
370 
371 Epoch [21/80], Iter [600/500] Loss: 1.9783
372 
373 Epoch [21/80], Iter [700/500] Loss: 2.7622
374 
375 Epoch [21/80], Iter [800/500] Loss: 2.0112
376 
377 Epoch [21/80], Iter [900/500] Loss: 2.2692
378 
379 Epoch [22/80], Iter [100/500] Loss: 2.1882
380 
381 Epoch [22/80], Iter [200/500] Loss: 4.2540
382 
383 Epoch [22/80], Iter [300/500] Loss: 4.0126
384 
385 Epoch [22/80], Iter [400/500] Loss: 2.2220
386 
387 Epoch [22/80], Iter [500/500] Loss: 2.4755
388 
389 Epoch [22/80], Iter [600/500] Loss: 3.0793
390 
391 Epoch [22/80], Iter [700/500] Loss: 1.9128
392 
393 Epoch [22/80], Iter [800/500] Loss: 4.8721
394 
395 Epoch [22/80], Iter [900/500] Loss: 2.1349
396 
397 Epoch [23/80], Iter [100/500] Loss: 1.8705
398 
399 Epoch [23/80], Iter [200/500] Loss: 2.4326
400 
401 Epoch [23/80], Iter [300/500] Loss: 1.5636
402 
403 Epoch [23/80], Iter [400/500] Loss: 2.0465
404 
405 Epoch [23/80], Iter [500/500] Loss: 1.5183
406 
407 Epoch [23/80], Iter [600/500] Loss: 2.2711
408 
409 Epoch [23/80], Iter [700/500] Loss: 2.8997
410 
411 Epoch [23/80], Iter [800/500] Loss: 2.6150
412 
413 Epoch [23/80], Iter [900/500] Loss: 2.8083
414 
415 Epoch [24/80], Iter [100/500] Loss: 2.7177
416 
417 Epoch [24/80], Iter [200/500] Loss: 3.2044
418 
419 Epoch [24/80], Iter [300/500] Loss: 3.8137
420 
421 Epoch [24/80], Iter [400/500] Loss: 1.9400
422 
423 Epoch [24/80], Iter [500/500] Loss: 2.3550
424 
425 Epoch [24/80], Iter [600/500] Loss: 1.6304
426 
427 Epoch [24/80], Iter [700/500] Loss: 1.1287
428 
429 Epoch [24/80], Iter [800/500] Loss: 2.1436
430 
431 Epoch [24/80], Iter [900/500] Loss: 1.3761
432 
433 Epoch [25/80], Iter [100/500] Loss: 1.9115
434 
435 Epoch [25/80], Iter [200/500] Loss: 0.9423
436 
437 Epoch [25/80], Iter [300/500] Loss: 1.1732
438 
439 Epoch [25/80], Iter [400/500] Loss: 1.8946
440 
441 Epoch [25/80], Iter [500/500] Loss: 1.4359
442 
443 Epoch [25/80], Iter [600/500] Loss: 2.7499
444 
445 Epoch [25/80], Iter [700/500] Loss: 3.2734
446 
447 Epoch [25/80], Iter [800/500] Loss: 1.5863
448 
449 Epoch [25/80], Iter [900/500] Loss: 2.8276
450 
451 Epoch [26/80], Iter [100/500] Loss: 3.3783
452 
453 Epoch [26/80], Iter [200/500] Loss: 1.6336
454 
455 Epoch [26/80], Iter [300/500] Loss: 1.8298
456 
457 Epoch [26/80], Iter [400/500] Loss: 1.1775
458 
459 Epoch [26/80], Iter [500/500] Loss: 2.5811
460 
461 Epoch [26/80], Iter [600/500] Loss: 1.2587
462 
463 Epoch [26/80], Iter [700/500] Loss: 2.3547
464 
465 Epoch [26/80], Iter [800/500] Loss: 3.2238
466 
467 Epoch [26/80], Iter [900/500] Loss: 1.8571
468 
469 Epoch [27/80], Iter [100/500] Loss: 1.9582
470 
471 Epoch [27/80], Iter [200/500] Loss: 0.8752
472 
473 Epoch [27/80], Iter [300/500] Loss: 1.5140
474 
475 Epoch [27/80], Iter [400/500] Loss: 1.4624
476 
477 Epoch [27/80], Iter [500/500] Loss: 3.6735
478 
479 Epoch [27/80], Iter [600/500] Loss: 2.5618
480 
481 Epoch [27/80], Iter [700/500] Loss: 1.3707
482 
483 Epoch [27/80], Iter [800/500] Loss: 1.2286
484 
485 Epoch [27/80], Iter [900/500] Loss: 2.4623
486 
487 Epoch [28/80], Iter [100/500] Loss: 0.8966
488 
489 Epoch [28/80], Iter [200/500] Loss: 1.4363
490 
491 Epoch [28/80], Iter [300/500] Loss: 1.3229
492 
493 Epoch [28/80], Iter [400/500] Loss: 1.4402
494 
495 Epoch [28/80], Iter [500/500] Loss: 1.4920
496 
497 Epoch [28/80], Iter [600/500] Loss: 1.9604
498 
499 Epoch [28/80], Iter [700/500] Loss: 3.1165
500 
501 Epoch [28/80], Iter [800/500] Loss: 1.0391
502 
503 Epoch [28/80], Iter [900/500] Loss: 2.5201
504 
505 Epoch [29/80], Iter [100/500] Loss: 1.8787
506 
507 Epoch [29/80], Iter [200/500] Loss: 0.9840
508 
509 Epoch [29/80], Iter [300/500] Loss: 1.4460
510 
511 Epoch [29/80], Iter [400/500] Loss: 2.2886
512 
513 Epoch [29/80], Iter [500/500] Loss: 1.4231
514 
515 Epoch [29/80], Iter [600/500] Loss: 1.4980
516 
517 Epoch [29/80], Iter [700/500] Loss: 2.3995
518 
519 Epoch [29/80], Iter [800/500] Loss: 1.7662
520 
521 Epoch [29/80], Iter [900/500] Loss: 2.3659
522 
523 Epoch [30/80], Iter [100/500] Loss: 1.9505
524 
525 Epoch [30/80], Iter [200/500] Loss: 1.1663
526 
527 Epoch [30/80], Iter [300/500] Loss: 0.9471
528 
529 Epoch [30/80], Iter [400/500] Loss: 0.9364
530 
531 Epoch [30/80], Iter [500/500] Loss: 1.0124
532 
533 Epoch [30/80], Iter [600/500] Loss: 1.2437
534 
535 Epoch [30/80], Iter [700/500] Loss: 0.8796
536 
537 Epoch [30/80], Iter [800/500] Loss: 1.2183
538 
539 Epoch [30/80], Iter [900/500] Loss: 2.3959
540 
541 Epoch [31/80], Iter [100/500] Loss: 1.4337
542 
543 Epoch [31/80], Iter [200/500] Loss: 1.1861
544 
545 Epoch [31/80], Iter [300/500] Loss: 1.2915
546 
547 Epoch [31/80], Iter [400/500] Loss: 1.0188
548 
549 Epoch [31/80], Iter [500/500] Loss: 2.2067
550 
551 Epoch [31/80], Iter [600/500] Loss: 2.6476
552 
553 Epoch [31/80], Iter [700/500] Loss: 1.1402
554 
555 Epoch [31/80], Iter [800/500] Loss: 1.4248
556 
557 Epoch [31/80], Iter [900/500] Loss: 1.0669
558 
559 Epoch [32/80], Iter [100/500] Loss: 1.5955
560 
561 Epoch [32/80], Iter [200/500] Loss: 1.7216
562 
563 Epoch [32/80], Iter [300/500] Loss: 1.2304
564 
565 Epoch [32/80], Iter [400/500] Loss: 1.7058
566 
567 Epoch [32/80], Iter [500/500] Loss: 1.2115
568 
569 Epoch [32/80], Iter [600/500] Loss: 1.6176
570 
571 Epoch [32/80], Iter [700/500] Loss: 1.3043
572 
573 Epoch [32/80], Iter [800/500] Loss: 1.9501
574 
575 Epoch [32/80], Iter [900/500] Loss: 1.9035
576 
577 Epoch [33/80], Iter [100/500] Loss: 1.9505
578 
579 Epoch [33/80], Iter [200/500] Loss: 1.5603
580 
581 Epoch [33/80], Iter [300/500] Loss: 1.5528
582 
583 Epoch [33/80], Iter [400/500] Loss: 1.4192
584 
585 Epoch [33/80], Iter [500/500] Loss: 1.2211
586 
587 Epoch [33/80], Iter [600/500] Loss: 1.3927
588 
589 Epoch [33/80], Iter [700/500] Loss: 2.3885
590 
591 Epoch [33/80], Iter [800/500] Loss: 1.0948
592 
593 Epoch [33/80], Iter [900/500] Loss: 1.6951
594 
595 Epoch [34/80], Iter [100/500] Loss: 0.9534
596 
597 Epoch [34/80], Iter [200/500] Loss: 0.7364
598 
599 Epoch [34/80], Iter [300/500] Loss: 1.2372
600 
601 Epoch [34/80], Iter [400/500] Loss: 1.6718
602 
603 Epoch [34/80], Iter [500/500] Loss: 0.7804
604 
605 Epoch [34/80], Iter [600/500] Loss: 2.1848
606 
607 Epoch [34/80], Iter [700/500] Loss: 0.6333
608 
609 Epoch [34/80], Iter [800/500] Loss: 1.6399
610 
611 Epoch [34/80], Iter [900/500] Loss: 0.9555
612 
613 Epoch [35/80], Iter [100/500] Loss: 1.5851
614 
615 Epoch [35/80], Iter [200/500] Loss: 3.7824
616 
617 Epoch [35/80], Iter [300/500] Loss: 2.5642
618 
619 Epoch [35/80], Iter [400/500] Loss: 0.8965
620 
621 Epoch [35/80], Iter [500/500] Loss: 1.9092
622 
623 Epoch [35/80], Iter [600/500] Loss: 1.3729
624 
625 Epoch [35/80], Iter [700/500] Loss: 2.2079
626 
627 Epoch [35/80], Iter [800/500] Loss: 0.9051
628 
629 Epoch [35/80], Iter [900/500] Loss: 1.1845
630 
631 Epoch [36/80], Iter [100/500] Loss: 0.8240
632 
633 Epoch [36/80], Iter [200/500] Loss: 1.1929
634 
635 Epoch [36/80], Iter [300/500] Loss: 1.7051
636 
637 Epoch [36/80], Iter [400/500] Loss: 0.7341
638 
639 Epoch [36/80], Iter [500/500] Loss: 0.8078
640 
641 Epoch [36/80], Iter [600/500] Loss: 0.7525
642 
643 Epoch [36/80], Iter [700/500] Loss: 1.5739
644 
645 Epoch [36/80], Iter [800/500] Loss: 1.3938
646 
647 Epoch [36/80], Iter [900/500] Loss: 0.7145
648 
649 Epoch [37/80], Iter [100/500] Loss: 0.9577
650 
651 Epoch [37/80], Iter [200/500] Loss: 0.9464
652 
653 Epoch [37/80], Iter [300/500] Loss: 1.0931
654 
655 Epoch [37/80], Iter [400/500] Loss: 1.0390
656 
657 Epoch [37/80], Iter [500/500] Loss: 1.3472
658 
659 Epoch [37/80], Iter [600/500] Loss: 0.6312
660 
661 Epoch [37/80], Iter [700/500] Loss: 0.6754
662 
663 Epoch [37/80], Iter [800/500] Loss: 0.5888
664 
665 Epoch [37/80], Iter [900/500] Loss: 3.1377
666 
667 Epoch [38/80], Iter [100/500] Loss: 0.8339
668 
669 Epoch [38/80], Iter [200/500] Loss: 0.9345
670 
671 Epoch [38/80], Iter [300/500] Loss: 0.6615
672 
673 Epoch [38/80], Iter [400/500] Loss: 1.6327
674 
675 Epoch [38/80], Iter [500/500] Loss: 0.4701
676 
677 Epoch [38/80], Iter [600/500] Loss: 1.1513
678 
679 Epoch [38/80], Iter [700/500] Loss: 0.9013
680 
681 Epoch [38/80], Iter [800/500] Loss: 2.7680
682 
683 Epoch [38/80], Iter [900/500] Loss: 1.2733
684 
685 Epoch [39/80], Iter [100/500] Loss: 3.0368
686 
687 Epoch [39/80], Iter [200/500] Loss: 1.5569
688 
689 Epoch [39/80], Iter [300/500] Loss: 0.5049
690 
691 Epoch [39/80], Iter [400/500] Loss: 0.4075
692 
693 Epoch [39/80], Iter [500/500] Loss: 0.9771
694 
695 Epoch [39/80], Iter [600/500] Loss: 0.9003
696 
697 Epoch [39/80], Iter [700/500] Loss: 1.6323
698 
699 Epoch [39/80], Iter [800/500] Loss: 0.4881
700 
701 Epoch [39/80], Iter [900/500] Loss: 2.1344
702 
703 Epoch [40/80], Iter [100/500] Loss: 1.2439
704 
705 Epoch [40/80], Iter [200/500] Loss: 1.3419
706 
707 Epoch [40/80], Iter [300/500] Loss: 0.9575
708 
709 Epoch [40/80], Iter [400/500] Loss: 1.4438
710 
711 Epoch [40/80], Iter [500/500] Loss: 0.8559
712 
713 Epoch [40/80], Iter [600/500] Loss: 1.0400
714 
715 Epoch [40/80], Iter [700/500] Loss: 0.9063
716 
717 Epoch [40/80], Iter [800/500] Loss: 1.0714
718 
719 Epoch [40/80], Iter [900/500] Loss: 0.5098
720 
721 Epoch [41/80], Iter [100/500] Loss: 0.5906
722 
723 Epoch [41/80], Iter [200/500] Loss: 0.6610
724 
725 Epoch [41/80], Iter [300/500] Loss: 0.4230
726 
727 Epoch [41/80], Iter [400/500] Loss: 0.6014
728 
729 Epoch [41/80], Iter [500/500] Loss: 0.3004
730 
731 Epoch [41/80], Iter [600/500] Loss: 0.5606
732 
733 Epoch [41/80], Iter [700/500] Loss: 0.4994
734 
735 Epoch [41/80], Iter [800/500] Loss: 0.8664
736 
737 Epoch [41/80], Iter [900/500] Loss: 0.5302
738 
739 Epoch [42/80], Iter [100/500] Loss: 0.2961
740 
741 Epoch [42/80], Iter [200/500] Loss: 0.2826
742 
743 Epoch [42/80], Iter [300/500] Loss: 0.3575
744 
745 Epoch [42/80], Iter [400/500] Loss: 0.3224
746 
747 Epoch [42/80], Iter [500/500] Loss: 0.6851
748 
749 Epoch [42/80], Iter [600/500] Loss: 0.2997
750 
751 Epoch [42/80], Iter [700/500] Loss: 0.3907
752 
753 Epoch [42/80], Iter [800/500] Loss: 0.4437
754 
755 Epoch [42/80], Iter [900/500] Loss: 0.4847
756 
757 Epoch [43/80], Iter [100/500] Loss: 0.5418
758 
759 Epoch [43/80], Iter [200/500] Loss: 0.4099
760 
761 Epoch [43/80], Iter [300/500] Loss: 0.3339
762 
763 Epoch [43/80], Iter [400/500] Loss: 0.5546
764 
765 Epoch [43/80], Iter [500/500] Loss: 0.5867
766 
767 Epoch [43/80], Iter [600/500] Loss: 0.3540
768 
769 Epoch [43/80], Iter [700/500] Loss: 0.4656
770 
771 Epoch [43/80], Iter [800/500] Loss: 0.2922
772 
773 Epoch [43/80], Iter [900/500] Loss: 0.3042
774 
775 Epoch [44/80], Iter [100/500] Loss: 0.6309
776 
777 Epoch [44/80], Iter [200/500] Loss: 0.2412
778 
779 Epoch [44/80], Iter [300/500] Loss: 0.5505
780 
781 Epoch [44/80], Iter [400/500] Loss: 0.4133
782 
783 Epoch [44/80], Iter [500/500] Loss: 0.4317
784 
785 Epoch [44/80], Iter [600/500] Loss: 0.4152
786 
787 Epoch [44/80], Iter [700/500] Loss: 0.6375
788 
789 Epoch [44/80], Iter [800/500] Loss: 0.3283
790 
791 Epoch [44/80], Iter [900/500] Loss: 0.4399
792 
793 Epoch [45/80], Iter [100/500] Loss: 0.2777
794 
795 Epoch [45/80], Iter [200/500] Loss: 0.3131
796 
797 Epoch [45/80], Iter [300/500] Loss: 0.2451
798 
799 Epoch [45/80], Iter [400/500] Loss: 0.5350
800 
801 Epoch [45/80], Iter [500/500] Loss: 0.2501
802 
803 Epoch [45/80], Iter [600/500] Loss: 0.2076
804 
805 Epoch [45/80], Iter [700/500] Loss: 0.2317
806 
807 Epoch [45/80], Iter [800/500] Loss: 0.8772
808 
809 Epoch [45/80], Iter [900/500] Loss: 0.4162
810 
811 Epoch [46/80], Iter [100/500] Loss: 0.3190
812 
813 Epoch [46/80], Iter [200/500] Loss: 0.2458
814 
815 Epoch [46/80], Iter [300/500] Loss: 0.2976
816 
817 Epoch [46/80], Iter [400/500] Loss: 0.3712
818 
819 Epoch [46/80], Iter [500/500] Loss: 0.4305
820 
821 Epoch [46/80], Iter [600/500] Loss: 0.5143
822 
823 Epoch [46/80], Iter [700/500] Loss: 0.2622
824 
825 Epoch [46/80], Iter [800/500] Loss: 0.5331
826 
827 Epoch [46/80], Iter [900/500] Loss: 0.3598
828 
829 Epoch [47/80], Iter [100/500] Loss: 0.2180
830 
831 Epoch [47/80], Iter [200/500] Loss: 0.2275
832 
833 Epoch [47/80], Iter [300/500] Loss: 0.5302
834 
835 Epoch [47/80], Iter [400/500] Loss: 0.3535
836 
837 Epoch [47/80], Iter [500/500] Loss: 0.5790
838 
839 Epoch [47/80], Iter [600/500] Loss: 0.3741
840 
841 Epoch [47/80], Iter [700/500] Loss: 0.5120
842 
843 Epoch [47/80], Iter [800/500] Loss: 0.6204
844 
845 Epoch [47/80], Iter [900/500] Loss: 0.4902
846 
847 Epoch [48/80], Iter [100/500] Loss: 0.2668
848 
849 Epoch [48/80], Iter [200/500] Loss: 0.5693
850 
851 Epoch [48/80], Iter [300/500] Loss: 0.3328
852 
853 Epoch [48/80], Iter [400/500] Loss: 0.2399
854 
855 Epoch [48/80], Iter [500/500] Loss: 0.3160
856 
857 Epoch [48/80], Iter [600/500] Loss: 0.2944
858 
859 Epoch [48/80], Iter [700/500] Loss: 0.2742
860 
861 Epoch [48/80], Iter [800/500] Loss: 0.5297
862 
863 Epoch [48/80], Iter [900/500] Loss: 0.3755
864 
865 Epoch [49/80], Iter [100/500] Loss: 0.2658
866 
867 Epoch [49/80], Iter [200/500] Loss: 0.2223
868 
869 Epoch [49/80], Iter [300/500] Loss: 0.4348
870 
871 Epoch [49/80], Iter [400/500] Loss: 0.2313
872 
873 Epoch [49/80], Iter [500/500] Loss: 0.2838
874 
875 Epoch [49/80], Iter [600/500] Loss: 0.3415
876 
877 Epoch [49/80], Iter [700/500] Loss: 0.3633
878 
879 Epoch [49/80], Iter [800/500] Loss: 0.3768
880 
881 Epoch [49/80], Iter [900/500] Loss: 0.5177
882 
883 Epoch [50/80], Iter [100/500] Loss: 0.3538
884 
885 Epoch [50/80], Iter [200/500] Loss: 0.2759
886 
887 Epoch [50/80], Iter [300/500] Loss: 0.2255
888 
889 Epoch [50/80], Iter [400/500] Loss: 0.3148
890 
891 Epoch [50/80], Iter [500/500] Loss: 0.4502
892 
893 Epoch [50/80], Iter [600/500] Loss: 0.3382
894 
895 Epoch [50/80], Iter [700/500] Loss: 0.8207
896 
897 Epoch [50/80], Iter [800/500] Loss: 0.3541
898 
899 Epoch [50/80], Iter [900/500] Loss: 0.4090
900 
901 ('time used:', 17124.861335999998)
View Code

未被DaraParallel初始化

  1 Epoch [1/80], Iter [100/500] Loss: 635.6779
  2 
  3 Epoch [1/80], Iter [200/500] Loss: 247.5514
  4 
  5 Epoch [1/80], Iter [300/500] Loss: 231.7609
  6 
  7 Epoch [1/80], Iter [400/500] Loss: 198.7304
  8 
  9 Epoch [1/80], Iter [500/500] Loss: 207.1028
 10 
 11 Epoch [1/80], Iter [600/500] Loss: 114.7708
 12 
 13 Epoch [1/80], Iter [700/500] Loss: 126.9886
 14 
 15 Epoch [1/80], Iter [800/500] Loss: 160.8622
 16 
 17 Epoch [1/80], Iter [900/500] Loss: 153.8121
 18 
 19 Epoch [2/80], Iter [100/500] Loss: 106.6578
 20 
 21 Epoch [2/80], Iter [200/500] Loss: 91.5044
 22 
 23 Epoch [2/80], Iter [300/500] Loss: 111.4231
 24 
 25 Epoch [2/80], Iter [400/500] Loss: 50.7004
 26 
 27 Epoch [2/80], Iter [500/500] Loss: 58.9242
 28 
 29 Epoch [2/80], Iter [600/500] Loss: 55.2035
 30 
 31 Epoch [2/80], Iter [700/500] Loss: 26.7637
 32 
 33 Epoch [2/80], Iter [800/500] Loss: 52.5472
 34 
 35 Epoch [2/80], Iter [900/500] Loss: 51.7907
 36 
 37 Epoch [3/80], Iter [100/500] Loss: 35.7970
 38 
 39 Epoch [3/80], Iter [200/500] Loss: 59.1204
 40 
 41 Epoch [3/80], Iter [300/500] Loss: 70.5727
 42 
 43 Epoch [3/80], Iter [400/500] Loss: 50.1149
 44 
 45 Epoch [3/80], Iter [500/500] Loss: 26.3628
 46 
 47 Epoch [3/80], Iter [600/500] Loss: 67.3355
 48 
 49 Epoch [3/80], Iter [700/500] Loss: 56.8271
 50 
 51 Epoch [3/80], Iter [800/500] Loss: 46.5803
 52 
 53 Epoch [3/80], Iter [900/500] Loss: 34.9568
 54 
 55 Epoch [4/80], Iter [100/500] Loss: 67.0837
 56 
 57 Epoch [4/80], Iter [200/500] Loss: 36.8596
 58 
 59 Epoch [4/80], Iter [300/500] Loss: 37.6830
 60 
 61 Epoch [4/80], Iter [400/500] Loss: 52.1378
 62 
 63 Epoch [4/80], Iter [500/500] Loss: 104.5909
 64 
 65 Epoch [4/80], Iter [600/500] Loss: 71.3509
 66 
 67 Epoch [4/80], Iter [700/500] Loss: 28.4496
 68 
 69 Epoch [4/80], Iter [800/500] Loss: 56.1399
 70 
 71 Epoch [4/80], Iter [900/500] Loss: 58.7510
 72 
 73 Epoch [5/80], Iter [100/500] Loss: 42.5710
 74 
 75 Epoch [5/80], Iter [200/500] Loss: 25.5430
 76 
 77 Epoch [5/80], Iter [300/500] Loss: 25.9271
 78 
 79 Epoch [5/80], Iter [400/500] Loss: 75.8942
 80 
 81 Epoch [5/80], Iter [500/500] Loss: 70.6782
 82 
 83 Epoch [5/80], Iter [600/500] Loss: 10.7801
 84 
 85 Epoch [5/80], Iter [700/500] Loss: 29.9416
 86 
 87 Epoch [5/80], Iter [800/500] Loss: 47.0781
 88 
 89 Epoch [5/80], Iter [900/500] Loss: 45.4692
 90 
 91 Epoch [6/80], Iter [100/500] Loss: 51.3811
 92 
 93 Epoch [6/80], Iter [200/500] Loss: 30.6207
 94 
 95 Epoch [6/80], Iter [300/500] Loss: 35.4928
 96 
 97 Epoch [6/80], Iter [400/500] Loss: 37.9467
 98 
 99 Epoch [6/80], Iter [500/500] Loss: 36.7505
100 
101 Epoch [6/80], Iter [600/500] Loss: 64.3528
102 
103 Epoch [6/80], Iter [700/500] Loss: 73.6308
104 
105 Epoch [6/80], Iter [800/500] Loss: 33.1290
106 
107 Epoch [6/80], Iter [900/500] Loss: 34.2442
108 
109 Epoch [7/80], Iter [100/500] Loss: 34.9157
110 
111 Epoch [7/80], Iter [200/500] Loss: 26.8041
112 
113 Epoch [7/80], Iter [300/500] Loss: 43.5796
114 
115 Epoch [7/80], Iter [400/500] Loss: 31.5104
116 
117 Epoch [7/80], Iter [500/500] Loss: 41.2132
118 
119 Epoch [7/80], Iter [600/500] Loss: 23.1634
120 
121 Epoch [7/80], Iter [700/500] Loss: 26.7399
122 
123 Epoch [7/80], Iter [800/500] Loss: 60.4979
124 
125 Epoch [7/80], Iter [900/500] Loss: 32.8528
126 
127 Epoch [8/80], Iter [100/500] Loss: 36.6079
128 
129 Epoch [8/80], Iter [200/500] Loss: 49.1552
130 
131 Epoch [8/80], Iter [300/500] Loss: 21.2926
132 
133 Epoch [8/80], Iter [400/500] Loss: 33.5335
134 
135 Epoch [8/80], Iter [500/500] Loss: 50.1770
136 
137 Epoch [8/80], Iter [600/500] Loss: 21.9908
138 
139 Epoch [8/80], Iter [700/500] Loss: 40.2040
140 
141 Epoch [8/80], Iter [800/500] Loss: 22.5460
142 
143 Epoch [8/80], Iter [900/500] Loss: 43.9564
144 
145 Epoch [9/80], Iter [100/500] Loss: 19.8116
146 
147 Epoch [9/80], Iter [200/500] Loss: 8.5169
148 
149 Epoch [9/80], Iter [300/500] Loss: 37.0475
150 
151 Epoch [9/80], Iter [400/500] Loss: 74.2606
152 
153 Epoch [9/80], Iter [500/500] Loss: 16.3256
154 
155 Epoch [9/80], Iter [600/500] Loss: 26.0609
156 
157 Epoch [9/80], Iter [700/500] Loss: 24.3721
158 
159 Epoch [9/80], Iter [800/500] Loss: 37.5132
160 
161 Epoch [9/80], Iter [900/500] Loss: 27.4818
162 
163 Epoch [10/80], Iter [100/500] Loss: 11.7654
164 
165 Epoch [10/80], Iter [200/500] Loss: 9.3536
166 
167 Epoch [10/80], Iter [300/500] Loss: 11.6718
168 
169 Epoch [10/80], Iter [400/500] Loss: 24.4423
170 
171 Epoch [10/80], Iter [500/500] Loss: 25.6966
172 
173 Epoch [10/80], Iter [600/500] Loss: 35.2358
174 
175 Epoch [10/80], Iter [700/500] Loss: 17.2685
176 
177 Epoch [10/80], Iter [800/500] Loss: 22.3965
178 
179 Epoch [10/80], Iter [900/500] Loss: 42.6901
180 
181 Epoch [11/80], Iter [100/500] Loss: 17.9832
182 
183 Epoch [11/80], Iter [200/500] Loss: 18.8705
184 
185 Epoch [11/80], Iter [300/500] Loss: 25.3700
186 
187 Epoch [11/80], Iter [400/500] Loss: 10.8511
188 
189 Epoch [11/80], Iter [500/500] Loss: 18.3028
190 
191 Epoch [11/80], Iter [600/500] Loss: 23.2316
192 
193 Epoch [11/80], Iter [700/500] Loss: 10.2498
194 
195 Epoch [11/80], Iter [800/500] Loss: 14.7609
196 
197 Epoch [11/80], Iter [900/500] Loss: 20.1801
198 
199 Epoch [12/80], Iter [100/500] Loss: 23.8675
200 
201 Epoch [12/80], Iter [200/500] Loss: 15.7924
202 
203 Epoch [12/80], Iter [300/500] Loss: 13.7092
204 
205 Epoch [12/80], Iter [400/500] Loss: 12.0196
206 
207 Epoch [12/80], Iter [500/500] Loss: 7.2408
208 
209 Epoch [12/80], Iter [600/500] Loss: 10.7912
210 
211 Epoch [12/80], Iter [700/500] Loss: 11.9665
212 
213 Epoch [12/80], Iter [800/500] Loss: 13.7599
214 
215 Epoch [12/80], Iter [900/500] Loss: 18.3869
216 
217 Epoch [13/80], Iter [100/500] Loss: 11.1715
218 
219 Epoch [13/80], Iter [200/500] Loss: 17.6397
220 
221 Epoch [13/80], Iter [300/500] Loss: 9.3256
222 
223 Epoch [13/80], Iter [400/500] Loss: 12.7995
224 
225 Epoch [13/80], Iter [500/500] Loss: 7.8598
226 
227 Epoch [13/80], Iter [600/500] Loss: 10.7001
228 
229 Epoch [13/80], Iter [700/500] Loss: 26.3672
230 
231 Epoch [13/80], Iter [800/500] Loss: 15.4815
232 
233 Epoch [13/80], Iter [900/500] Loss: 14.0478
234 
235 Epoch [14/80], Iter [100/500] Loss: 16.0473
236 
237 Epoch [14/80], Iter [200/500] Loss: 4.7192
238 
239 Epoch [14/80], Iter [300/500] Loss: 10.7586
240 
241 Epoch [14/80], Iter [400/500] Loss: 13.6734
242 
243 Epoch [14/80], Iter [500/500] Loss: 9.3228
244 
245 Epoch [14/80], Iter [600/500] Loss: 5.5830
246 
247 Epoch [14/80], Iter [700/500] Loss: 7.5252
248 
249 Epoch [14/80], Iter [800/500] Loss: 7.6239
250 
251 Epoch [14/80], Iter [900/500] Loss: 7.1024
252 
253 Epoch [15/80], Iter [100/500] Loss: 17.5188
254 
255 Epoch [15/80], Iter [200/500] Loss: 11.8842
256 
257 Epoch [15/80], Iter [300/500] Loss: 9.0330
258 
259 Epoch [15/80], Iter [400/500] Loss: 11.7120
260 
261 Epoch [15/80], Iter [500/500] Loss: 17.0862
262 
263 Epoch [15/80], Iter [600/500] Loss: 11.4103
264 
265 Epoch [15/80], Iter [700/500] Loss: 12.2746
266 
267 Epoch [15/80], Iter [800/500] Loss: 13.6224
268 
269 Epoch [15/80], Iter [900/500] Loss: 12.7686
270 
271 Epoch [16/80], Iter [100/500] Loss: 5.5978
272 
273 Epoch [16/80], Iter [200/500] Loss: 12.2122
274 
275 Epoch [16/80], Iter [300/500] Loss: 5.1189
276 
277 Epoch [16/80], Iter [400/500] Loss: 14.1793
278 
279 Epoch [16/80], Iter [500/500] Loss: 10.3744
280 
281 Epoch [16/80], Iter [600/500] Loss: 5.2099
282 
283 Epoch [16/80], Iter [700/500] Loss: 6.7522
284 
285 Epoch [16/80], Iter [800/500] Loss: 13.2532
286 
287 Epoch [16/80], Iter [900/500] Loss: 6.7040
288 
289 Epoch [17/80], Iter [100/500] Loss: 10.7390
290 
291 Epoch [17/80], Iter [200/500] Loss: 8.1525
292 
293 Epoch [17/80], Iter [300/500] Loss: 14.2229
294 
295 Epoch [17/80], Iter [400/500] Loss: 7.6302
296 
297 Epoch [17/80], Iter [500/500] Loss: 6.4554
298 
299 Epoch [17/80], Iter [600/500] Loss: 8.2380
300 
301 Epoch [17/80], Iter [700/500] Loss: 6.4445
302 
303 Epoch [17/80], Iter [800/500] Loss: 8.4644
304 
305 Epoch [17/80], Iter [900/500] Loss: 9.0200
306 
307 Epoch [18/80], Iter [100/500] Loss: 9.5088
308 
309 Epoch [18/80], Iter [200/500] Loss: 3.8648
310 
311 Epoch [18/80], Iter [300/500] Loss: 8.8408
312 
313 Epoch [18/80], Iter [400/500] Loss: 7.4195
314 
315 Epoch [18/80], Iter [500/500] Loss: 15.0480
316 
317 Epoch [18/80], Iter [600/500] Loss: 5.6232
318 
319 Epoch [18/80], Iter [700/500] Loss: 5.2233
320 
321 Epoch [18/80], Iter [800/500] Loss: 6.5702
322 
323 Epoch [18/80], Iter [900/500] Loss: 13.7427
324 
325 Epoch [19/80], Iter [100/500] Loss: 3.5658
326 
327 Epoch [19/80], Iter [200/500] Loss: 4.7062
328 
329 Epoch [19/80], Iter [300/500] Loss: 10.7831
330 
331 Epoch [19/80], Iter [400/500] Loss: 13.1375
332 
333 Epoch [19/80], Iter [500/500] Loss: 22.2764
334 
335 Epoch [19/80], Iter [600/500] Loss: 10.3463
336 
337 Epoch [19/80], Iter [700/500] Loss: 7.2373
338 
339 Epoch [19/80], Iter [800/500] Loss: 5.5266
340 
341 Epoch [19/80], Iter [900/500] Loss: 9.2434
342 
343 Epoch [20/80], Iter [100/500] Loss: 7.8164
344 
345 Epoch [20/80], Iter [200/500] Loss: 9.6628
346 
347 Epoch [20/80], Iter [300/500] Loss: 4.1032
348 
349 Epoch [20/80], Iter [400/500] Loss: 16.5922
350 
351 Epoch [20/80], Iter [500/500] Loss: 6.9907
352 
353 Epoch [20/80], Iter [600/500] Loss: 10.9906
354 
355 Epoch [20/80], Iter [700/500] Loss: 8.5092
356 
357 Epoch [20/80], Iter [800/500] Loss: 7.1332
358 
359 Epoch [20/80], Iter [900/500] Loss: 6.1639
360 
361 Epoch [21/80], Iter [100/500] Loss: 6.3100
362 
363 Epoch [21/80], Iter [200/500] Loss: 4.5190
364 
365 Epoch [21/80], Iter [300/500] Loss: 4.3493
366 
367 Epoch [21/80], Iter [400/500] Loss: 7.9860
368 
369 Epoch [21/80], Iter [500/500] Loss: 8.8312
370 
371 Epoch [21/80], Iter [600/500] Loss: 10.7502
372 
373 Epoch [21/80], Iter [700/500] Loss: 3.2116
374 
375 Epoch [21/80], Iter [800/500] Loss: 4.0126
376 
377 Epoch [21/80], Iter [900/500] Loss: 5.3675
378 
379 Epoch [22/80], Iter [100/500] Loss: 1.4893
380 
381 Epoch [22/80], Iter [200/500] Loss: 1.6984
382 
383 Epoch [22/80], Iter [300/500] Loss: 2.6195
384 
385 Epoch [22/80], Iter [400/500] Loss: 2.1465
386 
387 Epoch [22/80], Iter [500/500] Loss: 2.9847
388 
389 Epoch [22/80], Iter [600/500] Loss: 4.9699
390 
391 Epoch [22/80], Iter [700/500] Loss: 1.6728
392 
393 Epoch [22/80], Iter [800/500] Loss: 1.3381
394 
395 Epoch [22/80], Iter [900/500] Loss: 2.0680
396 
397 Epoch [23/80], Iter [100/500] Loss: 1.9145
398 
399 Epoch [23/80], Iter [200/500] Loss: 0.9280
400 
401 Epoch [23/80], Iter [300/500] Loss: 2.9585
402 
403 Epoch [23/80], Iter [400/500] Loss: 1.0787
404 
405 Epoch [23/80], Iter [500/500] Loss: 3.1779
406 
407 Epoch [23/80], Iter [600/500] Loss: 2.4411
408 
409 Epoch [23/80], Iter [700/500] Loss: 2.0049
410 
411 Epoch [23/80], Iter [800/500] Loss: 2.2844
412 
413 Epoch [23/80], Iter [900/500] Loss: 2.2328
414 
415 Epoch [24/80], Iter [100/500] Loss: 1.5221
416 
417 Epoch [24/80], Iter [200/500] Loss: 2.0100
418 
419 Epoch [24/80], Iter [300/500] Loss: 1.8868
420 
421 Epoch [24/80], Iter [400/500] Loss: 1.4898
422 
423 Epoch [24/80], Iter [500/500] Loss: 1.1626
424 
425 Epoch [24/80], Iter [600/500] Loss: 1.2527
426 
427 Epoch [24/80], Iter [700/500] Loss: 1.3430
428 
429 Epoch [24/80], Iter [800/500] Loss: 1.3355
430 
431 Epoch [24/80], Iter [900/500] Loss: 1.8292
432 
433 Epoch [25/80], Iter [100/500] Loss: 2.2471
434 
435 Epoch [25/80], Iter [200/500] Loss: 2.8727
436 
437 Epoch [25/80], Iter [300/500] Loss: 1.3531
438 
439 Epoch [25/80], Iter [400/500] Loss: 1.1110
440 
441 Epoch [25/80], Iter [500/500] Loss: 2.7648
442 
443 Epoch [25/80], Iter [600/500] Loss: 1.8364
444 
445 Epoch [25/80], Iter [700/500] Loss: 1.4299
446 
447 Epoch [25/80], Iter [800/500] Loss: 1.5985
448 
449 Epoch [25/80], Iter [900/500] Loss: 2.5364
450 
451 Epoch [26/80], Iter [100/500] Loss: 2.6469
452 
453 Epoch [26/80], Iter [200/500] Loss: 3.1215
454 
455 Epoch [26/80], Iter [300/500] Loss: 1.4029
456 
457 Epoch [26/80], Iter [400/500] Loss: 1.2688
458 
459 Epoch [26/80], Iter [500/500] Loss: 2.4794
460 
461 Epoch [26/80], Iter [600/500] Loss: 1.1937
462 
463 Epoch [26/80], Iter [700/500] Loss: 1.0709
464 
465 Epoch [26/80], Iter [800/500] Loss: 1.4961
466 
467 Epoch [26/80], Iter [900/500] Loss: 1.4560
468 
469 Epoch [27/80], Iter [100/500] Loss: 2.0633
470 
471 Epoch [27/80], Iter [200/500] Loss: 2.6687
472 
473 Epoch [27/80], Iter [300/500] Loss: 5.2073
474 
475 Epoch [27/80], Iter [400/500] Loss: 2.2762
476 
477 Epoch [27/80], Iter [500/500] Loss: 1.6105
478 
479 Epoch [27/80], Iter [600/500] Loss: 1.6631
480 
481 Epoch [27/80], Iter [700/500] Loss: 1.0523
482 
483 Epoch [27/80], Iter [800/500] Loss: 2.8945
484 
485 Epoch [27/80], Iter [900/500] Loss: 1.5388
486 
487 Epoch [28/80], Iter [100/500] Loss: 1.6230
488 
489 Epoch [28/80], Iter [200/500] Loss: 1.8003
490 
491 Epoch [28/80], Iter [300/500] Loss: 1.4840
492 
493 Epoch [28/80], Iter [400/500] Loss: 0.9465
494 
495 Epoch [28/80], Iter [500/500] Loss: 1.6054
496 
497 Epoch [28/80], Iter [600/500] Loss: 3.3669
498 
499 Epoch [28/80], Iter [700/500] Loss: 1.4555
500 
501 Epoch [28/80], Iter [800/500] Loss: 2.2903
502 
503 Epoch [28/80], Iter [900/500] Loss: 1.2850
504 
505 Epoch [29/80], Iter [100/500] Loss: 1.7152
506 
507 Epoch [29/80], Iter [200/500] Loss: 1.2824
508 
509 Epoch [29/80], Iter [300/500] Loss: 1.5778
510 
511 Epoch [29/80], Iter [400/500] Loss: 3.1152
512 
513 Epoch [29/80], Iter [500/500] Loss: 1.2492
514 
515 Epoch [29/80], Iter [600/500] Loss: 0.9721
516 
517 Epoch [29/80], Iter [700/500] Loss: 1.4465
518 
519 Epoch [29/80], Iter [800/500] Loss: 0.9678
520 
521 Epoch [29/80], Iter [900/500] Loss: 1.5000
522 
523 Epoch [30/80], Iter [100/500] Loss: 1.5524
524 
525 Epoch [30/80], Iter [200/500] Loss: 1.5233
526 
527 Epoch [30/80], Iter [300/500] Loss: 1.4226
528 
529 Epoch [30/80], Iter [400/500] Loss: 0.9432
530 
531 Epoch [30/80], Iter [500/500] Loss: 1.4623
532 
533 Epoch [30/80], Iter [600/500] Loss: 1.3845
534 
535 Epoch [30/80], Iter [700/500] Loss: 1.3301
536 
537 Epoch [30/80], Iter [800/500] Loss: 1.0105
538 
539 Epoch [30/80], Iter [900/500] Loss: 1.8372
540 
541 Epoch [31/80], Iter [100/500] Loss: 1.3019
542 
543 Epoch [31/80], Iter [200/500] Loss: 1.1216
544 
545 Epoch [31/80], Iter [300/500] Loss: 0.8553
546 
547 Epoch [31/80], Iter [400/500] Loss: 1.6882
548 
549 Epoch [31/80], Iter [500/500] Loss: 1.7691
550 
551 Epoch [31/80], Iter [600/500] Loss: 1.7412
552 
553 Epoch [31/80], Iter [700/500] Loss: 2.2204
554 
555 Epoch [31/80], Iter [800/500] Loss: 0.6559
556 
557 Epoch [31/80], Iter [900/500] Loss: 1.4613
558 
559 Epoch [32/80], Iter [100/500] Loss: 1.1408
560 
561 Epoch [32/80], Iter [200/500] Loss: 3.6378
562 
563 Epoch [32/80], Iter [300/500] Loss: 1.5543
564 
565 Epoch [32/80], Iter [400/500] Loss: 2.1538
566 
567 Epoch [32/80], Iter [500/500] Loss: 1.1102
568 
569 Epoch [32/80], Iter [600/500] Loss: 1.3187
570 
571 Epoch [32/80], Iter [700/500] Loss: 0.7230
572 
573 Epoch [32/80], Iter [800/500] Loss: 1.6149
574 
575 Epoch [32/80], Iter [900/500] Loss: 1.0926
576 
577 Epoch [33/80], Iter [100/500] Loss: 1.9460
578 
579 Epoch [33/80], Iter [200/500] Loss: 0.9948
580 
581 Epoch [33/80], Iter [300/500] Loss: 1.4460
582 
583 Epoch [33/80], Iter [400/500] Loss: 1.5855
584 
585 Epoch [33/80], Iter [500/500] Loss: 1.5834
586 
587 Epoch [33/80], Iter [600/500] Loss: 0.8896
588 
589 Epoch [33/80], Iter [700/500] Loss: 1.1927
590 
591 Epoch [33/80], Iter [800/500] Loss: 1.5707
592 
593 Epoch [33/80], Iter [900/500] Loss: 0.7817
594 
595 Epoch [34/80], Iter [100/500] Loss: 0.9155
596 
597 Epoch [34/80], Iter [200/500] Loss: 0.7930
598 
599 Epoch [34/80], Iter [300/500] Loss: 1.2760
600 
601 Epoch [34/80], Iter [400/500] Loss: 0.7170
602 
603 Epoch [34/80], Iter [500/500] Loss: 1.9962
604 
605 Epoch [34/80], Iter [600/500] Loss: 1.2418
606 
607 Epoch [34/80], Iter [700/500] Loss: 1.4847
608 
609 Epoch [34/80], Iter [800/500] Loss: 0.8495
610 
611 Epoch [34/80], Iter [900/500] Loss: 1.3709
612 
613 Epoch [35/80], Iter [100/500] Loss: 1.8495
614 
615 Epoch [35/80], Iter [200/500] Loss: 0.9494
616 
617 Epoch [35/80], Iter [300/500] Loss: 0.6224
618 
619 Epoch [35/80], Iter [400/500] Loss: 0.5101
620 
621 Epoch [35/80], Iter [500/500] Loss: 0.9373
622 
623 Epoch [35/80], Iter [600/500] Loss: 1.5811
624 
625 Epoch [35/80], Iter [700/500] Loss: 1.5295
626 
627 Epoch [35/80], Iter [800/500] Loss: 0.7787
628 
629 Epoch [35/80], Iter [900/500] Loss: 1.0337
630 
631 Epoch [36/80], Iter [100/500] Loss: 0.6236
632 
633 Epoch [36/80], Iter [200/500] Loss: 1.8516
634 
635 Epoch [36/80], Iter [300/500] Loss: 1.5021
636 
637 Epoch [36/80], Iter [400/500] Loss: 1.0459
638 
639 Epoch [36/80], Iter [500/500] Loss: 1.4737
640 
641 Epoch [36/80], Iter [600/500] Loss: 0.7842
642 
643 Epoch [36/80], Iter [700/500] Loss: 1.6798
644 
645 Epoch [36/80], Iter [800/500] Loss: 1.7413
646 
647 Epoch [36/80], Iter [900/500] Loss: 0.6222
648 
649 Epoch [37/80], Iter [100/500] Loss: 0.5713
650 
651 Epoch [37/80], Iter [200/500] Loss: 1.3030
652 
653 Epoch [37/80], Iter [300/500] Loss: 1.6937
654 
655 Epoch [37/80], Iter [400/500] Loss: 0.8656
656 
657 Epoch [37/80], Iter [500/500] Loss: 1.3340
658 
659 Epoch [37/80], Iter [600/500] Loss: 0.6310
660 
661 Epoch [37/80], Iter [700/500] Loss: 1.1445
662 
663 Epoch [37/80], Iter [800/500] Loss: 0.6099
664 
665 Epoch [37/80], Iter [900/500] Loss: 1.3679
666 
667 Epoch [38/80], Iter [100/500] Loss: 0.9127
668 
669 Epoch [38/80], Iter [200/500] Loss: 1.9450
670 
671 Epoch [38/80], Iter [300/500] Loss: 1.2240
672 
673 Epoch [38/80], Iter [400/500] Loss: 1.4049
674 
675 Epoch [38/80], Iter [500/500] Loss: 0.9247
676 
677 Epoch [38/80], Iter [600/500] Loss: 1.5308
678 
679 Epoch [38/80], Iter [700/500] Loss: 1.9777
680 
681 Epoch [38/80], Iter [800/500] Loss: 1.2109
682 
683 Epoch [38/80], Iter [900/500] Loss: 0.8337
684 
685 Epoch [39/80], Iter [100/500] Loss: 0.7904
686 
687 Epoch [39/80], Iter [200/500] Loss: 0.8451
688 
689 Epoch [39/80], Iter [300/500] Loss: 1.6993
690 
691 Epoch [39/80], Iter [400/500] Loss: 1.2196
692 
693 Epoch [39/80], Iter [500/500] Loss: 1.0665
694 
695 Epoch [39/80], Iter [600/500] Loss: 0.7412
696 
697 Epoch [39/80], Iter [700/500] Loss: 0.6486
698 
699 Epoch [39/80], Iter [800/500] Loss: 1.5608
700 
701 Epoch [39/80], Iter [900/500] Loss: 1.9978
702 
703 Epoch [40/80], Iter [100/500] Loss: 1.7101
704 
705 Epoch [40/80], Iter [200/500] Loss: 1.4484
706 
707 Epoch [40/80], Iter [300/500] Loss: 1.5894
708 
709 Epoch [40/80], Iter [400/500] Loss: 1.3371
710 
711 Epoch [40/80], Iter [500/500] Loss: 0.9766
712 
713 Epoch [40/80], Iter [600/500] Loss: 1.9935
714 
715 Epoch [40/80], Iter [700/500] Loss: 2.0719
716 
717 Epoch [40/80], Iter [800/500] Loss: 0.9455
718 
719 Epoch [40/80], Iter [900/500] Loss: 0.8072
720 
721 Epoch [41/80], Iter [100/500] Loss: 1.3899
722 
723 Epoch [41/80], Iter [200/500] Loss: 0.9863
724 
725 Epoch [41/80], Iter [300/500] Loss: 1.3738
726 
727 Epoch [41/80], Iter [400/500] Loss: 0.6883
728 
729 Epoch [41/80], Iter [500/500] Loss: 0.8442
730 
731 Epoch [41/80], Iter [600/500] Loss: 2.0286
732 
733 Epoch [41/80], Iter [700/500] Loss: 1.1960
734 
735 Epoch [41/80], Iter [800/500] Loss: 1.2499
736 
737 Epoch [41/80], Iter [900/500] Loss: 0.6043
738 
739 Epoch [42/80], Iter [100/500] Loss: 0.3437
740 
741 Epoch [42/80], Iter [200/500] Loss: 0.6596
742 
743 Epoch [42/80], Iter [300/500] Loss: 0.4450
744 
745 Epoch [42/80], Iter [400/500] Loss: 0.7189
746 
747 Epoch [42/80], Iter [500/500] Loss: 0.5022
748 
749 Epoch [42/80], Iter [600/500] Loss: 0.4597
750 
751 Epoch [42/80], Iter [700/500] Loss: 0.7743
752 
753 Epoch [42/80], Iter [800/500] Loss: 0.3344
754 
755 Epoch [42/80], Iter [900/500] Loss: 0.7295
756 
757 Epoch [43/80], Iter [100/500] Loss: 0.5074
758 
759 Epoch [43/80], Iter [200/500] Loss: 0.3128
760 
761 Epoch [43/80], Iter [300/500] Loss: 0.2800
762 
763 Epoch [43/80], Iter [400/500] Loss: 0.3059
764 
765 Epoch [43/80], Iter [500/500] Loss: 0.3486
766 
767 Epoch [43/80], Iter [600/500] Loss: 0.7222
768 
769 Epoch [43/80], Iter [700/500] Loss: 0.7349
770 
771 Epoch [43/80], Iter [800/500] Loss: 0.8455
772 
773 Epoch [43/80], Iter [900/500] Loss: 0.7261
774 
775 Epoch [44/80], Iter [100/500] Loss: 0.5404
776 
777 Epoch [44/80], Iter [200/500] Loss: 0.5428
778 
779 Epoch [44/80], Iter [300/500] Loss: 0.5385
780 
781 Epoch [44/80], Iter [400/500] Loss: 0.4106
782 
783 Epoch [44/80], Iter [500/500] Loss: 0.5296
784 
785 Epoch [44/80], Iter [600/500] Loss: 0.6045
786 
787 Epoch [44/80], Iter [700/500] Loss: 0.3837
788 
789 Epoch [44/80], Iter [800/500] Loss: 0.7552
790 
791 Epoch [44/80], Iter [900/500] Loss: 0.4996
792 
793 Epoch [45/80], Iter [100/500] Loss: 0.3381
794 
795 Epoch [45/80], Iter [200/500] Loss: 0.3910
796 
797 Epoch [45/80], Iter [300/500] Loss: 0.3790
798 
799 Epoch [45/80], Iter [400/500] Loss: 0.2718
800 
801 Epoch [45/80], Iter [500/500] Loss: 0.3572
802 
803 Epoch [45/80], Iter [600/500] Loss: 0.2913
804 
805 Epoch [45/80], Iter [700/500] Loss: 0.5244
806 
807 Epoch [45/80], Iter [800/500] Loss: 0.3647
808 
809 Epoch [45/80], Iter [900/500] Loss: 0.3161
810 
811 Epoch [46/80], Iter [100/500] Loss: 0.4728
812 
813 Epoch [46/80], Iter [200/500] Loss: 0.4386
814 
815 Epoch [46/80], Iter [300/500] Loss: 0.2861
816 
817 Epoch [46/80], Iter [400/500] Loss: 0.2460
818 
819 Epoch [46/80], Iter [500/500] Loss: 0.3490
820 
821 Epoch [46/80], Iter [600/500] Loss: 0.5804
822 
823 Epoch [46/80], Iter [700/500] Loss: 0.4951
824 
825 Epoch [46/80], Iter [800/500] Loss: 0.4600
826 
827 Epoch [46/80], Iter [900/500] Loss: 0.5658
828 
829 Epoch [47/80], Iter [100/500] Loss: 0.2479
830 
831 Epoch [47/80], Iter [200/500] Loss: 0.2688
832 
833 Epoch [47/80], Iter [300/500] Loss: 0.3082
834 
835 Epoch [47/80], Iter [400/500] Loss: 0.3929
836 
837 Epoch [47/80], Iter [500/500] Loss: 0.3126
838 
839 Epoch [47/80], Iter [600/500] Loss: 0.5041
840 
841 Epoch [47/80], Iter [700/500] Loss: 0.5848
842 
843 Epoch [47/80], Iter [800/500] Loss: 0.4968
844 
845 Epoch [47/80], Iter [900/500] Loss: 0.3496
846 
847 Epoch [48/80], Iter [100/500] Loss: 0.2753
848 
849 Epoch [48/80], Iter [200/500] Loss: 0.3885
850 
851 Epoch [48/80], Iter [300/500] Loss: 0.3743
852 
853 Epoch [48/80], Iter [400/500] Loss: 0.2425
854 
855 Epoch [48/80], Iter [500/500] Loss: 0.2472
856 
857 Epoch [48/80], Iter [600/500] Loss: 0.3003
858 
859 Epoch [48/80], Iter [700/500] Loss: 0.4936
860 
861 Epoch [48/80], Iter [800/500] Loss: 0.3169
862 
863 Epoch [48/80], Iter [900/500] Loss: 0.2543
864 
865 Epoch [49/80], Iter [100/500] Loss: 0.4262
866 
867 Epoch [49/80], Iter [200/500] Loss: 0.3396
868 
869 Epoch [49/80], Iter [300/500] Loss: 0.4670
870 
871 Epoch [49/80], Iter [400/500] Loss: 0.2543
872 
873 Epoch [49/80], Iter [500/500] Loss: 0.3146
874 
875 Epoch [49/80], Iter [600/500] Loss: 1.3187
876 
877 Epoch [49/80], Iter [700/500] Loss: 0.2993
878 
879 Epoch [49/80], Iter [800/500] Loss: 0.3053
880 
881 Epoch [49/80], Iter [900/500] Loss: 0.3343
882 
883 Epoch [50/80], Iter [100/500] Loss: 0.2081
884 
885 Epoch [50/80], Iter [200/500] Loss: 0.5631
886 
887 Epoch [50/80], Iter [300/500] Loss: 0.4358
888 
889 Epoch [50/80], Iter [400/500] Loss: 0.4028
890 
891 Epoch [50/80], Iter [500/500] Loss: 0.2510
892 
893 Epoch [50/80], Iter [600/500] Loss: 0.5876
894 
895 Epoch [50/80], Iter [700/500] Loss: 0.3692
896 
897 Epoch [50/80], Iter [800/500] Loss: 0.4500
898 
899 Epoch [50/80], Iter [900/500] Loss: 0.1850
900 
901 ('time used:', 30318.149681000003)
View Code

 


 

posted on 2017-07-31 10:46  YongjieShi  阅读(12114)  评论(0编辑  收藏  举报

导航