if output_shape == "valid":
return (0, 0), (0, 0)
# Pad so that the output shape is the same as input shape (given that stride=1)
elif output_shape == "same":
filter_height, filter_width = filter_shape

# Derived from:
# output_height = (height + pad_h - filter_height) / stride + 1
# In this case output_height = height and stride = 1. This gives the
# expression for the padding below.

• math.floor(x)表示返回小于或等于x的最大整数。
• math.ceil(x)表示返回大于或等于x的最大整数。

def image_to_column(images, filter_shape, stride, output_shape='same'):
filter_height, filter_width = filter_shape
images_padded = np.pad(images, ((0, 0), (0, 0), pad_h, pad_w), mode='constant')# Calculate the indices where the dot products are to be applied between weights
# and the image

# Get content from image at those indices
cols = images_padded[:, k, i, j]
channels = images.shape[1]
# Reshape content into column shape
cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1)
return cols

# First figure out what the size of the output should be
batch_size, channels, height, width = images_shape
filter_height, filter_width = filter_shape
out_height = int((height + np.sum(pad_h) - filter_height) / stride + 1)
out_width = int((width + np.sum(pad_w) - filter_width) / stride + 1)

i0 = np.repeat(np.arange(filter_height), filter_width)
i0 = np.tile(i0, channels)
i1 = stride * np.repeat(np.arange(out_height), out_width)
j0 = np.tile(np.arange(filter_width), filter_height * channels)
j1 = stride * np.tile(np.arange(out_width), out_height)
i = i0.reshape(-1, 1) + i1.reshape(1, -1)
j = j0.reshape(-1, 1) + j1.reshape(1, -1)
k = np.repeat(np.arange(channels), filter_height * filter_width).reshape(-1, 1)return (k, i, j)

get_im2col_indices((1,3,32,32), (3,3), ((1,1),(1,1)), stride=1)

• i0：np.repeat(np.arange(3),3)：[0 ,0,0,1,1,1,2,2,2]
• i0：np.tile([0,0,0,1,1,1,2,2,2],3)：[0,0,0,1,1,1,2,2,2,0,0,0,1,1,1,2,2,2,0,0,0,1,1,1,2,2,2]，大小为：(27,)
• i1：1*np.repeat(np.arange(32),32)：[0,0,0......,31,31,31]，大小为：(1024,)
• j0：np.tile(np.arange(3),3*3)：[0,1,2,0,1,2,......]，大小为：(27,)
• j1：1*np.tile(np.arange(32),32)：[0,1,2,3,......,0,1,2,......,29,30,31]，大小为(1024,)
• i：i0.reshape(-1,1)+i1.reshape(1,-1)：大小(27,1024)
• j：j0.reshape(-1,1)+j1.reshape(1,-1)：大小(27,1024)
• k：np.repeat(np.arange(3),3*3).reshape(-1,1)：大小(27,1)

• numpy.arange(start, stop, step, dtype = None)：举例numpy.arange(3)，输出[0,1,2]
• numpy.repeat(array,repeats,axis=None)：举例numpy.repeat([0,1,2],3)，输出：[0,0,0,1,1,1,2,2,2]
• numpy.tile(array,reps)：举例numpy.tile([0,1,2],3)，输出：[0,1,2,0,1,2,0,1,2]
• 具体的更复杂的用法还是得去查相关资料。这里只列举出与本代码相关的。

cols = images_padded[:, k, i, j]
channels = images.shape[1]
# Reshape content into column shape
cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1)

channels的大小是3

class Layer(object):

def set_input_shape(self, shape):
""" Sets the shape that the layer expects of the input in the forward
pass method """
self.input_shape = shape

def layer_name(self):
""" The name of the layer. Used in model summary. """
return self.__class__.__name__

def parameters(self):
""" The number of trainable parameters used by the layer """
return 0

def forward_pass(self, X, training):
""" Propogates the signal forward in the network """
raise NotImplementedError()

""" Propogates the accumulated gradient backwards in the network.
If the has trainable weights then these weights are also tuned in this method.
returns the gradient with respect to the output of the previous layer. """
raise NotImplementedError()

def output_shape(self):
""" The shape of the output produced by forward_pass """
raise NotImplementedError()

class Conv2D(Layer):
"""A 2D Convolution Layer.
Parameters:
-----------
n_filters: int
The number of filters that will convolve over the input matrix. The number of channels
of the output shape.
filter_shape: tuple
A tuple (filter_height, filter_width).
input_shape: tuple
The shape of the expected input of the layer. (batch_size, channels, height, width)
Only needs to be specified for first layer in the network.
Either 'same' or 'valid'. 'same' results in padding being added so that the output height and width
matches the input height and width. For 'valid' no padding is added.
stride: int
The stride length of the filters during the convolution over the input.
"""
def __init__(self, n_filters, filter_shape, input_shape=None, padding='same', stride=1):
self.n_filters = n_filters
self.filter_shape = filter_shape
self.stride = stride
self.input_shape = input_shape
self.trainable = True

def initialize(self, optimizer):
# Initialize the weights
filter_height, filter_width = self.filter_shape
channels = self.input_shape[0]
limit = 1 / math.sqrt(np.prod(self.filter_shape))
self.W  = np.random.uniform(-limit, limit, size=(self.n_filters, channels, filter_height, filter_width))
self.w0 = np.zeros((self.n_filters, 1))
# Weight optimizers
self.W_opt  = copy.copy(optimizer)
self.w0_opt = copy.copy(optimizer)

def parameters(self):
return np.prod(self.W.shape) + np.prod(self.w0.shape)

def forward_pass(self, X, training=True):
batch_size, channels, height, width = X.shape
self.layer_input = X
# Turn image shape into column shape
# (enables dot product between input and weights)
self.X_col = image_to_column(X, self.filter_shape, stride=self.stride, output_shape=self.padding)
# Turn weights into column shape
self.W_col = self.W.reshape((self.n_filters, -1))
# Calculate output
output = self.W_col.dot(self.X_col) + self.w0
# Reshape into (n_filters, out_height, out_width, batch_size)
output = output.reshape(self.output_shape() + (batch_size, ))
# Redistribute axises so that batch size comes first
return output.transpose(3,0,1,2)

# Reshape accumulated gradient into column shape

if self.trainable:
# Take dot product between column shaped accum. gradient and column shape
# layer input to determine the gradient at the layer with respect to layer weights
# The gradient with respect to bias terms is the sum similarly to in Dense layer

# Update the layers weights

# Recalculate the gradient which will be propogated back to prev. layer
# Reshape from column shape to image shape
self.layer_input.shape,
self.filter_shape,
stride=self.stride,

def output_shape(self):
channels, height, width = self.input_shape
output_height = (height + np.sum(pad_h) - self.filter_shape[0]) / self.stride + 1
output_width = (width + np.sum(pad_w) - self.filter_shape[1]) / self.stride + 1
return self.n_filters, int(output_height), int(output_width)

self.X_col的大小就是(27,1024)，self.W_col的大小是(16,27)，那么output = self.W_col.dot(self.X_col) + self.w0的大小就是(16,1024)

image = np.random.randint(0,255,size=(1,3,32,32)).astype(np.uint8)
input_shape=image.squeeze().shape
conv2d = Conv2D(16, (3,3), input_shape=input_shape, padding='same', stride=1)
conv2d.initialize(None)
output=conv2d.forward_pass(image,training=True)
print(output.shape)

print(conv2d.parameters())

image = np.random.randint(0,255,size=(1,3,32,32)).astype(np.uint8)
input_shape=image.squeeze().shape
conv2d = Conv2D(16, (3,3), input_shape=input_shape, padding='valid', stride=1)
conv2d.initialize(None)
output=conv2d.forward_pass(image,training=True)
print(output.shape)
print(conv2d.parameters())

cols的大小：(27,900)

(1,16,30,30)

448

image = np.random.randint(0,255,size=(1,3,32,32)).astype(np.uint8)
input_shape=image.squeeze().shape
conv2d = Conv2D(16, (3,3), input_shape=input_shape, padding='valid', stride=2)
conv2d.initialize(None)
output=conv2d.forward_pass(image,training=True)
print(output.shape)
print(conv2d.parameters())

cols的大小：(27,225)

(1,16,15,15)

448

get_im2col_indices()函数中的变换操作是清楚了，至于为什么这么变换的原因还需要好好去琢磨。至于反向传播和优化optimizer等研究好了之后再更新了。

posted @ 2020-04-15 17:10  西西嘛呦  阅读(6257)  评论(0编辑  收藏  举报