Deep Learning & Neural Style Transfer(VGG) ——By何子辰

这周做了一个DeepLearning在Neural Style Transfer上应用的Assignment 。参考算法论文如下

Gatys et al. (2015) (https://arxiv.org/abs/1508.06576).

先上效果图：

① 美丽的中国石油大学（北京）+ 毕加索风格图像：（所有图像都预处理成400x300的图片）

②再放点其他的：

※※过程如下：

Create an Interactive Session
Load the content image
Load the style image
Randomly initialize the image to be generated
Load the pretrained VGG16 model
Build the TensorFlow graph:Initialize the TensorFlow graph and run it for a large number of iterations, updating the generated image at every step
- Run the content image through the VGG16 model and compute the content cost
- Run the style image through the VGG16 model and compute the style cost
- Compute the total cost
- Define the optimizer and the learning rate

VGG网格结构代码：

  1 ### Part of this code is due to the MatConvNet team and is used to load the parameters of the pretrained VGG19 model in the notebook ###
  2 
  3 import os
  4 import sys
  5 import scipy.io
  6 import scipy.misc
  7 import matplotlib.pyplot as plt
  8 from matplotlib.pyplot import imshow
  9 from PIL import Image
 10 from nst_utils import *
 11 
 12 import numpy as np
 13 import tensorflow as tf
 14 
 15 class CONFIG:
 16     IMAGE_WIDTH = 400
 17     IMAGE_HEIGHT = 300
 18     COLOR_CHANNELS = 3
 19     NOISE_RATIO = 0.6
 20     MEANS = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3)) 
 21     VGG_MODEL = 'pretrained-model/imagenet-vgg-verydeep-19.mat' # Pick the VGG 19-layer model by from the paper "Very Deep Convolutional Networks for Large-Scale Image Recognition".
 22     STYLE_IMAGE = 'images/stone_style.jpg' # Style image to use.
 23     CONTENT_IMAGE = 'images/content300.jpg' # Content image to use.
 24     OUTPUT_DIR = 'output/'
 25     
 26 def load_vgg_model(path):
 27     """
 28     Returns a model for the purpose of 'painting' the picture.
 29     Takes only the convolution layer weights and wrap using the TensorFlow
 30     Conv2d, Relu and AveragePooling layer. VGG actually uses maxpool but
 31     the paper indicates that using AveragePooling yields better results.
 32     The last few fully connected layers are not used.
 33     Here is the detailed configuration of the VGG model:
 34         0 is conv1_1 (3, 3, 3, 64)
 35         1 is relu
 36         2 is conv1_2 (3, 3, 64, 64)
 37         3 is relu    
 38         4 is maxpool
 39         5 is conv2_1 (3, 3, 64, 128)
 40         6 is relu
 41         7 is conv2_2 (3, 3, 128, 128)
 42         8 is relu
 43         9 is maxpool
 44         10 is conv3_1 (3, 3, 128, 256)
 45         11 is relu
 46         12 is conv3_2 (3, 3, 256, 256)
 47         13 is relu
 48         14 is conv3_3 (3, 3, 256, 256)
 49         15 is relu
 50         16 is conv3_4 (3, 3, 256, 256)
 51         17 is relu
 52         18 is maxpool
 53         19 is conv4_1 (3, 3, 256, 512)
 54         20 is relu
 55         21 is conv4_2 (3, 3, 512, 512)
 56         22 is relu
 57         23 is conv4_3 (3, 3, 512, 512)
 58         24 is relu
 59         25 is conv4_4 (3, 3, 512, 512)
 60         26 is relu
 61         27 is maxpool
 62         28 is conv5_1 (3, 3, 512, 512)
 63         29 is relu
 64         30 is conv5_2 (3, 3, 512, 512)
 65         31 is relu
 66         32 is conv5_3 (3, 3, 512, 512)
 67         33 is relu
 68         34 is conv5_4 (3, 3, 512, 512)
 69         35 is relu
 70         36 is maxpool
 71         37 is fullyconnected (7, 7, 512, 4096)
 72         38 is relu
 73         39 is fullyconnected (1, 1, 4096, 4096)
 74         40 is relu
 75         41 is fullyconnected (1, 1, 4096, 1000)
 76         42 is softmax
 77     """
 78     
 79     vgg = scipy.io.loadmat(path)
 80 
 81     vgg_layers = vgg['layers']
 82     
 83     def _weights(layer, expected_layer_name):
 84         """
 85         Return the weights and bias from the VGG model for a given layer.
 86         """
 87         wb = vgg_layers[0][layer][0][0][2]
 88         W = wb[0][0]
 89         b = wb[0][1]
 90         layer_name = vgg_layers[0][layer][0][0][0][0]
 91         assert layer_name == expected_layer_name
 92         return W, b
 93 
 94         return W, b
 95 
 96     def _relu(conv2d_layer):
 97         """
 98         Return the RELU function wrapped over a TensorFlow layer. Expects a
 99         Conv2d layer input.
100         """
101         return tf.nn.relu(conv2d_layer)
102 
103     def _conv2d(prev_layer, layer, layer_name):
104         """
105         Return the Conv2D layer using the weights, biases from the VGG
106         model at 'layer'.
107         """
108         W, b = _weights(layer, layer_name)
109         W = tf.constant(W)
110         b = tf.constant(np.reshape(b, (b.size)))
111         return tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b
112 
113     def _conv2d_relu(prev_layer, layer, layer_name):
114         """
115         Return the Conv2D + RELU layer using the weights, biases from the VGG
116         model at 'layer'.
117         """
118         return _relu(_conv2d(prev_layer, layer, layer_name))
119 
120     def _avgpool(prev_layer):
121         """
122         Return the AveragePooling layer.
123         """
124         return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
125 
126     # Constructs the graph model.
127     graph = {}
128     graph['input']   = tf.Variable(np.zeros((1, CONFIG.IMAGE_HEIGHT, CONFIG.IMAGE_WIDTH, CONFIG.COLOR_CHANNELS)), dtype = 'float32')
129     graph['conv1_1']  = _conv2d_relu(graph['input'], 0, 'conv1_1')
130     graph['conv1_2']  = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
131     graph['avgpool1'] = _avgpool(graph['conv1_2'])
132     graph['conv2_1']  = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
133     graph['conv2_2']  = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
134     graph['avgpool2'] = _avgpool(graph['conv2_2'])
135     graph['conv3_1']  = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
136     graph['conv3_2']  = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
137     graph['conv3_3']  = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
138     graph['conv3_4']  = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
139     graph['avgpool3'] = _avgpool(graph['conv3_4'])
140     graph['conv4_1']  = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
141     graph['conv4_2']  = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
142     graph['conv4_3']  = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
143     graph['conv4_4']  = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
144     graph['avgpool4'] = _avgpool(graph['conv4_4'])
145     graph['conv5_1']  = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
146     graph['conv5_2']  = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
147     graph['conv5_3']  = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
148     graph['conv5_4']  = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
149     graph['avgpool5'] = _avgpool(graph['conv5_4'])
150     
151     return graph
152 
153 def generate_noise_image(content_image, noise_ratio = CONFIG.NOISE_RATIO):
154     """
155     Generates a noisy image by adding random noise to the content_image
156     """
157     
158     # Generate a random noise_image
159     noise_image = np.random.uniform(-20, 20, (1, CONFIG.IMAGE_HEIGHT, CONFIG.IMAGE_WIDTH, CONFIG.COLOR_CHANNELS)).astype('float32')
160     
161     # Set the input_image to be a weighted average of the content_image and a noise_image
162     input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
163     
164     return input_image
165 
166 
167 def reshape_and_normalize_image(image):
168     """
169     Reshape and normalize the input image (content or style)
170     """
171     
172     # Reshape image to mach expected input of VGG16
173     image = np.reshape(image, ((1,) + image.shape))
174     
175     # Substract the mean to match the expected input of VGG16
176     image = image - CONFIG.MEANS
177     
178     return image
179 
180 
181 def save_image(path, image):
182     
183     # Un-normalize the image so that it looks good
184     image = image + CONFIG.MEANS
185     
186     # Clip and Save the image
187     image = np.clip(image[0], 0, 255).astype('uint8')
188     scipy.misc.imsave(path, image)

接下来就是

# Deep Learning & Art: Neural Style Transfer
# This assignment:
# - Implement the neural style transfer algorithm
# - Generate novel artistic images using your algorithm

# - Most of the algorithms you've studied optimize a cost function to get a set of parameter values. In Neural Style Transfer, you'll optimize a cost function to get pixel values.

  1 # Deep Learning & Art: Neural Style Transfer
  2 # This assignment:
  3 #     - Implement the neural style transfer algorithm
  4 #    - Generate novel artistic images using your algorithm
  5 
  6 #    - Most of the algorithms you've studied optimize a cost function to get a set of parameter 
  7 #    - values. In Neural Style Transfer, you'll optimize a cost function to get pixel values.
  8 
  9 import os 
 10 import sys
 11 import scipy.io
 12 import scipy.misc
 13 import matplotlib.pyplot as plt 
 14 from matplotlib.pyplot import imshow
 15 from PIL import Image
 16 from nst_utils import *
 17 import numpy as np
 18 import tensorflow as tf
 19 
 20 # Essential params
 21 class CONFIG:
 22     IMAGE_WIDTH = 400
 23     IMAGE_HEIGHT = 300
 24     COLOR_CHANNELS = 3
 25     NOISE_RATIO = 0.6
 26     MEANS = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3)) 
 27     VGG_MODEL = 'pretrained-model/imagenet-vgg-verydeep-19.mat' # Pick the VGG 19-layer model by from the paper "Very Deep Convolutional Networks for Large-Scale Image Recognition".
 28     STYLE_IMAGE = 'HZC_test_image/2.jpg' # Style image to use.
 29     CONTENT_IMAGE = 'HZC_test_image/bjs.jpg' # Content image to use.
 30     OUTPUT_DIR = 'output/'
 31 
 32 #    STYLE weights
 33 #     When complete the assignment, come back and experiment with different weights to see 
 34 #    how it changes the generated image G. 
 35 #    default value:
 36 #     权重不同，最终生成图像风格也不同
 37 STYLE_LAYERS = [
 38     ('conv1_1', 0.2),
 39     ('conv2_1', 0.2),
 40     ('conv3_1', 0.4),
 41     ('conv4_1', 0.4),
 42     ('conv5_1', 0.4)
 43    ]
 44 
 45 #    - use a previously trained convolutional network, and build on top of that.
 46 #    - model: vgg-19; a 19-layer version of VGG network.
 47 #    -         this model has already been trained in the very large ImageNet database.
 48 
 49 #    step 1: run the following model to load parameters from VGG model. 
 50 #    Use load_vgg_model function in nst_utils.py
 51 model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")    
 52 # print(model)
 53 
 54 #    CONTENT image
 55 content_image = scipy.misc.imread("images/louvre.jpg")
 56 imshow(content_image)
 57 
 58 #    STYLE image
 59 style_image = scipy.misc.imread("images/monet_800600.jpg")
 60 imshow(style_image)
 61 
 62 #Tool that was necessary
 63 
 64 #    Reshape and normalize the input image (content or style)
 65 def reshape_and_normalize_image(image):
 66 
 67     #    Reshape image to mach expected input of VGG16
 68     # image = np.reshape(image,(300,400,3))
 69     image = np.reshape(image,((1,)+image.shape))
 70 
 71     #    Substract the mean to match the expected input of VGG16
 72     image = image - CONFIG.MEANS
 73     return image
 74 
 75 #    Generate a noisy image bt adding random noise to the content_image
 76 def generate_noise_image(content_image,noise_ratio = CONFIG.NOISE_RATIO):
 77     
 78     # Generate a random noise_image
 79     noise_image = np.random.uniform(-20,20, (1, CONFIG.IMAGE_HEIGHT, CONFIG.IMAGE_WIDTH, CONFIG.COLOR_CHANNELS)).astype('float32')
 80 
 81     # Set the input_image to be a weighted average of the content image and  a noise image
 82     input_image = noise_image*noise_ratio + content_image*(1 - noise_ratio)
 83 
 84     return input_image
 85 
 86 #Save image 
 87 def save_image(path, image):
 88     
 89     # Un-normalize the image so that it looks good
 90     image = image + CONFIG.MEANS
 91     
 92     # Clip and Save the image
 93     image = np.clip(image[0], 0, 255).astype('uint8')
 94     scipy.misc.imsave(path, image)
 95 
 96 #    Compute the Content Cost use Tensorflow
 97 def compute_content_cost(a_C,a_G):
 98     """
 99     Compute the Content Cost
100 
101     Arguments:
102     a_C >>> tensor of dimension(1,n_h,n_w,n_c) hidden layer activations
103     a_G >>> tensor of dimension(1,n_h,n_w,n_c) hidden layer activations
104 
105     Returns:
106     J_content >>> scalar that you compute using equation that you needed
107 
108     """
109     #    Retrieve params 
110     m,n_H,n_W,n_C = a_G.get_shape().as_list()
111 
112     #  Reshape a_C and a_G
113     a_C_unrolled = tf.reshape(a_C,[n_H*n_W,n_C])
114     a_G_unrolled = tf.reshape(a_G,[n_H*n_W,n_C])
115 
116     # Compute the cost with tensorflow 
117     params = 1/(4*n_H*n_W*n_C)
118     J_content = params*(tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled,a_G_unrolled))))
119 
120     return J_content
121 
122 
123 # Gram_matrix (Style matrix)
124 def gram_matrix(A):
125     """
126     Argument: 
127     A -- matrix of shape(n_C, n_H,n*W)
128 
129     Returns:
130     GA -- Gram matrix of A: shape(n_C,n_C)
131     """
132     GA = tf.matmul(A,tf.transpose(A))
133 
134     return GA 
135 
136 # Style cost 
137 # We only use a single layer l
138 def compute_layer_style_cost(a_S,a_G):
139     """
140     Arguments:
141     a_S -- tensor of dimension(1,n_H,n_W,n_C), hidden layer activations representing style
142     a_G -- tensor of dimension(1,n_H,n_W,n_C), hidden layer activations
143     Returns:
144     J_style_layer -- tensor representing a scalar value(标量), style cost  
145     """
146 
147     #    Retrieve params from a_G
148     m,n_H,n_W,n_C = a_G.get_shape().as_list()
149 
150     #    Reshape the images to have them of shape(n_C, n_H*n_W)
151     a_S = tf.reshape(a_S, [n_C,n_H*n_W])
152     a_G = tf.reshape(a_G, [n_C,n_H*n_W])
153 
154     #    Compute gram matrix for both images S and G
155     GS = gram_matrix(a_S)
156     GG = gram_matrix(a_G)
157 
158     #    Compute the loss
159     params = 1/(4*(n_C**2)*((n_H*n_W)**2))
160     J_style_layer = params*(tf.reduce_sum(tf.square(tf.subtract(GS,GG))))
161 
162     return J_style_layer
163 
164 # Combine the style costs for different layers as follows:
165 def compute_style_cost(model,STYLE_LAYERS):
166     """
167     Computes the overall style cost from several chosen layers.
168     
169     Arguments:
170     model -- our tensorflow model
171     STYLE_LAYERS -- A python list contains:
172                     -- the names of the layers we would like to extract style from 
173                     -- a coefficient for each of them 
174     Returns:
175     J_style -- tensor representing a scalar value
176     """
177     #    The overall style cost 
178     J_style = 0
179 
180     for layer_name, coeff in STYLE_LAYERS:
181         # Select the output tensor 
182         out = model[layer_name]
183 
184         # Set a_S to be the hidden layer activation that we have selected.
185         a_S = sess.run(out)
186         #  U don't have to do it again
187         # Set a_G to be the hidden layer activation from same layer.
188         a_G = out
189 
190         # Compute style_cost for the current layer
191         J_style_layer = compute_layer_style_cost(a_S, a_G)
192         # Add coeff 
193         J_style += coeff * J_style_layer
194 
195     return J_style
196 
197 # Define the total cost to optimize 
198 def total_cost(J_content, J_style, alpha=10, beta=40):
199     """
200     Compute the total cost function
201 
202     alpha>>> hyperparameter weighting the importance of the content cost
203     beta >>> hyperparameter weighting the importance of the style cost
204 
205     Returns:
206     J -- total cost as defined by the formula above.
207     """
208     J = alpha*J_content + beta*J_style
209     return J
210 
211 # Solving the optimization problem
212 # STEP1: Create an interactive session:
213 tf.reset_default_graph()
214 
215 sess = tf.InteractiveSession()
216 
217 # STEP2: Load the content&style image
218 content_image = scipy.misc.imread("HZC_test_image/2.jpg")
219 content_image = reshape_and_normalize_image(content_image)
220 print(content_image.shape)
221 style_image = scipy.misc.imread("HZC_test_image/bjs.jpg")
222 
223 style_image = reshape_and_normalize_image(style_image)
224 print(style_image.shape)
225 # STEP3: Randomly initialize the image to be generated
226 generated_image = generate_noise_image(content_image)
227     # print(generated_image.shape) # 1x300x400x3
228 imshow(generated_image[0])
229 
230 # STEP4: Load the VGG16 model 
231 model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")
232 
233 # STEP5: Build the tensorflow graph
234 
235 #     Run the content image through the VGG16 model and compute the content cost
236 #         Assign the content image to be the input of the VGG model
237 sess.run(model['input'].assign(content_image))
238 #        Select the output tensor of the layer conv4_2
239 out = model['conv4_2']
240 # Set a_C to be the hidden layer activation from the layer we have selected
241 a_C = sess.run(out)
242 a_G = out
243 J_content = compute_content_cost(a_C, a_G)
244 
245 #     Run the style image through the VGG16 model and compute the style cost
246 #          Assign the input of the model to be the "style" image 
247 sess.run(model['input'].assign(style_image))
248 J_style = compute_style_cost(model, STYLE_LAYERS)
249 
250 #    Compute the total cost
251 J = total_cost(J_content, J_style, alpha=10, beta=40)
252 
253 #    Define the optimizer and the learning rate
254 #         optimizer
255 optimizer = tf.train.AdamOptimizer(2.0)
256 #        train_step
257 train_step = optimizer.minimize(J)
258 
259 # STEP6: Initialize the TensorFlow graph and run it for a large number of iterations, 
260 # updating the generated image at every step.
261 def model_nn(sess,input_image,num_iterations=200):
262 
263     # Initialize the global variables
264     sess.run(tf.global_variables_initializer())
265 
266     # Run the noisy input image 
267     sess.run(model['input'].assign(input_image))
268 
269     for i in range(num_iterations):
270 
271         # Run the session on the train_step to minimize the total cost 
272         sess.run(train_step)
273 
274         # Compute the generated image by runing the session on the 
275         # Current model['input']
276         generated_image = sess.run(model['input'])
277 
278         # Print every 20 iterations 
279         if i%20 == 0:
280             Jt,Jc,Js = sess.run([J, J_content, J_style])
281             print("iterations"+str(i)+":")
282             print("total cost ="+str(Jt))
283             print("content cost = "+str(Jc))
284             print("style_cost = "+str(Js))            
285 
286             save_image("output/generated_image_cup.jpg", generated_image)
287     # Save last generate image
288     save_image('output/generated_image_cup.jpg',generated_image)
289 
290     return generated_image
291 
292 model_nn(sess, generated_image, num_iterations=200)
293 
294 
295 
296 # plt.show() 
297 
298 # Test code for "compute_content_cost"
299 # tf.reset_default_graph()
300 
301 # with tf.Session() as test1:
302 #     tf.set_random_seed(1)
303 #     a_C = tf.random_normal([1,4,4,3],mean=1,stddev=4)
304 #     a_G = tf.random_normal([1,4,4,3],mean=1,stddev=4)
305 #     J_content = compute_content_cost(a_C, a_G)
306 
307 #     print("J_content="+str(J_content.eval()))
308 
309 # # Test code for "gram_matrix"
310 # tf.reset_default_graph()
311 
312 # with tf.Session() as test2:
313 #     tf.set_random_seed(1)
314 #     A = tf.random_normal([3,2*1], mean=1, stddev=4)
315 #     GA = gram_matrix(A)
316 
317 #     print("GA = " + str(GA.eval()))
318 
319 # Test code for "compute_layer_style_cost "
320 # tf.reset_default_graph()
321 
322 # with tf.Session() as test3:
323 #     tf.set_random_seed(1)
324 #     a_S = tf.random_normal([1,4,4,3],mean=1,stddev=4)
325 #     a_G = tf.random_normal([1,4,4,3],mean=1,stddev=4)
326 #     J_style_layer=compute_layer_style_cost(a_S, a_G)
327 #     print("J_style_layer = " + str(J_style_layer.eval()))
328 
329 # Test code for "total_cost"
330 # tf.reset_default_graph()
331 
332 # with tf.Session() as test:
333 #     np.random.seed(3)
334 #     J_content = np.random.randn()    
335 #     J_style = np.random.randn()
336 #     J = total_cost(J_content, J_style)
337 #     print("J = " + str(J))

posted @ 2018-12-22 12:18 Iriving 阅读(1122) 评论(0) 收藏举报

刷新页面返回顶部

Irving33Luna

Keep da dream alive..

Deep Learning & Neural Style Transfer(VGG) ——By何子辰

公告