最终完全用tensorflow函数的解决方案
tf对新手还是很不友好的,不知道相关函数根本不能高效实现相关功能,了解到相关函数后也需要花时间上手。
1 from __future__ import division 2 import matplotlib.pyplot as plt 3 import numpy as np 4 import tensorflow as tf 5 6 imgs = [[[[255, 0, 0], [0, 255, 0], [0, 0, 255]], 7 [[255, 0, 0], [0, 255, 0], [0, 0, 255]], 8 [[255, 0, 0], [0, 255, 0], [0, 0, 255]], 9 [[255, 0, 0], [0, 255, 0], [0, 0, 255]]], 10 [[[255, 0, 0], [0, 255, 0], [0, 0, 255]], 11 [[255, 0, 0], [0, 255, 0], [0, 0, 255]], 12 [[255, 0, 0], [0, 255, 0], [0, 0, 255]], 13 [[255, 0, 0], [0, 255, 0], [0, 0, 255]]]] 14 imgs = tf.reshape(imgs, [2, 4, 3, 3]) 15 16 coords = [[[[0.2, 0.2], [1.3, 0.2], [1.8, 2.2]], 17 [[0.5, 2], [1.3, 2], [0.2, 1.6]], 18 [[0.2, 0.2], [1.3, 0.2], [1.8, 2.2]], 19 [[0.5, 2], [1.3, 2], [0.2, 1.6]]], 20 [[[0.2, 0.2], [1.3, 0.2], [1.8, 2.2]], 21 [[0.5, 2], [1.3, 2], [0.2, 1.6]], 22 [[0.2, 0.2], [1.3, 0.2], [1.8, 2.2]], 23 [[0.5, 2], [1.3, 2], [0.2, 1.6]]]] 24 coords = tf.reshape(coords, [2, 4, 3, 2]) 25 26 cam_coords = [[[[1, 2, 3, 1], [2, 3, 4, 1], [3, 4, 5, 1]], 27 [[4, 5, 6, 1], [7, 8, 9, 1], [10, 11, 12, 1]], 28 [[1, 2, 3, 1], [2, 3, 4, 1], [3, 4, 5, 1]], 29 [[4, 5, 6, 1], [7, 8, 9, 1], [10, 11, 12, 1]]], 30 [[[1, 2, 3, 1], [2, 3, 4, 1], [3, 4, 5, 1]], 31 [[4, 5, 6, 1], [7, 8, 9, 1], [10, 11, 12, 1]], 32 [[2, 2, 3, 1], [3, 3, 4, 1], [4, 4, 5, 1]], 33 [[5, 5, 6, 1], [8, 8, 9, 1], [11, 11, 12, 1]]]] 34 cam_coords = tf.reshape(cam_coords, [2, 4, 3, 4]) 35 cam_coords = tf.transpose(cam_coords, perm=[0, 3, 1, 2]) 36 37 38 # def bilinear_sampler(imgs, coords, cam_coords_T): 39 # imgs = tf.tile(imgs, multiples=[1, 100, 50, 1]) 40 # coords = tf.tile(coords, multiples=[1, 100, 50, 1]) 41 # cam_coords_T = tf.tile(cam_coords_T, multiples=[1, 1, 100, 50]) 42 def _repeat(x, n_repeats): # x = tf.cast(tf.range(4), 'float32') * 53248 n_repeats = 53248。 43 rep = tf.transpose( 44 tf.expand_dims(tf.ones(shape=tf.stack([ 45 n_repeats, 46 ])), 1), [1, 47 0]) # 最后得到[1,53248]大小的全一矩阵。tf.stack其作用类似于tf.concat,都是拼接两个张量,而不同之处在于,tf.concat拼接的是两个shape完全相同的张量,并且产生的张量的阶数不会发生变化,而tf.stack则会在新的张量阶上拼接,产生的张量的阶数将会增加 48 rep = tf.cast(rep, 'float32') 49 x = tf.matmul(tf.reshape(x, (-1, 1)), 50 rep) # reshape为一列,得到[[ 0.][ 53248.][106496.][159744.]]*rep,最后得到shape=(4, 53248)的矩阵。 51 return tf.reshape(x, [-1]) # 最后又化为一列Tensor("Reshape_1:0", shape=(212992,), dtype=float32) 52 53 with tf.name_scope('image_sampling'): 54 coords_x, coords_y = tf.split(coords, [1, 1], axis=3) 55 inp_size = imgs.get_shape() 56 coord_size = coords.get_shape() 57 out_size = coords.get_shape().as_list() 58 out_size[3] = imgs.get_shape().as_list()[3] 59 60 coords_x = tf.cast(coords_x, 'float32') 61 coords_y = tf.cast(coords_y, 'float32') 62 63 x0 = tf.floor(coords_x) 64 x1 = x0 + 1 65 y0 = tf.floor(coords_y) 66 y1 = y0 + 1 67 68 y_max = tf.cast(tf.shape(imgs)[1] - 1, 'float32') 69 x_max = tf.cast(tf.shape(imgs)[2] - 1, 'float32') 70 zero = tf.zeros([1], dtype='float32') 71 72 x0_safe = tf.clip_by_value(x0, zero, x_max) 73 y0_safe = tf.clip_by_value(y0, zero, y_max) 74 x1_safe = tf.clip_by_value(x1, zero, x_max) 75 y1_safe = tf.clip_by_value(y1, zero, y_max) 76 77 ## bilinear interp weights, with points outside the grid having weight 0#判断是否相等,相等为1,不相等为0. 78 ## 以下方式没有提高效果的原因是令重建的对应像素值为0了,但是原图像对应位置还有值,计算误差单纯为原图像的像素值。 79 # wt_x0 = (x1 - coords_x) * tf.cast(tf.equal(x0, x0_safe), 'float32') 80 # wt_x1 = (coords_x - x0) * tf.cast(tf.equal(x1, x1_safe), 'float32') 81 # wt_y0 = (y1 - coords_y) * tf.cast(tf.equal(y0, y0_safe), 'float32') 82 # wt_y1 = (coords_y - y0) * tf.cast(tf.equal(y1, y1_safe), 'float32') 83 mask_p = tf.logical_and( # 如果四个值都在图像中说明投影点没有落到图像外。这个mask和相邻帧即投影来点的图像大小相等。 84 tf.logical_and(x0 >= zero, x1 <= x_max), 85 tf.logical_and(y0 >= zero, y1 <= y_max)) 86 mask_p = tf.to_float(mask_p) 87 mask_p = tf.tile(mask_p, multiples=[1, 1, 1, 3]) 88 89 wt_x0 = x1_safe - coords_x 90 wt_x1 = coords_x - x0_safe 91 wt_y0 = y1_safe - coords_y 92 wt_y1 = coords_y - y0_safe 93 94 ## indices in the flat image to sample from 95 dim2 = tf.cast(inp_size[2], 'float32') 96 dim1 = tf.cast(inp_size[2] * inp_size[1], 'float32') 97 base = tf.reshape( 98 _repeat( 99 tf.cast(tf.range(coord_size[0]), 'float32') * dim1, 100 coord_size[1] * coord_size[2]), 101 [out_size[0], out_size[1], out_size[2], 102 1]) # tf.reshape(_repeat(tf.cast(tf.range(4), 'float32') * 128 * 416, 128 * 416), [4, 128, 416, 1]) 103 # 上面最后得base=Tensor("Reshape_2:0", shape=(4, 128, 416, 1), dtype=float32)。中间有[ 0.][ 53248.][106496.][159744.]四种数。 104 base_y0 = base + y0_safe * dim2 105 base_y1 = base + y1_safe * dim2 # 考虑进有4个batch,所以不同batch要加上不同的基数。 106 idx00 = tf.reshape(x0_safe + base_y0, [-1]) # 加上基数之后构成了四个像素值的索引。 107 idx01 = x0_safe + base_y1 108 idx10 = x1_safe + base_y0 109 idx11 = x1_safe + base_y1 110 111 ## sample from imgs 112 imgs_flat = tf.reshape(imgs, tf.stack([-1, inp_size[3]])) 113 imgs_flat = tf.cast(imgs_flat, 'float32') 114 im00 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx00, 'int32')), out_size) # 每一个输出都有对应的四个像素点的值参与运算。 115 im01 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx01, 'int32')), out_size) 116 im10 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx10, 'int32')), out_size) 117 im11 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx11, 'int32')), out_size) 118 119 w00 = wt_x0 * wt_y0 ######这里横轴和纵轴的距离乘机就算距离了。 120 w01 = wt_x0 * wt_y1 121 w10 = wt_x1 * wt_y0 122 w11 = wt_x1 * wt_y1 123 124 output = tf.add_n([ 125 w00 * im00, w01 * im01, 126 w10 * im10, w11 * im11 127 ]) 128 # 以下为自定义代码 129 cam_coords = tf.transpose(cam_coords, perm=[0, 2, 3, 1]) 130 batch, height, width, channels = imgs.get_shape().as_list() 131 cam_coords = cam_coords[:, :, :, 0:-1] 132 cam_coords = tf.cast(cam_coords, 'float32') 133 euclidean = tf.sqrt(tf.reduce_sum(tf.square(cam_coords), 3)) 134 euclidean = tf.reshape(euclidean, [batch, -1]) 135 xy00 = tf.concat([x0, y0], axis=3) 136 137 for i in range(2): 138 euclideani = euclidean[i, :] 139 euclideani = tf.reshape(euclideani, [-1, 1]) 140 xy00_batchi = xy00[i, :, :, :] # 将横纵坐标合在一起,取batch1. 141 xy00_batchi = tf.reshape(xy00_batchi, [-1, 2]) 142 xy00_batchi = tf.cast(xy00_batchi, tf.int32) 143 144 unique_xy00_batchi, ids = tf.unique(xy00_batchi[:, 0]*width+xy00_batchi[:, 1]) 145 num_segments = tf.shape(unique_xy00_batchi) 146 outputs = tf.unsorted_segment_min(euclideani, ids, num_segments[0]) 147 zuixiaojuli = tf.gather(outputs, ids) 148 mask0 = tf.where(tf.less_equal(euclideani, zuixiaojuli), tf.ones_like(euclideani), tf.zeros_like(euclideani)) 149 150 mask0 = tf.reshape(mask0, [height, width]) 151 mask0 = tf.expand_dims(mask0, 0) 152 if i == 0: 153 mask0_stack = mask0 154 else: 155 mask0_stack = tf.concat([mask0_stack, mask0], axis=0) 156 # mask0_stack = tf.tile(tf.expand_dims(mask0_stack, 3), multiples=[1, 1, 1, 3]) 157 # return output, mask_p, mask0_stack 158 159 160 # output, mask_p, mask0_stack = bilinear_sampler(imgs, coords, cam_coords) 161 # imgs = tf.cast(imgs, 'float32') 162 # imgs_mask = imgs*mask1_stack 163 with tf.Session() as sess: 164 165 print(sess.run(xy00_batchi[:, 0])) 166 print(xy00_batchi[:, 0]) 167 print(sess.run(euclideani)) 168 print(euclideani) 169 print(mask0_stack) 170 print(sess.run(mask0_stack))

浙公网安备 33010602011771号