论文 END-TO-END OPTIMIZED IMAGE COMPRESSION 源码解析与论文阅读——预处理+分析变换
0 前言
1 预处理
1.1 读取
def read_bmp(filename):
"""Loads a bmp image file."""
string = tf.read_file(filename)
image = tf.image.decode_image(string, channels=0)
#读取、解码,读入后为二维矩阵
image = tf.cast(image, tf.float32) #转float32 类型
image /= 255 #归一化
return image
1.2 维度变换
# 假设输入矩阵维度为[256,256]
x = tf.expand_dims(x, 0) #增加了批维度batch [1,256,256]
x.set_shape([1, None, None, 1]) #增加了通道维度channel [1,256,256,1]
# 到这一步,变为[1,256,256,1]
x_shape = tf.shape(x) #读取形状,用于后期的解压缩。或可省略
PS:不确定是否必须扩展成4维矩阵,个人认为维度扩展主要是为了适配TensorFlow的函数。或许batch维度可以省略。
2 分析变换
分析变换类定义:
class AnalysisTransform(tf.keras.layers.Layer):
"""The analysis transform."""
def __init__(self, num_filters, *args, **kwargs):
self.num_filters = num_filters
super(AnalysisTransform, self).__init__(*args, **kwargs)
def build(self, input_shape):
self._layers = [
tfc.SignalConv2D(
self.num_filters, (5, 5), name="layer_0", corr=True, strides_down=2,
padding="same_zeros", use_bias=True,
activation=tfc.GDN(name="gdn_0")),
tfc.SignalConv2D(
self.num_filters, (5, 5), name="layer_1", corr=True, strides_down=2,
padding="same_zeros", use_bias=True,
activation=tfc.GDN(name="gdn_1")),
#图上到这一步过,实际前面3层都一样。
tfc.SignalConv2D(
self.num_filters, (5, 5), name="layer_2", corr=True, strides_down=2,
padding="same_zeros", use_bias=True,
activation=tfc.GDN(name="gdn_2")),
]
super(AnalysisTransform, self).build(input_shape)
def call(self, tensor):
for layer in self._layers:
tensor = layer(tensor)
以一层为例说明:
tfc.SignalConv2D(
self.num_filters, #滤波器个数,决定输出特征图的通道数,例如36,输出则为[256,256,36]
(5, 5), #卷积核尺寸
name="layer_0", #名字,略
corr=True, # 卷积/互相关?
strides_down=2, #下采样步长
padding="same_zeros", #0填充
use_bias=True,#Boolean, whether an additive constant will be applied to each output channel.
activation=tfc.GDN(name="gdn_0")# 激活函数GDN
),
- 调用顺序
init()->build()->call()
2.1 初始化init()
一些变量的赋值
FPGA可能无法做面向对象编程?具体如何传参还请你们设计。
def __init__(self, filters, kernel_support,
corr=False, strides_down=1, strides_up=1, padding="valid",
extra_pad_end=True, channel_separable=False,
data_format="channels_last",
activation=None, use_bias=False, use_explicit=True,
kernel_initializer=tf.initializers.variance_scaling(),
bias_initializer=tf.initializers.zeros(),
kernel_regularizer=None, bias_regularizer=None,
kernel_parameterizer=parameterizers.RDFTParameterizer(),
bias_parameterizer=None,
**kwargs):#以上为默认值
#调用父类构造函数
super(_SignalConv, self).__init__(**kwargs)
self._filters = int(filters) #√
self._kernel_support = self._normalized_tuple(
kernel_support, "kernel_support") # =(5,5)
self._corr = bool(corr) #√
self._strides_down = self._normalized_tuple(strides_down, "strides_down") #√
self._strides_up = self._normalized_tuple(strides_up, "strides_up") #√
self._padding = str(padding).lower()
try:#√
self._pad_mode = {
"valid": None,
"same_zeros": "CONSTANT",
"same_reflect": "REFLECT",
}[self.padding]
except KeyError:
raise ValueError("Unsupported padding mode: '{}'".format(padding))
self._extra_pad_end = bool(extra_pad_end)
self._channel_separable = bool(channel_separable)
self._data_format = str(data_format)
self._activation = activation
self._use_bias = bool(use_bias)#√
self._use_explicit = bool(use_explicit)
self._kernel_initializer = kernel_initializer
self._bias_initializer = bias_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._kernel_parameterizer = kernel_parameterizer
self._bias_parameterizer = bias_parameterizer
if self.data_format not in ("channels_first", "channels_last"):
raise ValueError("Unknown data format: '{}'.".format(self.data_format))
self.input_spec = tf.keras.layers.InputSpec(ndim=self._rank + 2)
2.2 build()
def build(self, input_shape):
#获取输入矩阵的形状,例如[1,256,256,1]
input_shape = tf.TensorShape(input_shape)
channel_axis = {"channels_first": 1, "channels_last": -1}[self.data_format]
input_channels = input_shape.as_list()[channel_axis]
if input_channels is None:
raise ValueError("The channel dimension of the inputs must be defined.")
# 具体化输入的信息:维度=2+2
self.input_spec = tf.keras.layers.InputSpec(
ndim=self._rank + 2, axes={channel_axis: input_channels})
#self.input_spec=(ndim=4, axes={-1: 1})
# 卷积核形状=(5,5)+(1,36)=(5,5,1,36) 4维卷积核
kernel_shape = self.kernel_support + (input_channels, self.filters)
if self.channel_separable:
output_channels = self.filters * input_channels
else:
output_channels = self.filters #输出通道数=36
#2.2.1调用parameterizers.RDFTParameterizer()
kernel_parameterizer = self.kernel_parameterizer
if kernel_parameterizer is None:
getter = self.add_weight
else:
getter = functools.partial(
kernel_parameterizer, getter=self.add_weight)
self._kernel = getter(
name="kernel", shape=kernel_shape, dtype=self.dtype,
initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer)
#2.2.2 初始化bias 结果为shape=(36,), dtype=float32 全0向量
if self.use_bias:
bias_parameterizer = self.bias_parameterizer
if bias_parameterizer is None:
getter = self.add_weight #√
else:
getter = functools.partial(
bias_parameterizer, getter=self.add_weight)
self._bias = getter(
name="bias", shape=(output_channels,), dtype=self.dtype,
initializer=self.bias_initializer, regularizer=self.bias_regularizer)
super(_SignalConv, self).build(input_shape)
- 调用了参数化器,给卷积核赋值,赋值后是一个shape=(5, 5, 1, 36), float32类型的矩阵。
- 因为不需要实现训练,这一步替代为读取模型参数、赋值
2.3 call()
2.3.1 初始化
inputs = tf.convert_to_tensor(inputs)
outputs = inputs
kernel = self.kernel
corr = self.corr
2.3.2 零填充
1)计算每个维度首、尾的填充数。
padding = padding_ops.same_padding_for_kernel(
self.kernel_support=(5,5), corr=True, self.strides_up=(1,1))
# padding = [(2, 2), (2, 2)] 即2个维度,首尾各填充2个像素
调用的函数
def same_padding_for_kernel(shape, corr, strides_up=None):
rank = len(shape)
if strides_up is None:
strides_up = rank * (1,)
if corr:
padding = [(s // 2, (s - 1) // 2) for s in shape] #√
else:
padding = [((s - 1) // 2, s // 2) for s in shape]
padding = [((padding[i][0] - 1) // strides_up[i] + 1,
(padding[i][1] - 1) // strides_up[i] + 1) for i in range(rank)] #√
return padding
2)“预填充”置零,即不进行预填充。
prepadding = self._rank * ((0, 0),)
# prepadding = ((0, 0), (0, 0))
3)填充
(这一步实际是在卷积计算时操作,在这里先说明更清晰)
采用same padding,根据1)的计算结果填充0。
例如:padding = [(2, 2), (2, 2)] 2个维度,首尾各填充2个像素,即图像的4条边各拓展2个像素宽的0。
2.3.3 卷积计算
convolution/correlation 卷积/互相关
1)执行 互相关 下采样
outputs = self._correlate_down_explicit(outputs=输入的矩阵, kernel, padding)
↓
def _correlate_down_explicit(self, inputs, kernel, padding):
# Computes correlation followed by downsampling, with arbitrary zero
# padding.
data_format = self._op_data_format
strides = self._padded_tuple(self.strides_down, 1) # =(1, 2, 2, 1)
padding = self._padded_tuple(padding, (0, 0)) # ((0, 0), (2, 2), (2, 2), (0, 0))
do_cast = inputs.dtype.is_integer
if self._rank == 1 and not self.channel_separable:
# 1D 省略
elif self._rank == 2 and not self.channel_separable:
# `tf.nn.conv2d` performs correlations followed by optional downsampling.
if do_cast: # 整型,忽略
inputs = tf.cast(inputs, tf.float32)
#卷积
outputs = tf.nn.conv2d(
inputs=4维矩阵, kernel=shape(5,5,1,36)矩阵,
strides=strides=(1, 2, 2, 1), padding=padding=((0, 0), (2, 2), (2, 2), (0, 0)), data_format=data_format)
if do_cast:# 整型,忽略
outputs = tf.cast(tf.math.round(outputs), self.accum_dtype)
else:
self._raise_notimplemented()
return outputs
输入:
kernel 形状 [filter_height, filter_width, in_channels, out_channels]=[5,5,1,36]
输入矩阵 [batch, in_height, in_width, in_channels]=[1,256,256,1]
tf.nn.conv2d执行了以下操作:
- 将滤波器(卷积核)展平为形状为[filter_height * filter_width * in_channels, output_channels]=[25,36]的二维矩阵.
- 从输入张量中提取图像patch,以形成形状为[batch, out_height, out_width, filter_height * filter_width * in_channels]=[1,256,256,25]的虚拟张量.
- 对于每个patch,右乘卷积核矩阵和图像patch矢量.
output[b, i, j, k] =
sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *filter[di, dj, q, k]
上述操作本质上就是:
- 如果输入是
[1,256,256,c]的图,kernel是[5,5,c,36],
共36个[5,5,c]的卷积核,对于每个卷积核:
以2的步长滑动,对
[256,256,c]的图像做卷积,产生[128, 128]的特征图一个。c=1,对应元素直接相乘;c≠1,则是做c维向量的点积。
- 共36个,输出
(1,128,128,36)的特征图。
参考:
源码
tensorflow代码解析
tf.nn.conv2d是怎样实现卷积的?
Explaining Tensorflow Code for a Convolutional Neural Network
What does tf.nn.conv2d do in tensorflow?
2.3.4 加bias
outputs = tf.nn.bias_add(outputs, bias)
outputs:shape(1,256,256,36)
bias:shape(36,)
- 推测应该是将每个通道的(256,256)的矩阵加上同一个bias,即每一个bias对应一个通道,目前还没查到具体的机制。
2.3.5 激活函数 GDN(正向)
outputs = self.activation(outputs)
执行的公式如下,其中gamma、beta是可以训练的参数,需要读取模型赋值。
y[i] = x[i] / sqrt(beta[i] + sum_j(gamma[j, i] * x[j]^2))
对应代码:
norm_pool = tf.linalg.matmul(tf.math.square(inputs), self.gamma)
norm_pool = tf.nn.bias_add(norm_pool, self.beta)
norm_pool = tf.math.rsqrt(norm_pool)

浙公网安备 33010602011771号