【caffe源码研究】第三章:源码篇(6) :caffe.proto
【caffe源码研究】第三章:源码篇(6) :caffe.proto
caffe使用protobuf来定义网络结构、参数等。这里介绍一下caffe.proto里面核心的部分。
Blob
先看Blob相关的protobuf
message BlobShape {
//数据块形状定义为Num×Channel×Height×Wight原因在于caffe基于容器的多维嵌套
//来实现高维数据的封装。即vector(N)>。
repeated int64 dim = 1 [packed = true];
}
// 数据块{形状,数据,微分}
message BlobProto {
optional BlobShape shape = 7;
repeated float data = 5 [packed = true];
repeated float diff = 6 [packed = true];
repeated double double_data = 8 [packed = true];
repeated double double_diff = 9 [packed = true];
//数据4D形状 -- 旧版本,已使用"BlobShape shape"代替:
optional int32 num = 1 [default = 0]; //样本
optional int32 channels = 2 [default = 0];
optional int32 height = 3 [default = 0];
optional int32 width = 4 [default = 0];
}
// 存放多个BlobProto实例的对应Index,易于引用
message BlobProtoVector {
repeated BlobProto blobs = 1;
}
Datum
// 数据:{C,H,W,data(uchar&float),label} 图像样本
message Datum {
optional int32 channels = 1;
optional int32 height = 2;
optional int32 width = 3;
// the actual image data, in bytes
optional bytes data = 4;
optional int32 label = 5;
// Optionally, the datum could also hold float data.
repeated float float_data = 6;
// If true data contains an encoded image that need to be decoded
optional bool encoded = 7 [default = false];
}
FillerParameter
//滤波器参数{Type(const|uniform|gauss),}
message FillerParameter {
// The filler type.
optional string type = 1 [default = 'constant'];
optional float value = 2 [default = 0]; // the value in constant filler
optional float min = 3 [default = 0]; // the min value in uniform filler
optional float max = 4 [default = 1]; // the max value in uniform filler
optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
optional float std = 6 [default = 1]; // the std value in Gaussian filler
// 给定输入与权值相乘后应该得到非零输出,默认值-1意为不稀疏化高斯模板。
optional int32 sparse = 7 [default = -1];
// Normalize the filler variance by fan_in, fan_out, or their average.
// Applies to 'xavier' and 'msra' fillers.(扇入,扇出)
// 通过fanIn,fanOut,及其均值来归一化填充值的方差,有“xavier法”或“msra法”
enum VarianceNorm {
FAN_IN = 0;
FAN_OUT = 1;
AVERAGE = 2;
}
optional VarianceNorm variance_norm = 8 [default = FAN_IN];
}
NetParameter
//网络参数{网名,输入参数,数据块形状,forceBack,NetState,debugInfo,}
message NetParameter {
optional string name = 1; // consider giving the network a name
// 旧版--输入网络的数据块Blobs; 改为新版--InputParameter
repeated string input = 3;
// DEPRECATED. See InputParameter. The shape of the input blobs.
// 旧版--输入的Blobs的形状; 改为新版--InputerParameter
repeated BlobShape input_shape = 8;
// 指定Blobs的4D输入形状 -- 已改为新版:input_shape代替
// 如要使用旧版,对每个输入的blob都需要指定4个参数,Num×Cha×H×W
// 因此 input_dim需要重复4次
repeated int32 input_dim = 4;
//确定网络是否要让每个层都强制反向传播。
//如果设置为false,将根据网络结构和学习率来自动确定是否需要反向传播。
//网络的当前状态"state"包括"phase","level","stage"。
//某些层需要设置phase属性,使其跳过网络运行时的某些状态.
optional NetState state = 6;
// 当运行Net::Forward/Backward/Update时,打印调试信息,默认false.
optional bool debug_info = 7 [default = false];
// 构成net的layers。每个layer的链接和行为通过LayerParameter配置。
repeated LayerParameter layer = 100; // ID 100 so layers are printed last.
// DEPRECATED: use 'layer' instead.
repeated V1LayerParameter layers = 2;
}
SolverParameter
// NOTE:注意
// Update the next available ID when you add a new SolverParameter field.
// 当你添加一个新的SolverParameter属性时,需要更新下一个可获得的ID
// SolverParameter next available ID: 41 (last added: type)
//求解器参数{网络,}
message SolverParameter {
//////////////////////////////////////////////////////////////////////////////
// Specifying the train and test networks
//
// Exactly one train net must be specified using one of the following fields:
// train_net_param, train_net, net_param, net
// One or more test nets may be specified using any of the following fields:
// test_net_param, test_net, net_param, net
// If more than one test net field is specified (e.g., both net and
// test_net are specified), they will be evaluated in the field order given
// above: (1) test_net_param, (2) test_net, (3) net_param/net.
// A test_iter must be specified for each test_net.
// A test_level and/or a test_stage may also be specified for each test_net.
//////////////////////////////////////////////////////////////////////////////
//指定网络,可有以下的多种形式
// Proto filename for the train net, possibly combined with one or more
// test nets.
optional string net = 24;
// Inline train net param, possibly combined with one or more test nets.
optional NetParameter net_param = 25;
optional string train_net = 1; // Proto filename for the train net.
repeated string test_net = 2; // Proto filenames for the test nets.
optional NetParameter train_net_param = 21; // Inline train net params.
repeated NetParameter test_net_param = 22; // Inline test net params.
// 指定网络状态
// The states for the train/test nets. Must be unspecified or
// specified once per net.
//
// By default, all states will have solver = true;
// train_state will have phase = TRAIN,
// and all test_state's will have phase = TEST.
// Other defaults are set according to the NetState defaults.
optional NetState train_state = 26;
repeated NetState test_state = 27;
//测试迭代批次数:
//合理设置可使得测试遍历完全部测试样本
//合理值 = 测试样本总数/每批次测试数 = totalTestSamples/batchSize
repeated int32 test_iter = 3;
//训练迭代批次数:
//两次测试之间所经历的训练迭代次数:合理设置可使得训练遍历完全部训练样本
//合理值 = 训练样本总数/每批次训练数 = totalTrainSamples/batchSize
optional int32 test_interval = 4 [default = 0];
//训练test_interval个批次,再测试test_iter个批次,为一个回合(epoch)
//合理设置应使得每个回合内,遍历覆盖到全部训练样本和测试样本
//默认不计算测试时损失
optional bool test_compute_loss = 19 [default = false];
// 如设置为真,则在训练前运行一次测试,以确保内存足够,并打印初始损失值
optional bool test_initialization = 32 [default = true];
// 基本学习速率
optional float base_lr = 5; // The base learning rate
// 打印信息的遍历间隔,遍历多少个批次打印一次信息。设置为0则不打印。
optional int32 display = 6;
// Display the loss averaged over the last average_loss iterations
// 打印最后一个迭代批次下的平均损失(?)
optional int32 average_loss = 33 [default = 1];
// 训练最大迭代次数
optional int32 max_iter = 7;
// accumulate gradients over `iter_size` x `batch_size` instances
// 累积梯度误差基于“iter_size×batchSize”个样本实例
// “批次数×批量数”=“遍历的批次数×每批的样本数”个样本实例
optional int32 iter_size = 36 [default = 1];
//学习率衰减策略(7种)
// The learning rate decay policy. The currently implemented learning rate
// policies are as follows:
// - fixed: always return base_lr.
// - step: return base_lr * gamma ^ (floor(iter / step))
// - exp: return base_lr * gamma ^ iter
// - inv: return base_lr * (1 + gamma * iter) ^ (- power)
// - multistep: similar to step butallows non uniform steps defined by
// stepvalue
// - poly: the effective learning rate follows a polynomial decay, to be
// zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
// - sigmoid: the effective learning rate follows a sigmod decay
// return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
//
// 在上述参数中,base_lr, max_iter, gamma, step, stepvalue and power 被定义
// 在solver.prototxt文件中,iter是当前迭代次数。
optional string lr_policy = 8; //学习率调节策略
optional float gamma = 9; // The parameter to compute the learning rate.
optional float power = 10; // The parameter to compute the learning rate.
optional float momentum = 11; // The momentum value.动量
optional float weight_decay = 12; // The weight decay.权值衰减系数
//由权值衰减系数所控制的正则化类型:L1或L2范数,默认L2
optional string regularization_type = 29 [default = "L2"];
//"step"策略下,学习率的步长值
optional int32 stepsize = 13;
//"multistep"策略下的步长值
repeated int32 stepvalue = 34;
//设置梯度裁剪阈值为>=0,当其实际L2范数超出此值时(?)
optional float clip_gradients = 35 [default = -1];
//快照间隔,遍历多少次对模型和求解器状态保存一次
optional int32 snapshot = 14 [default = 0]; // The snapshot interval
optional string snapshot_prefix = 15; // The prefix for the snapshot.
//是否对diff快照,有助调试,但最终的protocol buffer尺寸会很大
optional bool snapshot_diff = 16 [default = false];
//快照数据保存格式{hdf5,binaryproto(默认)}
enum SnapshotFormat {
HDF5 = 0;
BINARYPROTO = 1;
}
optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
// the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
enum SolverMode {
CPU = 0;
GPU = 1;
}
求解模式{GPU(device_id),CPU}
optional SolverMode solver_mode = 17 [default = GPU];
optional int32 device_id = 18 [default = 0];
//随机数种子,设为正则表示Solver会以此为随机数初始化caffe,可产生重复随机
//数,易于重复试验;设为默认-1代表使用系统时钟作为种子。
optional int64 random_seed = 20 [default = -1];
//求解器类型=SGD(默认)
optional string type = 40 [default = "SGD"];
// numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
optional float delta = 31 [default = 1e-8];
// parameters for the Adam solver
optional float momentum2 = 39 [default = 0.999];
// RMSProp decay value
// MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
optional float rms_decay = 38;
//若真,则打印网络状态信息,有助于调试问题
optional bool debug_info = 23 [default = false];
//若假,则不会在训练后保存快照
optional bool snapshot_after_train =