From c2dea5a877611f43ea8f7100297305297fa19ef9 Mon Sep 17 00:00:00 2001 From: ranqiu Date: Mon, 18 Sep 2017 17:14:38 +0800 Subject: [PATCH 1/8] Update the annotation of layers.py --- .../paddle/trainer_config_helpers/layers.py | 67 +++++++++++-------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 4b1d80d3db..d4d165e5d9 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -5457,9 +5457,9 @@ def nce_layer(input, param_attr=[attr1, attr2], weight=layer3, num_classes=3, neg_distribution=[0.1,0.3,0.6]) - :param name: layer name + :param name: The name of this layer. :type name: basestring - :param input: input layers. It could be a LayerOutput of list/tuple of LayerOutput. + :param input: The input layers. It could be a LayerOutput of list/tuple of LayerOutput. :type input: LayerOutput|list|tuple|collections.Sequence :param label: label layer :type label: LayerOutput @@ -5477,7 +5477,9 @@ def nce_layer(input, A uniform distribution will be used if not provided. If not None, its length must be equal to num_classes. :type neg_distribution: list|tuple|collections.Sequence|None - :param bias_attr: Bias parameter attribute. True if no bias. + :param bias_attr: The Bias Attribute. If no bias, then pass False or + something not type of ParameterAttribute. None will get a + default Bias. :type bias_attr: ParameterAttribute|None|False :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute @@ -5593,7 +5595,7 @@ def rank_cost(left, :param weight: The weight affects the cost, namely the scale of cost. It is an optional argument. :type weight: LayerOutput - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring :param coeff: The coefficient affects the gradient in the backward. :type coeff: float @@ -5647,7 +5649,7 @@ def lambda_cost(input, :param score: The 2nd input. Score of each sample. :type input: LayerOutput :param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain), - e.g., 5 for NDCG@5. It must be less than for equal to the + e.g., 5 for NDCG@5. It must be less than or equal to the minimum size of lists. :type NDCG_num: int :param max_sort_size: The size of partial sorting in calculating gradient. @@ -5658,7 +5660,7 @@ def lambda_cost(input, than the size of a list, the algorithm will sort the entire list of get gradient. :type max_sort_size: int - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute @@ -5702,7 +5704,7 @@ def cross_entropy(input, :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring. :param coeff: The cost is multiplied with coeff. The coefficient affects the gradient in the backward. @@ -5750,7 +5752,7 @@ def cross_entropy_with_selfnorm(input, :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring. :param coeff: The coefficient affects the gradient in the backward. :type coeff: float. @@ -5790,7 +5792,7 @@ def sum_cost(input, name=None, layer_attr=None): :param input: The first input layer. :type input: LayerOutput. - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring. :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute @@ -5835,7 +5837,7 @@ def huber_regression_cost(input, :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring. :param delta: The difference between the observed and predicted values. :type delta: float. @@ -5885,7 +5887,7 @@ def huber_classification_cost(input, :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring. :param coeff: The coefficient affects the gradient in the backward. :type coeff: float. @@ -5928,7 +5930,7 @@ def multi_binary_label_cross_entropy(input, :type input: LayerOutput :param label: The input label. :type input: LayerOutput - :param name: The name of this layers. It is not necessary. + :param name: The name of this layer. It is not necessary. :type name: None|basestring :param coeff: The coefficient affects the gradient in the backward. :type coeff: float @@ -6033,9 +6035,9 @@ def cross_entropy_over_beam(input, name=None): ]) - :param input: input beams for this layer. + :param input: Input beams for this layer. :type input: BeamInput - :param name: input beams for this layer. + :param name: The name of this layer. :type name: basestring :return: LayerOutput object. :rtype: LayerOutput @@ -6175,12 +6177,21 @@ def multiplex_layer(input, name=None, layer_attr=None): @wrap_name_default("dropout") def dropout_layer(input, dropout_rate, name=None): """ - @TODO(yuyang18): Add comments. - :param name: - :param input: - :param dropout_rate: - :return: + The example usage is: + + .. code-block:: python + + dropout = dropout_layer(input=input_layer, dropout_rate=0.5) + + :param name: The name of this layer. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput + :param dropout_rate: The probability of dropout. + :type dropout_rate: float + :return: LayerOutput object. + :rtype: LayerOutput """ return addto_layer( name=name, @@ -6203,7 +6214,7 @@ def row_conv_layer(input, """ The row convolution is called lookahead convolution. It is firstly - introduced in paper of `Deep Speech 2: End-toEnd Speech Recognition + introduced in paper of `Deep Speech 2: End-to-End Speech Recognition in English and Mandarin `_ . The bidirectional RNN that learns representation for a sequence by @@ -6211,9 +6222,9 @@ def row_conv_layer(input, However, unlike unidirectional RNNs, bidirectional RNNs are challenging to deploy in an online and low-latency setting. The lookahead convolution incorporates information from future subsequences in a computationally - efficient manner to improve unidirectional recurrent neural networks. + efficient manner to improve unidirectional RNNs. - The connection of row convolution is different form the 1D sequence + The connection of row convolution is different from the 1D sequence convolution. Assumed that, the future context-length is k, that is to say, it can get the output at timestep t by using the the input feature from t-th timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input @@ -6242,7 +6253,7 @@ def row_conv_layer(input, :param act: Activation Type. Default is linear activation. :type act: BaseActivation :param param_attr: The Parameter Attribute. If None, the parameter will be - initialized smartly. It's better set it by yourself. + initialized smartly. It's better to set it by yourself. :type param_attr: ParameterAttribute :param layer_attr: Extra Layer config. :type layer_attr: ExtraLayerAttribute|None @@ -6342,7 +6353,7 @@ def gated_unit_layer(input, The gated unit layer implements a simple gating mechanism over the input. The input :math:`X` is first projected into a new space :math:`X'`, and it is also used to produce a gate weight :math:`\sigma`. Element-wise - prodict between :match:`X'` and :math:`\sigma` is finally returned. + product between :match:`X'` and :math:`\sigma` is finally returned. Reference: Language Modeling with Gated Convolutional Networks @@ -6440,8 +6451,8 @@ def switch_order_layer(input, :type input: LayerOutput :param name: Name of this layer. :type name: basestring - :param reshape: reshape matrix by axises. - :type reshape: Dict + :param reshape_axis: Specify the axises of 'height'. Its value should be positive and less than 4. + :type reshape_axis: int :return: LayerOutput object. :rtype: LayerOutput """ @@ -6869,7 +6880,9 @@ def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): :type input: LayerOutput. :param param_attr: The parameter attribute of scaling. :type param_attr: ParameterAttribute - :param bias_attr: The parameter attribute of shifting. + :param bias_attr: The Bias Attribute. If no bias, then pass False or + something not type of ParameterAttribute. None will get a + default Bias. :type bias_attr: ParameterAttribute :return: LayerOutput object. :rtype: LayerOutput From a0187f1c55e2522d5a8357027888a9870126a81f Mon Sep 17 00:00:00 2001 From: ranqiu Date: Mon, 18 Sep 2017 21:08:16 +0800 Subject: [PATCH 2/8] Update the annotation about bias_attr of layers.py --- .../paddle/trainer_config_helpers/layers.py | 168 +++++++++++------- 1 file changed, 102 insertions(+), 66 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 0983d2c3e6..a6fbff9ca4 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -781,11 +781,11 @@ class MixedLayerType(LayerOutput): :type size: int :param act: activation type. :type act: BaseActivation - :param bias_attr: The Bias Attribute. If no bias, then pass False or - something not type of ParameterAttribute. None will - get a default Bias. - :type bias_attr: ParameterAttribute or None means has bias. Any other - type means no bias. + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute or None """ @@ -881,10 +881,11 @@ def mixed_layer(size=0, then this function will just return layer's name. :param act: Activation Type. :type act: BaseActivation - :param bias_attr: The Bias Attribute. If no bias, then pass False or - something not type of ParameterAttribute. None will get a - default Bias. - :type bias_attr: ParameterAttribute or None or bool + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param layer_attr: The extra layer config. Default is None. :type layer_attr: ExtraLayerAttribute :return: MixedLayerType object can add inputs or layer name. @@ -1025,10 +1026,11 @@ def fc_layer(input, :type act: BaseActivation :param param_attr: The Parameter Attribute|list. :type param_attr: ParameterAttribute - :param bias_attr: The Bias Attribute. If no bias, then pass False or - something not type of ParameterAttribute. None will get a - default Bias. - :type bias_attr: ParameterAttribute|None|Any + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param layer_attr: Extra Layer config. :type layer_attr: ExtraLayerAttribute|None :return: LayerOutput object. @@ -1373,8 +1375,11 @@ def pooling_layer(input, :type pooling_type: BasePoolingType|None :param stride: The step size between successive pooling regions. :type stride: Int - :param bias_attr: Bias parameter attribute. False if no bias. - :type bias_attr: ParameterAttribute|None|False + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param layer_attr: The Extra Attributes for layer, such as dropout. :type layer_attr: ExtraLayerAttribute|None :return: LayerOutput object. @@ -1471,10 +1476,11 @@ def lstmemory(input, :type gate_act: BaseActivation :param state_act: state activation type, TanhActivation by default. :type state_act: BaseActivation - - :param bias_attr: Bias attribute. None means default bias. False means no - bias. - :type bias_attr: ParameterAttribute|None|False + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param param_attr: Parameter Attribute. :type param_attr: ParameterAttribute|None|False :param layer_attr: Extra Layer attribute @@ -1596,9 +1602,11 @@ def grumemory(input, This activation affects the :math:`z_t` and :math:`r_t`. It is the :math:`\\sigma` in the above formula. :type gate_act: BaseActivation - :param bias_attr: Bias attribute. None means default bias. False means no - bias. - :type bias_attr: ParameterAttribute|None|False + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param param_attr: Parameter Attribute. :type param_attr: ParameterAttribute|None|False :param layer_attr: Extra Layer attribute @@ -1794,9 +1802,11 @@ def expand_layer(input, :type expand_as: LayerOutput :param name: Layer name. :type name: basestring - :param bias_attr: Bias attribute. None means default bias. False means no - bias. - :type bias_attr: ParameterAttribute|None|False + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param expand_level: whether input layer is timestep(default) or sequence. :type expand_level: ExpandLevel :param layer_attr: extra layer attributes. @@ -1914,10 +1924,11 @@ def seq_reshape_layer(input, :type act: BaseActivation :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. - :param bias_attr: The Bias Attribute. If no bias, then pass False or - something not type of ParameterAttribute. None will get a - default Bias. - :type bias_attr: ParameterAttribute or None or bool + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :return: LayerOutput object. :rtype: LayerOutput """ @@ -2301,9 +2312,11 @@ def hsigmoid(input, :type num_classes: int|None :param name: layer name :type name: basestring - :param bias_attr: Bias attribute. None means default bias. - False means no bias. - :type bias_attr: ParameterAttribute|False + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param param_attr: Parameter Attribute. None means default parameter. :type param_attr: ParameterAttribute|None :param layer_attr: Extra Layer Attribute. @@ -2442,9 +2455,11 @@ def img_conv_layer(input, :type dilation: int|tuple|list :param dilation_y: The y dimension of the dilation. :type dilation_y: int - :param bias_attr: Convolution bias attribute. None means default bias. - False means no bias. - :type bias_attr: ParameterAttribute|False + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param num_channels: number of input channels. If None will be set automatically from previous output. :type num_channels: int @@ -3016,7 +3031,7 @@ def batch_norm_layer(input, :type num_channels: int :param bias_attr: :math:`\\beta`, better be zero when initialize. So the initial_std=0, initial_mean=1 is best practice. - :type bias_attr: ParameterAttribute + :type bias_attr: ParameterAttribute|None|Bool|Any :param param_attr: :math:`\\gamma`, better be one when initialize. So the initial_std=0, initial_mean=1 is best practice. :type param_attr: ParameterAttribute @@ -3186,9 +3201,11 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): :type input: LayerOutput|list|tuple :param act: Activation Type, default is tanh. :type act: BaseActivation - :param bias_attr: Bias attribute. If False, means no bias. None is default - bias. - :type bias_attr: ParameterAttribute|bool + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param layer_attr: Extra Layer attribute. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. @@ -3340,10 +3357,11 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, :type act: BaseActivation :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute - :param bias_attr: The Bias Attribute. If no bias, then pass False or - something not type of ParameterAttribute. None will get a - default Bias. - :type bias_attr: ParameterAttribute or None or bool + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :return: LayerOutput object. :rtype: LayerOutput """ @@ -3524,8 +3542,11 @@ def lstm_step_layer(input, :param state_act: State Activation Type. Default is sigmoid, and should be sigmoid only. :type state_act: BaseActivation - :param bias_attr: Bias Attribute. - :type bias_attr: ParameterAttribute + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param layer_attr: layer's extra attribute. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. @@ -3578,7 +3599,11 @@ def gru_step_layer(input, :param act: :param name: :param gate_act: - :param bias_attr: + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param param_attr: the parameter_attribute for transforming the output_mem from previous step. :param layer_attr: @@ -3635,7 +3660,11 @@ def gru_step_naive_layer(input, :param name: :param act: :param gate_act: - :param bias_attr: + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param param_attr: :param layer_attr: :return: @@ -3757,8 +3786,11 @@ def recurrent_layer(input, :type input: LayerOutput :param act: activation. :type act: BaseActivation - :param bias_attr: bias attribute. - :type bias_attr: ParameterAttribute + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param param_attr: parameter attribute. :type param_attr: ParameterAttribute :param name: name of the layer @@ -4747,10 +4779,11 @@ def tensor_layer(a, :type act: BaseActivation :param param_attr: The Parameter Attribute. :type param_attr: ParameterAttribute - :param bias_attr: The Bias Attribute. If no bias, then pass False or - something not type of ParameterAttribute. None will get a - default Bias. - :type bias_attr: ParameterAttribute|None|Any + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param layer_attr: Extra Layer config. :type layer_attr: ExtraLayerAttribute|None :return: LayerOutput object. @@ -4811,10 +4844,11 @@ def selective_fc_layer(input, :type act: BaseActivation :param param_attr: The Parameter Attribute. :type param_attr: ParameterAttribute - :param bias_attr: The Bias Attribute. If no bias, then pass False or - something not type of ParameterAttribute. None will get a - default Bias. - :type bias_attr: ParameterAttribute|None|Any + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param layer_attr: Extra Layer config. :type layer_attr: ExtraLayerAttribute|None :return: LayerOutput object. @@ -5478,10 +5512,11 @@ def nce_layer(input, A uniform distribution will be used if not provided. If not None, its length must be equal to num_classes. :type neg_distribution: list|tuple|collections.Sequence|None - :param bias_attr: The Bias Attribute. If no bias, then pass False or - something not type of ParameterAttribute. None will get a - default Bias. - :type bias_attr: ParameterAttribute|None|False + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute :return: layer name. @@ -6763,7 +6798,7 @@ def img_conv3d_layer(input, :type padding: int|tuple|list :param bias_attr: Convolution bias attribute. None means default bias. False means no bias. - :type bias_attr: ParameterAttribute|False + :type bias_attr: ParameterAttribute|None|Bool|Any :param num_channels: number of input channels. If None will be set automatically from previous output. :type num_channels: int @@ -6881,10 +6916,11 @@ def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): :type input: LayerOutput. :param param_attr: The parameter attribute of scaling. :type param_attr: ParameterAttribute - :param bias_attr: The Bias Attribute. If no bias, then pass False or - something not type of ParameterAttribute. None will get a - default Bias. - :type bias_attr: ParameterAttribute + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute|None|Bool|Any :return: LayerOutput object. :rtype: LayerOutput """ From 716e020e8084897bfff10c61e2583cf4bf8b1b42 Mon Sep 17 00:00:00 2001 From: ranqiu Date: Tue, 19 Sep 2017 17:36:33 +0800 Subject: [PATCH 3/8] Update faq about PaddlePaddle params --- doc/faq/index_cn.rst | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/doc/faq/index_cn.rst b/doc/faq/index_cn.rst index 138efb566e..119e037aa2 100644 --- a/doc/faq/index_cn.rst +++ b/doc/faq/index_cn.rst @@ -321,3 +321,53 @@ pip uninstall py_paddle paddle 然后安装paddle的python环境, 在build目录下执行 pip install python/dist/paddle*.whl && pip install ../paddle/dist/py_paddle*.whl + +16. 如何加载预训练embedding参数 +------------------------------ + +设置embedding的参数属性 :code:`is_static=True`,使embedding参数在训练过程中保持不变,在创建parameters后,使用 :code:`parameters.set()` 加载预训练参数。 + +.. code-block:: python + + def load_parameter(file_name, h, w): + with open(file_name, 'rb') as f: + f.read(16) # skip header. + return np.fromfile(f, dtype=np.float32).reshape(h, w) + + + emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True) + paddle.layer.embedding(size=word_dim, input=x, param_attr=emb_para) + + + parameters = paddle.parameters.create(my_cost) + parameters.set('emb', load_parameter(emb_param_file, 30000, 256)) + + +17. PaddlePaddle存储的参数格式是什么,如何和明文进行相互转化 +--------------------------------------------------------- + +PaddlePaddle保存的二进制参数文件内容由16位头信息和网络参数两部分组成。头信息中,第一位固定为0,第二位为4,在使用double精度时,第二位为8,第三位记录共有多少个数值。 + +将PaddlePaddle保存的二进制参数还原回明文时,先跳过PaddlePaddle模型参数文件的头信息,再提取网络参数,示例如下: + +.. code-block:: python + + def read_parameter(fname, width): + s = open(fname).read() + # skip header + vec = np.fromstring(s[16:], dtype=np.float32) + # width is the size of the corresponding layer + np.savetxt(fname + ".csv", vec.reshape(width, -1), + fmt="%.6f", delimiter=",") + + +将明文参数转化为PaddlePaddle可加载的模型参数时,先根据参数规模写入头信息,再写入具体网络参数。以下为将随机生成的矩阵转化为PaddlePaddle可加载的模型参数示例: + +.. code-block:: python + + def gen_rand_param(param_file, width, height, need_trans): + np.random.seed() + header = struct.pack("iil", 0, 4, height * width) + param = np.float32(np.random.rand(height, width)) + with open(param_file, "w") as fparam: + fparam.write(header + param.tostring()) From fe2c5936d985a41528554445212e638a90c3120a Mon Sep 17 00:00:00 2001 From: ranqiu Date: Tue, 19 Sep 2017 19:15:22 +0800 Subject: [PATCH 4/8] Update annotation of layers.py --- python/paddle/trainer_config_helpers/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 53aec16906..94769b40f2 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -6726,7 +6726,7 @@ def kmax_seq_score_layer(input, name=None, beam_size=1): :param input: The input layer. It stores scores over a sequence or a nested sequence and its size must be 1. :type input: LayerOutput. - :param beam_size: squence indices with top beam_size scores are returned. + :param beam_size: sequence indices with top beam_size scores are returned. :type beam_size: double :return: LayerOutput object. :rtype: LayerOutput From 62377fd1f3c09bcac8b49f970891a319c0055eeb Mon Sep 17 00:00:00 2001 From: ranqiu Date: Tue, 19 Sep 2017 20:17:05 +0800 Subject: [PATCH 5/8] Update annotations about layer name of layers.py --- .../paddle/trainer_config_helpers/layers.py | 128 +++++++++--------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 94769b40f2..c97e6c0a36 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -921,7 +921,7 @@ def data_layer(name, size, depth=None, height=None, width=None, data = data_layer(name="input", size=1000) - :param name: Name of this data layer. + :param name: The name of this layer. It is optional. :type name: basestring :param size: Size of this data layer. :type size: int @@ -961,7 +961,7 @@ def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None): """ Define a embedding Layer. - :param name: Name of this embedding layer. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer for this embedding. NOTE: must be Index Data. :type input: LayerOutput @@ -1016,7 +1016,7 @@ def fc_layer(input, with mixed_layer(size=1024) as fc: fc += full_matrix_projection(input=layer) - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer. Could be a list/tuple of input layer. :type input: LayerOutput|list|tuple @@ -1067,7 +1067,7 @@ def printer_layer(input, format=None, name=None): """ Print the output value of input layers. This layer is useful for debugging. - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer. Could be a list/tuple of input layer. :type input: LayerOutput|list|tuple @@ -1105,7 +1105,7 @@ def priorbox_layer(input, """ Compute the priorbox and set the variance. This layer is necessary for ssd. - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer. :type input: LayerOutput @@ -1154,7 +1154,7 @@ def multibox_loss_layer(input_loc, """ Compute the location loss and the confidence loss for ssd. - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param input_loc: The input predict locations. :type input_loc: LayerOutput | List of LayerOutput @@ -1229,7 +1229,7 @@ def detection_output_layer(input_loc, box location. The output's shape of this layer could be zero if there is no valid bounding box. - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param input_loc: The input predict locations. :type input_loc: LayerOutput | List of LayerOutput. @@ -1301,7 +1301,7 @@ def cross_channel_norm_layer(input, name=None, param_attr=None): a conv layer's output and scale the output by a group of trainable factors which dimensions equal to the channel's number. - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer. :type input: LayerOutput @@ -1366,7 +1366,7 @@ def pooling_layer(input, :param agg_level: AggregateLevel.TO_NO_SEQUENCE or AggregateLevel.TO_SEQUENCE :type agg_level: AggregateLevel - :param name: layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: input layer name. :type input: LayerOutput @@ -1665,7 +1665,7 @@ def last_seq(input, seq = last_seq(input=layer) :param agg_level: Aggregated level - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: Input layer name. :type input: LayerOutput @@ -1721,7 +1721,7 @@ def first_seq(input, seq = first_seq(input=layer) :param agg_level: aggregation level - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: Input layer name. :type input: LayerOutput @@ -1800,7 +1800,7 @@ def expand_layer(input, :type input: LayerOutput :param expand_as: Expand as this layer's sequence info. :type expand_as: LayerOutput - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param bias_attr: The Bias Attribute. If the parameter is set to False or something not type of ParameterAttribute, @@ -1859,7 +1859,7 @@ def repeat_layer(input, :type input: LayerOutput :param num_repeats: Repeat the input so many times :type num_repeats: int - :param name: Layer name. + :param name: The name of this layer. It is optional. :param as_row_vector: True for treating input as row vector and repeating in the column direction. This is equivalent to apply concat_layer() with num_repeats same input. @@ -1918,7 +1918,7 @@ def seq_reshape_layer(input, :type input: LayerOutput :param reshape_size: the size of reshaped sequence. :type reshape_size: int - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param act: Activation type. :type act: BaseActivation @@ -1971,7 +1971,7 @@ def interpolation_layer(input, weight, name=None, layer_attr=None): :type input: list|tuple :param weight: Weight layer. :type weight: LayerOutput - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -2076,7 +2076,7 @@ def power_layer(input, weight, name=None, layer_attr=None): :type input: LayerOutput :param weight: Weight layer. :type weight: LayerOutput - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -2120,7 +2120,7 @@ def scaling_layer(input, weight, name=None, layer_attr=None): :type input: LayerOutput :param weight: Weight layer. :type weight: LayerOutput - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -2158,7 +2158,7 @@ def trans_layer(input, name=None, layer_attr=None): :param input: Input layer. :type input: LayerOutput - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -2198,7 +2198,7 @@ def rotate_layer(input, height, width, name=None, layer_attr=None): :type input: LayerOutput :param height: The height of the sample matrix :type height: int - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -2243,7 +2243,7 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): cos = cos_sim(a=layer1, b=layer2, size=3) - :param name: layer name + :param name: The name of this layer. It is optional. :type name: basestring :param a: input layer a :type a: LayerOutput @@ -2310,7 +2310,7 @@ def hsigmoid(input, :type label: LayerOutput :param num_classes: number of classes. :type num_classes: int|None - :param name: layer name + :param name: The name of this layer. It is optional. :type name: basestring :param bias_attr: The Bias Attribute. If the parameter is set to False or something not type of ParameterAttribute, @@ -2424,7 +2424,7 @@ def img_conv_layer(input, bias_attr=False, act=ReluActivation()) - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: Layer Input. :type input: LayerOutput @@ -2850,7 +2850,7 @@ def spp_layer(input, num_channels=16, pool_type=MaxPooling()) - :param name: layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: layer's input. :type input: LayerOutput @@ -2944,7 +2944,7 @@ def img_cmrnorm_layer(input, norm = img_cmrnorm_layer(input=net, size=5) - :param name: layer name. + :param name: The name of this layer. It is optional. :type name: None|basestring :param input: layer's input. :type input: LayerOutput @@ -3007,7 +3007,7 @@ def batch_norm_layer(input, norm = batch_norm_layer(input=net, act=ReluActivation()) - :param name: layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: batch normalization input. Better be linear activation. Because there is an activation inside batch_normalization. @@ -3106,7 +3106,7 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None): :param input: Input layer. :type input: LayerOutput - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -3142,7 +3142,7 @@ def row_l2_norm_layer(input, name=None, layer_attr=None): :param input: Input layer. :type input: LayerOutput - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -3194,7 +3194,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): dropout here. Please refer to dropout_layer for details. - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: Input layers. It could be a LayerOutput or list/tuple of LayerOutput. @@ -3254,7 +3254,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): concat = concat_layer(input=[layer1, layer2]) - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: input layers or projections :type input: list|tuple|collections.Sequence @@ -3347,7 +3347,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, concat = seq_concat_layer(a=layer1, b=layer2) - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param a: input sequence layer :type a: LayerOutput @@ -3524,7 +3524,7 @@ def lstm_step_layer(input, output is :math:`o_t`, whose name is 'state' and can use :code:`get_output_layer` to extract this output. - :param name: Layer's name. + :param name: The name of this layer. It is optional. :type name: basestring :param size: Layer's size. NOTE: lstm layer's size, should be equal to :code:`input.size/4`, and should be equal to @@ -3597,7 +3597,7 @@ def gru_step_layer(input, :param output_mem: :param size: :param act: - :param name: + :param name: The name of this layer. It is optional. :param gate_act: :param bias_attr: The Bias Attribute. If the parameter is set to False or something not type of ParameterAttribute, @@ -3657,7 +3657,7 @@ def gru_step_naive_layer(input, :param input: :param output_mem: :param size: - :param name: + :param name: The name of this layer. It is optional. :param act: :param gate_act: :param bias_attr: The Bias Attribute. If the parameter is set to @@ -3720,7 +3720,7 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None): output besides the default one, please use get_output_layer first to get the output from input. - :param name: Layer's name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: get output layer's input. And this layer should contains multiple outputs. @@ -3793,7 +3793,7 @@ def recurrent_layer(input, :type bias_attr: ParameterAttribute|None|Bool|Any :param param_attr: parameter attribute. :type param_attr: ParameterAttribute - :param name: name of the layer + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: Layer Attribute. :type layer_attr: ExtraLayerAttribute @@ -4032,7 +4032,7 @@ def maxid_layer(input, name=None, layer_attr=None): :param input: Input layer name. :type input: LayerOutput - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -4065,7 +4065,7 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None): out_prod = out_prod_layer(input1=vec1, input2=vec2) - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input1: The first input layer name. :type input: LayerOutput @@ -4106,7 +4106,7 @@ def eos_layer(input, eos_id, name=None, layer_attr=None): eos = eos_layer(input=layer, eos_id=id) - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: Input layer name. :type input: LayerOutput @@ -4297,7 +4297,7 @@ def square_error_cost(input, cost = \\sum_{i=1}^N(t_i-y_i)^2 - :param name: layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: Network prediction. :type input: LayerOutput @@ -4339,7 +4339,7 @@ def classification_cost(input, """ classification cost Layer. - :param name: layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: input layer name. network output. :type input: LayerOutput @@ -4643,7 +4643,7 @@ def pad_layer(input, :type pad_w: list|None :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute - :param name: layer name. + :param name: The name of this layer. It is optional. :type name: basestring :return: LayerOutput object. :rtype: LayerOutput @@ -4711,7 +4711,7 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): conv_shift = conv_shift_layer(a=layer1, b=layer2) - :param name: layer name + :param name: The name of this layer. It is optional. :type name: basestring :param a: Input layer a. :type a: LayerOutput @@ -4767,7 +4767,7 @@ def tensor_layer(a, tensor = tensor_layer(a=layer1, b=layer2, size=1000) - :param name: layer name + :param name: The name of this layer. It is optional. :type name: basestring :param a: Input layer a. :type a: LayerOutput @@ -4830,7 +4830,7 @@ def selective_fc_layer(input, sel_fc = selective_fc_layer(input=input, size=128, act=TanhActivation()) - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer. :type input: LayerOutput|list|tuple @@ -4904,7 +4904,7 @@ def sampling_id_layer(input, name=None, layer_attr=None): :param input: The input layer. :type input: LayerOutput - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: Extra Layer config. :type layer_attr: ExtraLayerAttribute|None @@ -4942,7 +4942,7 @@ def slope_intercept_layer(input, :param input: The input layer. :type input: LayerOutput - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param slope: the scale factor. :type slope: float. @@ -5006,7 +5006,7 @@ def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None): :type vectors: LayerOutput :param size: the dimension of this layer. :type size: int - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: Extra Layer config. :type layer_attr: ExtraLayerAttribute|None @@ -5089,7 +5089,7 @@ def block_expand_layer(input, :type padding_x: int :param padding_y: The padding size in vertical direction. :type padding_y: int - :param name: The name of this layer, which can not specify. + :param name: The name of this layer. It is optional. :type name: None|basestring. :param layer_attr: Extra Layer config. :type layer_attr: ExtraLayerAttribute|None @@ -5158,7 +5158,7 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): :type num_channels: int|None :param groups: The group number of input layer. :type groups: int - :param name: The name of this layer, which can not specify. + :param name: The name of this layer. It is optional. :type name: None|basestring. :param layer_attr: Extra Layer attribute. :type layer_attr: ExtraLayerAttribute @@ -5222,7 +5222,7 @@ def ctc_layer(input, :type label: LayerOutput :param size: category numbers + 1. :type size: int - :param name: The name of this layer + :param name: The name of this layer. It is optional. :type name: basestring|None :param norm_by_times: Whether to normalization by times. False by default. :type norm_by_times: bool @@ -5299,7 +5299,7 @@ def warp_ctc_layer(input, :type label: LayerOutput :param size: category numbers + 1. :type size: int - :param name: The name of this layer, which can not specify. + :param name: The name of this layer. It is optional. :type name: basestring|None :param blank: the 'blank' label used in ctc :type blank: int @@ -5492,7 +5492,7 @@ def nce_layer(input, param_attr=[attr1, attr2], weight=layer3, num_classes=3, neg_distribution=[0.1,0.3,0.6]) - :param name: The name of this layer. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layers. It could be a LayerOutput of list/tuple of LayerOutput. :type input: LayerOutput|list|tuple|collections.Sequence @@ -6182,7 +6182,7 @@ def multiplex_layer(input, name=None, layer_attr=None): :param input: Input layers. :type input: list of LayerOutput - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -6220,7 +6220,7 @@ def dropout_layer(input, dropout_rate, name=None): dropout = dropout_layer(input=input_layer, dropout_rate=0.5) - :param name: The name of this layer. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer. :type input: LayerOutput @@ -6336,7 +6336,7 @@ def prelu_layer(input, prelu = prelu_layer(input=layers, partial_sum=1) - :param name: Name of this layer. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer. :type input: LayerOutput @@ -6409,7 +6409,7 @@ def gated_unit_layer(input, :type size: int :param act: activation type of the projected input. :type act: BaseActivation - :param name: name of this layer. + :param name: The name of this layer. It is optional. :type name: basestring :param gate_attr: Attributes to tune the gate output, for example, error clipping threshold, dropout and so on. See ExtraLayerAttribute for @@ -6485,7 +6485,7 @@ def switch_order_layer(input, :param input: The input layer. :type input: LayerOutput - :param name: Name of this layer. + :param name: The name of this layer. It is optional. :type name: basestring :param reshape_axis: Specify the axises of 'height'. Its value should be positive and less than 4. :type reshape_axis: int @@ -6538,7 +6538,7 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): :type partial_sum: int :param shape: The shape to be cropped. Default is None. :type shape: Sequence | None - :param name: Name of this layer. + :param name: The name of this layer. It is optional. :type name: basestring :return: LayerOutput object. :rtype: LayerOutput @@ -6584,7 +6584,7 @@ def sub_nested_seq_layer(input, selected_indices, name=None): :type input: LayerOutput :param selected_indices: a set of sequence indices in the nested sequence. :type input: LayerOutput - :param name: name of this layer. + :param name: The name of this layer. It is optional. :type name: basestring :return: LayerOutput object. :rtype: LayerOutput @@ -6622,7 +6622,7 @@ def clip_layer(input, min, max, name=None): clip = clip_layer(input=input_layer, min=-10, max=10) - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer. :type input: LayerOutput. @@ -6667,7 +6667,7 @@ def seq_slice_layer(input, starts, ends, name=None): seq_silce = seq_slice_layer(input=input_seq, starts=start_pos, ends=end_pos) - :param name: name of this layer. + :param name: The name of this layer. It is optional. :type name: basestring :param input: input for this layer, it should be a sequence. :type input: LayerOutput @@ -6721,7 +6721,7 @@ def kmax_seq_score_layer(input, name=None, beam_size=1): kmax_indices = kmax_seq_score_layer(input=input_layer, beam_size) - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer. It stores scores over a sequence or a nested sequence and its size must be 1. @@ -6779,7 +6779,7 @@ def img_conv3d_layer(input, bias_attr=False, act=ReluActivation()) - :param name: Layer name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: Layer Input. :type input: LayerOutput @@ -6910,7 +6910,7 @@ def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): scale_shift = scale_shift_layer(input=input_layer, bias_attr=False) - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer. :type input: LayerOutput. From e635e3fd2be4313b2e2e1491f54f95468899ebe7 Mon Sep 17 00:00:00 2001 From: ranqiu Date: Wed, 20 Sep 2017 14:55:49 +0800 Subject: [PATCH 6/8] Update faq of the doc --- doc/faq/index_cn.rst | 49 ++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/doc/faq/index_cn.rst b/doc/faq/index_cn.rst index 119e037aa2..3a7dc4a93c 100644 --- a/doc/faq/index_cn.rst +++ b/doc/faq/index_cn.rst @@ -322,33 +322,12 @@ pip uninstall py_paddle paddle pip install python/dist/paddle*.whl && pip install ../paddle/dist/py_paddle*.whl -16. 如何加载预训练embedding参数 ------------------------------- - -设置embedding的参数属性 :code:`is_static=True`,使embedding参数在训练过程中保持不变,在创建parameters后,使用 :code:`parameters.set()` 加载预训练参数。 - -.. code-block:: python - - def load_parameter(file_name, h, w): - with open(file_name, 'rb') as f: - f.read(16) # skip header. - return np.fromfile(f, dtype=np.float32).reshape(h, w) - - - emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True) - paddle.layer.embedding(size=word_dim, input=x, param_attr=emb_para) - - - parameters = paddle.parameters.create(my_cost) - parameters.set('emb', load_parameter(emb_param_file, 30000, 256)) - - -17. PaddlePaddle存储的参数格式是什么,如何和明文进行相互转化 +16. PaddlePaddle存储的参数格式是什么,如何和明文进行相互转化 --------------------------------------------------------- -PaddlePaddle保存的二进制参数文件内容由16位头信息和网络参数两部分组成。头信息中,第一位固定为0,第二位为4,在使用double精度时,第二位为8,第三位记录共有多少个数值。 +PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数两部分组成。头信息中,1~4字节表示PaddlePaddle版本信息;5~8字节表示每个参数占用的字节数,当保存的网络参数为float类型时为4,double类型时为8;9~16字节表示保存的参数总个数。 -将PaddlePaddle保存的二进制参数还原回明文时,先跳过PaddlePaddle模型参数文件的头信息,再提取网络参数,示例如下: +将PaddlePaddle保存的模型参数还原回明文时,可以使用相应数据类型的 :code:`numpy.array` 加载具体网络参数,此时需要跳过PaddlePaddle模型参数文件的头信息。一般情况下,PaddlePaddle保存的模型参数数据类型为float,所以在使用 :code:`numpy.array` 时一般设置 :code:`dtype=float32` 。示例如下: .. code-block:: python @@ -361,7 +340,7 @@ PaddlePaddle保存的二进制参数文件内容由16位头信息和网络参数 fmt="%.6f", delimiter=",") -将明文参数转化为PaddlePaddle可加载的模型参数时,先根据参数规模写入头信息,再写入具体网络参数。以下为将随机生成的矩阵转化为PaddlePaddle可加载的模型参数示例: +将明文参数转化为PaddlePaddle可加载的模型参数时,先根据数据类型和参数规模写入头信息,再写入具体网络参数。以下为将随机生成的矩阵转化为PaddlePaddle可加载的模型参数示例: .. code-block:: python @@ -371,3 +350,23 @@ PaddlePaddle保存的二进制参数文件内容由16位头信息和网络参数 param = np.float32(np.random.rand(height, width)) with open(param_file, "w") as fparam: fparam.write(header + param.tostring()) + +17. 如何加载预训练embedding参数 +------------------------------ + +设置embedding的参数属性 :code:`is_static=True`,使embedding参数在训练过程中保持不变,从模型文件将预训练参数载入 :code:`numpy.array`,在创建parameters后,使用 :code:`parameters.set()` 加载预训练参数。PaddlePaddle保存的模型参数文件前16字节为头信息,用户将参数载入 :code:`numpy.array` 时须从第17字节开始。 + +.. code-block:: python + + def load_parameter(file_name, h, w): + with open(file_name, 'rb') as f: + f.read(16) # skip header. + return np.fromfile(f, dtype=np.float32).reshape(h, w) + + + emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True) + paddle.layer.embedding(size=word_dim, input=x, param_attr=emb_para) + + + parameters = paddle.parameters.create(my_cost) + parameters.set('emb', load_parameter(emb_param_file, 30000, 256)) From cabd643438418dda2b14315a8f30f5f7246e3796 Mon Sep 17 00:00:00 2001 From: ranqiu Date: Wed, 20 Sep 2017 15:58:03 +0800 Subject: [PATCH 7/8] Update faq of doc --- doc/faq/index_cn.rst | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/doc/faq/index_cn.rst b/doc/faq/index_cn.rst index 3a7dc4a93c..209dac6989 100644 --- a/doc/faq/index_cn.rst +++ b/doc/faq/index_cn.rst @@ -325,9 +325,9 @@ pip install python/dist/paddle*.whl && pip install ../paddle/dist/py_paddle*.whl 16. PaddlePaddle存储的参数格式是什么,如何和明文进行相互转化 --------------------------------------------------------- -PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数两部分组成。头信息中,1~4字节表示PaddlePaddle版本信息;5~8字节表示每个参数占用的字节数,当保存的网络参数为float类型时为4,double类型时为8;9~16字节表示保存的参数总个数。 +PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数两部分组成。头信息中,1~4字节表示PaddlePaddle版本信息,在多数情况下,可以直接填充0;5~8字节表示每个参数占用的字节数,当保存的网络参数为float类型时为4,double类型时为8;9~16字节表示保存的参数总个数。 -将PaddlePaddle保存的模型参数还原回明文时,可以使用相应数据类型的 :code:`numpy.array` 加载具体网络参数,此时需要跳过PaddlePaddle模型参数文件的头信息。一般情况下,PaddlePaddle保存的模型参数数据类型为float,所以在使用 :code:`numpy.array` 时一般设置 :code:`dtype=float32` 。示例如下: +将PaddlePaddle保存的模型参数还原回明文时,可以使用相应数据类型的 :code:`numpy.array` 加载具体网络参数,此时可以跳过PaddlePaddle模型参数文件的头信息。若在PaddlePaddle编译时,未指定按照double精度编译,默认情况下按照float精度计算,保存的参数也是float类型。这时在使用 :code:`numpy.array` 时,一般设置 :code:`dtype=float32` 。示例如下: .. code-block:: python @@ -340,7 +340,7 @@ PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数 fmt="%.6f", delimiter=",") -将明文参数转化为PaddlePaddle可加载的模型参数时,先根据数据类型和参数规模写入头信息,再写入具体网络参数。以下为将随机生成的矩阵转化为PaddlePaddle可加载的模型参数示例: +将明文参数转化为PaddlePaddle可加载的模型参数时,首先构造头信息,再写入网络参数。下面将随机生成的矩阵转化为可以被PaddlePaddle加载的模型参数。 .. code-block:: python @@ -351,10 +351,18 @@ PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数 with open(param_file, "w") as fparam: fparam.write(header + param.tostring()) -17. 如何加载预训练embedding参数 +17. 如何加载预训练参数 ------------------------------ -设置embedding的参数属性 :code:`is_static=True`,使embedding参数在训练过程中保持不变,从模型文件将预训练参数载入 :code:`numpy.array`,在创建parameters后,使用 :code:`parameters.set()` 加载预训练参数。PaddlePaddle保存的模型参数文件前16字节为头信息,用户将参数载入 :code:`numpy.array` 时须从第17字节开始。 +* 对加载预训练参数的层,设置其参数属性 :code:`is_static=True`,使该层的参数在训练过程中保持不变。以embedding层为例,代码如下: + +.. code-block:: python + + emb_para = paddle.attr.Param(name='emb', is_static=True) + paddle.layer.embedding(size=word_dim, input=x, param_attr=emb_para) + + +* 从模型文件将预训练参数载入 :code:`numpy.array`,在创建parameters后,使用 :code:`parameters.set()` 加载预训练参数。PaddlePaddle保存的模型参数文件前16字节为头信息,用户将参数载入 :code:`numpy.array` 时须从第17字节开始。以embedding层为例,代码如下: .. code-block:: python @@ -363,10 +371,5 @@ PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数 f.read(16) # skip header. return np.fromfile(f, dtype=np.float32).reshape(h, w) - - emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True) - paddle.layer.embedding(size=word_dim, input=x, param_attr=emb_para) - - parameters = paddle.parameters.create(my_cost) parameters.set('emb', load_parameter(emb_param_file, 30000, 256)) From c1e3550ecc41da9df58e83e744a8785325636ca4 Mon Sep 17 00:00:00 2001 From: ranqiu Date: Wed, 20 Sep 2017 16:29:10 +0800 Subject: [PATCH 8/8] Update faq of doc --- doc/faq/index_cn.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/faq/index_cn.rst b/doc/faq/index_cn.rst index 209dac6989..00192aa69b 100644 --- a/doc/faq/index_cn.rst +++ b/doc/faq/index_cn.rst @@ -325,7 +325,7 @@ pip install python/dist/paddle*.whl && pip install ../paddle/dist/py_paddle*.whl 16. PaddlePaddle存储的参数格式是什么,如何和明文进行相互转化 --------------------------------------------------------- -PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数两部分组成。头信息中,1~4字节表示PaddlePaddle版本信息,在多数情况下,可以直接填充0;5~8字节表示每个参数占用的字节数,当保存的网络参数为float类型时为4,double类型时为8;9~16字节表示保存的参数总个数。 +PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数两部分组成。头信息中,1~4字节表示PaddlePaddle版本信息,请直接填充0;5~8字节表示每个参数占用的字节数,当保存的网络参数为float类型时为4,double类型时为8;9~16字节表示保存的参数总个数。 将PaddlePaddle保存的模型参数还原回明文时,可以使用相应数据类型的 :code:`numpy.array` 加载具体网络参数,此时可以跳过PaddlePaddle模型参数文件的头信息。若在PaddlePaddle编译时,未指定按照double精度编译,默认情况下按照float精度计算,保存的参数也是float类型。这时在使用 :code:`numpy.array` 时,一般设置 :code:`dtype=float32` 。示例如下: @@ -340,7 +340,7 @@ PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数 fmt="%.6f", delimiter=",") -将明文参数转化为PaddlePaddle可加载的模型参数时,首先构造头信息,再写入网络参数。下面将随机生成的矩阵转化为可以被PaddlePaddle加载的模型参数。 +将明文参数转化为PaddlePaddle可加载的模型参数时,首先构造头信息,再写入网络参数。下面的代码将随机生成的矩阵转化为可以被PaddlePaddle加载的模型参数。 .. code-block:: python