Merge pull request #11314 from typhoonzero/fix_api_reference_docs

Fix api reference docs
wangkuiyi-patch-1
Yu Yang 7 years ago committed by GitHub
commit ce60bbf506
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -91,32 +91,31 @@ class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker {
"(int64_t). The number of chunks both in Inference and Label on the " "(int64_t). The number of chunks both in Inference and Label on the "
"given mini-batch."); "given mini-batch.");
AddAttr<int>("num_chunk_types", AddAttr<int>("num_chunk_types",
"(int). The number of chunk type. See below for details."); "The number of chunk type. See the description for details.");
AddAttr<std::string>( AddAttr<std::string>("chunk_scheme",
"chunk_scheme", "The labeling scheme indicating "
"(string, default IOB). The labeling scheme indicating " "how to encode the chunks. Must be IOB, IOE, IOBES or "
"how to encode the chunks. Must be IOB, IOE, IOBES or plain. See below " "plain. See the description"
"for details.") "for details.")
.SetDefault("IOB"); .SetDefault("IOB");
AddAttr<std::vector<int>>("excluded_chunk_types", AddAttr<std::vector<int>>("excluded_chunk_types",
"(list<int>) A list including chunk type ids " "A list including chunk type ids "
"indicating chunk types that are not counted. " "indicating chunk types that are not counted. "
"See below for details.") "See the description for details.")
.SetDefault(std::vector<int>{}); .SetDefault(std::vector<int>{});
AddComment(R"DOC( AddComment(R"DOC(
For some basics of chunking, please refer to For some basics of chunking, please refer to
Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>. 'Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>'.
ChunkEvalOp computes the precision, recall, and F1-score of chunk detection,
CheckEvalOp computes the precision, recall, and F1-score of chunk detection,
and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes. and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes.
Here is a NER example of labeling for these tagging schemes: Here is a NER example of labeling for these tagging schemes:
Li Ming works at Agricultural Bank of China in Beijing. Li Ming works at Agricultural Bank of China in Beijing.
IO: I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC IO I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC
IOB: B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC IOB B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC
IOE: I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC IOE I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC
IOBES: B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC IOBES B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC
There are three chunk types(named entity types) including PER(person), ORG(organization) There are three chunk types(named entity types) including PER(person), ORG(organization)
and LOC(LOCATION), and we can see that the labels have the form <tag type>-<chunk type>. and LOC(LOCATION), and we can see that the labels have the form <tag type>-<chunk type>.
@ -124,31 +123,31 @@ and LOC(LOCATION), and we can see that the labels have the form <tag type>-<chun
Since the calculations actually use label ids rather than labels, extra attention Since the calculations actually use label ids rather than labels, extra attention
should be paid when mapping labels to ids to make CheckEvalOp work. The key point should be paid when mapping labels to ids to make CheckEvalOp work. The key point
is that the listed equations are satisfied by ids. is that the listed equations are satisfied by ids.
tag_type = label % num_tag_type tag_type = label % num_tag_type
chunk_type = label / num_tag_type chunk_type = label / num_tag_type
where `num_tag_type` is the num of tag types in the tagging scheme, `num_chunk_type` where `num_tag_type` is the num of tag types in the tagging scheme, `num_chunk_type`
is the num of chunk types, and `tag_type` get its value from the following table. is the num of chunk types, and `tag_type` get its value from the following table.
Scheme Begin Inside End Single Scheme Begin Inside End Single
plain 0 - - - plain 0 - - -
IOB 0 1 - - IOB 0 1 - -
IOE - 0 1 - IOE - 0 1 -
IOBES 0 1 2 3 IOBES 0 1 2 3
Still use NER as example, assuming the tagging scheme is IOB while chunk types are ORG, Still use NER as example, assuming the tagging scheme is IOB while chunk types are ORG,
PER and LOC. To satisfy the above equations, the label map can be like this: PER and LOC. To satisfy the above equations, the label map can be like this:
B-ORG 0 B-ORG 0
I-ORG 1 I-ORG 1
B-PER 2 B-PER 2
I-PER 3 I-PER 3
B-LOC 4 B-LOC 4
I-LOC 5 I-LOC 5
O 6 O 6
Its not hard to verify the equations noting that the num of chunk types It's not hard to verify the equations noting that the num of chunk types
is 3 and the num of tag types in IOB scheme is 2. For example, the label is 3 and the num of tag types in IOB scheme is 2. For example, the label
id of I-LOC is 5, the tag type id of I-LOC is 1, and the chunk type id of id of I-LOC is 5, the tag type id of I-LOC is 1, and the chunk type id of
I-LOC is 2, which consistent with the results from the equations. I-LOC is 2, which consistent with the results from the equations.

@ -156,7 +156,7 @@ Parameters(strides, paddings) are two elements. These two elements represent hei
and width, respectively. and width, respectively.
The input(X) size and output(Out) size may be different. The input(X) size and output(Out) size may be different.
Example: For an example:
Input: Input:
Input shape: $(N, C_{in}, H_{in}, W_{in})$ Input shape: $(N, C_{in}, H_{in}, W_{in})$
Filter shape: $(C_{in}, C_{out}, H_f, W_f)$ Filter shape: $(C_{in}, C_{out}, H_f, W_f)$

@ -76,9 +76,9 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker {
.AsIntermediate(); .AsIntermediate();
AddComment(R"DOC( AddComment(R"DOC(
Cosine Similarity Operator. **Cosine Similarity Operator**
$Out = X^T * Y / (\sqrt{X^T * X} * \sqrt{Y^T * Y})$ $Out = \frac{X^T * Y}{(\sqrt{X^T * X} * \sqrt{Y^T * Y})}$
The input X and Y must have the same shape, except that the 1st dimension The input X and Y must have the same shape, except that the 1st dimension
of input Y could be just 1 (different from input X), which will be of input Y could be just 1 (different from input X), which will be

@ -53,21 +53,18 @@ sequence of observed tags.
The output of this operator changes according to whether Input(Label) is given: The output of this operator changes according to whether Input(Label) is given:
1. Input(Label) is given: 1. Input(Label) is given:
This happens in training. This operator is used to co-work with the chunk_eval
This happens in training. This operator is used to co-work with the chunk_eval operator.
operator. When Input(Label) is given, the crf_decoding operator returns a row vector
with shape [N x 1] whose values are fixed to be 0, indicating an incorrect
When Input(Label) is given, the crf_decoding operator returns a row vector prediction, or 1 indicating a tag is correctly predicted. Such an output is the
with shape [N x 1] whose values are fixed to be 0, indicating an incorrect input to chunk_eval operator.
prediction, or 1 indicating a tag is correctly predicted. Such an output is the
input to chunk_eval operator.
2. Input(Label) is not given: 2. Input(Label) is not given:
This is the standard decoding process.
This is the standard decoding process.
The crf_decoding operator returns a row vector with shape [N x 1] whose values The crf_decoding operator returns a row vector with shape [N x 1] whose values
range from 0 to maximum tag number - 1. Each element indicates an index of a range from 0 to maximum tag number - 1, Each element indicates an index of a
predicted tag. predicted tag.
)DOC"); )DOC");
} }

@ -68,15 +68,16 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
"representing pairwise iou scores."); "representing pairwise iou scores.");
AddComment(R"DOC( AddComment(R"DOC(
IOU Similarity Operator. **IOU Similarity Operator**
Computes intersection-over-union (IOU) between two box lists. Computes intersection-over-union (IOU) between two box lists.
Box list 'X' should be a LoDTensor and 'Y' is a common Tensor, Box list 'X' should be a LoDTensor and 'Y' is a common Tensor,
boxes in 'Y' are shared by all instance of the batched inputs of X. boxes in 'Y' are shared by all instance of the batched inputs of X.
Given two boxes A and B, the calculation of IOU is as follows: Given two boxes A and B, the calculation of IOU is as follows:
$$ $$
IOU(A, B) = IOU(A, B) =
\frac{area(A\cap B)}{area(A)+area(B)-area(A\cap B)} \\frac{area(A\\cap B)}{area(A)+area(B)-area(A\\cap B)}
$$ $$
)DOC"); )DOC");

@ -84,6 +84,7 @@ CRF. Please refer to http://www.cs.columbia.edu/~mcollins/fb.pdf and
http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for details. http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for details.
Equation: Equation:
1. Denote Input(Emission) to this operator as $x$ here. 1. Denote Input(Emission) to this operator as $x$ here.
2. The first D values of Input(Transition) to this operator are for starting 2. The first D values of Input(Transition) to this operator are for starting
weights, denoted as $a$ here. weights, denoted as $a$ here.
@ -106,6 +107,7 @@ Finally, the linear chain CRF operator outputs the logarithm of the conditional
likelihood of each training sample in a mini-batch. likelihood of each training sample in a mini-batch.
NOTE: NOTE:
1. The feature function for a CRF is made up of the emission features and the 1. The feature function for a CRF is made up of the emission features and the
transition features. The emission feature weights are NOT computed in transition features. The emission feature weights are NOT computed in
this operator. They MUST be computed first before this operator is called. this operator. They MUST be computed first before this operator is called.

@ -184,34 +184,32 @@ Long-Short Term Memory (LSTM) Operator.
The defalut implementation is diagonal/peephole connection The defalut implementation is diagonal/peephole connection
(https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows: (https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows:
$$ $$ i_t = \\sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) $$
i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) \\
f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) \\ $$ f_t = \\sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) $$
\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) \\ $$ \\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) $$
o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) \\ $$ o_t = \\sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) $$
c_t = f_t \odot c_{t-1} + i_t \odot \tilde{c_t} \\ $$ c_t = f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t} $$
h_t = o_t \odot act_h(c_t) $$ h_t = o_t \\odot act_h(c_t) $$
$$
where the W terms denote weight matrices (e.g. $W_{xi}$ is the matrix - W terms denote weight matrices (e.g. $W_{xi}$ is the matrix
of weights from the input gate to the input), $W_{ic}, W_{fc}, W_{oc}$ of weights from the input gate to the input), $W_{ic}, W_{fc}, W_{oc}$
are diagonal weight matrices for peephole connections. In our implementation, are diagonal weight matrices for peephole connections. In our implementation,
we use vectors to reprenset these diagonal weight matrices. The b terms we use vectors to reprenset these diagonal weight matrices.
denote bias vectors ($b_i$ is the input gate bias vector), $\sigma$ - The b terms denote bias vectors ($b_i$ is the input gate bias vector).
is the non-line activations, such as logistic sigmoid function, and - $\sigma$ is the non-line activations, such as logistic sigmoid function.
$i, f, o$ and $c$ are the input gate, forget gate, output gate, - $i, f, o$ and $c$ are the input gate, forget gate, output gate,
and cell activation vectors, respectively, all of which have the same size as and cell activation vectors, respectively, all of which have the same size as
the cell output activation vector $h$. the cell output activation vector $h$.
- The $\odot$ is the element-wise product of the vectors.
The $\odot$ is the element-wise product of the vectors. $act_g$ and $act_h$ - $act_g$ and $act_h$ are the cell input and cell output activation functions
are the cell input and cell output activation functions and `tanh` is usually and `tanh` is usually used for them.
used for them. $\tilde{c_t}$ is also called candidate hidden state, - $\tilde{c_t}$ is also called candidate hidden state,
which is computed based on the current input and the previous hidden state. which is computed based on the current input and the previous hidden state.
Set `use_peepholes` False to disable peephole connection. The formula Set `use_peepholes` False to disable peephole connection. The formula
is omitted here, please refer to the paper is omitted here, please refer to the paper

@ -139,7 +139,20 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
"The pooled output width.") "The pooled output width.")
.SetDefault(1); .SetDefault(1);
AddComment(R"DOC( AddComment(R"DOC(
ROIPool operator **ROIPool Operator**
Region of interest pooling (also known as RoI pooling) is to perform
is to perform max pooling on inputs of nonuniform sizes to obtain
fixed-size feature maps (e.g. 7*7).
The operator has three steps:
1. Dividing each region proposal into equal-sized sections with
the pooled_width and pooled_height
2. Finding the largest value in each section
3. Copying these max values to the output buffer
ROI Pooling for Faster-RCNN. The link below is a further introduction: ROI Pooling for Faster-RCNN. The link below is a further introduction:
https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn

@ -41,13 +41,13 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "(Tensor) Input tensor of scale operator."); AddInput("X", "(Tensor) Input tensor of scale operator.");
AddOutput("Out", "(Tensor) Output tensor of scale operator."); AddOutput("Out", "(Tensor) Output tensor of scale operator.");
AddComment(R"DOC( AddComment(R"DOC(
Scale operator **Scale operator**
Multiply the input tensor with a float scalar to scale the input tensor.
$$Out = scale*X$$ $$Out = scale*X$$
)DOC"); )DOC");
AddAttr<float>("scale", AddAttr<float>("scale", "The scaling factor of the scale operator.")
"(float, default 1.0)"
"The scaling factor of the scale operator.")
.SetDefault(1.0); .SetDefault(1.0);
} }
}; };

@ -109,10 +109,35 @@ class BlockGuardServ(BlockGuard):
class ListenAndServ(object): class ListenAndServ(object):
""" """
ListenAndServ class. **ListenAndServ Layer**
ListenAndServ is used to create a rpc server bind and listen
on specific TCP port, this server will run the sub-block when
received variables from clients.
Args:
endpoint(string): IP:port string which the server will listen on.
inputs(list): a list of variables that the server will get from clients.
fan_in(int): how many client are expected to report to this server, default: 1.
optimizer_mode(bool): whether to run the server as a parameter server, default: True.
Examples:
.. code-block:: python
ListenAndServ class is used to wrap listen_and_serv op to create a server with fluid.program_guard(main):
which can receive variables from clients and run a block. serv = layers.ListenAndServ(
"127.0.0.1:6170", ["X"], optimizer_mode=False)
with serv.do():
x = layers.data(
shape=[32, 32],
dtype='float32',
name="X",
append_batch_size=False)
fluid.initializer.Constant(value=1.0)(x, main.global_block())
layers.scale(x=x, scale=10.0, out=out_var)
exe = fluid.Executor(place)
exe.run(main)
""" """
def __init__(self, endpoint, inputs, fan_in=1, optimizer_mode=True): def __init__(self, endpoint, inputs, fan_in=1, optimizer_mode=True):

@ -49,6 +49,13 @@ _single_dollar_pattern_ = re.compile(r"\$([^\$]+)\$")
_two_bang_pattern_ = re.compile(r"!!([^!]+)!!") _two_bang_pattern_ = re.compile(r"!!([^!]+)!!")
def escape_math(text):
return _two_bang_pattern_.sub(
r'$$\1$$',
_single_dollar_pattern_.sub(r':math:`\1`',
_two_dollar_pattern_.sub(r"!!\1!!", text)))
def _generate_doc_string_(op_proto): def _generate_doc_string_(op_proto):
""" """
Generate docstring by OpProto Generate docstring by OpProto
@ -60,12 +67,6 @@ def _generate_doc_string_(op_proto):
str: the document string str: the document string
""" """
def escape_math(text):
return _two_bang_pattern_.sub(
r'$$\1$$',
_single_dollar_pattern_.sub(
r':math:`\1`', _two_dollar_pattern_.sub(r"!!\1!!", text)))
if not isinstance(op_proto, framework_pb2.OpProto): if not isinstance(op_proto, framework_pb2.OpProto):
raise TypeError("OpProto should be `framework_pb2.OpProto`") raise TypeError("OpProto should be `framework_pb2.OpProto`")
@ -233,9 +234,6 @@ def autodoc(comment=""):
return __impl__ return __impl__
_inline_math_single_dollar = re.compile(r"\$([^\$]+)\$")
def templatedoc(op_type=None): def templatedoc(op_type=None):
""" """
Decorator of layer function. It will use the docstring from the layer Decorator of layer function. It will use the docstring from the layer
@ -253,9 +251,6 @@ def templatedoc(op_type=None):
def trim_ending_dot(msg): def trim_ending_dot(msg):
return msg.rstrip('.') return msg.rstrip('.')
def escape_inline_math(msg):
return _inline_math_single_dollar.sub(repl=r':math:`\1`', string=msg)
def __impl__(func): def __impl__(func):
if op_type is None: if op_type is None:
op_type_name = func.__name__ op_type_name = func.__name__
@ -269,7 +264,7 @@ def templatedoc(op_type=None):
for line in comment_lines: for line in comment_lines:
line = line.strip() line = line.strip()
if len(line) != 0: if len(line) != 0:
comment += escape_inline_math(line) comment += escape_math(line)
comment += " " comment += " "
elif len(comment) != 0: elif len(comment) != 0:
comment += "\n \n " comment += "\n \n "

File diff suppressed because it is too large Load Diff

@ -68,6 +68,7 @@ __all__ = [
'slice', 'slice',
'polygon_box_transform', 'polygon_box_transform',
'shape', 'shape',
'iou_similarity',
'maxout', 'maxout',
] + __activations__ ] + __activations__

Loading…
Cancel
Save