PaddleOCR/ppocr/modeling/necks/rnn.py

# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from paddle import nn

from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr


class EncoderWithReshape(nn.Layer):
    def __init__(self, in_channels, **kwargs):
        super().__init__()
        self.out_channels = in_channels

    def forward(self, x):
        B, C, H, W = x.shape
        x = x.reshape((B, C, -1))
        x = x.transpose([0, 2, 1])  # (NTC)(batch, width, channels)
        return x


class Im2Seq(nn.Layer):
    def __init__(self, in_channels, **kwargs):
        super().__init__()
        self.out_channels = in_channels

    def forward(self, x):
        B, C, H, W = x.shape
        assert H == 1
        x = x.transpose((0, 2, 3, 1))
        x = x.reshape((-1, C))
        return x


class EncoderWithRNN(nn.Layer):
    def __init__(self, in_channels, hidden_size):
        super(EncoderWithRNN, self).__init__()
        self.out_channels = hidden_size * 2
        # self.lstm1_fw = nn.LSTMCell(
        #     in_channels,
        #     hidden_size,
        #     weight_ih_attr=ParamAttr(name='lstm_st1_fc1_w'),
        #     bias_ih_attr=ParamAttr(name='lstm_st1_fc1_b'),
        #     weight_hh_attr=ParamAttr(name='lstm_st1_out1_w'),
        #     bias_hh_attr=ParamAttr(name='lstm_st1_out1_b'),
        # )
        # self.lstm1_bw = nn.LSTMCell(
        #     in_channels,
        #     hidden_size,
        #     weight_ih_attr=ParamAttr(name='lstm_st1_fc2_w'),
        #     bias_ih_attr=ParamAttr(name='lstm_st1_fc2_b'),
        #     weight_hh_attr=ParamAttr(name='lstm_st1_out2_w'),
        #     bias_hh_attr=ParamAttr(name='lstm_st1_out2_b'),
        # )
        # self.lstm2_fw = nn.LSTMCell(
        #     hidden_size,
        #     hidden_size,
        #     weight_ih_attr=ParamAttr(name='lstm_st2_fc1_w'),
        #     bias_ih_attr=ParamAttr(name='lstm_st2_fc1_b'),
        #     weight_hh_attr=ParamAttr(name='lstm_st2_out1_w'),
        #     bias_hh_attr=ParamAttr(name='lstm_st2_out1_b'),
        # )
        # self.lstm2_bw = nn.LSTMCell(
        #     hidden_size,
        #     hidden_size,
        #     weight_ih_attr=ParamAttr(name='lstm_st2_fc2_w'),
        #     bias_ih_attr=ParamAttr(name='lstm_st2_fc2_b'),
        #     weight_hh_attr=ParamAttr(name='lstm_st2_out2_w'),
        #     bias_hh_attr=ParamAttr(name='lstm_st2_out2_b'),
        # )
        self.lstm = nn.LSTM(
            in_channels, hidden_size, direction='bidirectional', num_layers=2)

    def forward(self, x):
        # fw_x, _ = self.lstm1_fw(x)
        # fw_x, _ = self.lstm2_fw(fw_x)
        #
        # # bw
        # bw_x, _ = self.lstm1_bw(x)
        # bw_x, _ = self.lstm2_bw(bw_x)
        # x = paddle.concat([fw_x, bw_x], axis=2)
        x, _ = self.lstm(x)
        return x


class EncoderWithFC(nn.Layer):
    def __init__(self, in_channels, hidden_size):
        super(EncoderWithFC, self).__init__()
        self.out_channels = hidden_size
        weight_attr, bias_attr = get_para_bias_attr(
            l2_decay=0.00001, k=in_channels, name='reduce_encoder_fea')
        self.fc = nn.Linear(
            in_channels,
            hidden_size,
            weight_attr=weight_attr,
            bias_attr=bias_attr,
            name='reduce_encoder_fea')

    def forward(self, x):
        x = self.fc(x)
        return x


class SequenceEncoder(nn.Layer):
    def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs):
        super(SequenceEncoder, self).__init__()
        self.encoder_reshape = EncoderWithReshape(in_channels)
        self.out_channels = self.encoder_reshape.out_channels
        if encoder_type == 'reshape':
            self.only_reshape = True
        else:
            support_encoder_dict = {
                'reshape': EncoderWithReshape,
                'fc': EncoderWithFC,
                'rnn': EncoderWithRNN
            }
            assert encoder_type in support_encoder_dict, '{} must in {}'.format(
                encoder_type, support_encoder_dict.keys())

            self.encoder = support_encoder_dict[encoder_type](
                self.encoder_reshape.out_channels, hidden_size)
            self.out_channels = self.encoder.out_channels
            self.only_reshape = False

    def forward(self, x):
        x = self.encoder_reshape(x)
        if not self.only_reshape:
            x = self.encoder(x)
        return x
dygraph first commit 4 years ago			`# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`

			`from paddle import nn`

			`from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr`


			`class EncoderWithReshape(nn.Layer):`
			`def __init__(self, in_channels, **kwargs):`
			`super().__init__()`
			`self.out_channels = in_channels`

			`def forward(self, x):`
			`B, C, H, W = x.shape`
			`x = x.reshape((B, C, -1))`
			`x = x.transpose([0, 2, 1]) # (NTC)(batch, width, channels)`
			`return x`


			`class Im2Seq(nn.Layer):`
			`def __init__(self, in_channels, **kwargs):`
			`super().__init__()`
			`self.out_channels = in_channels`

			`def forward(self, x):`
			`B, C, H, W = x.shape`
			`assert H == 1`
			`x = x.transpose((0, 2, 3, 1))`
			`x = x.reshape((-1, C))`
			`return x`


			`class EncoderWithRNN(nn.Layer):`
			`def __init__(self, in_channels, hidden_size):`
			`super(EncoderWithRNN, self).__init__()`
			`self.out_channels = hidden_size * 2`
			`# self.lstm1_fw = nn.LSTMCell(`
			`# in_channels,`
			`# hidden_size,`
			`# weight_ih_attr=ParamAttr(name='lstm_st1_fc1_w'),`
			`# bias_ih_attr=ParamAttr(name='lstm_st1_fc1_b'),`
			`# weight_hh_attr=ParamAttr(name='lstm_st1_out1_w'),`
			`# bias_hh_attr=ParamAttr(name='lstm_st1_out1_b'),`
			`# )`
			`# self.lstm1_bw = nn.LSTMCell(`
			`# in_channels,`
			`# hidden_size,`
			`# weight_ih_attr=ParamAttr(name='lstm_st1_fc2_w'),`
			`# bias_ih_attr=ParamAttr(name='lstm_st1_fc2_b'),`
			`# weight_hh_attr=ParamAttr(name='lstm_st1_out2_w'),`
			`# bias_hh_attr=ParamAttr(name='lstm_st1_out2_b'),`
			`# )`
			`# self.lstm2_fw = nn.LSTMCell(`
			`# hidden_size,`
			`# hidden_size,`
			`# weight_ih_attr=ParamAttr(name='lstm_st2_fc1_w'),`
			`# bias_ih_attr=ParamAttr(name='lstm_st2_fc1_b'),`
			`# weight_hh_attr=ParamAttr(name='lstm_st2_out1_w'),`
			`# bias_hh_attr=ParamAttr(name='lstm_st2_out1_b'),`
			`# )`
			`# self.lstm2_bw = nn.LSTMCell(`
			`# hidden_size,`
			`# hidden_size,`
			`# weight_ih_attr=ParamAttr(name='lstm_st2_fc2_w'),`
			`# bias_ih_attr=ParamAttr(name='lstm_st2_fc2_b'),`
			`# weight_hh_attr=ParamAttr(name='lstm_st2_out2_w'),`
			`# bias_hh_attr=ParamAttr(name='lstm_st2_out2_b'),`
			`# )`
			`self.lstm = nn.LSTM(`
			`in_channels, hidden_size, direction='bidirectional', num_layers=2)`

			`def forward(self, x):`
			`# fw_x, _ = self.lstm1_fw(x)`
			`# fw_x, _ = self.lstm2_fw(fw_x)`
			`#`
			`# # bw`
			`# bw_x, _ = self.lstm1_bw(x)`
			`# bw_x, _ = self.lstm2_bw(bw_x)`
			`# x = paddle.concat([fw_x, bw_x], axis=2)`
			`x, _ = self.lstm(x)`
			`return x`


			`class EncoderWithFC(nn.Layer):`
			`def __init__(self, in_channels, hidden_size):`
			`super(EncoderWithFC, self).__init__()`
			`self.out_channels = hidden_size`
			`weight_attr, bias_attr = get_para_bias_attr(`
			`l2_decay=0.00001, k=in_channels, name='reduce_encoder_fea')`
			`self.fc = nn.Linear(`
			`in_channels,`
			`hidden_size,`
			`weight_attr=weight_attr,`
			`bias_attr=bias_attr,`
			`name='reduce_encoder_fea')`

			`def forward(self, x):`
			`x = self.fc(x)`
			`return x`


			`class SequenceEncoder(nn.Layer):`
hidden_size添加默认参数 4 years ago			`def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs):`
dygraph first commit 4 years ago			`super(SequenceEncoder, self).__init__()`
			`self.encoder_reshape = EncoderWithReshape(in_channels)`
			`self.out_channels = self.encoder_reshape.out_channels`
			`if encoder_type == 'reshape':`
			`self.only_reshape = True`
			`else:`
			`support_encoder_dict = {`
			`'reshape': EncoderWithReshape,`
			`'fc': EncoderWithFC,`
			`'rnn': EncoderWithRNN`
			`}`
			`assert encoder_type in support_encoder_dict, '{} must in {}'.format(`
			`encoder_type, support_encoder_dict.keys())`

			`self.encoder = support_encoder_dict[encoder_type](`
			`self.encoder_reshape.out_channels, hidden_size)`
			`self.out_channels = self.encoder.out_channels`
			`self.only_reshape = False`

			`def forward(self, x):`
			`x = self.encoder_reshape(x)`
			`if not self.only_reshape:`
			`x = self.encoder(x)`
			`return x`