|
|
|
@ -261,7 +261,7 @@ class BertOutput(nn.Cell):
|
|
|
|
|
def construct(self, hidden_status, input_tensor):
|
|
|
|
|
output = self.dense(hidden_status)
|
|
|
|
|
output = self.dropout(output)
|
|
|
|
|
output = self.add(output, input_tensor)
|
|
|
|
|
output = self.add(input_tensor, output)
|
|
|
|
|
output = self.layernorm(output)
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
@ -832,8 +832,7 @@ class CreateAttentionMaskFromInputMask(nn.Cell):
|
|
|
|
|
if not self.input_mask_from_dataset:
|
|
|
|
|
input_mask = self.input_mask
|
|
|
|
|
|
|
|
|
|
input_mask = self.cast(self.reshape(input_mask, self.shape), mstype.float32)
|
|
|
|
|
attention_mask = self.batch_matmul(self.broadcast_ones, input_mask)
|
|
|
|
|
attention_mask = self.cast(self.reshape(input_mask, self.shape), mstype.float32)
|
|
|
|
|
return attention_mask
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|