parent
df361d1d26
commit
6c21e556c4
@ -1,7 +0,0 @@
|
||||
add_subdirectory(kernels)
|
||||
|
||||
add_library(nlp OBJECT
|
||||
vocab.cc
|
||||
)
|
||||
|
||||
add_dependencies(nlp nlp-kernels)
|
@ -1,3 +0,0 @@
|
||||
add_library(nlp-kernels OBJECT
|
||||
lookup_op.cc
|
||||
)
|
@ -0,0 +1,7 @@
|
||||
add_subdirectory(kernels)
|
||||
|
||||
add_library(text OBJECT
|
||||
vocab.cc
|
||||
)
|
||||
|
||||
add_dependencies(text text-kernels)
|
@ -1,6 +1,7 @@
|
||||
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||
add_library(kernels-text OBJECT
|
||||
add_library(text-kernels OBJECT
|
||||
lookup_op.cc
|
||||
jieba_tokenizer_op.cc
|
||||
unicode_char_tokenizer_op.cc
|
||||
)
|
||||
)
|
@ -1,21 +0,0 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This module is to support nlp augmentations. It includes two parts:
|
||||
c_transforms and py_transforms. C_transforms is a high performance
|
||||
image augmentation module which is developed with c++ opencv. Py_transforms
|
||||
provide more kinds of image augmentations which is developed with python PIL.
|
||||
"""
|
||||
from .utils import as_text, JiebaMode
|
||||
from . import c_transforms
|
@ -1,43 +0,0 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Some basic function for nlp
|
||||
"""
|
||||
from enum import IntEnum
|
||||
import numpy as np
|
||||
|
||||
|
||||
def as_text(array, encoding='utf8'):
|
||||
"""
|
||||
Convert data of array to unicode.
|
||||
|
||||
Args:
|
||||
array (numpy array): Data of array should be ASCII values of each character after converted.
|
||||
encoding (string): Indicating the charset for decoding.
|
||||
Returns:
|
||||
A 'str' object.
|
||||
|
||||
"""
|
||||
|
||||
if not isinstance(array, np.ndarray):
|
||||
raise ValueError('input should be a numpy array')
|
||||
|
||||
decode = np.vectorize(lambda x: x.decode(encoding))
|
||||
return decode(array)
|
||||
|
||||
|
||||
class JiebaMode(IntEnum):
|
||||
MIX = 0
|
||||
MP = 1
|
||||
HMM = 2
|
@ -1,79 +0,0 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Validators for TensorOps.
|
||||
"""
|
||||
from functools import wraps
|
||||
from ...transforms.validators import check_uint32
|
||||
|
||||
|
||||
def check_jieba_init(method):
|
||||
"""Wrapper method to check the parameters of jieba add word."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
hmm_path, mp_path, model = (list(args) + 3 * [None])[:3]
|
||||
|
||||
if "hmm_path" in kwargs:
|
||||
hmm_path = kwargs.get("hmm_path")
|
||||
if "mp_path" in kwargs:
|
||||
mp_path = kwargs.get("mp_path")
|
||||
if hmm_path is None:
|
||||
raise ValueError(
|
||||
"the dict of HMMSegment in cppjieba is not provided")
|
||||
kwargs["hmm_path"] = hmm_path
|
||||
if mp_path is None:
|
||||
raise ValueError(
|
||||
"the dict of MPSegment in cppjieba is not provided")
|
||||
kwargs["mp_path"] = mp_path
|
||||
if model is not None:
|
||||
kwargs["model"] = model
|
||||
return method(self, **kwargs)
|
||||
return new_method
|
||||
|
||||
|
||||
def check_jieba_add_word(method):
|
||||
"""Wrapper method to check the parameters of jieba add word."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
word, freq = (list(args) + 2 * [None])[:2]
|
||||
|
||||
if "word" in kwargs:
|
||||
word = kwargs.get("word")
|
||||
if "freq" in kwargs:
|
||||
freq = kwargs.get("freq")
|
||||
if word is None:
|
||||
raise ValueError("word is not provided")
|
||||
kwargs["word"] = word
|
||||
if freq is not None:
|
||||
check_uint32(freq)
|
||||
kwargs["freq"] = freq
|
||||
return method(self, **kwargs)
|
||||
return new_method
|
||||
|
||||
|
||||
def check_jieba_add_dict(method):
|
||||
"""Wrapper method to check the parameters of add dict"""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
user_dict = (list(args) + [None])[0]
|
||||
if "user_dict" in kwargs:
|
||||
user_dict = kwargs.get("user_dict")
|
||||
if user_dict is None:
|
||||
raise ValueError("user_dict is not provided")
|
||||
kwargs["user_dict"] = user_dict
|
||||
return method(self, **kwargs)
|
||||
return new_method
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue