/** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /*! * \file audio_ops.h * \brief */ #ifndef OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_ #define OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_ #include "graph/operator_reg.h" namespace ge { /** *@brief Mel-Frequency Cepstral Coefficient (MFCC) calculation consists of taking the DCT-II of a log-magnitude mel-scale spectrogram . \n *@par Inputs: *Input "spectrogram" is a 3D tensor. Input "sample_rate" is a scalar. * @li spectrogram: A 3D float tensor. * @li sample_rate: The MFCC sample rate . \n *@par Attributes: *@li upper_frequency_limit: The highest frequency for calculation. *@li lower_frequency_limit: The lowest frequency for calculation. *@li filterbank_channel_count: Resolution of the Mel bank. *@li dct_coefficient_count: Number of output channels to produce per time slice . \n *@par Outputs: *y: A Tensor of type float32 . \n *@attention Constraints: *Mfcc runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator Mfcc . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(Mfcc) .INPUT(spectrogram, TensorType({DT_FLOAT})) .INPUT(sample_rate, TensorType({DT_INT32})) .OUTPUT(y, TensorType({DT_FLOAT})) .ATTR(upper_frequency_limit, Float, 4000) .ATTR(lower_frequency_limit, Float, 20) .ATTR(filterbank_channel_count, Int, 40) .ATTR(dct_coefficient_count, Int, 13) .OP_END_FACTORY_REG(Mfcc) /** *@brief Decodes and generates spectrogram using wav float tensor . \n *@par Inputs: *Input "x" is a 2D matrix. * x: A float tensor. Float representation of audio data . \n *@par Attributes: *@li window_size: Size of the spectrogram window. *@li stride: Size of the spectrogram stride. *@li magnitude_squared: If true, uses squared magnitude . \n *@par Outputs: *spectrogram: A 3D float Tensor . \n *@attention Constraints: *AudioSpectrogram runs on the Ascend AI CPU, which delivers poor performance . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator AudioSpectrogram . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(AudioSpectrogram) .INPUT(x, TensorType({DT_FLOAT})) .OUTPUT(spectrogram, TensorType({DT_FLOAT})) .REQUIRED_ATTR(window_size, Int) .REQUIRED_ATTR(stride, Int) .ATTR(magnitude_squared, Bool, false) .OP_END_FACTORY_REG(AudioSpectrogram) /** *@brief Decodes a 16-bit WAV file into a float tensor . \n *@par Inputs: *contents: A Tensor of type string. The WAV-encoded audio, usually from a file . \n *@par Attributes: *@li desired_channels: An optional int. Defaults to "-1". Number of sample channels wanted. *@li desired_samples: An optional int. Defaults to "-1". Length of audio requested . \n *@par Outputs: *@li *audio: A Tensor of type float32. *@li *sample_rate: A Tensor of type int32 . \n *@attention Constraints: *DecodeWav runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator DecodeWav . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeWav) .INPUT(contents, TensorType({DT_STRING})) .OUTPUT(audio, TensorType({DT_FLOAT})) .OUTPUT(sample_rate, TensorType({DT_INT32})) .ATTR(desired_channels, Int, -1) .ATTR(desired_samples, Int, -1) .OP_END_FACTORY_REG(DecodeWav) /** *@brief Encode audio data using the WAV file format . \n *@par Inputs: *Including: * @li audio: A Tensor of type DT_FLOAT. * @li sample_rate: A Tensor of type DT_INT32 . \n *@par Outputs: *contents: A Tensor of type DT_STRING . \n *@attention Constraints: *EncodeWav runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with tensorflow Operator EncodeWav . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(EncodeWav) .INPUT(audio, TensorType({DT_FLOAT})) .INPUT(sample_rate, TensorType({DT_INT32})) .OUTPUT(contents, TensorType({DT_STRING})) .OP_END_FACTORY_REG(EncodeWav) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_