/** * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /*! * \file candidate_sampling_ops.h * \brief */ #ifndef OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_ #define OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_ #include "graph/operator_reg.h" namespace ge { /** *@brief Generates labels for candidate sampling with a learned unigram distribution. \n *@par Inputs: *Input "true_classes" is a 2D matrix. *true_classes: A "batch_size * num_true" matrix, in which each row contains the IDs of the "num_true" "target_classes" in the corresponding original label. \n *@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. *@li unique: If "unique" is true, samples with rejection, so that all sampled candidates in a batch are unique. *This requires some approximation to estimate the post-rejection sampling probabilities. *@li range_max: The sampler will sample integers from the interval [0, range_max). *@li seed: If either "seed" or "seed2" are set to be non-zero. *@li seed2: A second seed to avoid seed collision. \n *@par Outputs: *@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. *@li true_expected_count: A "batch_size * num_true" matrix, representing the number of times each candidate is expected to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. *@li sampled_expected_count: A vector of length "num_sampled", for each sampled candidate. *representing the number of times the candidate is expected to occur in a batch of sampled candidates. * If "unique" is true, then this is a probability. *@attention Constraints: *ThreadUnsafeUnigramCandidateSampler runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ThreadUnsafeUnigramCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ThreadUnsafeUnigramCandidateSampler) .INPUT(true_classes, TensorType({ DT_INT64 })) .OUTPUT(sampled_candidates, TensorType({ DT_INT64 })) .OUTPUT(true_expected_count, TensorType({ DT_FLOAT })) .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT })) .REQUIRED_ATTR(num_true, Int) .REQUIRED_ATTR(num_sampled, Int) .REQUIRED_ATTR(unique, Bool) .REQUIRED_ATTR(range_max, Int) .ATTR(seed, Int, 0) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(ThreadUnsafeUnigramCandidateSampler) /** *@brief Generates labels for candidate sampling with a learned unigram distribution. \n *@par Inputs: *true_classes: A "batch_size * num_true" matrix, in which each row contains the IDs of the "num_true" "target_classes" in the corresponding original label. *Input "true_classes" is a 2D matrix. \n *@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. *@li unique: If "unique" is true, samples with rejection, so that all sampled candidates in a batch are unique. *This requires some approximation to estimate the post-rejection sampling probabilities. *@li range_max: The sampler will sample integers from the interval [0, range_max). *@li seed: If either "seed" or "seed2" are set to be non-zero. *@li seed2: A second seed to avoid seed collision. \n *@par Outputs: *@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. *@li true_expected_count: A "batch_size * num_true" matrix, representing the number of times each candidate is expected to occur in a batch of sampled candidates. *If "unique" is true, then this is a probability. *@li sampled_expected_count: A vector of length "num_sampled", for each sampled candidate representing the number of times. * the candidate is expected to occur in a batch of sampled candidates. *If "unique" is true, then this is a probability. \n *@attention Constraints: *UniformCandidateSampler runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator UniformCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(UniformCandidateSampler) .INPUT(true_classes, TensorType({ DT_INT64 })) .OUTPUT(sampled_candidates, TensorType({ DT_INT64 })) .OUTPUT(true_expected_count, TensorType({ DT_FLOAT })) .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT })) .REQUIRED_ATTR(num_true, Int) .REQUIRED_ATTR(num_sampled, Int) .REQUIRED_ATTR(unique, Bool) .REQUIRED_ATTR(range_max, Int) .ATTR(seed, Int, 0) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(UniformCandidateSampler) /** *@brief Generates labels for candidate sampling with a learned unigram distribution. \n *@par Inputs: *true_classes: A "batch_size * num_true" matrix, in which each row contains the IDs of the "num_true" "target_classes" in the corresponding original label. * Input "true_classes" is a 2D matrix. \n *@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. *@li unique: If "unique" is true, samples with rejection, so that all sampled candidates in a batch are unique. This requires some approximation to estimate the post-rejection sampling probabilities. *@li range_max: The sampler will sample integers from the interval [0, range_max). *@li vocab_file: Each valid line in this file (which should have a CSV-like format) corresponds to a valid word ID. *IDs are in sequential order, starting from num_reserved_ids. *@li distortion: The distortion is used to skew the unigram probability distribution. Each weight is first raised to the distortion's power before adding to the internal unigram distribution. *@li num_reserved_ids: Optionally some reserved IDs can be added in the range [0, ..., num_reserved_ids) by the users. * One use case is that a special unknown word token is used as ID 0. *@li num_shards: A sampler can be used to sample from a subset of the original range. in order to speed up the whole computation through parallelism. *@li shard: A sampler can be used to sample from a subset of the original range in order to speed up the whole computation through parallelism. *@li unigrams: A list of unigram counts or probabilities, one per ID in sequential order. *@li seed: If either "seed" or "seed2" are set to be non-zero. *@li seed2: A second seed to avoid seed collision. \n *@par Outputs: *@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. *@li true_expected_count: A "batch_size * num_true" matrix, representing the number of times each candidate is expected to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. *@li sampled_expected_count: A vector of length "num_sampled", for each sampled candidate representing the number of times the candidate is expected to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. \n *@attention Constraints: * FixedUnigramCandidateSampler runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator FixedUnigramCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FixedUnigramCandidateSampler) .INPUT(true_classes, TensorType({ DT_INT64 })) .OUTPUT(sampled_candidates, TensorType({ DT_INT64 })) .OUTPUT(true_expected_count, TensorType({ DT_FLOAT })) .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT })) .ATTR(num_true, Int, 0) .ATTR(num_sampled, Int, 0) .ATTR(unique, Bool, false) .ATTR(range_max, Int, 0) .ATTR(vocab_file, String, "") .ATTR(distortion, Float, 1.0) .ATTR(num_reserved_ids, Int, 0) .ATTR(num_shards, Int, 1) .ATTR(shard, Int, 0) .REQUIRED_ATTR(unigrams, ListFloat) .ATTR(seed, Int, 0) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(FixedUnigramCandidateSampler) /** *@brief Generates labels for candidate sampling with a learned unigram distribution. \n *@par Inputs: *true_classes: A "batch_size * num_true" matrix, in which each row contains the IDs of the "num_true" "target_classes" in the corresponding original label. * Input "true_classes" is a 2D matrix. \n *@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. *@li unique: If "unique" is true, samples with rejection, so that all sampled candidates in a batch are unique. *This requires some approximation to estimate the post-rejection sampling probabilities. *@li range_max: The sampler will sample integers from the interval [0, range_max). *@li seed: If either "seed" or "seed2" are set to be non-zero. *@li seed2: A second seed to avoid seed collision. \n *@par Outputs: *@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. *@li true_expected_count: A "batch_size * num_true" matrix, representing the number of times each candidate is expected to occur in a batch of sampled candidates. *If "unique" is true, then this is a probability. *@li sampled_expected_count: A vector of length "num_sampled", for each sampled candidate representing the number of times the candidate is expected to occur in a batch of sampled candidates. *If "unique" is true, then this is a probability. \n *@attention Constraints: *LearnedUnigramCandidateSampler runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator LearnedUnigramCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LearnedUnigramCandidateSampler) .INPUT(true_classes, TensorType({ DT_INT64 })) .OUTPUT(sampled_candidates, TensorType({ DT_INT64 })) .OUTPUT(true_expected_count, TensorType({ DT_FLOAT })) .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT })) .REQUIRED_ATTR(num_true, Int) .REQUIRED_ATTR(num_sampled, Int) .REQUIRED_ATTR(unique, Bool) .REQUIRED_ATTR(range_max, Int) .ATTR(seed, Int, 0) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(LearnedUnigramCandidateSampler) /** *@brief Generates labels for candidate sampling with a log-uniform distribution. \n *@par Inputs: *true_classes: A "batch_size * num_true" matrix, in which each row contains the IDs of the "num_true" "target_classes" in the corresponding original label. * Input "true_classes" is a 2D matrix. \n *@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. *@li unique: If "unique" is true, samples with rejection, so that all sampled candidates in a batch are unique. This requires some approximation to estimate the post-rejection sampling probabilities. *@li range_max: The sampler will sample integers from the interval [0, range_max). *@li seed: If either "seed" or "seed2" are set to be non-zero. *@li seed2: A second seed to avoid seed collision. \n *@par Outputs: *@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. *@li true_expected_count: A "batch_size * num_true" matrix, representing the number of times each candidate is expected to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. *@li sampled_expected_count: A vector of length "num_sampled", for each sampled candidate representing the number of times the candidate is expected to occur in a batch of sampled candidates. *If "unique" is true, then this is a probability. \n *@attention Constraints: *LogUniformCandidateSampler runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator LogUniformCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LogUniformCandidateSampler) .INPUT(true_classes, TensorType({ DT_INT64 })) .OUTPUT(sampled_candidates, TensorType({ DT_INT64 })) .OUTPUT(true_expected_count, TensorType({ DT_FLOAT })) .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT })) .REQUIRED_ATTR(num_true, Int) .REQUIRED_ATTR(num_sampled, Int) .REQUIRED_ATTR(unique, Bool) .REQUIRED_ATTR(range_max, Int) .ATTR(seed, Int, 0) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(LogUniformCandidateSampler) /** *@brief Generates labels for candidate sampling with a learned unigram distribution. \n *@par Inputs: *true_classes: A "batch_size * num_true" matrix, in which each row contains the IDs of the "num_true" "target_classes" in the corresponding original label. * Input "true_classes" is a 2D matrix. \n *@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. *@li unique: If "unique" is true, samples with rejection, so that all sampled candidates in a batch are unique. This requires some approximation to estimate the post-rejection sampling probabilities. *@li seed: If either "seed" or "seed2" are set to be non-zero. *@li seed2: A second seed to avoid seed collision. \n *@par Outputs: *@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. *@li true_expected_count: A "batch_size * num_true" matrix, representing the number of times each candidate is expected to occur in a batch of sampled candidates. *If "unique" is true, then this is a probability. *@li sampled_expected_count: A vector of length "num_sampled", for each sampled candidate representing the number of times the candidate is expected to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. \n *@attention Constraints: *AllCandidateSampler runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator AllCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(AllCandidateSampler) .INPUT(true_classes, TensorType({ DT_INT64 })) .OUTPUT(sampled_candidates, TensorType({ DT_INT64 })) .OUTPUT(true_expected_count, TensorType({ DT_FLOAT })) .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT })) .REQUIRED_ATTR(num_true, Int) .REQUIRED_ATTR(num_sampled, Int) .REQUIRED_ATTR(unique, Bool) .ATTR(seed, Int, 0) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(AllCandidateSampler) /** *@brief Computes the "ids" of the positions in "sampled_candidates" that match "true_labels". \n *@par Inputs: * @li Input "true_classes" is a 2D matrix. * @li true_classes: The "true_classes" output of UnpackSparseLabels. * @li sampled_candidates: The "sampled_candidates" output of CandidateSampler. \n *@par Attributes: *@li num_true: Number of true labels per context. *@li seed: If either "seed" or "seed2" are set to be non-zero. *@li seed2: A second seed to avoid seed collision. \n *@par Outputs: * @li indices: A vector of indices corresponding to rows of "true_candidates". * @li ids: A vector of IDs of positions in "sampled_candidates" that match a "true_label" for the row with the corresponding index in indices. * @li weights: A vector of the same length as "indices" and "ids", in which each element is -FLOAT_MAX. \n *@attention Constraints: *ComputeAccidentalHits runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator ComputeAccidentalHits. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ComputeAccidentalHits) .INPUT(true_classes, TensorType({ DT_INT64 })) .INPUT(sampled_candidates, TensorType({ DT_INT64 })) .OUTPUT(indices, TensorType({ DT_INT32 })) .OUTPUT(ids, TensorType({ DT_INT64 })) .OUTPUT(weights, TensorType({ DT_FLOAT })) .REQUIRED_ATTR(num_true, Int) .ATTR(seed, Int, 0) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(ComputeAccidentalHits) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_