You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
292 lines
10 KiB
292 lines
10 KiB
# -*-coding:utf-8-*-
|
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from __future__ import print_function
|
|
|
|
import unittest
|
|
import numpy as np
|
|
from op_test import OpTest
|
|
import paddle.fluid.core as core
|
|
from paddle.fluid.op import Operator
|
|
import paddle.fluid.layers as layers
|
|
import paddle.fluid as fluid
|
|
import random
|
|
import six
|
|
from sys import version_info
|
|
|
|
|
|
def create_tdm_travel():
|
|
tree_travel = [[1, 3, 7, 14], [1, 3, 7, 15], [1, 3, 8, 16], [1, 3, 8, 17],
|
|
[1, 4, 9, 18], [1, 4, 9, 19], [1, 4, 10, 20],
|
|
[1, 4, 10, 21], [2, 5, 11, 22], [2, 5, 11, 23],
|
|
[2, 5, 12, 24], [2, 5, 12, 25], [2, 6, 13, 0]]
|
|
return tree_travel
|
|
|
|
|
|
def create_tdm_layer():
|
|
tree_layer = [[1, 2], [3, 4, 5, 6], [7, 8, 9, 10, 11, 12, 13],
|
|
[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]]
|
|
return tree_layer
|
|
|
|
|
|
type_dict = {
|
|
"int32": int(core.VarDesc.VarType.INT32),
|
|
"int64": int(core.VarDesc.VarType.INT64)
|
|
}
|
|
|
|
|
|
class TestTDMSamplerOp(OpTest):
|
|
def setUp(self):
|
|
self.__class__.op_type = "tdm_sampler"
|
|
self.config()
|
|
|
|
self.tree_travel = create_tdm_travel()
|
|
self.tree_layer = create_tdm_layer()
|
|
|
|
output_0 = self.x_shape[0]
|
|
output_1 = len(self.neg_samples_num_list) + \
|
|
np.sum(self.neg_samples_num_list)
|
|
self.output_shape = (output_0, output_1)
|
|
self.layer_sample_nums = [1 + i for i in self.neg_samples_num_list]
|
|
|
|
layer_node_num_list = [len(i) for i in self.tree_layer]
|
|
tree_layer_offset_lod = [0]
|
|
tree_layer_flat = []
|
|
node_nums = 0
|
|
for layer_idx, layer_node in enumerate(layer_node_num_list):
|
|
tree_layer_flat += self.tree_layer[layer_idx]
|
|
node_nums += layer_node
|
|
tree_layer_offset_lod.append(node_nums)
|
|
|
|
travel_np = np.array(self.tree_travel).astype(self.tree_dtype)
|
|
layer_np = np.array(tree_layer_flat).astype(self.tree_dtype)
|
|
layer_np = layer_np.reshape([-1, 1])
|
|
|
|
self.x_np = np.random.randint(
|
|
low=0, high=13, size=self.x_shape).astype(self.x_type)
|
|
|
|
out = np.random.random(self.output_shape).astype(self.out_dtype)
|
|
label = np.random.random(self.output_shape).astype(self.out_dtype)
|
|
mask = np.random.random(self.output_shape).astype(self.out_dtype)
|
|
|
|
self.attrs = {
|
|
'neg_samples_num_list': self.neg_samples_num_list,
|
|
'output_positive': True,
|
|
'layer_offset_lod': tree_layer_offset_lod,
|
|
'seed': 0,
|
|
'dtype': type_dict[self.out_dtype]
|
|
}
|
|
self.inputs = {'X': self.x_np, 'Travel': travel_np, 'Layer': layer_np}
|
|
self.outputs = {'Out': out, 'Labels': label, 'Mask': mask}
|
|
|
|
def config(self):
|
|
"""set test shape & type"""
|
|
self.neg_samples_num_list = [0, 0, 0, 0]
|
|
self.x_shape = (10, 1)
|
|
self.x_type = 'int32'
|
|
self.tree_dtype = 'int32'
|
|
self.out_dtype = 'int32'
|
|
|
|
def test_check_output(self):
|
|
places = self._get_places()
|
|
for place in places:
|
|
outs, fetch_list = self._calc_output(place)
|
|
self.out = [np.array(out) for out in outs]
|
|
|
|
x_res = self.out[fetch_list.index('Out')]
|
|
label_res = self.out[fetch_list.index('Labels')]
|
|
mask_res = self.out[fetch_list.index('Mask')]
|
|
|
|
# check dtype
|
|
if self.out_dtype == 'int32':
|
|
assert x_res.dtype == np.int32
|
|
assert label_res.dtype == np.int32
|
|
assert mask_res.dtype == np.int32
|
|
elif self.out_dtype == 'int64':
|
|
assert x_res.dtype == np.int64
|
|
assert label_res.dtype == np.int64
|
|
assert mask_res.dtype == np.int64
|
|
|
|
x_res = x_res.reshape(self.output_shape)
|
|
label_res = label_res.reshape(self.output_shape)
|
|
mask_res = mask_res.reshape(self.output_shape)
|
|
|
|
layer_nums = len(self.neg_samples_num_list)
|
|
for batch_ids, x_batch in enumerate(x_res):
|
|
start_offset = 0
|
|
positive_travel = []
|
|
for layer_idx in range(layer_nums):
|
|
end_offset = start_offset + self.layer_sample_nums[layer_idx]
|
|
sampling_res = x_batch[start_offset:end_offset]
|
|
sampling_res_list = sampling_res.tolist()
|
|
positive_travel.append(sampling_res_list[0])
|
|
|
|
label_sampling_res = label_res[batch_ids][start_offset:
|
|
end_offset]
|
|
mask_sampling_res = mask_res[batch_ids][start_offset:end_offset]
|
|
|
|
# check unique
|
|
if sampling_res_list[0] != 0:
|
|
assert len(set(sampling_res_list)) == len(
|
|
sampling_res_list
|
|
), "len(set(sampling_res_list)): {}, len(sampling_res_list): {} , sample_res: {}, label_res:{}, mask_res: {}".format(
|
|
len(set(sampling_res_list)),
|
|
len(sampling_res_list), sampling_res,
|
|
label_sampling_res, mask_sampling_res)
|
|
# check legal
|
|
layer_node = self.tree_layer[layer_idx]
|
|
layer_node.append(0)
|
|
for sample in sampling_res_list:
|
|
assert (
|
|
sample in layer_node
|
|
), "sample: {}, layer_node: {} , sample_res: {}, label_res: {}, mask_res:{}".format(
|
|
sample, layer_node, sampling_res, label_sampling_res,
|
|
mask_sampling_res)
|
|
|
|
# check label
|
|
label_flag = 1
|
|
if sampling_res[0] == 0:
|
|
label_flag = 0
|
|
assert label_sampling_res[0] == label_flag
|
|
# check mask
|
|
padding_index = np.where(sampling_res == 0)
|
|
assert not np.sum(
|
|
mask_sampling_res[padding_index]
|
|
), "np.sum(mask_sampling_res[padding_index]): {} ".format(
|
|
np.sum(mask_sampling_res[padding_index]))
|
|
start_offset = end_offset
|
|
# check travel legal
|
|
assert self.tree_travel[int(self.x_np[
|
|
batch_ids])] == positive_travel
|
|
|
|
|
|
class TestCase1(TestTDMSamplerOp):
|
|
def config(self):
|
|
"""test input int64"""
|
|
self.neg_samples_num_list = [0, 0, 0, 0]
|
|
self.x_shape = (10, 1)
|
|
self.x_type = 'int64'
|
|
self.tree_dtype = 'int64'
|
|
self.out_dtype = 'int32'
|
|
|
|
|
|
class TestCase2(TestTDMSamplerOp):
|
|
def config(self):
|
|
"""test dtype int64"""
|
|
self.neg_samples_num_list = [0, 0, 0, 0]
|
|
self.x_shape = (10, 1)
|
|
self.x_type = 'int32'
|
|
self.tree_dtype = 'int32'
|
|
self.out_dtype = 'int64'
|
|
|
|
|
|
class TestCase3(TestTDMSamplerOp):
|
|
def config(self):
|
|
"""test all dtype int64"""
|
|
self.neg_samples_num_list = [0, 0, 0, 0]
|
|
self.x_shape = (10, 1)
|
|
self.x_type = 'int64'
|
|
self.tree_dtype = 'int64'
|
|
self.out_dtype = 'int64'
|
|
|
|
|
|
class TestCase4(TestTDMSamplerOp):
|
|
def config(self):
|
|
"""test one neg"""
|
|
self.neg_samples_num_list = [1, 1, 1, 1]
|
|
self.x_shape = (10, 1)
|
|
self.x_type = 'int64'
|
|
self.tree_dtype = 'int32'
|
|
self.out_dtype = 'int64'
|
|
|
|
|
|
class TestCase5(TestTDMSamplerOp):
|
|
def config(self):
|
|
"""test normal neg"""
|
|
self.neg_samples_num_list = [1, 2, 3, 4]
|
|
self.x_shape = (10, 1)
|
|
self.x_type = 'int64'
|
|
self.tree_dtype = 'int32'
|
|
self.out_dtype = 'int64'
|
|
|
|
|
|
class TestCase6(TestTDMSamplerOp):
|
|
def config(self):
|
|
"""test huge batchsize"""
|
|
self.neg_samples_num_list = [1, 2, 3, 4]
|
|
self.x_shape = (100, 1)
|
|
self.x_type = 'int64'
|
|
self.tree_dtype = 'int32'
|
|
self.out_dtype = 'int64'
|
|
|
|
|
|
class TestCase7(TestTDMSamplerOp):
|
|
def config(self):
|
|
"""test full neg"""
|
|
self.neg_samples_num_list = [1, 3, 6, 11]
|
|
self.x_shape = (10, 1)
|
|
self.x_type = 'int64'
|
|
self.tree_dtype = 'int32'
|
|
self.out_dtype = 'int64'
|
|
|
|
|
|
class TestTDMSamplerShape(unittest.TestCase):
|
|
def test_shape(self):
|
|
x = fluid.layers.data(name='x', shape=[1], dtype='int32', lod_level=1)
|
|
tdm_tree_travel = create_tdm_travel()
|
|
tdm_tree_layer = create_tdm_layer()
|
|
layer_node_num_list = [len(i) for i in tdm_tree_layer]
|
|
|
|
tree_layer_flat = []
|
|
for layer_idx, layer_node in enumerate(layer_node_num_list):
|
|
tree_layer_flat += tdm_tree_layer[layer_idx]
|
|
|
|
travel_array = np.array(tdm_tree_travel).astype('int32')
|
|
layer_array = np.array(tree_layer_flat).astype('int32')
|
|
|
|
neg_samples_num_list = [1, 2, 3, 4]
|
|
leaf_node_num = 13
|
|
|
|
sample, label, mask = fluid.contrib.layers.tdm_sampler(
|
|
x,
|
|
neg_samples_num_list,
|
|
layer_node_num_list,
|
|
leaf_node_num,
|
|
tree_travel_attr=fluid.ParamAttr(
|
|
initializer=fluid.initializer.NumpyArrayInitializer(
|
|
travel_array)),
|
|
tree_layer_attr=fluid.ParamAttr(
|
|
initializer=fluid.initializer.NumpyArrayInitializer(
|
|
layer_array)),
|
|
output_positive=True,
|
|
output_list=True,
|
|
seed=0,
|
|
tree_dtype='int32',
|
|
dtype='int32')
|
|
|
|
place = fluid.CPUPlace()
|
|
exe = fluid.Executor(place=place)
|
|
exe.run(fluid.default_startup_program())
|
|
|
|
feed = {
|
|
'x': np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9],
|
|
[10], [11], [12]]).astype('int32')
|
|
}
|
|
exe.run(feed=feed)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|