Move hapi to python/paddle root dir. (#26442)

* Move hapi form paddle/incubate to paddle

* Remove vision/datasets/utils.py and clean code

* Add sample code for conll05

* Print pull path when saving model

* Fix sample code after paramter_list of SGD is changed to parameters

* Fix bug in wmt16 datase
revert-26856-strategy_example2
qingqing01 5 years ago committed by GitHub
parent 9a1ea9b45d
commit f7fb4c2212
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -95,7 +95,6 @@ if (WITH_TESTING)
add_subdirectory(paddle/fluid/tests)
add_subdirectory(paddle/fluid/contrib/tests)
add_subdirectory(paddle/fluid/contrib/slim/tests)
add_subdirectory(paddle/incubate/hapi/tests)
endif()
install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
DESTINATION opt/paddle/share/wheels

@ -256,8 +256,6 @@ from .device import get_device
# from .tensor.tensor import LoDTensor #DEFINE_ALIAS
# from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS
from . import incubate
from .incubate import hapi
from .fluid.dygraph.base import enable_dygraph as disable_static #DEFINE_ALIAS
from .fluid.dygraph.base import disable_dygraph as enable_static #DEFINE_ALIAS
from .fluid.framework import in_dygraph_mode as in_dynamic_mode #DEFINE_ALIAS
@ -265,3 +263,9 @@ from .fluid.dygraph.base import no_grad #DEFINE_ALIAS
from . import jit
from . import static
# high-level api
from .hapi import Model
from .hapi import callbacks
import paddle.text
import paddle.vision

@ -196,3 +196,14 @@ def cluster_files_reader(files_pattern,
yield line
return reader
def _check_exists_and_download(path, url, md5, module_name, download=True):
if path and os.path.exists(path):
return path
if download:
return paddle.dataset.common.download(url, module_name, md5)
else:
raise ValueError('{} not exists and auto download disabled'.format(
path))

@ -36,7 +36,7 @@ import tarfile
import gzip
from collections import defaultdict
import paddle.dataset.common
import paddle
import paddle.compat as cpt
__all__ = [

@ -13,9 +13,11 @@
# limitations under the License.
# TODO: define the functions to manipulate devices
import re
from paddle.fluid import core
from paddle.fluid import framework
import re
from paddle.fluid.dygraph.parallel import ParallelEnv
__all__ = [
'get_cudnn_version',
@ -81,8 +83,8 @@ def set_device(device):
.. code-block:: python
import paddle
paddle.enable_imperative()
paddle.fluid.dygraph.set_device("gpu:0")
paddle.disable_static()
paddle.set_device("cpu")
x1 = paddle.ones(name='x1', shape=[1, 2], dtype='int32')
x2 = paddle.zeros(name='x2', shape=[1, 2], dtype='int32')
data = paddle.stack([x1,x2], axis=1)
@ -90,18 +92,28 @@ def set_device(device):
lower_device = device.lower()
if lower_device == 'cpu':
place = core.CPUPlace()
framework._set_expected_place(place)
elif lower_device == 'gpu':
if not core.is_compiled_with_cuda():
raise ValueError(
"The device should not be 'gpu', " \
"since PaddlePaddle is not compiled with CUDA")
place = core.CUDAPlace(ParallelEnv().dev_id)
else:
avaliable_device = ((lower_device == 'cpu') or
re.match(r'gpu:\d+', lower_device))
avaliable_device = re.match(r'gpu:\d+', lower_device)
if not avaliable_device:
raise ValueError(
"The device must be a string which is like 'cpu' or 'gpu:0'")
"The device must be a string which is like 'cpu', 'gpu' or 'gpu:0'"
)
if not core.is_compiled_with_cuda():
raise ValueError(
"The device should not be {}, since PaddlePaddle is " \
"not compiled with CUDA".format(avaliable_device))
device_info_list = device.split(':', 1)
device_id = device_info_list[1]
device_id = int(device_id)
place = core.CUDAPlace(device_id)
framework._set_expected_place(place)
framework._set_expected_place(place)
return place
def get_device():
@ -116,8 +128,8 @@ def get_device():
.. code-block:: python
import paddle
paddle.enable_imperative()
device = paddle.fluid.dygraph.get_device()
paddle.disable_static()
device = paddle.get_device()
"""
device = ''

@ -69,7 +69,7 @@ class ImperativeQuantAware(object):
from paddle.fluid.contrib.slim.quantization \
import ImperativeQuantAware
from paddle.incubate.hapi.vision.models \
from paddle.vision.models \
import resnet
model = resnet.resnet50(pretrained=True)

@ -16,10 +16,12 @@ from __future__ import print_function
from __future__ import division
import numpy as np
import math
from .sampler import Sampler, SequenceSampler, RandomSampler
from .dataset import Dataset, IterableDataset
__all__ = ["BatchSampler"]
__all__ = ["BatchSampler", "DistributedBatchSampler"]
class BatchSampler(Sampler):
@ -158,3 +160,185 @@ class _InfiniteIterableSampler(object):
def __iter__(self):
while True:
yield [None] * self.batch_size
class DistributedBatchSampler(BatchSampler):
"""Sampler that restricts data loading to a subset of the dataset.
In such case, each process can pass a DistributedBatchSampler instance
as a DataLoader sampler, and load a subset of the original dataset that
is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Args:
dataset(paddle.io.Dataset): this could be a `paddle.io.Dataset` implement
or other python object which implemented
`__len__` for BatchSampler to get sample
number of data source.
batch_size(int): sample indice number in a mini-batch indices.
num_replicas(int, optional): porcess number in distributed training.
If :attr:`num_replicas` is None, :attr:`num_replicas` will be
retrieved from :code:`paddle.fluid.dygraph.parallel.ParallenEnv`.
Default None.
rank(int, optional): the rank of the current process among :attr:`num_replicas`
processes. If :attr:`rank` is None, :attr:`rank` is retrieved from
:code:`paddle.fluid.dygraph.parallel.ParallenEnv`. Default None.
shuffle(bool): whther to shuffle indices order before genrating
batch indices. Default False.
drop_last(bool): whether drop the last incomplete batch dataset size
is not divisible by the batch size. Default False
Examples:
.. code-block:: python
import numpy as np
from paddle.io import Dataset, DistributedBatchSampler
# init with dataset
class RandomDataset(Dataset):
def __init__(self, num_samples):
self.num_samples = num_samples
def __getitem__(self, idx):
image = np.random.random([784]).astype('float32')
label = np.random.randint(0, 9, (1, )).astype('int64')
return image, label
def __len__(self):
return self.num_samples
dataset = RandomDataset(100)
sampler = DistributedBatchSampler(dataset, batch_size=64)
for data in sampler:
# do something
break
"""
def __init__(self,
dataset,
batch_size,
num_replicas=None,
rank=None,
shuffle=False,
drop_last=False):
self.dataset = dataset
assert isinstance(batch_size, int) and batch_size > 0, \
"batch_size should be a positive integer"
self.batch_size = batch_size
assert isinstance(shuffle, bool), \
"shuffle should be a boolean value"
self.shuffle = shuffle
assert isinstance(drop_last, bool), \
"drop_last should be a boolean number"
from paddle.fluid.dygraph.parallel import ParallelEnv
if num_replicas is not None:
assert isinstance(num_replicas, int) and num_replicas > 0, \
"num_replicas should be a positive integer"
self.nranks = num_replicas
else:
self.nranks = ParallelEnv().nranks
if rank is not None:
assert isinstance(rank, int) and rank >= 0, \
"rank should be a non-negative integer"
self.local_rank = rank
else:
self.local_rank = ParallelEnv().local_rank
self.drop_last = drop_last
self.epoch = 0
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.nranks))
self.total_size = self.num_samples * self.nranks
def __iter__(self):
num_samples = len(self.dataset)
indices = np.arange(num_samples).tolist()
indices += indices[:(self.total_size - len(indices))]
assert len(indices) == self.total_size
if self.shuffle:
np.random.RandomState(self.epoch).shuffle(indices)
self.epoch += 1
# subsample
def _get_indices_by_batch_size(indices):
subsampled_indices = []
last_batch_size = self.total_size % (self.batch_size * self.nranks)
assert last_batch_size % self.nranks == 0
last_local_batch_size = last_batch_size // self.nranks
for i in range(self.local_rank * self.batch_size,
len(indices) - last_batch_size,
self.batch_size * self.nranks):
subsampled_indices.extend(indices[i:i + self.batch_size])
indices = indices[len(indices) - last_batch_size:]
subsampled_indices.extend(indices[
self.local_rank * last_local_batch_size:(
self.local_rank + 1) * last_local_batch_size])
return subsampled_indices
if self.nranks > 1:
indices = _get_indices_by_batch_size(indices)
assert len(indices) == self.num_samples
_sample_iter = iter(indices)
batch_indices = []
for idx in _sample_iter:
batch_indices.append(idx)
if len(batch_indices) == self.batch_size:
yield batch_indices
batch_indices = []
if not self.drop_last and len(batch_indices) > 0:
yield batch_indices
def __len__(self):
num_samples = self.num_samples
num_samples += int(not self.drop_last) * (self.batch_size - 1)
return num_samples // self.batch_size
def set_epoch(self, epoch):
"""
Sets the epoch number. When :attr:`shuffle=True`, this number is used
as seeds of random numbers. By default, users may not set this, all
replicas (workers) use a different random ordering for each epoch.
If set same number at each epoch, this sampler will yield the same
ordering at all epoches.
Arguments:
epoch (int): Epoch number.
Examples:
.. code-block:: python
import numpy as np
from paddle.io import Dataset, DistributedBatchSampler
# init with dataset
class RandomDataset(Dataset):
def __init__(self, num_samples):
self.num_samples = num_samples
def __getitem__(self, idx):
image = np.random.random([784]).astype('float32')
label = np.random.randint(0, 9, (1, )).astype('int64')
return image, label
def __len__(self):
return self.num_samples
dataset = RandomDataset(100)
sampler = DistributedBatchSampler(dataset, batch_size=64)
for epoch in range(10):
sampler.set_epoch(epoch)
"""
self.epoch = epoch

@ -18,6 +18,7 @@ import unittest
import paddle.fluid as fluid
from paddle.io import BatchSampler, Dataset, Sampler, SequenceSampler, RandomSampler
from paddle.io import DistributedBatchSampler
class RandomDataset(Dataset):
@ -194,5 +195,15 @@ class TestBatchSamplerWithSamplerShuffle(unittest.TestCase):
pass
class TestDistributedBatchSamplerWithSampler(TestBatchSampler):
def init_batch_sampler(self):
dataset = RandomDataset(1000, 10)
bs = DistributedBatchSampler(
dataset=dataset,
batch_size=self.batch_size,
drop_last=self.drop_last)
return bs
if __name__ == '__main__':
unittest.main()

@ -13,34 +13,15 @@
# limitations under the License.
from . import logger
from . import progressbar
from . import callbacks
from . import download
from . import model
from .model import *
from . import datasets
from . import distributed
from . import vision
from . import text
from . import utils
from . import device
from .device import *
from .dygraph_layer_patch import monkey_patch_layer
logger.setup_logger()
__all__ = [
'callbacks',
'datasets',
'distributed',
'download',
'vision',
'text',
'utils',
] + model.__all__ + device.__all__
__all__ = ['callbacks'] + model.__all__
monkey_patch_layer()

@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from paddle.fluid.dygraph.parallel import ParallelEnv
from .progressbar import ProgressBar
@ -117,10 +119,10 @@ class Callback(object):
.. code-block:: python
from paddle.incubate.hapi.callbacks import Callback
import paddle
# build a simple model checkpoint callback
class ModelCheckpoint(Callback):
class ModelCheckpoint(paddle.callbacks.Callback):
def __init__(self, save_freq=1, save_dir=None):
self.save_freq = save_freq
self.save_dir = save_dir
@ -147,12 +149,12 @@ class Callback(object):
- 'verbose': an integer. Verbose mode is 0, 1 or 2.
0 = silent, 1 = progress bar, 2 = one line per epoch.
- 'metrics': a list of str. Names of metrics, including 'loss'
and the names of hapi.Metric.
and the names of paddle.metric.Metric.
"""
self.params = params
def set_model(self, model):
"""model is instance of hapi.Model.
"""model is instance of paddle.Model.
"""
self.model = model
@ -168,7 +170,7 @@ class Callback(object):
Args:
logs (dict): The logs is a dict or None. The keys of logs
passed by hapi.Model contains 'loss', metric names and
passed by paddle.Model contains 'loss', metric names and
`batch_size`.
"""
@ -177,10 +179,10 @@ class Callback(object):
Args:
logs (dict): The logs is a dict or None. The keys of logs
passed by hapi.Model contains 'steps' and 'metrics',
passed by paddle.Model contains 'steps' and 'metrics',
The `steps` is number of total steps of validation dataset.
The `metrics` is a list of str including 'loss' and the names
of hapi.Metric.
of paddle.metric.Metric.
"""
def on_eval_end(self, logs=None):
@ -188,7 +190,7 @@ class Callback(object):
Args:
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is a dict contains 'loss', metrics and 'batch_size'
paddle.Model is a dict contains 'loss', metrics and 'batch_size'
of last batch of validation dataset.
"""
@ -212,7 +214,7 @@ class Callback(object):
Args:
epoch (int): The index of epoch.
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is None.
paddle.Model is None.
"""
def on_epoch_end(self, epoch, logs=None):
@ -221,7 +223,7 @@ class Callback(object):
Args:
epoch (int): The index of epoch.
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is a dict, contains 'loss', metrics and 'batch_size'
paddle.Model is a dict, contains 'loss', metrics and 'batch_size'
of last batch.
"""
@ -231,7 +233,7 @@ class Callback(object):
Args:
step (int): The index of step (or iteration).
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is empty.
paddle.Model is empty.
"""
def on_train_batch_end(self, step, logs=None):
@ -240,7 +242,7 @@ class Callback(object):
Args:
step (int): The index of step (or iteration).
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is a dict, contains 'loss', metrics and 'batch_size'
paddle.Model is a dict, contains 'loss', metrics and 'batch_size'
of current batch.
"""
@ -250,7 +252,7 @@ class Callback(object):
Args:
step (int): The index of step (or iteration).
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is empty.
paddle.Model is empty.
"""
def on_eval_batch_end(self, step, logs=None):
@ -259,7 +261,7 @@ class Callback(object):
Args:
step (int): The index of step (or iteration).
logs (dict): The logs is a dict or None. The `logs` passed by
hapi.Model is a dict, contains 'loss', metrics and 'batch_size'
paddle.Model is a dict, contains 'loss', metrics and 'batch_size'
of current batch.
"""
@ -292,23 +294,22 @@ class ProgBarLogger(Callback):
.. code-block:: python
import paddle
import paddle.fluid as fluid
import paddle.incubate.hapi as hapi
from paddle.static import InputSpec
inputs = [hapi.Input([-1, 1, 28, 28], 'float32', 'image')]
labels = [hapi.Input([None, 1], 'int64', 'label')]
inputs = [InputSpec([-1, 1, 28, 28], 'float32', 'image')]
labels = [InputSpec([None, 1], 'int64', 'label')]
train_dataset = hapi.datasets.MNIST(mode='train')
train_dataset = paddle.vision.datasets.MNIST(mode='train')
model = hapi.Model(hapi.vision.LeNet(classifier_activation=None),
model = paddle.Model(paddle.vision.LeNet(classifier_activation=None),
inputs, labels)
optim = fluid.optimizer.Adam(0.001)
optim = paddle.optimizer.Adam(0.001)
model.prepare(optimizer=optim,
loss=paddle.nn.CrossEntropyLoss(),
metrics=paddle.metric.Accuracy())
callback = hapi.callbacks.ProgBarLogger(log_freq=10)
callback = paddle.callbacks.ProgBarLogger(log_freq=10)
model.fit(train_dataset, batch_size=64, callbacks=callback)
"""
@ -428,23 +429,22 @@ class ModelCheckpoint(Callback):
.. code-block:: python
import paddle
import paddle.fluid as fluid
import paddle.incubate.hapi as hapi
from paddle.static import InputSpec
inputs = [hapi.Input([-1, 1, 28, 28], 'float32', 'image')]
labels = [hapi.Input([None, 1], 'int64', 'label')]
inputs = [InputSpec([-1, 1, 28, 28], 'float32', 'image')]
labels = [InputSpec([None, 1], 'int64', 'label')]
train_dataset = hapi.datasets.MNIST(mode='train')
train_dataset = paddle.vision.datasets.MNIST(mode='train')
model = hapi.Model(hapi.vision.LeNet(classifier_activation=None),
model = paddle.Model(paddle.vision.LeNet(classifier_activation=None),
inputs, labels)
optim = fluid.optimizer.Adam(0.001)
optim = paddle.optimizer.Adam(0.001)
model.prepare(optimizer=optim,
loss=paddle.nn.CrossEntropyLoss(),
metrics=paddle.metric.Accuracy())
callback = hapi.callbacks.ModelCheckpoint(save_dir='./temp')
callback = paddle.callbacks.ModelCheckpoint(save_dir='./temp')
model.fit(train_dataset, batch_size=64, callbacks=callback)
"""
@ -461,11 +461,11 @@ class ModelCheckpoint(Callback):
def on_epoch_end(self, epoch, logs=None):
if self._is_save() and self.epoch % self.save_freq == 0:
path = '{}/{}'.format(self.save_dir, epoch)
print('save checkpoint at {}'.format(path))
print('save checkpoint at {}'.format(os.path.abspath(path)))
self.model.save(path)
def on_train_end(self, logs=None):
if self._is_save():
path = '{}/final'.format(self.save_dir)
print('save checkpoint at {}'.format(path))
print('save checkpoint at {}'.format(os.path.abspath(path)))
self.model.save(path)

@ -16,8 +16,7 @@ import warnings
import paddle.fluid as fluid
from paddle.fluid.framework import in_dygraph_mode
from .device import _get_device
from paddle.fluid.framework import _current_expected_place as _get_device
def monkey_patch_layer():

@ -66,6 +66,7 @@ class ProgressBar(object):
return terminal_size(80, 24)
terminal_width, _ = get_terminal_size()
terminal_width = terminal_width if terminal_width > 0 else 80
max_width = min(int(terminal_width * 0.6), terminal_width - 50)
return max_width

@ -12,10 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from . import hapi
__all__ = []
__all__ += hapi.__all__
__all__ += ["reader"]
from ..fluid.contrib import reader

@ -1,66 +0,0 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import six
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
__all__ = ['set_device', ]
# TODO(qingqing01): remove or refine _global_device, set_device and get_device
# after core framework supporting these function.
_global_device = None
def set_device(device):
"""
Args:
device (str): specify device type, 'cpu' or 'gpu'.
Returns:
fluid.CUDAPlace or fluid.CPUPlace: Created GPU or CPU place.
Examples:
.. code-block:: python
import paddle.incubate.hapi as hapi
input = hapi.set_device('gpu')
"""
assert isinstance(device, six.string_types) and device.lower() in ['cpu', 'gpu'], \
"Expected device in ['cpu', 'gpu'], but got {}".format(device)
device = fluid.CUDAPlace(ParallelEnv().dev_id) \
if device.lower() == 'gpu' and fluid.is_compiled_with_cuda() \
else fluid.CPUPlace()
global _global_device
_global_device = device
return device
def _get_device():
"""
Return global device.
"""
if _global_device is not None:
device = _global_device
else:
if fluid.is_compiled_with_cuda():
device = fluid.CUDAPlace(ParallelEnv().dev_id)
else:
device = fluid.CPUPlace()
return device

File diff suppressed because it is too large Load Diff

@ -1,46 +0,0 @@
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py")
string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}")
foreach(TEST_OP ${DIST_TEST_OPS})
list(REMOVE_ITEM TEST_OPS ${TEST_OP})
endforeach()
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
set_tests_properties(test_dataset_imdb PROPERTIES TIMEOUT 150)
function(py_dist_test TARGET_NAME)
if(WITH_TESTING)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(WITH_COVERAGE AND WITH_GPU AND WITH_NCCL AND NOT WIN32)
add_test(NAME ${TARGET_NAME}
COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1
PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_dist_test_ENVS}
COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data
${PYTHON_EXECUTABLE} -u ${py_dist_test_SRCS} ${py_dist_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
# No unit test should exceed 10 minutes.
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=DIST")
endif()
endif()
endfunction()
foreach(src ${DIST_TEST_OPS})
message(STATUS ${src})
py_dist_test(${src} SRCS ${src}.py)
endforeach()

@ -1,124 +0,0 @@
# copyright (c) 2020 paddlepaddle authors. all rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import unittest
import numpy as np
import shutil
import tempfile
from paddle import fluid
from paddle.nn import Conv2d, Pool2D, Linear, ReLU, Sequential, Softmax
from paddle.incubate.hapi.utils import uncombined_weight_to_state_dict
class LeNetDygraph(fluid.dygraph.Layer):
def __init__(self, num_classes=10, classifier_activation='softmax'):
super(LeNetDygraph, self).__init__()
self.num_classes = num_classes
self.features = Sequential(
Conv2d(
1, 6, 3, stride=1, padding=1),
ReLU(),
Pool2D(2, 'max', 2),
Conv2d(
6, 16, 5, stride=1, padding=0),
ReLU(),
Pool2D(2, 'max', 2))
if num_classes > 0:
self.fc = Sequential(
Linear(400, 120), Linear(120, 84), Linear(84, 10),
Softmax()) #Todo: accept any activation
def forward(self, inputs):
x = self.features(inputs)
if self.num_classes > 0:
x = fluid.layers.flatten(x, 1)
x = self.fc(x)
return x
class TestUncombinedWeight2StateDict(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.save_dir = tempfile.mkdtemp()
@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.save_dir)
def test_infer(self):
start_prog = fluid.Program()
train_prog = fluid.Program()
x = fluid.data(name='x', shape=[None, 1, 28, 28], dtype='float32')
with fluid.program_guard(train_prog, start_prog):
with fluid.unique_name.guard():
x = fluid.data(
name='x', shape=[None, 1, 28, 28], dtype='float32')
model = LeNetDygraph()
output = model.forward(x)
excutor = fluid.Executor()
excutor.run(start_prog)
test_prog = train_prog.clone(for_test=True)
fluid.io.save_params(excutor, self.save_dir, test_prog)
rand_x = np.random.rand(1, 1, 28, 28).astype('float32')
out = excutor.run(program=test_prog,
feed={'x': rand_x},
fetch_list=[output.name],
return_numpy=True)
state_dict = uncombined_weight_to_state_dict(self.save_dir)
key2key_dict = {
'features.0.weight': 'conv2d_0.w_0',
'features.0.bias': 'conv2d_0.b_0',
'features.3.weight': 'conv2d_1.w_0',
'features.3.bias': 'conv2d_1.b_0',
'fc.0.weight': 'linear_0.w_0',
'fc.0.bias': 'linear_0.b_0',
'fc.1.weight': 'linear_1.w_0',
'fc.1.bias': 'linear_1.b_0',
'fc.2.weight': 'linear_2.w_0',
'fc.2.bias': 'linear_2.b_0'
}
fluid.enable_imperative()
dygraph_model = LeNetDygraph()
converted_state_dict = dygraph_model.state_dict()
for k1, k2 in key2key_dict.items():
converted_state_dict[k1] = state_dict[k2]
dygraph_model.set_dict(converted_state_dict)
dygraph_model.eval()
dy_out = dygraph_model(fluid.dygraph.to_variable(rand_x))
np.testing.assert_allclose(dy_out.numpy(), out[0], atol=1e-5)
if __name__ == '__main__':
unittest.main()

@ -1,221 +0,0 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import inspect
import numpy as np
from collections import OrderedDict
from paddle import fluid
from paddle.fluid.framework import Variable
from paddle.fluid.executor import global_scope
__all__ = ['uncombined_weight_to_state_dict']
def uncombined_weight_to_state_dict(weight_dir):
"""
Convert uncombined weight which getted by using `fluid.io.save_params` or `fluid.io.save_persistables` to state_dict
Args:
weight_dir (str): weight direcotory path.
Returns:
OrderDict: weight dict.
Examples:
.. code-block:: python
import os
from paddle import fluid
from paddle.nn import Conv2D, Pool2D, Linear, ReLU, Sequential
from paddle.incubate.hapi.utils import uncombined_weight_to_state_dict
class LeNetDygraph(fluid.dygraph.Layer):
def __init__(self, num_classes=10, classifier_activation='softmax'):
super(LeNetDygraph, self).__init__()
self.num_classes = num_classes
self.features = Sequential(
Conv2D(
1, 6, 3, stride=1, padding=1),
ReLU(),
Pool2D(2, 'max', 2),
Conv2D(
6, 16, 5, stride=1, padding=0),
ReLU(),
Pool2D(2, 'max', 2))
if num_classes > 0:
self.fc = Sequential(
Linear(400, 120),
Linear(120, 84),
Linear(
84, 10, act=classifier_activation))
def forward(self, inputs):
x = self.features(inputs)
if self.num_classes > 0:
x = fluid.layers.flatten(x, 1)
x = self.fc(x)
return x
# save weight use fluid.io.save_params
save_dir = 'temp'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
start_prog = fluid.Program()
train_prog = fluid.Program()
x = fluid.data(name='x', shape=[None, 1, 28, 28], dtype='float32')
with fluid.program_guard(train_prog, start_prog):
with fluid.unique_name.guard():
x = fluid.data(
name='x', shape=[None, 1, 28, 28], dtype='float32')
model = LeNetDygraph()
output = model.forward(x)
excutor = fluid.Executor()
excutor.run(start_prog)
test_prog = train_prog.clone(for_test=True)
fluid.io.save_params(excutor, save_dir, test_prog)
# convert uncombined weight to state dict
state_dict = uncombined_weight_to_state_dict(save_dir)
key2key_dict = {
'features.0.weight': 'conv2d_0.w_0',
'features.0.bias': 'conv2d_0.b_0',
'features.3.weight': 'conv2d_1.w_0',
'features.3.bias': 'conv2d_1.b_0',
'fc.0.weight': 'linear_0.w_0',
'fc.0.bias': 'linear_0.b_0',
'fc.1.weight': 'linear_1.w_0',
'fc.1.bias': 'linear_1.b_0',
'fc.2.weight': 'linear_2.w_0',
'fc.2.bias': 'linear_2.b_0'
}
fluid.enable_imperative()
dygraph_model = LeNetDygraph()
converted_state_dict = dygraph_model.state_dict()
for k1, k2 in key2key_dict.items():
converted_state_dict[k1] = state_dict[k2]
# dygraph model load state dict which converted from uncombined weight
dygraph_model.set_dict(converted_state_dict)
"""
def _get_all_params_name(dir):
params_name = []
dir = os.path.expanduser(dir)
dir_len = len(dir)
for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
for fname in sorted(fnames):
path = os.path.join(root[dir_len:], fname)
params_name.append(path)
return params_name
class Load(fluid.dygraph.Layer):
def __init__(self):
super(Load, self).__init__()
def forward(self, filename):
weight = self.create_parameter(
shape=[1],
dtype='float32',
default_initializer=fluid.initializer.ConstantInitializer(0.0))
self._helper.append_op(
type='load',
inputs={},
outputs={'Out': [weight]},
attrs={'file_path': filename})
return weight
params_name_list = _get_all_params_name(weight_dir)
if not fluid.in_dygraph_mode():
dygraph_enabled = False
fluid.enable_imperative()
else:
dygraph_enabled = True
load = Load()
state_dict = OrderedDict()
for param_name in params_name_list:
param_path = os.path.join(weight_dir, param_name)
weight = load(param_path)
try:
weight = weight.numpy()
except Exception as e:
print(e)
state_dict[param_name] = weight
if not dygraph_enabled:
fluid.disable_imperative()
return state_dict
def to_list(value):
if value is None:
return value
if isinstance(value, (list, tuple)):
return list(value)
return [value]
def to_numpy(var):
assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable"
if isinstance(var, fluid.core.VarBase):
return var.numpy()
t = global_scope().find_var(var.name).get_tensor()
return np.array(t)
def flatten_list(l):
assert isinstance(l, list), "not a list"
outl = []
splits = []
for sl in l:
assert isinstance(sl, list), "sub content not a list"
splits.append(len(sl))
outl += sl
return outl, splits
def restore_flatten_list(l, splits):
outl = []
for split in splits:
assert len(l) >= split, "list length invalid"
sl, l = l[:split], l[split:]
outl.append(sl)
return outl
def extract_args(func):
if hasattr(inspect, 'getfullargspec'):
return inspect.getfullargspec(func)[0]
else:
return inspect.getargspec(func)[0]

@ -18,6 +18,7 @@ __all__ = [
'IterableDataset',
'TensorDataset',
'BatchSampler',
'DistributedBatchSampler',
# 'Transform',
'DataLoader',
'get_worker_info',
@ -43,7 +44,7 @@ __all__ = [
from ..fluid.io import DataLoader
from ..fluid.dataloader import Dataset, IterableDataset, BatchSampler, get_worker_info, \
TensorDataset, Sampler, SequenceSampler, RandomSampler
TensorDataset, Sampler, SequenceSampler, RandomSampler, DistributedBatchSampler
from ..fluid.io import load, save, load_program_state, set_program_state, \
load_inference_model, save_inference_model, batch
from ..reader import shuffle, buffered, cache, chain, firstn, compose, map_readers, xmap_readers

@ -202,12 +202,11 @@ class Accuracy(Metric):
.. code-block:: python
import paddle
import paddle.incubate.hapi as hapi
paddle.disable_static()
train_dataset = hapi.datasets.MNIST(mode='train')
train_dataset = paddle.vision.datasets.MNIST(mode='train')
model = hapi.Model(hapi.vision.LeNet(classifier_activation=None))
model = paddle.Model(paddle.vision.LeNet(classifier_activation=None))
optim = paddle.optimizer.Adam(
learning_rate=0.001, parameters=model.parameters())
model.prepare(
@ -336,7 +335,6 @@ class Precision(Metric):
import paddle
import paddle.nn as nn
import paddle.incubate.hapi as hapi
class Data(paddle.io.Dataset):
def __init__(self):
@ -352,7 +350,7 @@ class Precision(Metric):
return self.n
paddle.disable_static()
model = hapi.Model(nn.Sequential(
model = paddle.Model(nn.Sequential(
nn.Linear(10, 1),
nn.Sigmoid()
))
@ -471,7 +469,6 @@ class Recall(Metric):
import paddle
import paddle.nn as nn
import paddle.incubate.hapi as hapi
class Data(paddle.io.Dataset):
def __init__(self):
@ -487,7 +484,7 @@ class Recall(Metric):
return self.n
paddle.disable_static()
model = hapi.Model(nn.Sequential(
model = paddle.Model(nn.Sequential(
nn.Linear(10, 1),
nn.Sigmoid()
))
@ -617,7 +614,6 @@ class Auc(Metric):
import numpy as np
import paddle
import paddle.nn as nn
import paddle.incubate.hapi as hapi
class Data(paddle.io.Dataset):
def __init__(self):
@ -633,9 +629,9 @@ class Auc(Metric):
return self.n
paddle.disable_static()
model = hapi.Model(nn.Sequential(
nn.Linear(10, 2, act='softmax'),
))
model = paddle.Model(nn.Sequential(
nn.Linear(10, 2), nn.Softmax())
)
optim = paddle.optimizer.Adam(
learning_rate=0.001, parameters=model.parameters())

@ -1,6 +1,41 @@
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py")
string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}")
foreach(TEST_OP ${DIST_TEST_OPS})
list(REMOVE_ITEM TEST_OPS ${TEST_OP})
endforeach()
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
function(py_dist_test TARGET_NAME)
if(WITH_TESTING)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(WITH_COVERAGE AND WITH_GPU AND WITH_NCCL AND NOT WIN32)
add_test(NAME ${TARGET_NAME}
COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1
PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_dist_test_ENVS}
COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data
${PYTHON_EXECUTABLE} -u ${py_dist_test_SRCS} ${py_dist_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
# No unit test should exceed 10 minutes.
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=DIST")
endif()
endif()
endfunction()
foreach(src ${DIST_TEST_OPS})
message(STATUS ${src})
py_dist_test(${src} SRCS ${src}.py)
endforeach()

@ -20,14 +20,15 @@ import unittest
import numpy as np
import contextlib
from paddle import fluid
import paddle
import paddle.fluid as fluid
from paddle.incubate.hapi import Model, Input, set_device
from paddle import Model, set_device
from paddle.static import InputSpec as Input
from paddle.nn.layer.loss import CrossEntropyLoss
from paddle.incubate.hapi.vision.models import LeNet
from paddle.metric import Accuracy
from paddle.incubate.hapi.callbacks import ProgBarLogger
from paddle.incubate.hapi.datasets import MNIST
from paddle.vision.models import LeNet
from paddle.vision.datasets import MNIST
class MnistDataset(MNIST):
@ -76,7 +77,7 @@ class TestDistTraning(unittest.TestCase):
val_dataset = MnistDataset(mode='test')
test_dataset = MnistDataset(mode='test', return_label=False)
cbk = ProgBarLogger(50)
cbk = paddle.callbacks.ProgBarLogger(50)
model.fit(train_dataset,
val_dataset,
epochs=2,

@ -20,14 +20,15 @@ import unittest
import numpy as np
import contextlib
from paddle import fluid
import paddle
import paddle.fluid as fluid
from paddle.incubate.hapi import Model, Input, set_device
from paddle import Model, set_device
from paddle.static import InputSpec as Input
from paddle.nn.layer.loss import CrossEntropyLoss
from paddle.incubate.hapi.vision.models import LeNet
from paddle.metric import Accuracy
from paddle.incubate.hapi.callbacks import ProgBarLogger
from paddle.incubate.hapi.datasets import MNIST
from paddle.vision.models import LeNet
from paddle.vision.datasets import MNIST
class MnistDataset(MNIST):
@ -75,7 +76,7 @@ class TestDistTraning(unittest.TestCase):
val_dataset = MnistDataset(mode='test')
test_dataset = MnistDataset(mode='test', return_label=False)
cbk = ProgBarLogger(50)
cbk = paddle.callbacks.ProgBarLogger(50)
model.fit(train_dataset,
val_dataset,
epochs=2,

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save