From 9ba231d3d1c13923bb42a02a88f92bce2fc6afd1 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 1 Mar 2017 15:49:39 +0800 Subject: [PATCH] Complete inferencer. --- demo/mnist/api_train_v2.py | 13 +++++++++++++ python/paddle/v2/dataset/mnist.py | 29 ++++++++++++++-------------- python/paddle/v2/inferencer.py | 27 +++++++++++++++----------- python/paddle/v2/reader/decorator.py | 20 ++++++++++++++++--- 4 files changed, 61 insertions(+), 28 deletions(-) diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index a59b30ccdb..637596e7bc 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -44,6 +44,19 @@ def main(): batch_size=32), event_handler=event_handler) + # output is a softmax layer. It returns probabilities. + # Shape should be (100, 10) + probs = paddle.infer( + output=inference, + parameters=parameters, + reader=paddle.reader.batched( + paddle.reader.limited( + paddle.reader.map_readers(lambda item: (item[0], ), + paddle.dataset.mnist.test()), + limit=100), + batch_size=32)) + print probs.shape + if __name__ == '__main__': main() diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 1512a3c318..ebcdff78b3 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -35,24 +35,25 @@ def reader_creator(image_filename, label_filename, buffer_size): l = subprocess.Popen([zcat_cmd, label_filename], stdout=subprocess.PIPE) l.stdout.read(8) # skip some magic bytes - while True: - labels = numpy.fromfile( - l.stdout, 'ubyte', count=buffer_size).astype("int") + try: # reader could be break. + while True: + labels = numpy.fromfile( + l.stdout, 'ubyte', count=buffer_size).astype("int") - if labels.size != buffer_size: - break # numpy.fromfile returns empty slice after EOF. + if labels.size != buffer_size: + break # numpy.fromfile returns empty slice after EOF. - images = numpy.fromfile( - m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape( - (buffer_size, 28 * 28)).astype('float32') + images = numpy.fromfile( + m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape( + (buffer_size, 28 * 28)).astype('float32') - images = images / 255.0 * 2.0 - 1.0 + images = images / 255.0 * 2.0 - 1.0 - for i in xrange(buffer_size): - yield images[i, :], int(labels[i]) - - m.terminate() - l.terminate() + for i in xrange(buffer_size): + yield images[i, :], int(labels[i]) + finally: + m.terminate() + l.terminate() return reader diff --git a/python/paddle/v2/inferencer.py b/python/paddle/v2/inferencer.py index 36a8ee3711..33f5ad1c07 100644 --- a/python/paddle/v2/inferencer.py +++ b/python/paddle/v2/inferencer.py @@ -16,18 +16,18 @@ class InferenceEngine(object): for param in gm.getParameters(): val = param.getBuf(api.PARAMETER_VALUE) name = param.getName() - assert isinstance(val, api.Matrix) - val.copyFromNumpyMat(parameters.get(name)) + assert isinstance(val, api.Vector) + val.copyFromNumpyArray(parameters.get(name).flatten()) self.__gradient_machine__ = gm self.__data_types__ = topo.data_type() def iter_infer(self, reader, reader_dict=None): + if reader_dict is None: + reader_dict = self.default_reader_dict() feeder = DataFeeder(self.__data_types__, reader_dict) - out_args = api.Arguments.createArguments(0) self.__gradient_machine__.start() for data_batch in reader(): - yield self.__gradient_machine__.forwardTest( - feeder(data_batch), out_args, api.PASS_TEST) + yield self.__gradient_machine__.forwardTest(feeder(data_batch)) self.__gradient_machine__.finish() def iter_infer_field(self, field, **kwargs): @@ -35,12 +35,17 @@ class InferenceEngine(object): yield [each_result[field] for each_result in result] def infer(self, field='value', **kwargs): - retv = [] - for result in itertools.izip( - self.iter_infer_field( - field=field, **kwargs)): - retv.append(numpy.concatenate(result)) - return retv + retv = None + for result in self.iter_infer_field(field=field, **kwargs): + if retv is None: + retv = [[]] * len(result) + for i, item in enumerate(result): + retv[i].append(item) + retv = [numpy.concatenate(out) for out in retv] + if len(retv) == 1: + return retv[0] + else: + return retv def default_reader_dict(self): reader_dict = dict() diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py index 5687f118ce..fe5acbdff5 100644 --- a/python/paddle/v2/reader/decorator.py +++ b/python/paddle/v2/reader/decorator.py @@ -14,13 +14,13 @@ __all__ = [ 'map_readers', 'buffered', 'compose', 'chain', 'shuffle', - 'ComposeNotAligned', 'batched' + 'ComposeNotAligned', 'batched', 'limited' ] -from Queue import Queue -from threading import Thread import itertools import random +from Queue import Queue +from threading import Thread def map_readers(func, *readers): @@ -213,3 +213,17 @@ def batched(reader, batch_size): yield batch return batched_reader + + +def limited(reader, limit): + """ + Limit the max number of samples that reader could return. + """ + + def limited_reader(): + for i, item in enumerate(reader()): + if i == limit: + break + yield item + + return limited_reader