LangDaoAI commented 3 years ago

few shot EFL模型推理部署后，出现DataType of equal Op's duplicable Variable Y must be consistent的异常：

我使用的部署推理脚本如下：

请求Team验证，比较急，感谢！

LangDaoAI commented 3 years ago

以下是我这边客户端：

coding='utf-8'

import json import traceback from functools import partial

import paddle from flask import Flask, request, Response import numpy as np import paddlenlp as ppnlp from paddlenlp.data import Tuple, Pad from paddle import inference import paddle.nn.functional as F from gevent.pywsgi import WSGIServer

from data import processor_dict from task_label_description import TASK_LABELS_DESC, honorcrisis_labels

app = Flask(name) app.config['JSON_AS_ASCII'] = False

task_name = "honorcrisis"

def convert_example(example, tokenizer, max_seq_length=512, is_test=False): sentence1 = example["sentence1"] sentence2 = example["sentence2"] encoded_inputs = tokenizer( text=sentence1, text_pair=sentence2, max_seq_len=max_seq_length, truncation_strategy="only_first")

src_ids = encoded_inputs["input_ids"]
token_type_ids = encoded_inputs["token_type_ids"]

return src_ids, token_type_ids

def create_dataloader(dataset, batch_size=1, batchify_fn=None, trans_fn=None): if trans_fn: dataset = dataset.map(trans_fn)

batch_sampler = paddle.io.BatchSampler(
        dataset, batch_size=batch_size, shuffle=False)

return paddle.io.DataLoader(
    dataset=dataset,
    batch_sampler=batch_sampler,
    collate_fn=batchify_fn,
    return_list=True)

@app.route("/", methods=['GET', 'POST']) @app.route("/cn/crisis", methods=['GET', 'POST']) def crisis_cls_client(): try: text = [] if request.method == "GET": text.append(request.args.get("text")) if request.method == "POST": if request.content_type.startswith('application/json'): text.append(request.json.get('text')) elif request.content_type.startswith('multipart/form-data'): text.append(request.form.get('content')) else: text.append(request.values.get("content"))

    test_ds = []
    for sentence in text:
        new_sentence = dict()
        new_sentence['sentence'] = sentence
        test_ds.append(new_sentence)

    processor = processor_dict[task_name]()
    test_ds = processor.get_test_datasets(test_ds, TASK_LABELS_DESC[task_name])

    # [src_ids, token_type_ids]
    predict_batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # src_ids
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id),  # token_type_ids
    ): [data for data in fn(samples)]

    predict_trans_func = partial(
        convert_example,
        tokenizer=tokenizer,
        max_seq_length=256,
        is_test=True)

    test_data_loader = create_dataloader(
        test_ds,
        batch_size=1,
        batchify_fn=predict_batchify_fn,
        trans_fn=predict_trans_func)

    y_pred_labels, prediction_probs = predictor.predict(test_data_loader, task_label_description=TASK_LABELS_DESC[task_name])

    return Response(
        json.dumps({'status': '200', 'result': {'类别': y_pred_labels[0], '类别描述': honorcrisis_labels[
                                                                                              y_pred_labels[0]], '概率': prediction_probs[0][
                                                                                              int(y_pred_labels[
                                                                                                      0]) - 1]}},
                   ensure_ascii=False), mimetype='application/json')

except Exception as e:
    tb = traceback.format_exc()  #
    print('handle_request_failed\t%s\t%s' % (tb, str(e)))
    return Response(json.dumps({'status': '500', 'result': str(e)}), mimetype='application/json')

class Predictor(object): def init(self, model_file, params_file, device, max_seq_length): self.max_seq_length = max_seq_length

    config = inference.Config(model_file, params_file)
    if device == "gpu":
        # set GPU configs accordingly
        config.enable_use_gpu(100, 0)
    elif device == "cpu":
        # set CPU configs accordingly,
        # such as enable_mkldnn, set_cpu_math_library_num_threads
        config.disable_gpu()
    elif device == "xpu":
        # set XPU configs accordingly
        config.enable_xpu(100)
    config.switch_use_feed_fetch_ops(False)
    self.predictor = inference.create_predictor(config)

    self.input_handles = [
        self.predictor.get_input_handle(name)
        for name in self.predictor.get_input_names()
    ]

    self.output_handle = self.predictor.get_output_handle(
        self.predictor.get_output_names()[0])

@paddle.no_grad()
def predict(self, data_loader, task_label_description):

    index2label = {
        idx: label
        for idx, label in enumerate(task_label_description.keys())
    }

    class_num = len(task_label_description)

    all_prediction_probs = []

    for batch in data_loader:
        src_ids, token_type_ids = batch

        self.input_handles[0].copy_from_cpu(src_ids)
        self.input_handles[1].copy_from_cpu(token_type_ids)
        self.predictor.run()
        prediction_scores = self.output_handle.copy_to_cpu()

        prediction_probs = prediction_scores.numpy()

        all_prediction_probs.append(prediction_probs)

    all_prediction_probs = np.concatenate(all_prediction_probs, axis=0)

    all_prediction_probs = np.reshape(all_prediction_probs, (-1, class_num, 2))

    prediction_pos_probs = all_prediction_probs[:, :, 1]
    prediction_pos_probs = np.reshape(prediction_pos_probs, (-1, class_num))

    prediction_pos_probs1 = paddle.to_tensor(prediction_pos_probs)
    probs = F.softmax(prediction_pos_probs1)

    y_pred_index = np.argmax(prediction_pos_probs, axis=-1)

    y_preds = [index2label[idx] for idx in y_pred_index]

    return y_preds, probs.numpy()

if name == "main": predictor = Predictor("./static_graph_test/static_graph_params.pdmodel", "./static_graph_test/static_graph_params.pdiparams", 'gpu', 256) tokenizer = ppnlp.transformers.ErnieGramTokenizer.from_pretrained('ernie-gram-zh') http_server = WSGIServer(('0.0.0.0', 8021), app) http_server.serve_forever()

LangDaoAI commented 3 years ago

【更新】我用2.0.7 版本的原生EFL代码也复现了，我马上贴一下新做的客户端和导出static的脚本：

LangDaoAI commented 3 years ago

【efl_client.py】如下：

coding='utf-8'

import json import traceback from functools import partial

import paddle from flask import Flask, request, Response import numpy as np import paddlenlp as ppnlp from paddlenlp.data import Tuple, Pad from paddle import inference import paddle.nn.functional as F from gevent.pywsgi import WSGIServer

from data import create_dataloader, convert_example, processor_dict from task_label_description import TASK_LABELS_DESC

app = Flask(name) app.config['JSON_AS_ASCII'] = False

task_name = "tnews"

@app.route("/", methods=['GET', 'POST']) @app.route("/cn/efl", methods=['GET', 'POST']) def crisis_cls_client(): try: text = [] if request.method == "GET": text.append(request.args.get("text")) if request.method == "POST": if request.content_type.startswith('application/json'): text.append(request.json.get('text')) elif request.content_type.startswith('multipart/form-data'): text.append(request.form.get('content')) else: text.append(request.values.get("content"))

    test_ds = []
    for sentence in text:
        new_sentence = dict()
        new_sentence['sentence'] = sentence
        test_ds.append(new_sentence)

    processor = processor_dict[task_name]()
    test_ds = processor.get_test_datasets(test_ds, TASK_LABELS_DESC[task_name])

    # [src_ids, token_type_ids]
    predict_batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64"),  # src_ids
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype="int64"),  # token_type_ids
    ): [data for data in fn(samples)]

    predict_trans_func = partial(
        convert_example,
        tokenizer=tokenizer,
        max_seq_length=256,
        is_test=True)

    test_data_loader = create_dataloader(
        test_ds,
        batch_size=1,
        batchify_fn=predict_batchify_fn,
        trans_fn=predict_trans_func)

    y_pred_labels, prediction_probs = predictor.predict(test_data_loader, task_label_description=TASK_LABELS_DESC[task_name])

    return Response(
        json.dumps({'status': '200', 'result': {'类别': y_pred_labels[0]}},
                   ensure_ascii=False), mimetype='application/json')

except Exception as e:
    tb = traceback.format_exc()  #
    print('handle_request_failed\t%s\t%s' % (tb, str(e)))
    return Response(json.dumps({'status': '500', 'result': str(e)}), mimetype='application/json')

class Predictor(object): def init(self, model_file, params_file, device, max_seq_length): self.max_seq_length = max_seq_length

    config = inference.Config(model_file, params_file)
    if device == "gpu":
        # set GPU configs accordingly
        config.enable_use_gpu(100, 0)
    elif device == "cpu":
        # set CPU configs accordingly,
        # such as enable_mkldnn, set_cpu_math_library_num_threads
        config.disable_gpu()
    elif device == "xpu":
        # set XPU configs accordingly
        config.enable_xpu(100)
    config.switch_use_feed_fetch_ops(False)
    self.predictor = inference.create_predictor(config)

    self.input_handles = [
        self.predictor.get_input_handle(name)
        for name in self.predictor.get_input_names()
    ]

    self.output_handle = self.predictor.get_output_handle(
        self.predictor.get_output_names()[0])

@paddle.no_grad()
def predict(self, data_loader, task_label_description):

    index2label = {
        idx: label
        for idx, label in enumerate(task_label_description.keys())
    }

    class_num = len(task_label_description)

    all_prediction_probs = []

    for batch in data_loader:
        src_ids, token_type_ids = batch

        self.input_handles[0].copy_from_cpu(src_ids)
        self.input_handles[1].copy_from_cpu(token_type_ids)
        self.predictor.run()
        prediction_scores = self.output_handle.copy_to_cpu()

        prediction_probs = prediction_scores.numpy()

        all_prediction_probs.append(prediction_probs)

    all_prediction_probs = np.concatenate(all_prediction_probs, axis=0)

    all_prediction_probs = np.reshape(all_prediction_probs, (-1, class_num, 2))

    prediction_pos_probs = all_prediction_probs[:, :, 1]
    prediction_pos_probs = np.reshape(prediction_pos_probs, (-1, class_num))

    prediction_pos_probs1 = paddle.to_tensor(prediction_pos_probs)
    probs = F.softmax(prediction_pos_probs1)

    y_pred_index = np.argmax(prediction_pos_probs, axis=-1)

    y_preds = [index2label[idx] for idx in y_pred_index]

    return y_preds, probs.numpy()

if name == "main": predictor = Predictor("./static_graph/static_graph_params.pdmodel", "./static_graph/static_graph_params.pdiparams", 'gpu', 256) tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0') http_server = WSGIServer(('0.0.0.0', 8022), app) http_server.serve_forever()

LangDaoAI commented 3 years ago

【export静态模型脚本】如下：

import argparse import os

import paddle from paddlenlp.transformers import ErnieForSequenceClassification

parser = argparse.ArgumentParser() parser.add_argument("--params_path", type=str, default='./checkpoints/model_150/model_state.pdparams', help="The path to model parameters to be loaded.") parser.add_argument("--output_path", type=str, default='./static_graph/static_graph_params', help="The path of model parameter in static graph to be saved.")

args = parser.parse_args()

if name == "main": model = ErnieForSequenceClassification.from_pretrained( 'ernie-1.0', num_classes=2)

if args.params_path and os.path.isfile(args.params_path):
    state_dict = paddle.load(args.params_path)
    model.set_dict(state_dict)
    print("Loaded parameters from %s" % args.params_path)
model.eval()

# Convert to static graph with specific input description
model = paddle.jit.to_static(
    model,
    input_spec=[
        paddle.static.InputSpec(
            shape=[None, None], dtype="int64"),  # input_ids
        paddle.static.InputSpec(
            shape=[None, None], dtype="int64")  # segment_ids
    ])
# Save in static graph model.
paddle.jit.save(model, args.output_path)