PaddlePaddle / PaddleNLP

👑 Easy-to-use and powerful NLP and LLM library with 🤗 Awesome model zoo, supporting wide-range of NLP tasks from research to industrial applications, including 🗂Text Classification, 🔍 Neural Search, ❓ Question Answering, ℹ️ Information Extraction, 📄 Document Intelligence, 💌 Sentiment Analysis etc.
https://paddlenlp.readthedocs.io
Apache License 2.0
12.04k stars 2.93k forks source link

few shot EFL模型推理部署后, 出现DataType of equal Op's duplicable Variable Y must be consistent的异常 #827

Closed LangDaoAI closed 3 years ago

LangDaoAI commented 3 years ago

few shot EFL模型推理部署后, 出现DataType of equal Op's duplicable Variable Y must be consistent的异常:

image

我使用的部署推理脚本如下:

image

请求Team验证, 比较急, 感谢!

LangDaoAI commented 3 years ago

以下是我这边客户端:

coding='utf-8'

import json import traceback from functools import partial

import paddle from flask import Flask, request, Response import numpy as np import paddlenlp as ppnlp from paddlenlp.data import Tuple, Pad from paddle import inference import paddle.nn.functional as F from gevent.pywsgi import WSGIServer

from data import processor_dict from task_label_description import TASK_LABELS_DESC, honorcrisis_labels

app = Flask(name) app.config['JSON_AS_ASCII'] = False

task_name = "honorcrisis"

def convert_example(example, tokenizer, max_seq_length=512, is_test=False): sentence1 = example["sentence1"] sentence2 = example["sentence2"] encoded_inputs = tokenizer( text=sentence1, text_pair=sentence2, max_seq_len=max_seq_length, truncation_strategy="only_first")

src_ids = encoded_inputs["input_ids"]
token_type_ids = encoded_inputs["token_type_ids"]

return src_ids, token_type_ids

def create_dataloader(dataset, batch_size=1, batchify_fn=None, trans_fn=None): if trans_fn: dataset = dataset.map(trans_fn)

batch_sampler = paddle.io.BatchSampler(
        dataset, batch_size=batch_size, shuffle=False)

return paddle.io.DataLoader(
    dataset=dataset,
    batch_sampler=batch_sampler,
    collate_fn=batchify_fn,
    return_list=True)

@app.route("/", methods=['GET', 'POST']) @app.route("/cn/crisis", methods=['GET', 'POST']) def crisis_cls_client(): try: text = [] if request.method == "GET": text.append(request.args.get("text")) if request.method == "POST": if request.content_type.startswith('application/json'): text.append(request.json.get('text')) elif request.content_type.startswith('multipart/form-data'): text.append(request.form.get('content')) else: text.append(request.values.get("content"))

    test_ds = []
    for sentence in text:
        new_sentence = dict()
        new_sentence['sentence'] = sentence
        test_ds.append(new_sentence)

    processor = processor_dict[task_name]()
    test_ds = processor.get_test_datasets(test_ds, TASK_LABELS_DESC[task_name])

    # [src_ids, token_type_ids]
    predict_batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # src_ids
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id),  # token_type_ids
    ): [data for data in fn(samples)]

    predict_trans_func = partial(
        convert_example,
        tokenizer=tokenizer,
        max_seq_length=256,
        is_test=True)

    test_data_loader = create_dataloader(
        test_ds,
        batch_size=1,
        batchify_fn=predict_batchify_fn,
        trans_fn=predict_trans_func)

    y_pred_labels, prediction_probs = predictor.predict(test_data_loader, task_label_description=TASK_LABELS_DESC[task_name])

    return Response(
        json.dumps({'status': '200', 'result': {'类别': y_pred_labels[0], '类别描述': honorcrisis_labels[
                                                                                              y_pred_labels[0]], '概率': prediction_probs[0][
                                                                                              int(y_pred_labels[
                                                                                                      0]) - 1]}},
                   ensure_ascii=False), mimetype='application/json')

except Exception as e:
    tb = traceback.format_exc()  #
    print('handle_request_failed\t%s\t%s' % (tb, str(e)))
    return Response(json.dumps({'status': '500', 'result': str(e)}), mimetype='application/json')

class Predictor(object): def init(self, model_file, params_file, device, max_seq_length): self.max_seq_length = max_seq_length

    config = inference.Config(model_file, params_file)
    if device == "gpu":
        # set GPU configs accordingly
        config.enable_use_gpu(100, 0)
    elif device == "cpu":
        # set CPU configs accordingly,
        # such as enable_mkldnn, set_cpu_math_library_num_threads
        config.disable_gpu()
    elif device == "xpu":
        # set XPU configs accordingly
        config.enable_xpu(100)
    config.switch_use_feed_fetch_ops(False)
    self.predictor = inference.create_predictor(config)

    self.input_handles = [
        self.predictor.get_input_handle(name)
        for name in self.predictor.get_input_names()
    ]

    self.output_handle = self.predictor.get_output_handle(
        self.predictor.get_output_names()[0])

@paddle.no_grad()
def predict(self, data_loader, task_label_description):

    index2label = {
        idx: label
        for idx, label in enumerate(task_label_description.keys())
    }

    class_num = len(task_label_description)

    all_prediction_probs = []

    for batch in data_loader:
        src_ids, token_type_ids = batch

        self.input_handles[0].copy_from_cpu(src_ids)
        self.input_handles[1].copy_from_cpu(token_type_ids)
        self.predictor.run()
        prediction_scores = self.output_handle.copy_to_cpu()

        prediction_probs = prediction_scores.numpy()

        all_prediction_probs.append(prediction_probs)

    all_prediction_probs = np.concatenate(all_prediction_probs, axis=0)

    all_prediction_probs = np.reshape(all_prediction_probs, (-1, class_num, 2))

    prediction_pos_probs = all_prediction_probs[:, :, 1]
    prediction_pos_probs = np.reshape(prediction_pos_probs, (-1, class_num))

    prediction_pos_probs1 = paddle.to_tensor(prediction_pos_probs)
    probs = F.softmax(prediction_pos_probs1)

    y_pred_index = np.argmax(prediction_pos_probs, axis=-1)

    y_preds = [index2label[idx] for idx in y_pred_index]

    return y_preds, probs.numpy()

if name == "main": predictor = Predictor("./static_graph_test/static_graph_params.pdmodel", "./static_graph_test/static_graph_params.pdiparams", 'gpu', 256) tokenizer = ppnlp.transformers.ErnieGramTokenizer.from_pretrained('ernie-gram-zh') http_server = WSGIServer(('0.0.0.0', 8021), app) http_server.serve_forever()

LangDaoAI commented 3 years ago

【更新】 我用2.0.7 版本的原生EFL代码也复现了, 我马上贴一下新做的客户端和导出static的脚本:

image

LangDaoAI commented 3 years ago

【efl_client.py】如下:

coding='utf-8'

import json import traceback from functools import partial

import paddle from flask import Flask, request, Response import numpy as np import paddlenlp as ppnlp from paddlenlp.data import Tuple, Pad from paddle import inference import paddle.nn.functional as F from gevent.pywsgi import WSGIServer

from data import create_dataloader, convert_example, processor_dict from task_label_description import TASK_LABELS_DESC

app = Flask(name) app.config['JSON_AS_ASCII'] = False

task_name = "tnews"

@app.route("/", methods=['GET', 'POST']) @app.route("/cn/efl", methods=['GET', 'POST']) def crisis_cls_client(): try: text = [] if request.method == "GET": text.append(request.args.get("text")) if request.method == "POST": if request.content_type.startswith('application/json'): text.append(request.json.get('text')) elif request.content_type.startswith('multipart/form-data'): text.append(request.form.get('content')) else: text.append(request.values.get("content"))

    test_ds = []
    for sentence in text:
        new_sentence = dict()
        new_sentence['sentence'] = sentence
        test_ds.append(new_sentence)

    processor = processor_dict[task_name]()
    test_ds = processor.get_test_datasets(test_ds, TASK_LABELS_DESC[task_name])

    # [src_ids, token_type_ids]
    predict_batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64"),  # src_ids
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype="int64"),  # token_type_ids
    ): [data for data in fn(samples)]

    predict_trans_func = partial(
        convert_example,
        tokenizer=tokenizer,
        max_seq_length=256,
        is_test=True)

    test_data_loader = create_dataloader(
        test_ds,
        batch_size=1,
        batchify_fn=predict_batchify_fn,
        trans_fn=predict_trans_func)

    y_pred_labels, prediction_probs = predictor.predict(test_data_loader, task_label_description=TASK_LABELS_DESC[task_name])

    return Response(
        json.dumps({'status': '200', 'result': {'类别': y_pred_labels[0]}},
                   ensure_ascii=False), mimetype='application/json')

except Exception as e:
    tb = traceback.format_exc()  #
    print('handle_request_failed\t%s\t%s' % (tb, str(e)))
    return Response(json.dumps({'status': '500', 'result': str(e)}), mimetype='application/json')

class Predictor(object): def init(self, model_file, params_file, device, max_seq_length): self.max_seq_length = max_seq_length

    config = inference.Config(model_file, params_file)
    if device == "gpu":
        # set GPU configs accordingly
        config.enable_use_gpu(100, 0)
    elif device == "cpu":
        # set CPU configs accordingly,
        # such as enable_mkldnn, set_cpu_math_library_num_threads
        config.disable_gpu()
    elif device == "xpu":
        # set XPU configs accordingly
        config.enable_xpu(100)
    config.switch_use_feed_fetch_ops(False)
    self.predictor = inference.create_predictor(config)

    self.input_handles = [
        self.predictor.get_input_handle(name)
        for name in self.predictor.get_input_names()
    ]

    self.output_handle = self.predictor.get_output_handle(
        self.predictor.get_output_names()[0])

@paddle.no_grad()
def predict(self, data_loader, task_label_description):

    index2label = {
        idx: label
        for idx, label in enumerate(task_label_description.keys())
    }

    class_num = len(task_label_description)

    all_prediction_probs = []

    for batch in data_loader:
        src_ids, token_type_ids = batch

        self.input_handles[0].copy_from_cpu(src_ids)
        self.input_handles[1].copy_from_cpu(token_type_ids)
        self.predictor.run()
        prediction_scores = self.output_handle.copy_to_cpu()

        prediction_probs = prediction_scores.numpy()

        all_prediction_probs.append(prediction_probs)

    all_prediction_probs = np.concatenate(all_prediction_probs, axis=0)

    all_prediction_probs = np.reshape(all_prediction_probs, (-1, class_num, 2))

    prediction_pos_probs = all_prediction_probs[:, :, 1]
    prediction_pos_probs = np.reshape(prediction_pos_probs, (-1, class_num))

    prediction_pos_probs1 = paddle.to_tensor(prediction_pos_probs)
    probs = F.softmax(prediction_pos_probs1)

    y_pred_index = np.argmax(prediction_pos_probs, axis=-1)

    y_preds = [index2label[idx] for idx in y_pred_index]

    return y_preds, probs.numpy()

if name == "main": predictor = Predictor("./static_graph/static_graph_params.pdmodel", "./static_graph/static_graph_params.pdiparams", 'gpu', 256) tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0') http_server = WSGIServer(('0.0.0.0', 8022), app) http_server.serve_forever()

LangDaoAI commented 3 years ago

【export静态模型脚本】如下:

import argparse import os

import paddle from paddlenlp.transformers import ErnieForSequenceClassification

parser = argparse.ArgumentParser() parser.add_argument("--params_path", type=str, default='./checkpoints/model_150/model_state.pdparams', help="The path to model parameters to be loaded.") parser.add_argument("--output_path", type=str, default='./static_graph/static_graph_params', help="The path of model parameter in static graph to be saved.")

args = parser.parse_args()

if name == "main": model = ErnieForSequenceClassification.from_pretrained( 'ernie-1.0', num_classes=2)

if args.params_path and os.path.isfile(args.params_path):
    state_dict = paddle.load(args.params_path)
    model.set_dict(state_dict)
    print("Loaded parameters from %s" % args.params_path)
model.eval()

# Convert to static graph with specific input description
model = paddle.jit.to_static(
    model,
    input_spec=[
        paddle.static.InputSpec(
            shape=[None, None], dtype="int64"),  # input_ids
        paddle.static.InputSpec(
            shape=[None, None], dtype="int64")  # segment_ids
    ])
# Save in static graph model.
paddle.jit.save(model, args.output_path)
tianxin1860 commented 3 years ago

收到,基于静态图的部署方案我们正在进行中。

LangDaoAI commented 3 years ago

感谢 @tianxin1860 以及百度NLP团队协力定位问题, 问题已经确认, 我这边修改客户端以及再次确认后就关闭问题

LangDaoAI commented 3 years ago

image image

修改了客户端HTTP接口,已经确认通过了!