Closed LangDaoAI closed 3 years ago
以下是我这边客户端:
import json import traceback from functools import partial
import paddle from flask import Flask, request, Response import numpy as np import paddlenlp as ppnlp from paddlenlp.data import Tuple, Pad from paddle import inference import paddle.nn.functional as F from gevent.pywsgi import WSGIServer
from data import processor_dict from task_label_description import TASK_LABELS_DESC, honorcrisis_labels
app = Flask(name) app.config['JSON_AS_ASCII'] = False
task_name = "honorcrisis"
def convert_example(example, tokenizer, max_seq_length=512, is_test=False): sentence1 = example["sentence1"] sentence2 = example["sentence2"] encoded_inputs = tokenizer( text=sentence1, text_pair=sentence2, max_seq_len=max_seq_length, truncation_strategy="only_first")
src_ids = encoded_inputs["input_ids"]
token_type_ids = encoded_inputs["token_type_ids"]
return src_ids, token_type_ids
def create_dataloader(dataset, batch_size=1, batchify_fn=None, trans_fn=None): if trans_fn: dataset = dataset.map(trans_fn)
batch_sampler = paddle.io.BatchSampler(
dataset, batch_size=batch_size, shuffle=False)
return paddle.io.DataLoader(
dataset=dataset,
batch_sampler=batch_sampler,
collate_fn=batchify_fn,
return_list=True)
@app.route("/", methods=['GET', 'POST']) @app.route("/cn/crisis", methods=['GET', 'POST']) def crisis_cls_client(): try: text = [] if request.method == "GET": text.append(request.args.get("text")) if request.method == "POST": if request.content_type.startswith('application/json'): text.append(request.json.get('text')) elif request.content_type.startswith('multipart/form-data'): text.append(request.form.get('content')) else: text.append(request.values.get("content"))
test_ds = []
for sentence in text:
new_sentence = dict()
new_sentence['sentence'] = sentence
test_ds.append(new_sentence)
processor = processor_dict[task_name]()
test_ds = processor.get_test_datasets(test_ds, TASK_LABELS_DESC[task_name])
# [src_ids, token_type_ids]
predict_batchify_fn = lambda samples, fn=Tuple(
Pad(axis=0, pad_val=tokenizer.pad_token_id), # src_ids
Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # token_type_ids
): [data for data in fn(samples)]
predict_trans_func = partial(
convert_example,
tokenizer=tokenizer,
max_seq_length=256,
is_test=True)
test_data_loader = create_dataloader(
test_ds,
batch_size=1,
batchify_fn=predict_batchify_fn,
trans_fn=predict_trans_func)
y_pred_labels, prediction_probs = predictor.predict(test_data_loader, task_label_description=TASK_LABELS_DESC[task_name])
return Response(
json.dumps({'status': '200', 'result': {'类别': y_pred_labels[0], '类别描述': honorcrisis_labels[
y_pred_labels[0]], '概率': prediction_probs[0][
int(y_pred_labels[
0]) - 1]}},
ensure_ascii=False), mimetype='application/json')
except Exception as e:
tb = traceback.format_exc() #
print('handle_request_failed\t%s\t%s' % (tb, str(e)))
return Response(json.dumps({'status': '500', 'result': str(e)}), mimetype='application/json')
class Predictor(object): def init(self, model_file, params_file, device, max_seq_length): self.max_seq_length = max_seq_length
config = inference.Config(model_file, params_file)
if device == "gpu":
# set GPU configs accordingly
config.enable_use_gpu(100, 0)
elif device == "cpu":
# set CPU configs accordingly,
# such as enable_mkldnn, set_cpu_math_library_num_threads
config.disable_gpu()
elif device == "xpu":
# set XPU configs accordingly
config.enable_xpu(100)
config.switch_use_feed_fetch_ops(False)
self.predictor = inference.create_predictor(config)
self.input_handles = [
self.predictor.get_input_handle(name)
for name in self.predictor.get_input_names()
]
self.output_handle = self.predictor.get_output_handle(
self.predictor.get_output_names()[0])
@paddle.no_grad()
def predict(self, data_loader, task_label_description):
index2label = {
idx: label
for idx, label in enumerate(task_label_description.keys())
}
class_num = len(task_label_description)
all_prediction_probs = []
for batch in data_loader:
src_ids, token_type_ids = batch
self.input_handles[0].copy_from_cpu(src_ids)
self.input_handles[1].copy_from_cpu(token_type_ids)
self.predictor.run()
prediction_scores = self.output_handle.copy_to_cpu()
prediction_probs = prediction_scores.numpy()
all_prediction_probs.append(prediction_probs)
all_prediction_probs = np.concatenate(all_prediction_probs, axis=0)
all_prediction_probs = np.reshape(all_prediction_probs, (-1, class_num, 2))
prediction_pos_probs = all_prediction_probs[:, :, 1]
prediction_pos_probs = np.reshape(prediction_pos_probs, (-1, class_num))
prediction_pos_probs1 = paddle.to_tensor(prediction_pos_probs)
probs = F.softmax(prediction_pos_probs1)
y_pred_index = np.argmax(prediction_pos_probs, axis=-1)
y_preds = [index2label[idx] for idx in y_pred_index]
return y_preds, probs.numpy()
if name == "main": predictor = Predictor("./static_graph_test/static_graph_params.pdmodel", "./static_graph_test/static_graph_params.pdiparams", 'gpu', 256) tokenizer = ppnlp.transformers.ErnieGramTokenizer.from_pretrained('ernie-gram-zh') http_server = WSGIServer(('0.0.0.0', 8021), app) http_server.serve_forever()
【更新】 我用2.0.7 版本的原生EFL代码也复现了, 我马上贴一下新做的客户端和导出static的脚本:
【efl_client.py】如下:
import json import traceback from functools import partial
import paddle from flask import Flask, request, Response import numpy as np import paddlenlp as ppnlp from paddlenlp.data import Tuple, Pad from paddle import inference import paddle.nn.functional as F from gevent.pywsgi import WSGIServer
from data import create_dataloader, convert_example, processor_dict from task_label_description import TASK_LABELS_DESC
app = Flask(name) app.config['JSON_AS_ASCII'] = False
task_name = "tnews"
@app.route("/", methods=['GET', 'POST']) @app.route("/cn/efl", methods=['GET', 'POST']) def crisis_cls_client(): try: text = [] if request.method == "GET": text.append(request.args.get("text")) if request.method == "POST": if request.content_type.startswith('application/json'): text.append(request.json.get('text')) elif request.content_type.startswith('multipart/form-data'): text.append(request.form.get('content')) else: text.append(request.values.get("content"))
test_ds = []
for sentence in text:
new_sentence = dict()
new_sentence['sentence'] = sentence
test_ds.append(new_sentence)
processor = processor_dict[task_name]()
test_ds = processor.get_test_datasets(test_ds, TASK_LABELS_DESC[task_name])
# [src_ids, token_type_ids]
predict_batchify_fn = lambda samples, fn=Tuple(
Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64"), # src_ids
Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype="int64"), # token_type_ids
): [data for data in fn(samples)]
predict_trans_func = partial(
convert_example,
tokenizer=tokenizer,
max_seq_length=256,
is_test=True)
test_data_loader = create_dataloader(
test_ds,
batch_size=1,
batchify_fn=predict_batchify_fn,
trans_fn=predict_trans_func)
y_pred_labels, prediction_probs = predictor.predict(test_data_loader, task_label_description=TASK_LABELS_DESC[task_name])
return Response(
json.dumps({'status': '200', 'result': {'类别': y_pred_labels[0]}},
ensure_ascii=False), mimetype='application/json')
except Exception as e:
tb = traceback.format_exc() #
print('handle_request_failed\t%s\t%s' % (tb, str(e)))
return Response(json.dumps({'status': '500', 'result': str(e)}), mimetype='application/json')
class Predictor(object): def init(self, model_file, params_file, device, max_seq_length): self.max_seq_length = max_seq_length
config = inference.Config(model_file, params_file)
if device == "gpu":
# set GPU configs accordingly
config.enable_use_gpu(100, 0)
elif device == "cpu":
# set CPU configs accordingly,
# such as enable_mkldnn, set_cpu_math_library_num_threads
config.disable_gpu()
elif device == "xpu":
# set XPU configs accordingly
config.enable_xpu(100)
config.switch_use_feed_fetch_ops(False)
self.predictor = inference.create_predictor(config)
self.input_handles = [
self.predictor.get_input_handle(name)
for name in self.predictor.get_input_names()
]
self.output_handle = self.predictor.get_output_handle(
self.predictor.get_output_names()[0])
@paddle.no_grad()
def predict(self, data_loader, task_label_description):
index2label = {
idx: label
for idx, label in enumerate(task_label_description.keys())
}
class_num = len(task_label_description)
all_prediction_probs = []
for batch in data_loader:
src_ids, token_type_ids = batch
self.input_handles[0].copy_from_cpu(src_ids)
self.input_handles[1].copy_from_cpu(token_type_ids)
self.predictor.run()
prediction_scores = self.output_handle.copy_to_cpu()
prediction_probs = prediction_scores.numpy()
all_prediction_probs.append(prediction_probs)
all_prediction_probs = np.concatenate(all_prediction_probs, axis=0)
all_prediction_probs = np.reshape(all_prediction_probs, (-1, class_num, 2))
prediction_pos_probs = all_prediction_probs[:, :, 1]
prediction_pos_probs = np.reshape(prediction_pos_probs, (-1, class_num))
prediction_pos_probs1 = paddle.to_tensor(prediction_pos_probs)
probs = F.softmax(prediction_pos_probs1)
y_pred_index = np.argmax(prediction_pos_probs, axis=-1)
y_preds = [index2label[idx] for idx in y_pred_index]
return y_preds, probs.numpy()
if name == "main": predictor = Predictor("./static_graph/static_graph_params.pdmodel", "./static_graph/static_graph_params.pdiparams", 'gpu', 256) tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0') http_server = WSGIServer(('0.0.0.0', 8022), app) http_server.serve_forever()
【export静态模型脚本】如下:
import argparse import os
import paddle from paddlenlp.transformers import ErnieForSequenceClassification
parser = argparse.ArgumentParser() parser.add_argument("--params_path", type=str, default='./checkpoints/model_150/model_state.pdparams', help="The path to model parameters to be loaded.") parser.add_argument("--output_path", type=str, default='./static_graph/static_graph_params', help="The path of model parameter in static graph to be saved.")
args = parser.parse_args()
if name == "main": model = ErnieForSequenceClassification.from_pretrained( 'ernie-1.0', num_classes=2)
if args.params_path and os.path.isfile(args.params_path):
state_dict = paddle.load(args.params_path)
model.set_dict(state_dict)
print("Loaded parameters from %s" % args.params_path)
model.eval()
# Convert to static graph with specific input description
model = paddle.jit.to_static(
model,
input_spec=[
paddle.static.InputSpec(
shape=[None, None], dtype="int64"), # input_ids
paddle.static.InputSpec(
shape=[None, None], dtype="int64") # segment_ids
])
# Save in static graph model.
paddle.jit.save(model, args.output_path)
收到,基于静态图的部署方案我们正在进行中。
感谢 @tianxin1860 以及百度NLP团队协力定位问题, 问题已经确认, 我这边修改客户端以及再次确认后就关闭问题
修改了客户端HTTP接口,已经确认通过了!
few shot EFL模型推理部署后, 出现DataType of equal Op's duplicable Variable Y must be consistent的异常:
我使用的部署推理脚本如下:
请求Team验证, 比较急, 感谢!