Closed avan-debug closed 2 years ago
maybe your model is converted by using INT8_FP16 parameter in post_training_quantization.
INT8_FP16 is only supported on armv8.2 machine.
INT8_FP32 is supported on armv7, armv8, x86_avx512f, x86_avx512_vnni.
please reconvert model by using INT8_FP32, and make sure that your machine supports x86_avx512 or x86_avx512_vnni.
when i use "./post_training_quantization -p ppo_cnn_ptq_input.bolt -i INT8_FP32 -b true -q NOQUANT -c 0 -o false" to build a quantized model, i meet a problem [ERROR] thread 29537 file /home/xys/bolt-master/compute/tensor/src/cpu/x86/int8/convolution.cpp line 128: got an error: Not Match here is my code:
static ModelHandle model;
int main(){ char model_path = "ppo_cnn_int8_q.bolt"; // ResultHandle result; AFFINITY_TYPE affinity = CPU_HIGH_PERFORMANCE; char algorithm_path=NULL; model = CreateModel(model_path,affinity,algorithm_path); int num_inputs = GetNumInputsFromModel(model); // printf("num_inputs is: %d\n",num_inputs); if(num_inputs==0)return 0; char name; name = (char) malloc(sizeof(char)); name = (char)malloc(sizeof(char)128); int n,c,h,w; DATA_TYPE dt; DATA_FORMAT df; double total_time, inf_time; GetInputDataInfoFromModel(model,num_inputs,name,&n,&c,&h,&w,&dt,&df); printf("num_inputs: %d\nname: %s\nn: %d\nc: %d\nh: %d\nw: %d\ndt: %d\ndf: %d\n",num_inputs,name[0],n,c,h,w,dt,df); // RunModel(model,result,1,) n = 4; PrepareModel(model,num_inputs,(const char)name,&n,&c,&h,&w,&dt,&df); // FreeResultHandle(result); ResultHandle result = AllocAllResultHandle(model); float data; data = malloc(sizeof (float )num_inputs); for(int i=0;i<=num_inputs;i++){ (data+i) = malloc(sizeof(float)nchw2); for(int j=0;j<=nchw-1;j++){ ((data+i)+j) = (float)(j1.2); } for(int j=nchw;j<=nchw2-1;j++){ ((data+i)+j) = (float)((j-nchw)1.2); } }
clock_t start, end;
printf("after prepare model \n");
int times = 128;
for(int k = 0; k < times; k++) {
for(int i=0; i<=num_inputs; i++){
*(data+i) = malloc(sizeof(float)*n*c*h*w*2);
for(int j=0;j<=n*c*h*w*-1;j++){
*(*(data+i)+j) = (float)(j*1.2);
}
for(int j=n*c*h*w;j<=n*c*h*w*2-1;j++){
*(*(data+i)+j) = (float)((j-n*c*h*w)*1.2);
}
}
start = clock();
RunModel(model, result, num_inputs, (const char **) name, (void **) data);
end = clock();
inf_time = (double)(end - start) / CLOCKS_PER_SEC;
total_time += ((double)(end - start) / CLOCKS_PER_SEC);
printf("infer time ================ %f\n", inf_time);
}
printf("after RunModel model \n");
end = clock();
// total_time = (double)(end - start) / CLOCKS_PER_SEC; printf("infer time avg================ %f\n", total_time / times);
int num_outputs = GetNumOutputsFromResultHandle(result);
float **output = NULL;
float **outputFloat;
void **outputData;
// output = malloc(sizeof (float )num_inputs); // for(int i=0;i<=num_inputs;i++){ // (output+i) = malloc(sizeof(float)nch*w); // }
// GetOutputDataInfoFromResultHandle(model,num_outputs,name,&n,&c,&h,&w,&dt,&df); printf("num_outputs is: %d\n", num_outputs); outputData = (void )malloc(sizeof(void ) num_outputs); GetOutputDataFromResultHandle(result, num_outputs, outputData); outputFloat = (float )outputData;
float k[5];
/** for(int j = 0; j < 4; j++){ printf("outputFloat[0][%d] ==== %f\n", j, outputFloat[0][j]); }
for(int j = 0; j < 8; j++){
printf("outputFloat[1][%d] ==== %f\n", j, outputFloat[1][j]);
}
k[0] = outputFloat[0][0];
for(int j = 0; j < 4; j++){
k[j + 1] = outputFloat[1][j];
}
for(int j = 0; j < 5; j++){
printf("k[%d] ==== %f\n", j, k[j]);
}
*/ // GetOutputDataInfoFromResultHandle(result,num_outputs,name,&n,&c,&h,&w,&dt,&df); // printf("length: %d\nname: %s",nchw,name[0]); // GetOutputDataFromResultHandle(result,num_outputs,(void *)output); // for(int i=0;i<=nchw-1;i++){ // printf("%f",data[0][i]); // } // printf("\n");
return 0;
}
When i use bolt quantization to infer with file ppo_cnn_int8_q.bolt, i meet this problem.
[ERROR] thread 24754 file /home/xys/bolt-master/inference/engine/include/factory.hpp line 242: this library not support to inference float16/int8+float16, please recompile with --fp16=on. Only Armv8.2+ cpu and gpu support.