deepjavalibrary / djl

An Engine-Agnostic Deep Learning Framework in Java
https://djl.ai
Apache License 2.0
4.11k stars 653 forks source link

to add paddlepaddle demo about ALC #1042

Closed endlesshh closed 3 years ago

endlesshh commented 3 years ago

first I download the ALC module as the guid said,then I Translat the words to ids, but when I to predict ,I got a Exception .whats wrong with this? can you give me some more demos about NLP use the paddlehub ?

` private Map<String,String> word2id_dict = new HashMap<String,String>(); private Map<String,String> id2word_dict = new HashMap<String,String>(); private Map<String,String> label2id_dict= new HashMap<String,String>(); private Map<String,String> id2label_dict= new HashMap<String,String>(); private Map<String,String> word_replace_dict= new HashMap<String,String>(); private String oov_id;

/** {@inheritDoc} */
@Override
public void prepare(NDManager manager, Model model) throws IOException {
   try (InputStream is = model.getArtifact("lac/word.dic").openStream()) {
        List<String> words = Utils.readLines(is, true);
        words.stream().filter(word -> (word != null && word !="")).forEach(word -> { 
            String[] ws = word.split("  ");
            if(ws.length == 1){
                word2id_dict.put("", ws[0]); //文字是key,id是value
                id2word_dict.put(ws[0],"");
            }else{
                word2id_dict.put(ws[1], ws[0]); //文字是key,id是value
                id2word_dict.put(ws[0], ws[1]);
            } 
        }); 
    }
   try (InputStream is = model.getArtifact("lac/tag.dic").openStream()) {
       List<String> words = Utils.readLines(is, true);
       words.stream().filter(word -> (word != null && word !="")).forEach(word -> {
           String[] ws = word.split("   ");
           label2id_dict.put(ws[1], ws[0]); //文字是key,id是value
           id2label_dict.put(ws[0], ws[1]);
       }); 
   }
   try (InputStream is = model.getArtifact("lac/q2b.dic").openStream()) {
       List<String> words = Utils.readLines(is, true);
       words.stream().forEach(word -> { 
           if(StrUtil.isBlank(word)){ 
               word_replace_dict.put(" "," "); //文字是key,id是value  
           }else{  
               String[] ws = word.split("   ");
               if(ws.length == 1){
                   if(ws[0] != null){
                       word_replace_dict.put(ws[0],""); //文字是key,id是value    
                   }else{
                       word_replace_dict.put("", ws[1]); //文字是key,id是value    
                   }
               }else{
                   word_replace_dict.put(ws[0], ws[1]); //文字是key,id是value    
               }

           } 
       });  
   }
   oov_id = word2id_dict.get("OOV");
}

/** {@inheritDoc} */
@Override
public NDList processInput(TranslatorContext ctx, String[] inputs) {

    NDManager inputManager = ctx.getNDManager();

    NDList inputList = new NDList();  
    List<Integer> lod = new ArrayList<>(0);
    lod.add(new Integer(0));
    List<Integer> sh = tokenizeSingleString(inputManager,inputs,lod); 
    int size = Long.valueOf(lod.get(lod.size()-1)).intValue();
    float[] array=new float[size];
    for(int i=0;i<size;i++){
        if(sh.size() > i){
            array[i]=sh.get(i);
        }else{
            array[i]=0;
        }

    } 
    return new NDList(inputManager.create(array,new Shape(lod.get(lod.size()-1),1)).toType(DataType.INT32,true)); 

}
private NDArray tokenizeSingle(NDManager manager,String[] inputs,List<Integer> lod) {
    List<Integer> word_ids = new ArrayList<>(); 
    for(int i=0;i<inputs.length;i++){
        String input = inputs[i];
        String[] s = input.replace(" ", "").split(""); 
        for(String word : s) {  
            String newword = word_replace_dict.get(word); 
            word = StrUtil.isBlank(newword)?word:newword; 
            String word_id = word2id_dict.get(word);  
            word_ids.add(Integer.valueOf(StrUtil.isBlank(word_id)?oov_id:word_id));
        }
        lod.add(word_ids.size()+lod.get(i));
    }  
  return manager.create(word_ids.stream().mapToLong(l -> Long.valueOf(l)).toArray());

}
private List<Integer> tokenizeSingleString(NDManager manager,String[] inputs,List<Integer> lod) {
    List<Integer> word_ids = new ArrayList<>(); 
    for(int i=0;i<inputs.length;i++){
        String input = inputs[i];
        String[] s = input.replace(" ", "").split(""); 
        for(String word : s) {  
            String newword = word_replace_dict.get(word); 
            word = StrUtil.isBlank(newword)?word:newword; 
            String word_id = word2id_dict.get(word);  
            word_ids.add(Integer.valueOf(StrUtil.isBlank(word_id)?oov_id:word_id));
        }
        lod.add(word_ids.size()+lod.get(i));
    }  
    /*NDList outputList = new NDList();
    outputList.add(manager.create(word_ids.stream().mapToLong(l -> Long.valueOf(l)).toArray()));*/ 
    //return manager.create(word_ids.stream().mapToLong(l -> Long.valueOf(l)).toArray());
    return word_ids;
}
private NDArray stackInputs(List<NDList> tokenizedInputs, int index, String inputName) {
    NDArray stacked =
            NDArrays.stack(
                    tokenizedInputs
                            .stream()
                            .map(list -> list.get(index).expandDims(0))
                            .collect(Collectors.toCollection(NDList::new)));
    stacked.setName(inputName);
    return stacked;
}
/** {@inheritDoc} */
@Override
public NDList processOutput(TranslatorContext ctx, NDList list) throws IOException {

    return list;
}`

C++ Traceback (most recent call last):

Not support stack backtrace yet.


Error Message Summary:

InvalidArgumentError: Variable value (input) of OP(fluid.layers.embedding) expected >= 0 and < 20941, but got 485331305657. Please check input value. [Hint: Expected ids[i] < row_number, but received ids[i]:485331305657 >= row_number:20941.] (at D:\shixiaowei02\Paddle\paddle/fluid/operators/lookup_table_op.h:72)

endlesshh commented 3 years ago

this maybe need to change the paddle code