zjc0516 / language-detection

Automatically exported from code.google.com/p/language-detection
0 stars 0 forks source link

How work priority in prior map? #61

Open GoogleCodeExporter opened 8 years ago

GoogleCodeExporter commented 8 years ago

Hello guys.

I'm try to understand how work priority in prio map. On first look it sounds 
like easy. 

Here the code what i have:

package Tests.Language_detection_console;

import com.cybozu.labs.langdetect.*;

import java.util.*;

public class Language_detection_console
{
  private final static HashMap<String, Double> text_prior_map;
  static
  {
    text_prior_map = new HashMap<>();
    text_prior_map.put( "en", 0.3 );
    text_prior_map.put( "es", 0.01 );
    text_prior_map.put( "et", 0.01 );
    text_prior_map.put( "fa", 0.01 );
    text_prior_map.put( "fi", 0.01 );
    text_prior_map.put( "fr", 0.01 );
    text_prior_map.put( "he", 0.01 );
    text_prior_map.put( "hi", 0.01 );
    text_prior_map.put( "hu", 0.01 );
    text_prior_map.put( "id", 0.01 );
    text_prior_map.put( "it", 0.01 );
    text_prior_map.put( "ja", 0.01 );
    text_prior_map.put( "ko", 0.01 );
    text_prior_map.put( "lt", 0.01 );
    text_prior_map.put( "lv", 0.01 );
    text_prior_map.put( "ml", 0.01 );
    text_prior_map.put( "nl", 0.01 );
    text_prior_map.put( "no", 0.01 );
    text_prior_map.put( "pl", 0.01 );
    text_prior_map.put( "pt", 0.01 );
    text_prior_map.put( "ro", 0.01 );
    text_prior_map.put( "ru", 0.01 );
    text_prior_map.put( "sk", 0.01 );
    text_prior_map.put( "sl", 0.01 );
    text_prior_map.put( "sv", 0.01 );
    text_prior_map.put( "th", 0.01 );
    text_prior_map.put( "tr", 0.01 );
    text_prior_map.put( "uk", 0.01 );
    text_prior_map.put( "ur", 0.01 );
    text_prior_map.put( "vi", 0.01 );
  }

  public Language_detection_console()
  {
    try
    {
      if( DetectorFactory.getLangList().isEmpty() )
      {
        DetectorFactory.loadProfile( "resources/Language_detector/profiles" );
        DetectorFactory.setSeed( 0 );
      }
    }
    catch( LangDetectException e )
    {
      System.err.println( e.toString() );
    }
  }

  public void detect_text_language()
  {
    try
    {
      String text;
      text = "Hello";

      Detector detector;
      detector = DetectorFactory.create();
      detector.setPriorMap( text_prior_map );
      detector.append( text );

      String detected_lang;
      detected_lang = detector.detect();

      System.out.println( detected_lang );
    }
    catch( LangDetectException e )
    {
      System.err.println( e.toString() );
    }
  }
}

In output i have "fi 0.9076997887406691". No english in output list, why its 
happens?

Original issue reported on code.google.com by Hro...@gmail.com on 18 Nov 2013 at 5:58