Open DevShivmohan opened 1 year ago
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.*;
@Data
@AllArgsConstructor
@NoArgsConstructor
class Student{
private int id;
private String name;
private String address;
}
@Data
@AllArgsConstructor
@NoArgsConstructor
class FilterStudents{
private Student student;
private int priority;
}
public class NGramSearch{
private List<Student> students=new ArrayList<>();
public static void main(String[] args) {
var rand=new NGramSearch();
rand.filterStudents("Shvimohn").forEach(System.out::println);
}
/**
*
* @param tokenSize n-gram(where n=1,2,3,....n)
* @param keyword any search keyword
* @return
*/
public Set<String> nGramTokens(int tokenSize,String keyword){
Set<String> ngrams=new HashSet<>();
for(int i=0;i<=keyword.length()-tokenSize;i++)
ngrams.add(keyword.substring(i,i+tokenSize));
return ngrams;
}
public List<FilterStudents> filterStudents(String address){
var tokens=nGramTokens(2,address);
List<FilterStudents> filterStudents=new ArrayList<>();
students.parallelStream().forEach((student -> {
var filteredStudent=new FilterStudents(student,0);
tokens.parallelStream().forEach((token->{
if(student.getName().toLowerCase().contains(token.toLowerCase()))
filteredStudent.setPriority(filteredStudent.getPriority()+1);
}));
filterStudents.add(filteredStudent);
}));
filterStudents.sort(Comparator.comparing(FilterStudents::getPriority).reversed());
return filterStudents;
}
public NGramSearch(){
students.add(new Student(1,"Shivmohan","Siddharth nagar"));
students.add(new Student(2,"Vipul","Varansi"));
students.add(new Student(3,"Ram kumar","Sinch in india"));
students.add(new Student(4,"Deepak","Lalpur naugarh"));
students.add(new Student(5,"Raman","Buddh nagar india"));
students.add(new Student(6,"Shivmohan","Kushi nagar"));
students.add(new Student(7,"d","Nakha jungle"));
students.add(new Student(8,"fr","Kushi nagar"));
students.add(new Student(9,"se","Dift nagar"));
students.add(new Student(10,"sddsd","fatehpur"));
}
}
-- select * from hfj_spidx_string;
select hss.sp_id,hss.res_id,hss.sp_name,hss.sp_value_exact,hst.sp_name,hst.sp_value from hfj_spidx_string as hss
join hfj_spidx_token as hst on hss.res_id=hst.res_id
where hss.sp_name in ('given','name')
and hst.sp_name in ('gender')
and hss.res_type='Patient'
order by SIMILARITY(hss.sp_value_exact || '' || hss.res_id || '' || hst.sp_value,'jam' || '' || 056 || '' || 'male') desc;
select hrv.res_id,hrv.res_text_vc from hfj_res_ver as hrv join hfj_spidx_string as hss on hrv.res_id=hss.res_id
join hfj_spidx_token as hst on hss.res_id=hst.res_id
where hss.sp_name in ('given','name','address-postalcode','address-state','address','address-city')
and hst.sp_name in ('gender')
and hss.res_type='Patient'
order by SIMILARITY(hss.sp_value_exact || '' || hss.res_id || '' || hst.sp_value,
'jam' || '' || 056 || '' || 'male' || '' || 'Siddharth nagar') desc;
select hss.sp_name,hss.sp_value_exact,hrv.res_id,hrv.res_text_vc from hfj_res_ver as hrv join
hfj_spidx_string as hss on hss.res_id=hrv.res_id join
hfj_spidx_token as hst on hst.res_id=hrv.res_id join
hfj_spidx_date as hsd on hsd.res_id=hrv.res_id
where hss.sp_name in ('given','name','address-postalcode','address','address-city') -- replace with blank quote
and hst.sp_name='gender' and hst.sp_value='male'
and hss.res_type='Patient'
and hsd.sp_value_low between '1900-02-01' and '2000-05-04' and hsd.sp_name='birthdate'
order by SIMILARITY(hss.sp_value_exact,'shiv') desc; -- concat with all values in like shiv
implement n-gram algorithm for search in MySQL
examples
and this query also return records with greater matches in descending order
In MySQL by default
In Java n-gram implementation
For example purpose data set
Enabling n-gram(tri_gram) in PostgreSQL
CREATE EXTENSION pg_trgm;
.select <column_1>,<column_2>,<column_n> from <table_name> order by SIMILARITY(<column_1> || ' ' || <column_2> || ' ' || <column_n>,<search_string>) desc;
select * from test order by SIMILARITY(name || ' ' || bio,'HARRY') desc;