Open surendransuri opened 4 months ago
Hi @surendransuri ,
Please check the namedEntities
output and confirm any entities are being recognized at all from your text
https://learn.microsoft.com/en-us/azure/search/cognitive-search-skill-entity-recognition
I have tried using EntityRecognizer skill to extract the entities combined with SplitText, EmbeddingSkill.
On the output I am not getting extracted entities, it is showing null values for people, skills, locations etc.
PFB code: fields = [
SearchField(name="parent_id", type=SearchFieldDataType.String, sortable=True, filterable=True, facetable=True),
SearchField(name="chunk_id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True, analyzer_name="keyword"),
SearchField(name="chunk", type=SearchFieldDataType.String, sortable=False, filterable=False, facetable=False), SearchField(name="vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile"), SearchField(name="title", type=SearchFieldDataType.String), SearchField(name="url", type=SearchFieldDataType.String), SearchField(name="last_modified", type=SearchFieldDataType.DateTimeOffset), SearchField(name="people", type=SearchFieldDataType.String), SearchField(name="skills", type=SearchFieldDataType.String) ]
entity_skill=EntityRecognitionSkill( description="Skill Used to detect entities from the document", context= "/document/content/", categories=["Person", "Email", "Location", "Organization", "PhoneNumber", "Address", "Skill" ], inputs=[
InputFieldMappingEntry(name="text", source="/document/content"), InputFieldMappingEntry(name="languageCode", source="/document/language")
],
outputs=[
OutputFieldMappingEntry(name="persons", target_name="people"), OutputFieldMappingEntry(name="skills", target_name="skills"), ],
)
index_projections = SearchIndexerIndexProjections(
selectors=[
SearchIndexerIndexProjectionSelector(
target_index_name=index_name,
parent_key_field_name="parent_id",
source_context="/document/pages/",
mappings=[
InputFieldMappingEntry(name="chunk", source="/document/pages/"),
InputFieldMappingEntry(name="vector", source="/document/pages/*/vector"),
InputFieldMappingEntry(name="title", source="/document/metadata_storage_name"), InputFieldMappingEntry(name="people", source="/document/content/people"), InputFieldMappingEntry(name="skills", source="/document/content/skills") ],
),
],
parameters=SearchIndexerIndexProjectionsParameters(
projection_mode=IndexProjectionMode.SKIP_INDEXING_PARENT_DOCUMENTS
)
indexer = SearchIndexer(
name=indexer_name,
description="Indexer to index documents and generate embeddings",
skillset_name=skillset_name,
target_index_name=index_name,
data_source_name=data_source.name,
Map the metadata_storage_name field to the title field in the index to display the PDF title in the search results
)