β create_final_entities /lib/python3.12/site-packages/numpy/core/fromnumeric.py:59: FutureWarning: 'DataFrame.swapaxes' is deprecated and will be removed in a future version. Please use 'DataFrame.transpose' instead. #441
(graphrag-venv) root@UBANTU:/home/tom/tomAI/graphrag/ragtest# python -m graphrag.index --root . -vv
π Reading settings from settings.yaml
Using default configuration: {
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_chat",
"model": "tom/x-ai-vqw",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:11434/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"root_dir": ".",
"reporting": {
"type": "file",
"base_dir": "output/${timestamp}/reports",
"storage_account_blob_url": null
},
"storage": {
"type": "file",
"base_dir": "output/${timestamp}/artifacts",
"storage_account_blob_url": null
},
"cache": {
"type": "file",
"base_dir": "cache",
"storage_account_blob_url": null
},
"input": {
"type": "file",
"file_type": "text",
"base_dir": "input",
"storage_account_blob_url": null,
"encoding": "utf-8",
"file_pattern": ".\.txt$",
"file_filter": null,
"source_column": null,
"timestamp_column": null,
"timestamp_format": null,
"text_column": "text",
"title_column": null,
"document_attribute_columns": []
},
"embed_graph": {
"enabled": false,
"num_walks": 10,
"walk_length": 40,
"window_size": 2,
"iterations": 3,
"random_seed": 597832,
"strategy": null
},
"embeddings": {
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"batch_size": 16,
"batch_max_tokens": 8191,
"target": "required",
"skip": [],
"vector_store": null,
"strategy": null
},
"chunks": {
"size": 300,
"overlap": 100,
"group_by_columns": [
"id"
],
"strategy": null
},
"snapshots": {
"graphml": false,
"raw_entities": false,
"top_level_nodes": false
},
"entity_extraction": {
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_chat",
"model": "tom/x-ai-vqw",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:11434/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"prompt": "prompts/entity_extraction.txt",
"entity_types": [
"organization",
"person",
"geo",
"event"
],
"max_gleanings": 0,
"strategy": null
},
"summarize_descriptions": {
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_chat",
"model": "tom/x-ai-vqw",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:11434/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"prompt": "prompts/summarize_descriptions.txt",
"max_length": 500,
"strategy": null
},
"community_reports": {
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_chat",
"model": "tom/x-ai-vqw",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:11434/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"prompt": null,
"max_length": 2000,
"max_input_length": 8000,
"strategy": null
},
"claim_extraction": {
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_chat",
"model": "tom/x-ai-vqw",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:11434/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"enabled": false,
"prompt": "prompts/claim_extraction.txt",
"description": "Any claims or facts that could be relevant to information discovery.",
"max_gleanings": 0,
"strategy": null
},
"cluster_graph": {
"max_cluster_size": 10,
"strategy": null
},
"umap": {
"enabled": false
},
"local_search": {
"text_unit_prop": 0.5,
"community_prop": 0.1,
"conversation_history_max_turns": 5,
"top_k_entities": 10,
"top_k_relationships": 10,
"max_tokens": 12000,
"llm_max_tokens": 2000
},
"global_search": {
"max_tokens": 12000,
"data_max_tokens": 12000,
"map_max_tokens": 1000,
"reduce_max_tokens": 2000,
"concurrency": 32
},
"encoding_model": "cl100k_base",
"skip_workflows": []
}
Final Config: {
"extends": null,
"input": {
"file_type": "text",
"type": "file",
"storage_account_blob_url": null,
"base_dir": "input",
"file_pattern": ".\.txt$",
"file_filter": null,
"post_process": null,
"encoding": "utf-8",
"title_text_length": null
},
"reporting": {
"type": "file",
"base_dir": "output/${timestamp}/reports"
},
"storage": {
"type": "file",
"base_dir": "output/${timestamp}/artifacts"
},
"cache": {
"type": "file",
"base_dir": "cache"
},
"root_dir": ".",
"workflows": [
{
"name": "create_base_documents",
"steps": null,
"config": {
"document_attribute_columns": []
}
},
{
"name": "create_final_documents",
"steps": null,
"config": {
"document_raw_content_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"skip_raw_content_embedding": true
}
},
{
"name": "create_base_text_units",
"steps": null,
"config": {
"chunk_by": [
"id"
],
"text_chunk": {
"strategy": {
"type": "tokens",
"chunk_size": 300,
"chunk_overlap": 100,
"group_by_columns": [
"id"
]
}
}
}
},
{
"name": "join_text_units_to_entity_ids",
"steps": null,
"config": null
},
{
"name": "join_text_units_to_relationship_ids",
"steps": null,
"config": null
},
{
"name": "create_final_text_units",
"steps": null,
"config": {
"text_unit_text_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"covariates_enabled": false,
"skip_text_unit_embedding": true
}
},
{
"name": "create_base_extracted_entities",
"steps": null,
"config": {
"graphml_snapshot": false,
"raw_entity_snapshot": false,
"entity_extract": {
"stagger": 0.3,
"num_threads": 50,
"async_mode": "threaded",
"strategy": {
"type": "graph_intelligence",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_chat",
"model": "tom/x-ai-vqw",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:11434/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"extraction_prompt": "\n-Goal-\nGiven a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those
types from the text and all relationships among the identified entities.\n\n-Steps-\n1. Identify all entities. For each identified entity, extract the following information:\n-
entity_name: Name of the entity, capitalized\n- entity_type: One of the following types: [{entity_types}]\n- entity_description: Comprehensive description of the entity's attributes
and activities\nFormat each entity as (\"entity\"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}\n\n2. From the entities identified
in step 1, identify all pairs of (source_entity, target_entity) that are clearly related to each other.\nFor each pair of related entities, extract the following information:\n-
source_entity: name of the source entity, as identified in step 1\n- target_entity: name of the target entity, as identified in step 1\n- relationship_description: explanation as to
why you think the source entity and the target entity are related to each other\n- relationship_strength: a numeric score indicating strength of the relationship between the source
entity and target entity\n Format each relationship as
(\"relationship\"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter})\n\n3. Return
output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use {record_delimiter} as the list delimiter.\n\n4. When finished, output
{completion_delimiter}\n\n######################\n-Examples-\n######################\nExample 1:\n\nEntity_types: \nText:\nwhile Alex clenched his jaw, the buzz of frustration dull
against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was
an unspoken rebellion against Cruz's narrowing vision of control and order.\n\nThen Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device
with something akin to reverence. \u201cIf this tech can be understood...\" Taylor said, their voice quieter, \"It could change the game for us. For all of us.\u201d\n\nThe underlying
dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes
locked with Taylor's, a wordless clash of wills softening into an uneasy truce.\n\nIt was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They
had all been brought here by different paths\n################\nOutput:\n(\"entity\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Alex is a character who
experiences frustration and is observant of the dynamics among other
characters.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Taylor is portrayed with authoritarian certainty and shows a
moment of reverence towards a device, indicating a change in
perspective.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Jordan shares a commitment to discovery and has a significant
interaction with Taylor regarding a device.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Cruz\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Cruz is associated with a vision of
control and order, influencing the dynamics among other characters.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"The
Device\"{tuple_delimiter}\"technology\"{tuple_delimiter}\"The Device is central to the story, with potential game-changing implications, and is revered by
Taylor.\"){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"Alex is affected by Taylor's authoritarian certainty and observes
changes in Taylor's attitude towards the device.\"{tuple_delimiter}7){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"Alex
and Jordan share a commitment to discovery, which contrasts with Cruz's
vision.\"{tuple_delimiter}6){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"Taylor and Jordan interact directly regarding
the device, leading to a moment of mutual respect and an uneasy
truce.\"{tuple_delimiter}8){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"Cruz\"{tuple_delimiter}\"Jordan's commitment to discovery is in rebellion
against Cruz's vision of control and order.\"{tuple_delimiter}5){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"The
Device\"{tuple_delimiter}\"Taylor shows reverence towards the device, indicating its importance and potential
impact.\"{tuple_delimiter}9){completion_delimiter}\n#############################\nExample 2:\n\nEntity_types: \nText:\nThey were no longer mere operatives; they had become guardians
of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols\u2014it
demanded a new perspective, a new resolve.\n\nTension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a
portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential
peril.\n\nTheir connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter
instincts gained precedence\u2014 the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce
hummed with the newfound frequency of their daring, a tone set not by the
earthly\n#############\nOutput:\n(\"entity\"{tuple_delimiter}\"Washington\"{tuple_delimiter}\"location\"{tuple_delimiter}\"Washington is a location where communications are being
received, indicating its importance in the decision-making process.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Operation:
Dulce\"{tuple_delimiter}\"mission\"{tuple_delimiter}\"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives
and activities.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"The team\"{tuple_delimiter}\"organization\"{tuple_delimiter}\"The team is portrayed as a group of individuals who
have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role.\"){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"The
team\"{tuple_delimiter}\"Washington\"{tuple_delimiter}\"The team receives communications from Washington, which influences their decision-making
process.\"{tuple_delimiter}7){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"The team\"{tuple_delimiter}\"Operation: Dulce\"{tuple_delimiter}\"The team is directly involved in
Operation: Dulce, executing its evolved objectives and activities.\"{tuple_delimiter}9){completion_delimiter}\n#############################\nExample 3:\n\nEntity_types:
\nText:\ntheir voice slicing through the buzz of activity. \"Control may be an illusion when facing an intelligence that literally writes its own rules,\" they stated stoically,
casting a watchful eye over the flurry of data.\n\n\"It's like it's learning to communicate,\" offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe
and anxiety. \"This gives talking to strangers' a whole new meaning.\"\n\nAlex surveyed his team\u2014each face a study in concentration, determination, and not a small measure of
trepidation. \"This might well be our first contact,\" he acknowledged, \"And we need to be ready for whatever answers back.\"\n\nTogether, they stood on the edge of the unknown,
forging humanity's response to a message from the heavens. The ensuing silence was palpable\u2014a collective introspection about their role in this grand cosmic play, one that could
rewrite human history.\n\nThe encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny
anticipation\n#############\nOutput:\n(\"entity\"{tuple_delimiter}\"Sam Rivera\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Sam Rivera is a member of a team working on communicating
with an unknown intelligence, showing a mix of awe and anxiety.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Alex is the
leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their
task.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Control\"{tuple_delimiter}\"concept\"{tuple_delimiter}\"Control refers to the ability to manage or govern, which is
challenged by an intelligence that writes its own rules.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Intelligence\"{tuple_delimiter}\"concept\"{tuple_delimiter}\"Intelligence
here refers to an unknown entity capable of writing its own rules and learning to communicate.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"First
Contact\"{tuple_delimiter}\"event\"{tuple_delimiter}\"First Contact is the potential initial communication between humanity and an unknown
intelligence.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Humanity's Response\"{tuple_delimiter}\"event\"{tuple_delimiter}\"Humanity's Response is the collective action taken
by Alex's team in response to a message from an unknown intelligence.\"){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Sam
Rivera\"{tuple_delimiter}\"Intelligence\"{tuple_delimiter}\"Sam Rivera is directly involved in the process of learning to communicate with the unknown
intelligence.\"{tuple_delimiter}9){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"First Contact\"{tuple_delimiter}\"Alex leads the team that might be
making the First Contact with the unknown intelligence.\"{tuple_delimiter}10){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"Humanity's
Response\"{tuple_delimiter}\"Alex and his team are the key figures in Humanity's Response to the unknown
intelligence.\"{tuple_delimiter}8){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Control\"{tuple_delimiter}\"Intelligence\"{tuple_delimiter}\"The concept of Control is
challenged by the Intelligence that writes its own rules.\"{tuple_delimiter}7){completion_delimiter}\n#############################\n-Real Data-\n######################\nEntity_types:
{entity_types}\nText: {input_text}\n######################\nOutput:",
"max_gleanings": 0,
"encoding_name": "cl100k_base",
"prechunked": true
},
"entity_types": [
"organization",
"person",
"geo",
"event"
]
}
}
},
{
"name": "create_summarized_entities",
"steps": null,
"config": {
"graphml_snapshot": false,
"summarize_descriptions": {
"stagger": 0.3,
"num_threads": 50,
"async_mode": "threaded",
"strategy": {
"type": "graph_intelligence",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_chat",
"model": "tom/x-ai-vqw",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:11434/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"summarize_prompt": "\nYou are a helpful assistant responsible for generating a comprehensive summary of the data provided below.\nGiven one or two entities,
and a list of descriptions, all related to the same entity or group of entities.\nPlease concatenate all of these into a single, comprehensive description. Make sure to include
information collected from all the descriptions.\nIf the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.\nMake sure
it is written in third person, and include the entity names so we the have full context.\n\n#######\n-Data-\nEntities: {entity_name}\nDescription List:
{description_list}\n#######\nOutput:\n",
"max_summary_length": 500
}
}
}
},
{
"name": "create_base_entity_graph",
"steps": null,
"config": {
"graphml_snapshot": false,
"embed_graph_enabled": false,
"cluster_graph": {
"strategy": {
"type": "leiden",
"max_cluster_size": 10
}
},
"embed_graph": {
"strategy": {
"type": "node2vec",
"num_walks": 10,
"walk_length": 40,
"window_size": 2,
"iterations": 3,
"random_seed": 3
}
}
}
},
{
"name": "create_final_entities",
"steps": null,
"config": {
"entity_name_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"entity_name_description_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"skip_name_embedding": true,
"skip_description_embedding": false
}
},
{
"name": "create_final_relationships",
"steps": null,
"config": {
"relationship_description_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"skip_description_embedding": true
}
},
{
"name": "create_final_nodes",
"steps": null,
"config": {
"layout_graph_enabled": false,
"snapshot_top_level_nodes": false
}
},
{
"name": "create_final_communities",
"steps": null,
"config": null
},
{
"name": "create_final_community_reports",
"steps": null,
"config": {
"covariates_enabled": false,
"skip_title_embedding": true,
"skip_summary_embedding": true,
"skip_full_content_embedding": true,
"create_community_reports": {
"stagger": 0.3,
"num_threads": 50,
"async_mode": "threaded",
"strategy": {
"type": "graph_intelligence",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_chat",
"model": "tom/x-ai-vqw",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:11434/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"extraction_prompt": null,
"max_report_length": 2000,
"max_input_length": 8000
}
},
"community_report_full_content_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"community_report_summary_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"community_report_title_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
}
}
}
]
}
/home/tom/miniconda3/envs/graphrag-venv/lib/python3.12/site-packages/numpy/core/fromnumeric.py:59: FutureWarning: 'DataFrame.swapaxes' is deprecated and will be removed in a future
version. Please use 'DataFrame.transpose' instead.
return bound(*args, **kwds)
π create_base_text_units
id chunk chunk_id document_ids n_tokens
0 680dd6d2a970a49082fa4f34bf63a34e The Project Gutenberg eBook of A Christmas Ca... 680dd6d2a970a49082fa4f34bf63a34e 300
1 95f1f8f5bdbf0bee3a2c6f2f4a4907f6 THE PROJECT GUTENBERG EBOOK A CHRISTMAS CAROL... 95f1f8f5bdbf0bee3a2c6f2f4a4907f6 300
2 3a450ed2b7fb1e5fce66f92698c13824 1958,\n 1962, 1964, 1966, 1967, 1969, 1971, 1... 3a450ed2b7fb1e5fce66f92698c13824 300
3 95b143eba145d91eacae7be3e4ebaf0c .\n Mr. Fezziwig, a kind-hearted, jovial old ... 95b143eba145d91eacae7be3e4ebaf0c 300
4 c390f1b92e2888f78b58f6af5b12afa0 debtors.\n Mrs. Cratchit, wife of Bob Cratch... c390f1b92e2888f78b58f6af5b12afa0 300
.. ... ... ... ... ...
226 972bb34ddd371530f06d006480526d3e harmless from all liability, costs and expens... 972bb34ddd371530f06d006480526d3e 300
227 2f918cd94d1825eb5cbdc2a9d3ce094e \nGutenberg Literary Archive Foundation was cr... 2f918cd94d1825eb5cbdc2a9d3ce094e 300
228 eec5fc1a2be814473698e220b303dc1b . Email contact links and up\nto date contact ... eec5fc1a2be814473698e220b303dc1b 300
229 535f6bed392a62760401b1d4f2aa5e2f compliance. To SEND\nDONATIONS or determine t... 535f6bed392a62760401b1d4f2aa5e2f 300
230 9e59af410db84b25757e3bf90e036f39 could be\nfreely shared with anyone. For fort... 9e59af410db84b25757e3bf90e036f39 155
[231 rows x 5 columns]
π create_base_extracted_entities
entity_graph
0 <graphml xmlns="http://graphml.graphdrawing.or...
π create_summarized_entities
entity_graph
0 <graphml xmlns="http://graphml.graphdrawing.or...
π create_base_entity_graph
level clustered_graph
0 0 <graphml xmlns="http://graphml.graphdrawing.or...
1 1 <graphml xmlns="http://graphml.graphdrawing.or...
/home/tom/miniconda3/envs/graphrag-venv/lib/python3.12/site-packages/numpy/core/fromnumeric.py:59: FutureWarning: 'DataFrame.swapaxes' is deprecated and will be removed in a future
version. Please use 'DataFrame.transpose' instead.
return bound(*args, **kwds)
β create_final_entities
None
β ΄ GraphRAG Indexer
βββ Loading Input (InputFileType.text) - 1 files loaded (0 filtered) ββββββββββββββββββββββββββββββββββββββββ 100% 0:00:00 0:00:00
βββ create_base_text_units
βββ create_base_extracted_entities
βββ create_summarized_entities
βββ create_base_entity_graph
βββ create_final_entities
β Errors occurred during the pipeline run, see logs for more details.
(graphrag-venv) root@UBANTU:/home/tom/tomAI/graphrag/ragtest# python -m graphrag.index --root . -vv π Reading settings from settings.yaml Using default configuration: { "llm": { "api_key": "REDACTED, length 8", "type": "openai_chat", "model": "tom/x-ai-vqw", "max_tokens": 4000, "request_timeout": 180.0, "api_base": "http://localhost:11434/v1", "api_version": null, "proxy": null, "cognitive_services_endpoint": null, "deployment_name": null, "model_supports_json": true, "tokens_per_minute": 0, "requests_per_minute": 0, "max_retries": 1, "max_retry_wait": 10.0, "sleep_on_rate_limit_recommendation": true, "concurrent_requests": 25 }, "parallelization": { "stagger": 0.3, "num_threads": 50 }, "async_mode": "threaded", "root_dir": ".", "reporting": { "type": "file", "base_dir": "output/${timestamp}/reports", "storage_account_blob_url": null }, "storage": { "type": "file", "base_dir": "output/${timestamp}/artifacts", "storage_account_blob_url": null }, "cache": { "type": "file", "base_dir": "cache", "storage_account_blob_url": null }, "input": { "type": "file", "file_type": "text", "base_dir": "input", "storage_account_blob_url": null, "encoding": "utf-8", "file_pattern": ".\.txt$", "file_filter": null, "source_column": null, "timestamp_column": null, "timestamp_format": null, "text_column": "text", "title_column": null, "document_attribute_columns": [] }, "embed_graph": { "enabled": false, "num_walks": 10, "walk_length": 40, "window_size": 2, "iterations": 3, "random_seed": 597832, "strategy": null }, "embeddings": { "llm": { "api_key": "REDACTED, length 8", "type": "openai_embedding", "model": "mxbai-embed-large-v1", "max_tokens": 4000, "request_timeout": 180.0, "api_base": "http://localhost:9997/v1", "api_version": null, "proxy": null, "cognitive_services_endpoint": null, "deployment_name": null, "model_supports_json": null, "tokens_per_minute": 0, "requests_per_minute": 0, "max_retries": 1, "max_retry_wait": 10.0, "sleep_on_rate_limit_recommendation": true, "concurrent_requests": 1 }, "parallelization": { "stagger": 0.3, "num_threads": 50 }, "async_mode": "threaded", "batch_size": 16, "batch_max_tokens": 8191, "target": "required", "skip": [], "vector_store": null, "strategy": null }, "chunks": { "size": 300, "overlap": 100, "group_by_columns": [ "id" ], "strategy": null }, "snapshots": { "graphml": false, "raw_entities": false, "top_level_nodes": false }, "entity_extraction": { "llm": { "api_key": "REDACTED, length 8", "type": "openai_chat", "model": "tom/x-ai-vqw", "max_tokens": 4000, "request_timeout": 180.0, "api_base": "http://localhost:11434/v1", "api_version": null, "proxy": null, "cognitive_services_endpoint": null, "deployment_name": null, "model_supports_json": true, "tokens_per_minute": 0, "requests_per_minute": 0, "max_retries": 1, "max_retry_wait": 10.0, "sleep_on_rate_limit_recommendation": true, "concurrent_requests": 25 }, "parallelization": { "stagger": 0.3, "num_threads": 50 }, "async_mode": "threaded", "prompt": "prompts/entity_extraction.txt", "entity_types": [ "organization", "person", "geo", "event" ], "max_gleanings": 0, "strategy": null }, "summarize_descriptions": { "llm": { "api_key": "REDACTED, length 8", "type": "openai_chat", "model": "tom/x-ai-vqw", "max_tokens": 4000, "request_timeout": 180.0, "api_base": "http://localhost:11434/v1", "api_version": null, "proxy": null, "cognitive_services_endpoint": null, "deployment_name": null, "model_supports_json": true, "tokens_per_minute": 0, "requests_per_minute": 0, "max_retries": 1, "max_retry_wait": 10.0, "sleep_on_rate_limit_recommendation": true, "concurrent_requests": 25 }, "parallelization": { "stagger": 0.3, "num_threads": 50 }, "async_mode": "threaded", "prompt": "prompts/summarize_descriptions.txt", "max_length": 500, "strategy": null }, "community_reports": { "llm": { "api_key": "REDACTED, length 8", "type": "openai_chat", "model": "tom/x-ai-vqw", "max_tokens": 4000, "request_timeout": 180.0, "api_base": "http://localhost:11434/v1", "api_version": null, "proxy": null, "cognitive_services_endpoint": null, "deployment_name": null, "model_supports_json": true, "tokens_per_minute": 0, "requests_per_minute": 0, "max_retries": 1, "max_retry_wait": 10.0, "sleep_on_rate_limit_recommendation": true, "concurrent_requests": 25 }, "parallelization": { "stagger": 0.3, "num_threads": 50 }, "async_mode": "threaded", "prompt": null, "max_length": 2000, "max_input_length": 8000, "strategy": null }, "claim_extraction": { "llm": { "api_key": "REDACTED, length 8", "type": "openai_chat", "model": "tom/x-ai-vqw", "max_tokens": 4000, "request_timeout": 180.0, "api_base": "http://localhost:11434/v1", "api_version": null, "proxy": null, "cognitive_services_endpoint": null, "deployment_name": null, "model_supports_json": true, "tokens_per_minute": 0, "requests_per_minute": 0, "max_retries": 1, "max_retry_wait": 10.0, "sleep_on_rate_limit_recommendation": true, "concurrent_requests": 25 }, "parallelization": { "stagger": 0.3, "num_threads": 50 }, "async_mode": "threaded", "enabled": false, "prompt": "prompts/claim_extraction.txt", "description": "Any claims or facts that could be relevant to information discovery.", "max_gleanings": 0, "strategy": null }, "cluster_graph": { "max_cluster_size": 10, "strategy": null }, "umap": { "enabled": false }, "local_search": { "text_unit_prop": 0.5, "community_prop": 0.1, "conversation_history_max_turns": 5, "top_k_entities": 10, "top_k_relationships": 10, "max_tokens": 12000, "llm_max_tokens": 2000 }, "global_search": { "max_tokens": 12000, "data_max_tokens": 12000, "map_max_tokens": 1000, "reduce_max_tokens": 2000, "concurrency": 32 }, "encoding_model": "cl100k_base", "skip_workflows": [] } Final Config: { "extends": null, "input": { "file_type": "text", "type": "file", "storage_account_blob_url": null, "base_dir": "input", "file_pattern": ".\.txt$", "file_filter": null, "post_process": null, "encoding": "utf-8", "title_text_length": null }, "reporting": { "type": "file", "base_dir": "output/${timestamp}/reports" }, "storage": { "type": "file", "base_dir": "output/${timestamp}/artifacts" }, "cache": { "type": "file", "base_dir": "cache" }, "root_dir": ".", "workflows": [ { "name": "create_base_documents", "steps": null, "config": { "document_attribute_columns": [] } }, { "name": "create_final_documents", "steps": null, "config": { "document_raw_content_embed": { "strategy": { "type": "openai", "llm": { "api_key": "REDACTED, length 8", "type": "openai_embedding", "model": "mxbai-embed-large-v1", "max_tokens": 4000, "request_timeout": 180.0, "api_base": "http://localhost:9997/v1", "api_version": null, "proxy": null, "cognitive_services_endpoint": null, "deployment_name": null, "model_supports_json": null, "tokens_per_minute": 0, "requests_per_minute": 0, "max_retries": 1, "max_retry_wait": 10.0, "sleep_on_rate_limit_recommendation": true, "concurrent_requests": 1 }, "stagger": 0.3, "num_threads": 50, "batch_size": 16, "batch_max_tokens": 8191 } }, "skip_raw_content_embedding": true } }, { "name": "create_base_text_units", "steps": null, "config": { "chunk_by": [ "id" ], "text_chunk": { "strategy": { "type": "tokens", "chunk_size": 300, "chunk_overlap": 100, "group_by_columns": [ "id" ] } } } }, { "name": "join_text_units_to_entity_ids", "steps": null, "config": null }, { "name": "join_text_units_to_relationship_ids", "steps": null, "config": null }, { "name": "create_final_text_units", "steps": null, "config": { "text_unit_text_embed": { "strategy": { "type": "openai", "llm": { "api_key": "REDACTED, length 8", "type": "openai_embedding", "model": "mxbai-embed-large-v1", "max_tokens": 4000, "request_timeout": 180.0, "api_base": "http://localhost:9997/v1", "api_version": null, "proxy": null, "cognitive_services_endpoint": null, "deployment_name": null, "model_supports_json": null, "tokens_per_minute": 0, "requests_per_minute": 0, "max_retries": 1, "max_retry_wait": 10.0, "sleep_on_rate_limit_recommendation": true, "concurrent_requests": 1 }, "stagger": 0.3, "num_threads": 50, "batch_size": 16, "batch_max_tokens": 8191 } }, "covariates_enabled": false, "skip_text_unit_embedding": true } }, { "name": "create_base_extracted_entities", "steps": null, "config": { "graphml_snapshot": false, "raw_entity_snapshot": false, "entity_extract": { "stagger": 0.3, "num_threads": 50, "async_mode": "threaded", "strategy": { "type": "graph_intelligence", "llm": { "api_key": "REDACTED, length 8", "type": "openai_chat", "model": "tom/x-ai-vqw", "max_tokens": 4000, "request_timeout": 180.0, "api_base": "http://localhost:11434/v1", "api_version": null, "proxy": null, "cognitive_services_endpoint": null, "deployment_name": null, "model_supports_json": true, "tokens_per_minute": 0, "requests_per_minute": 0, "max_retries": 1, "max_retry_wait": 10.0, "sleep_on_rate_limit_recommendation": true, "concurrent_requests": 25 }, "stagger": 0.3, "num_threads": 50, "extraction_prompt": "\n-Goal-\nGiven a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.\n\n-Steps-\n1. Identify all entities. For each identified entity, extract the following information:\n- entity_name: Name of the entity, capitalized\n- entity_type: One of the following types: [{entity_types}]\n- entity_description: Comprehensive description of the entity's attributes and activities\nFormat each entity as (\"entity\"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}\n\n2. From the entities identified
in step 1, identify all pairs of (source_entity, target_entity) that are clearly related to each other.\nFor each pair of related entities, extract the following information:\n-
source_entity: name of the source entity, as identified in step 1\n- target_entity: name of the target entity, as identified in step 1\n- relationship_description: explanation as to
why you think the source entity and the target entity are related to each other\n- relationship_strength: a numeric score indicating strength of the relationship between the source
entity and target entity\n Format each relationship as
(\"relationship\"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter})\n\n3. Return
output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use {record_delimiter} as the list delimiter.\n\n4. When finished, output
{completion_delimiter}\n\n######################\n-Examples-\n######################\nExample 1:\n\nEntity_types: \nText:\nwhile Alex clenched his jaw, the buzz of frustration dull
against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was
an unspoken rebellion against Cruz's narrowing vision of control and order.\n\nThen Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device
with something akin to reverence. \u201cIf this tech can be understood...\" Taylor said, their voice quieter, \"It could change the game for us. For all of us.\u201d\n\nThe underlying
dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes
locked with Taylor's, a wordless clash of wills softening into an uneasy truce.\n\nIt was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They
had all been brought here by different paths\n################\nOutput:\n(\"entity\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Alex is a character who
experiences frustration and is observant of the dynamics among other
characters.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Taylor is portrayed with authoritarian certainty and shows a
moment of reverence towards a device, indicating a change in
perspective.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Jordan shares a commitment to discovery and has a significant
interaction with Taylor regarding a device.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Cruz\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Cruz is associated with a vision of
control and order, influencing the dynamics among other characters.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"The
Device\"{tuple_delimiter}\"technology\"{tuple_delimiter}\"The Device is central to the story, with potential game-changing implications, and is revered by
Taylor.\"){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"Alex is affected by Taylor's authoritarian certainty and observes
changes in Taylor's attitude towards the device.\"{tuple_delimiter}7){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"Alex
and Jordan share a commitment to discovery, which contrasts with Cruz's
vision.\"{tuple_delimiter}6){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"Taylor and Jordan interact directly regarding
the device, leading to a moment of mutual respect and an uneasy
truce.\"{tuple_delimiter}8){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"Cruz\"{tuple_delimiter}\"Jordan's commitment to discovery is in rebellion
against Cruz's vision of control and order.\"{tuple_delimiter}5){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"The
Device\"{tuple_delimiter}\"Taylor shows reverence towards the device, indicating its importance and potential
impact.\"{tuple_delimiter}9){completion_delimiter}\n#############################\nExample 2:\n\nEntity_types: \nText:\nThey were no longer mere operatives; they had become guardians
of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols\u2014it
demanded a new perspective, a new resolve.\n\nTension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a
portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential
peril.\n\nTheir connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter
instincts gained precedence\u2014 the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce
hummed with the newfound frequency of their daring, a tone set not by the
earthly\n#############\nOutput:\n(\"entity\"{tuple_delimiter}\"Washington\"{tuple_delimiter}\"location\"{tuple_delimiter}\"Washington is a location where communications are being
received, indicating its importance in the decision-making process.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Operation:
Dulce\"{tuple_delimiter}\"mission\"{tuple_delimiter}\"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives
and activities.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"The team\"{tuple_delimiter}\"organization\"{tuple_delimiter}\"The team is portrayed as a group of individuals who
have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role.\"){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"The
team\"{tuple_delimiter}\"Washington\"{tuple_delimiter}\"The team receives communications from Washington, which influences their decision-making
process.\"{tuple_delimiter}7){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"The team\"{tuple_delimiter}\"Operation: Dulce\"{tuple_delimiter}\"The team is directly involved in
Operation: Dulce, executing its evolved objectives and activities.\"{tuple_delimiter}9){completion_delimiter}\n#############################\nExample 3:\n\nEntity_types:
\nText:\ntheir voice slicing through the buzz of activity. \"Control may be an illusion when facing an intelligence that literally writes its own rules,\" they stated stoically,
casting a watchful eye over the flurry of data.\n\n\"It's like it's learning to communicate,\" offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe
and anxiety. \"This gives talking to strangers' a whole new meaning.\"\n\nAlex surveyed his team\u2014each face a study in concentration, determination, and not a small measure of
trepidation. \"This might well be our first contact,\" he acknowledged, \"And we need to be ready for whatever answers back.\"\n\nTogether, they stood on the edge of the unknown,
forging humanity's response to a message from the heavens. The ensuing silence was palpable\u2014a collective introspection about their role in this grand cosmic play, one that could
rewrite human history.\n\nThe encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny
anticipation\n#############\nOutput:\n(\"entity\"{tuple_delimiter}\"Sam Rivera\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Sam Rivera is a member of a team working on communicating
with an unknown intelligence, showing a mix of awe and anxiety.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Alex is the
leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their
task.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Control\"{tuple_delimiter}\"concept\"{tuple_delimiter}\"Control refers to the ability to manage or govern, which is
challenged by an intelligence that writes its own rules.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Intelligence\"{tuple_delimiter}\"concept\"{tuple_delimiter}\"Intelligence
here refers to an unknown entity capable of writing its own rules and learning to communicate.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"First
Contact\"{tuple_delimiter}\"event\"{tuple_delimiter}\"First Contact is the potential initial communication between humanity and an unknown
intelligence.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Humanity's Response\"{tuple_delimiter}\"event\"{tuple_delimiter}\"Humanity's Response is the collective action taken
by Alex's team in response to a message from an unknown intelligence.\"){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Sam
Rivera\"{tuple_delimiter}\"Intelligence\"{tuple_delimiter}\"Sam Rivera is directly involved in the process of learning to communicate with the unknown
intelligence.\"{tuple_delimiter}9){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"First Contact\"{tuple_delimiter}\"Alex leads the team that might be
making the First Contact with the unknown intelligence.\"{tuple_delimiter}10){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"Humanity's
Response\"{tuple_delimiter}\"Alex and his team are the key figures in Humanity's Response to the unknown
intelligence.\"{tuple_delimiter}8){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Control\"{tuple_delimiter}\"Intelligence\"{tuple_delimiter}\"The concept of Control is
challenged by the Intelligence that writes its own rules.\"{tuple_delimiter}7){completion_delimiter}\n#############################\n-Real Data-\n######################\nEntity_types:
{entity_types}\nText: {input_text}\n######################\nOutput:",
"max_gleanings": 0,
"encoding_name": "cl100k_base",
"prechunked": true
},
"entity_types": [
"organization",
"person",
"geo",
"event"
]
}
}
},
{
"name": "create_summarized_entities",
"steps": null,
"config": {
"graphml_snapshot": false,
"summarize_descriptions": {
"stagger": 0.3,
"num_threads": 50,
"async_mode": "threaded",
"strategy": {
"type": "graph_intelligence",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_chat",
"model": "tom/x-ai-vqw",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:11434/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"summarize_prompt": "\nYou are a helpful assistant responsible for generating a comprehensive summary of the data provided below.\nGiven one or two entities,
and a list of descriptions, all related to the same entity or group of entities.\nPlease concatenate all of these into a single, comprehensive description. Make sure to include
information collected from all the descriptions.\nIf the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.\nMake sure
it is written in third person, and include the entity names so we the have full context.\n\n#######\n-Data-\nEntities: {entity_name}\nDescription List:
{description_list}\n#######\nOutput:\n",
"max_summary_length": 500
}
}
}
},
{
"name": "create_base_entity_graph",
"steps": null,
"config": {
"graphml_snapshot": false,
"embed_graph_enabled": false,
"cluster_graph": {
"strategy": {
"type": "leiden",
"max_cluster_size": 10
}
},
"embed_graph": {
"strategy": {
"type": "node2vec",
"num_walks": 10,
"walk_length": 40,
"window_size": 2,
"iterations": 3,
"random_seed": 3
}
}
}
},
{
"name": "create_final_entities",
"steps": null,
"config": {
"entity_name_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"entity_name_description_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"skip_name_embedding": true,
"skip_description_embedding": false
}
},
{
"name": "create_final_relationships",
"steps": null,
"config": {
"relationship_description_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"skip_description_embedding": true
}
},
{
"name": "create_final_nodes",
"steps": null,
"config": {
"layout_graph_enabled": false,
"snapshot_top_level_nodes": false
}
},
{
"name": "create_final_communities",
"steps": null,
"config": null
},
{
"name": "create_final_community_reports",
"steps": null,
"config": {
"covariates_enabled": false,
"skip_title_embedding": true,
"skip_summary_embedding": true,
"skip_full_content_embedding": true,
"create_community_reports": {
"stagger": 0.3,
"num_threads": 50,
"async_mode": "threaded",
"strategy": {
"type": "graph_intelligence",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_chat",
"model": "tom/x-ai-vqw",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:11434/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"extraction_prompt": null,
"max_report_length": 2000,
"max_input_length": 8000
}
},
"community_report_full_content_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"community_report_summary_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"community_report_title_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 8",
"type": "openai_embedding",
"model": "mxbai-embed-large-v1",
"max_tokens": 4000,
"request_timeout": 180.0,
"api_base": "http://localhost:9997/v1",
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 1,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 1
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
}
}
}
]
}
/home/tom/miniconda3/envs/graphrag-venv/lib/python3.12/site-packages/numpy/core/fromnumeric.py:59: FutureWarning: 'DataFrame.swapaxes' is deprecated and will be removed in a future
version. Please use 'DataFrame.transpose' instead.
return bound(*args, **kwds)
π create_base_text_units
id chunk chunk_id document_ids n_tokens
0 680dd6d2a970a49082fa4f34bf63a34e The Project Gutenberg eBook of A Christmas Ca... 680dd6d2a970a49082fa4f34bf63a34e 300
1 95f1f8f5bdbf0bee3a2c6f2f4a4907f6 THE PROJECT GUTENBERG EBOOK A CHRISTMAS CAROL... 95f1f8f5bdbf0bee3a2c6f2f4a4907f6 300
2 3a450ed2b7fb1e5fce66f92698c13824 1958,\n 1962, 1964, 1966, 1967, 1969, 1971, 1... 3a450ed2b7fb1e5fce66f92698c13824 300
3 95b143eba145d91eacae7be3e4ebaf0c .\n Mr. Fezziwig, a kind-hearted, jovial old ... 95b143eba145d91eacae7be3e4ebaf0c 300
4 c390f1b92e2888f78b58f6af5b12afa0 debtors.\n Mrs. Cratchit, wife of Bob Cratch... c390f1b92e2888f78b58f6af5b12afa0 300
.. ... ... ... ... ...
226 972bb34ddd371530f06d006480526d3e harmless from all liability, costs and expens... 972bb34ddd371530f06d006480526d3e 300
227 2f918cd94d1825eb5cbdc2a9d3ce094e \nGutenberg Literary Archive Foundation was cr... 2f918cd94d1825eb5cbdc2a9d3ce094e 300
228 eec5fc1a2be814473698e220b303dc1b . Email contact links and up\nto date contact ... eec5fc1a2be814473698e220b303dc1b 300
229 535f6bed392a62760401b1d4f2aa5e2f compliance. To SEND\nDONATIONS or determine t... 535f6bed392a62760401b1d4f2aa5e2f 300
230 9e59af410db84b25757e3bf90e036f39 could be\nfreely shared with anyone. For fort... 9e59af410db84b25757e3bf90e036f39 155
[231 rows x 5 columns] π create_base_extracted_entities entity_graph 0 <graphml xmlns="http://graphml.graphdrawing.or... π create_summarized_entities entity_graph 0 <graphml xmlns="http://graphml.graphdrawing.or... π create_base_entity_graph level clustered_graph 0 0 <graphml xmlns="http://graphml.graphdrawing.or... 1 1 <graphml xmlns="http://graphml.graphdrawing.or... /home/tom/miniconda3/envs/graphrag-venv/lib/python3.12/site-packages/numpy/core/fromnumeric.py:59: FutureWarning: 'DataFrame.swapaxes' is deprecated and will be removed in a future version. Please use 'DataFrame.transpose' instead. return bound(*args, **kwds) β create_final_entities None β ΄ GraphRAG Indexer βββ Loading Input (InputFileType.text) - 1 files loaded (0 filtered) ββββββββββββββββββββββββββββββββββββββββ 100% 0:00:00 0:00:00 βββ create_base_text_units βββ create_base_extracted_entities βββ create_summarized_entities βββ create_base_entity_graph βββ create_final_entities β Errors occurred during the pipeline run, see logs for more details.