Skip to content

[Bug]: not able to retrieve keyword based search #17074

@Akshaybhure111

Description

@Akshaybhure111

Bug Description

from langchain_experimental.graph_transformers import LLMGraphTransformer
llm_transformer=LLMGraphTransformer(llm=llm,
node_properties=True,
relationship_properties=True)
graph_documents=llm_transformer.convert_to_graph_documents(docs[:10])
graph.add_graph_documents(
graph_documents,
baseEntityLabel=True,
include_source=True
)

Initialize embeddings with debug information

print("Initializing SentenceTransformerEmbeddings...")
embeddings = SentenceTransformerEmbeddings(model_name="intfloat/e5-small-v2")
print(f"Embeddings model initialized: {embeddings.model_name}")

vector_index = Neo4jVector.from_existing_graph(
embedding=embeddings,
url=NEO4J_URI,
username=NEO4J_USERNAME,
password=NEO4J_PASSWORD,
database="neo4j",
node_label="Document", # Adjust node_label as needed
text_node_properties=["id","text"],
embedding_node_property="embedding",
index_name="vector_index",
# keyword_index_name="entity_index",
search_type="hybrid",
pre_delete_collection=True

)
print("Neo4j vector index created successfully.")
def structured_retriever(question: str) -> str:
print(f"Received question: {question}")
result = ""

# Step 1: Retrieve entities from the entity chain
# print("Invoking entity chain to extract entities...")
entities = entity_chain.invoke({"question": question})
print(f"Extracted entities: {entities.names}")

# Step 2: Iterate through entities and query the graph
for entity1 in entities.names:
    # print(f"\nProcessing entity: {entity}")

    # Generate the full-text query for the current entity
    full_text_query = generate_full_text_query(entity1)
    print(f"Generated full text query for entity '{entity1}': {full_text_query}")

    # Query the graph
    print(f"Querying the graph for entity '{entity1}'...")
    response = graph.query(
        """CALL db.index.fulltext.queryNodes('entity_index', $query, {limit:2})
        YIELD node,score
        CALL {
          WITH node
          MATCH (node)-[r:!MENTIONS]->(neighbor)
          RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
          UNION ALL
          WITH node
          MATCH (node)<-[r:!MENTIONS]-(neighbor)
          RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
        }
        RETURN output LIMIT 50
        """,
        {"query": full_text_query},
    )
    print(f"Graph query response for entity '{entity1}': {response}")

    # Append the outputs from the query to the result
    outputs = [el['output'] for el in response]
    # print(f"Outputs for entity '{entity}': {outputs}")
    result += "\n".join(outputs)

# Final result
print(f"\nFinal result:\n{result}")
return result
please check in above code I am successfully taking response from similarity search but when trying to get output from keysearch that time its not able to give answer
but when I am running below query then it is able to giving answer please sujjest
graph.query(
"CREATE FULLTEXT INDEX entity_index IF NOT EXISTS FOR (n:__Entity__) ON EACH [n.id, n.text]"

)
thank you

Version

langchain

Steps to Reproduce

from langchain_experimental.graph_transformers import LLMGraphTransformer
llm_transformer=LLMGraphTransformer(llm=llm,
node_properties=True,
relationship_properties=True)
graph_documents=llm_transformer.convert_to_graph_documents(docs[:10])
graph.add_graph_documents(
graph_documents,
baseEntityLabel=True,
include_source=True
)

Initialize embeddings with debug information

print("Initializing SentenceTransformerEmbeddings...")
embeddings = SentenceTransformerEmbeddings(model_name="intfloat/e5-small-v2")
print(f"Embeddings model initialized: {embeddings.model_name}")

vector_index = Neo4jVector.from_existing_graph(
embedding=embeddings,
url=NEO4J_URI,
username=NEO4J_USERNAME,
password=NEO4J_PASSWORD,
database="neo4j",
node_label="Document", # Adjust node_label as needed
text_node_properties=["id","text"],
embedding_node_property="embedding",
index_name="vector_index",
# keyword_index_name="entity_index",
search_type="hybrid",
pre_delete_collection=True

)
print("Neo4j vector index created successfully.")
def structured_retriever(question: str) -> str:
print(f"Received question: {question}")
result = ""

# Step 1: Retrieve entities from the entity chain
# print("Invoking entity chain to extract entities...")
entities = entity_chain.invoke({"question": question})
print(f"Extracted entities: {entities.names}")

# Step 2: Iterate through entities and query the graph
for entity1 in entities.names:
    # print(f"\nProcessing entity: {entity}")

    # Generate the full-text query for the current entity
    full_text_query = generate_full_text_query(entity1)
    print(f"Generated full text query for entity '{entity1}': {full_text_query}")

    # Query the graph
    print(f"Querying the graph for entity '{entity1}'...")
    response = graph.query(
        """CALL db.index.fulltext.queryNodes('entity_index', $query, {limit:2})
        YIELD node,score
        CALL {
          WITH node
          MATCH (node)-[r:!MENTIONS]->(neighbor)
          RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
          UNION ALL
          WITH node
          MATCH (node)<-[r:!MENTIONS]-(neighbor)
          RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
        }
        RETURN output LIMIT 50
        """,
        {"query": full_text_query},
    )
    print(f"Graph query response for entity '{entity1}': {response}")

    # Append the outputs from the query to the result
    outputs = [el['output'] for el in response]
    # print(f"Outputs for entity '{entity}': {outputs}")
    result += "\n".join(outputs)

# Final result
print(f"\nFinal result:\n{result}")
return result
please check in above code I am successfully taking response from similarity search but when trying to get output from keysearch that time its not able to give answer
but when I am running below query then it is able to giving answer please sujjest
graph.query(
"CREATE FULLTEXT INDEX entity_index IF NOT EXISTS FOR (n:__Entity__) ON EACH [n.id, n.text]"

)
thank you

Relevant Logs/Tracbacks

empty results keyword search

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingtriageIssue needs to be triaged/prioritized

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions