From d3c352d718f044e78cec46b1aa7d4d711824ca09 Mon Sep 17 00:00:00 2001 From: soumaya Date: Fri, 24 Apr 2026 13:56:58 +0100 Subject: [PATCH 1/2] update langchair rag app final --- .../chatbot_api/pyproject.toml | 8 +++--- .../src/chains/hospital_cypher_chain.py | 4 +-- .../src/chains/hospital_review_chain.py | 24 ++++++++++++++++- .../source_code_final/docker-compose.yml | 3 ++- .../src/hospital_bulk_csv_write.py | 27 ++++++++++--------- 5 files changed, 45 insertions(+), 21 deletions(-) diff --git a/langchain-rag-app/source_code_final/chatbot_api/pyproject.toml b/langchain-rag-app/source_code_final/chatbot_api/pyproject.toml index 330db6ddb4..1707bebb66 100644 --- a/langchain-rag-app/source_code_final/chatbot_api/pyproject.toml +++ b/langchain-rag-app/source_code_final/chatbot_api/pyproject.toml @@ -4,12 +4,12 @@ version = "0.1" dependencies = [ "asyncio==3.4.3", "fastapi==0.109.0", - "langchain==0.1.0", - "langchain-openai==0.0.2", - "langchainhub==0.1.14", + "langchain==0.2.17", + "langchain-community==0.2.19", + "langchain-openai==0.1.25", + "langchainhub==0.1.21", "neo4j==5.14.1", "numpy==1.26.2", - "openai==1.7.2", "opentelemetry-api==1.22.0", "pydantic==2.5.1", "uvicorn==0.25.0" diff --git a/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_cypher_chain.py b/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_cypher_chain.py index 198a5df311..55586199d7 100644 --- a/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_cypher_chain.py +++ b/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_cypher_chain.py @@ -12,10 +12,9 @@ url=os.getenv("NEO4J_URI"), username=os.getenv("NEO4J_USERNAME"), password=os.getenv("NEO4J_PASSWORD"), + database=os.getenv("NEO4J_DATABASE", "neo4j"), ) -graph.refresh_schema() - cypher_generation_template = """ Task: Generate Cypher query for a Neo4j graph database. @@ -154,4 +153,5 @@ cypher_prompt=cypher_generation_prompt, validate_cypher=True, top_k=100, + allow_dangerous_requests=True, ) diff --git a/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_review_chain.py b/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_review_chain.py index 28ebe95513..e2b8b7f42b 100644 --- a/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_review_chain.py +++ b/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_review_chain.py @@ -7,16 +7,38 @@ PromptTemplate, SystemMessagePromptTemplate, ) -from langchain.vectorstores.neo4j_vector import Neo4jVector +from langchain_community.vectorstores import Neo4jVector from langchain_openai import ChatOpenAI, OpenAIEmbeddings +from neo4j import GraphDatabase HOSPITAL_QA_MODEL = os.getenv("HOSPITAL_QA_MODEL") +# langchain-community's create_new_index uses the deprecated +# db.index.vector.createNodeIndex procedure removed in Neo4j 5.27. +# Pre-create the index with the current syntax so from_existing_graph +# detects it and skips create_new_index. +_driver = GraphDatabase.driver( + os.getenv("NEO4J_URI"), + auth=(os.getenv("NEO4J_USERNAME"), os.getenv("NEO4J_PASSWORD")), +) +with _driver.session( + database=os.getenv("NEO4J_DATABASE", "neo4j") +) as _session: + _session.run( + "CREATE VECTOR INDEX reviews IF NOT EXISTS " + "FOR (n:Review) ON (n.embedding) " + "OPTIONS {indexConfig: {`vector.dimensions`: 1536, " + "`vector.similarity_function`: 'cosine'}}" + ) +_driver.close() +del _driver + neo4j_vector_index = Neo4jVector.from_existing_graph( embedding=OpenAIEmbeddings(), url=os.getenv("NEO4J_URI"), username=os.getenv("NEO4J_USERNAME"), password=os.getenv("NEO4J_PASSWORD"), + database=os.getenv("NEO4J_DATABASE", "neo4j"), index_name="reviews", node_label="Review", text_node_properties=[ diff --git a/langchain-rag-app/source_code_final/docker-compose.yml b/langchain-rag-app/source_code_final/docker-compose.yml index a68dd3a135..773402f913 100644 --- a/langchain-rag-app/source_code_final/docker-compose.yml +++ b/langchain-rag-app/source_code_final/docker-compose.yml @@ -13,7 +13,8 @@ services: env_file: - .env depends_on: - - hospital_neo4j_etl + hospital_neo4j_etl: + condition: service_completed_successfully ports: - "8000:8000" diff --git a/langchain-rag-app/source_code_final/hospital_neo4j_etl/src/hospital_bulk_csv_write.py b/langchain-rag-app/source_code_final/hospital_neo4j_etl/src/hospital_bulk_csv_write.py index fa7c6c161e..b3527ba430 100644 --- a/langchain-rag-app/source_code_final/hospital_neo4j_etl/src/hospital_bulk_csv_write.py +++ b/langchain-rag-app/source_code_final/hospital_neo4j_etl/src/hospital_bulk_csv_write.py @@ -42,12 +42,13 @@ def load_hospital_graph_from_csv() -> None: ) LOGGER.info("Setting uniqueness constraints on nodes") - with driver.session(database="neo4j") as session: + + with driver.session() as session: for node in NODES: session.execute_write(_set_uniqueness_constraints, node) LOGGER.info("Loading hospital nodes") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{HOSPITALS_CSV_PATH}' AS hospitals @@ -58,7 +59,7 @@ def load_hospital_graph_from_csv() -> None: _ = session.run(query, {}) LOGGER.info("Loading payer nodes") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{PAYERS_CSV_PATH}' AS payers @@ -68,7 +69,7 @@ def load_hospital_graph_from_csv() -> None: _ = session.run(query, {}) LOGGER.info("Loading physician nodes") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{PHYSICIANS_CSV_PATH}' AS physicians @@ -83,7 +84,7 @@ def load_hospital_graph_from_csv() -> None: _ = session.run(query, {}) LOGGER.info("Loading visit nodes") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{VISITS_CSV_PATH}' AS visits MERGE (v:Visit {{id: toInteger(visits.visit_id), @@ -106,7 +107,7 @@ def load_hospital_graph_from_csv() -> None: _ = session.run(query, {}) LOGGER.info("Loading patient nodes") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{PATIENTS_CSV_PATH}' AS patients @@ -120,7 +121,7 @@ def load_hospital_graph_from_csv() -> None: _ = session.run(query, {}) LOGGER.info("Loading review nodes") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{REVIEWS_CSV_PATH}' AS reviews @@ -134,7 +135,7 @@ def load_hospital_graph_from_csv() -> None: _ = session.run(query, {}) LOGGER.info("Loading 'AT' relationships") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{VISITS_CSV_PATH}' AS row MATCH (source: `Visit` {{ `id`: toInteger(trim(row.`visit_id`)) }}) @@ -145,7 +146,7 @@ def load_hospital_graph_from_csv() -> None: _ = session.run(query, {}) LOGGER.info("Loading 'WRITES' relationships") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{REVIEWS_CSV_PATH}' AS reviews MATCH (v:Visit {{id: toInteger(reviews.visit_id)}}) @@ -155,7 +156,7 @@ def load_hospital_graph_from_csv() -> None: _ = session.run(query, {}) LOGGER.info("Loading 'TREATS' relationships") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{VISITS_CSV_PATH}' AS visits MATCH (p:Physician {{id: toInteger(visits.physician_id)}}) @@ -165,7 +166,7 @@ def load_hospital_graph_from_csv() -> None: _ = session.run(query, {}) LOGGER.info("Loading 'COVERED_BY' relationships") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{VISITS_CSV_PATH}' AS visits MATCH (v:Visit {{id: toInteger(visits.visit_id)}}) @@ -178,7 +179,7 @@ def load_hospital_graph_from_csv() -> None: _ = session.run(query, {}) LOGGER.info("Loading 'HAS' relationships") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{VISITS_CSV_PATH}' AS visits MATCH (p:Patient {{id: toInteger(visits.patient_id)}}) @@ -188,7 +189,7 @@ def load_hospital_graph_from_csv() -> None: _ = session.run(query, {}) LOGGER.info("Loading 'EMPLOYS' relationships") - with driver.session(database="neo4j") as session: + with driver.session() as session: query = f""" LOAD CSV WITH HEADERS FROM '{VISITS_CSV_PATH}' AS visits MATCH (h:Hospital {{id: toInteger(visits.hospital_id)}}) From 9a38a20b78416de0dd59d0739ccba63104292824 Mon Sep 17 00:00:00 2001 From: Soumaya Mauthoor Date: Fri, 24 Apr 2026 14:07:12 +0100 Subject: [PATCH 2/2] Document risks of LLM database query generation Add comment about risks of LLM generating database queries --- .../chatbot_api/src/chains/hospital_cypher_chain.py | 1 + 1 file changed, 1 insertion(+) diff --git a/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_cypher_chain.py b/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_cypher_chain.py index 55586199d7..80bc4d3500 100644 --- a/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_cypher_chain.py +++ b/langchain-rag-app/source_code_final/chatbot_api/src/chains/hospital_cypher_chain.py @@ -153,5 +153,6 @@ cypher_prompt=cypher_generation_prompt, validate_cypher=True, top_k=100, + # acknowledges the risk of letting an LLM generate and execute database queries allow_dangerous_requests=True, )