-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathservice.py
More file actions
157 lines (117 loc) · 5.38 KB
/
service.py
File metadata and controls
157 lines (117 loc) · 5.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""
FastAPI service exposing the 飞享IM Q&A chatbot.
Endpoints:
POST /ask – single-turn Q&A
POST /stream – streaming Q&A (SSE)
GET /health – liveness probe
"""
import os
from collections import deque
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from config import DOCS_PERSIST_PATH
from graph import build_graph, QAState
from ingest import build_retriever, load_retriever
# ─── App lifecycle ────────────────────────────────────────────────────────────
app_state: dict = {}
@asynccontextmanager
async def lifespan(app: FastAPI):
if not os.path.exists(DOCS_PERSIST_PATH):
print("Knowledge base not found – running ingestion...")
retriever = build_retriever()
else:
print("Loading existing knowledge base...")
retriever = load_retriever()
app_state["graph"] = build_graph(retriever)
print("Q&A service ready.")
yield
app_state.clear()
app = FastAPI(
title="飞享IM 智能问答服务",
description="基于 LangChain + LangGraph + Claude 的飞享IM知识助手",
version="1.0.0",
lifespan=lifespan,
)
# ─── Conversation memory ──────────────────────────────────────────────────────
MAX_HISTORY_TURNS = 3 # 每用户保留最近 3 轮(user + assistant 各一条 = 6 条消息)
# {userid: deque([{"role": "user"|"assistant", "content": "..."}])}
conversation_store: dict[str, deque] = {}
def get_history(userid: str) -> list[dict]:
return list(conversation_store.get(userid, []))
def save_exchange(userid: str, question: str, answer: str) -> None:
if userid not in conversation_store:
conversation_store[userid] = deque(maxlen=MAX_HISTORY_TURNS * 2)
store = conversation_store[userid]
store.append({"role": "user", "content": question})
store.append({"role": "assistant", "content": answer})
# ─── Schemas ──────────────────────────────────────────────────────────────────
class AskRequest(BaseModel):
question: str
userid: str = "default"
class AskResponse(BaseModel):
question: str
answer: str
route: str
# ─── Endpoints ────────────────────────────────────────────────────────────────
@app.get("/health")
def health():
return {"status": "ok"}
@app.post("/ask", response_model=AskResponse)
def ask(req: AskRequest):
graph = app_state["graph"]
history = get_history(req.userid)
result = graph.invoke(QAState(question=req.question, history=history))
if isinstance(result, dict):
answer = result.get("answer", "")
save_exchange(req.userid, req.question, answer)
return AskResponse(
question=result.get("question", req.question),
answer=answer,
route=result.get("route", ""),
)
save_exchange(req.userid, req.question, result.answer)
return AskResponse(
question=result.question,
answer=result.answer,
route=result.route,
)
@app.post("/stream")
async def stream_ask(req: AskRequest):
"""Stream the answer token-by-token via Server-Sent Events."""
graph = app_state["graph"]
history = get_history(req.userid)
collected: list[str] = []
async def event_generator():
async for event in graph.astream_events(
QAState(question=req.question, history=history), version="v2"
):
kind = event["event"]
node = event.get("metadata", {}).get("langgraph_node", "")
if kind == "on_chat_model_stream" and node in ("generate", "reject"):
content = event["data"]["chunk"].content
if isinstance(content, list):
for part in content:
if isinstance(part, dict) and part.get("type") == "text" and part["text"]:
collected.append(part["text"])
yield part["text"]
elif isinstance(content, str) and content:
collected.append(content)
yield content
elif kind == "on_chain_end" and node == "fallback":
output = event["data"].get("output", {})
answer = output.get("answer", "") if isinstance(output, dict) else ""
if answer:
collected.append(answer)
yield answer
save_exchange(req.userid, req.question, "".join(collected))
return StreamingResponse(
event_generator(),
media_type="text/plain; charset=utf-8",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
)
# ─── CLI convenience ──────────────────────────────────────────────────────────
if __name__ == "__main__":
import uvicorn
uvicorn.run("service:app", host="0.0.0.0", port=8000, reload=False)