161 lines
4.8 KiB
Python
161 lines
4.8 KiB
Python
from models import ChatBody, ChatAppBody
|
|
from bs4 import BeautifulSoup
|
|
|
|
from langchain.docstore.document import Document as LDocument
|
|
from langchain.vectorstores.faiss import FAISS
|
|
from langchain.embeddings.openai import OpenAIEmbeddings
|
|
from langchain.llms import OpenAI
|
|
from langchain.text_splitter import CharacterTextSplitter
|
|
from langchain.chains import ConversationalRetrievalChain
|
|
from langchain.prompts.chat import (
|
|
ChatPromptTemplate,
|
|
SystemMessagePromptTemplate,
|
|
HumanMessagePromptTemplate
|
|
)
|
|
from langchain.vectorstores import Chroma
|
|
|
|
from db.supa import SupaService
|
|
|
|
|
|
supabase = SupaService()
|
|
|
|
|
|
async def chat_app_handler(body: ChatAppBody, jwt: str):
|
|
try:
|
|
|
|
user = supabase.get_user(jwt)
|
|
|
|
if not user:
|
|
return {
|
|
"bot_response": "You are not logged in",
|
|
"human_message": body.user_message,
|
|
}
|
|
|
|
user_id = user.user.id
|
|
|
|
website_response = supabase.find_website(body.id, user_id)
|
|
|
|
website = website_response.data
|
|
|
|
if len(website) == 0:
|
|
return {
|
|
"bot_response": "Website not found",
|
|
"human_message": body.user_message,
|
|
}
|
|
|
|
website = website[0]
|
|
|
|
text = website["html"]
|
|
text = text.strip()
|
|
|
|
result = [LDocument(page_content=text, metadata={"source": "test"})]
|
|
token_splitter = CharacterTextSplitter(
|
|
chunk_size=1000, chunk_overlap=0)
|
|
doc = token_splitter.split_documents(result)
|
|
|
|
print(f'Number of documents: {len(doc)}')
|
|
|
|
vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings())
|
|
|
|
messages = [
|
|
SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on.
|
|
Answer must be in markdown format.
|
|
-----------------
|
|
context:
|
|
{context}
|
|
"""),
|
|
HumanMessagePromptTemplate.from_template("{question}")
|
|
]
|
|
|
|
prompt = ChatPromptTemplate.from_messages(messages)
|
|
|
|
chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever(
|
|
search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,)
|
|
|
|
history = [(d["human_message"], d["bot_response"])
|
|
for d in body.history]
|
|
|
|
response = chat({
|
|
"question": body.user_message,
|
|
"chat_history": history
|
|
})
|
|
|
|
answer = response["answer"]
|
|
answer = answer[answer.find(":")+1:].strip()
|
|
|
|
return {
|
|
"bot_response": answer,
|
|
"human_message": body.user_message,
|
|
}
|
|
|
|
except Exception as e:
|
|
print(e)
|
|
return {
|
|
"bot_response": "Something went wrong please try again later",
|
|
"human_message": body.user_message,
|
|
}
|
|
|
|
|
|
async def chat_extension_handler(body: ChatBody):
|
|
try:
|
|
soup = BeautifulSoup(body.html, 'lxml')
|
|
|
|
iframe = soup.find('iframe', id='pageassist-iframe')
|
|
if iframe:
|
|
iframe.decompose()
|
|
div = soup.find('div', id='pageassist-icon')
|
|
if div:
|
|
div.decompose()
|
|
div = soup.find('div', id='__plasmo-loading__')
|
|
if div:
|
|
div.decompose()
|
|
text = soup.get_text()
|
|
text = text.strip()
|
|
|
|
result = [LDocument(page_content=text, metadata={"source": "test"})]
|
|
token_splitter = CharacterTextSplitter(
|
|
chunk_size=1000, chunk_overlap=0)
|
|
doc = token_splitter.split_documents(result)
|
|
|
|
print(f'Number of documents: {len(doc)}')
|
|
|
|
vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings())
|
|
|
|
messages = [
|
|
SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on.
|
|
Answer must be in markdown format.
|
|
-----------------
|
|
context:
|
|
{context}
|
|
"""),
|
|
HumanMessagePromptTemplate.from_template("{question}")
|
|
]
|
|
|
|
prompt = ChatPromptTemplate.from_messages(messages)
|
|
|
|
chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever(
|
|
search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,)
|
|
|
|
history = [(d["human_message"], d["bot_response"])
|
|
for d in body.history]
|
|
|
|
response = chat({
|
|
"question": body.user_message,
|
|
"chat_history": history
|
|
})
|
|
|
|
answer = response["answer"]
|
|
answer = answer[answer.find(":")+1:].strip()
|
|
|
|
return {
|
|
"bot_response": answer,
|
|
"human_message": body.user_message,
|
|
}
|
|
|
|
except Exception as e:
|
|
print(e)
|
|
return {
|
|
"bot_response": "Something went wrong please try again later",
|
|
"human_message": body.user_message,
|
|
}
|