2023-04-15 21:59:27 +05:30

161 lines
4.8 KiB
Python

from models import ChatBody, ChatAppBody
from bs4 import BeautifulSoup
from langchain.docstore.document import Document as LDocument
from langchain.vectorstores.faiss import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate
)
from langchain.vectorstores import Chroma
from db.supa import SupaService
supabase = SupaService()
async def chat_app_handler(body: ChatAppBody, jwt: str):
try:
user = supabase.get_user(jwt)
if not user:
return {
"bot_response": "You are not logged in",
"human_message": body.user_message,
}
user_id = user.user.id
website_response = supabase.find_website(body.id, user_id)
website = website_response.data
if len(website) == 0:
return {
"bot_response": "Website not found",
"human_message": body.user_message,
}
website = website[0]
text = website["html"]
text = text.strip()
result = [LDocument(page_content=text, metadata={"source": "test"})]
token_splitter = CharacterTextSplitter(
chunk_size=1000, chunk_overlap=0)
doc = token_splitter.split_documents(result)
print(f'Number of documents: {len(doc)}')
vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings())
messages = [
SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on.
Answer must be in markdown format.
-----------------
context:
{context}
"""),
HumanMessagePromptTemplate.from_template("{question}")
]
prompt = ChatPromptTemplate.from_messages(messages)
chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever(
search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,)
history = [(d["human_message"], d["bot_response"])
for d in body.history]
response = chat({
"question": body.user_message,
"chat_history": history
})
answer = response["answer"]
answer = answer[answer.find(":")+1:].strip()
return {
"bot_response": answer,
"human_message": body.user_message,
}
except Exception as e:
print(e)
return {
"bot_response": "Something went wrong please try again later",
"human_message": body.user_message,
}
async def chat_extension_handler(body: ChatBody):
try:
soup = BeautifulSoup(body.html, 'lxml')
iframe = soup.find('iframe', id='pageassist-iframe')
if iframe:
iframe.decompose()
div = soup.find('div', id='pageassist-icon')
if div:
div.decompose()
div = soup.find('div', id='__plasmo-loading__')
if div:
div.decompose()
text = soup.get_text()
text = text.strip()
result = [LDocument(page_content=text, metadata={"source": "test"})]
token_splitter = CharacterTextSplitter(
chunk_size=1000, chunk_overlap=0)
doc = token_splitter.split_documents(result)
print(f'Number of documents: {len(doc)}')
vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings())
messages = [
SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on.
Answer must be in markdown format.
-----------------
context:
{context}
"""),
HumanMessagePromptTemplate.from_template("{question}")
]
prompt = ChatPromptTemplate.from_messages(messages)
chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever(
search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,)
history = [(d["human_message"], d["bot_response"])
for d in body.history]
response = chat({
"question": body.user_message,
"chat_history": history
})
answer = response["answer"]
answer = answer[answer.find(":")+1:].strip()
return {
"bot_response": answer,
"human_message": body.user_message,
}
except Exception as e:
print(e)
return {
"bot_response": "Something went wrong please try again later",
"human_message": body.user_message,
}