from models import ChatBody, ChatAppBody from bs4 import BeautifulSoup from langchain.docstore.document import Document as LDocument from langchain.vectorstores.faiss import FAISS from langchain.embeddings.openai import OpenAIEmbeddings from langchain.llms import OpenAI from langchain.text_splitter import CharacterTextSplitter from langchain.chains import ConversationalRetrievalChain from langchain.prompts.chat import ( ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate ) from langchain.vectorstores import Chroma from db.supa import SupaService supabase = SupaService() async def chat_app_handler(body: ChatAppBody, jwt: str): try: user = supabase.get_user(jwt) if not user: return { "bot_response": "You are not logged in", "human_message": body.user_message, } user_id = user.user.id website_response = supabase.find_website(body.id, user_id) website = website_response.data if len(website) == 0: return { "bot_response": "Website not found", "human_message": body.user_message, } website = website[0] text = website["html"] text = text.strip() result = [LDocument(page_content=text, metadata={"source": "test"})] token_splitter = CharacterTextSplitter( chunk_size=1000, chunk_overlap=0) doc = token_splitter.split_documents(result) print(f'Number of documents: {len(doc)}') vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings()) messages = [ SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on. Answer must be in markdown format. ----------------- context: {context} """), HumanMessagePromptTemplate.from_template("{question}") ] prompt = ChatPromptTemplate.from_messages(messages) chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever( search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,) history = [(d["human_message"], d["bot_response"]) for d in body.history] response = chat({ "question": body.user_message, "chat_history": history }) answer = response["answer"] answer = answer[answer.find(":")+1:].strip() return { "bot_response": answer, "human_message": body.user_message, } except Exception as e: print(e) return { "bot_response": "Something went wrong please try again later", "human_message": body.user_message, } async def chat_extension_handler(body: ChatBody): try: soup = BeautifulSoup(body.html, 'lxml') iframe = soup.find('iframe', id='pageassist-iframe') if iframe: iframe.decompose() div = soup.find('div', id='pageassist-icon') if div: div.decompose() div = soup.find('div', id='__plasmo-loading__') if div: div.decompose() text = soup.get_text() text = text.strip() result = [LDocument(page_content=text, metadata={"source": "test"})] token_splitter = CharacterTextSplitter( chunk_size=1000, chunk_overlap=0) doc = token_splitter.split_documents(result) print(f'Number of documents: {len(doc)}') vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings()) messages = [ SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on. Answer must be in markdown format. ----------------- context: {context} """), HumanMessagePromptTemplate.from_template("{question}") ] prompt = ChatPromptTemplate.from_messages(messages) chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever( search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,) history = [(d["human_message"], d["bot_response"]) for d in body.history] response = chat({ "question": body.user_message, "chat_history": history }) answer = response["answer"] answer = answer[answer.find(":")+1:].strip() return { "bot_response": answer, "human_message": body.user_message, } except Exception as e: print(e) return { "bot_response": "Something went wrong please try again later", "human_message": body.user_message, }