|
|
import os |
|
|
import streamlit as st |
|
|
import random |
|
|
import time |
|
|
|
|
|
from langchain.chains import RetrievalQA |
|
|
from langchain.chat_models import ChatOpenAI |
|
|
from langchain.document_loaders import DataFrameLoader |
|
|
from langchain.embeddings import OpenAIEmbeddings |
|
|
from langchain.vectorstores import Chroma |
|
|
|
|
|
import pandas as pd |
|
|
|
|
|
from sklearn.model_selection import train_test_split |
|
|
|
|
|
|
|
|
file_path = "dataset-tickets-multi-lang-4-20k.csv" |
|
|
df = pd.read_csv(file_path) |
|
|
|
|
|
|
|
|
df = df[df['language'] == 'en'] |
|
|
non_string_body = df[~df['body'].apply(lambda x: isinstance(x, str))].index |
|
|
non_string_answers = df[~df['answer'].apply(lambda x: isinstance(x, str))].index |
|
|
non_string_ids = non_string_body.union(non_string_answers) |
|
|
df = df.drop(index=non_string_ids) |
|
|
df['q_and_a'] = 'Question: ' + df['body'] + ' Answer: ' + df['answer'] |
|
|
df_train, df_holdout = train_test_split(df, test_size=0.2, random_state=42) |
|
|
|
|
|
|
|
|
persist_directory = './chroma_db' |
|
|
loader = DataFrameLoader( |
|
|
df_train, |
|
|
page_content_column="q_and_a") |
|
|
documents = loader.load() |
|
|
|
|
|
|
|
|
openai_api_key = os.getenv("openai_token") |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def get_vectordb(): |
|
|
embedding = OpenAIEmbeddings(openai_api_key=os.getenv("openai_token")) |
|
|
return Chroma.from_documents( |
|
|
documents=documents, |
|
|
embedding=embedding, |
|
|
persist_directory=persist_directory) |
|
|
|
|
|
vectordb = get_vectordb() |
|
|
|
|
|
|
|
|
llm_name = "gpt-3.5-turbo" |
|
|
|
|
|
llm = ChatOpenAI(model_name=llm_name, temperature=0.7, |
|
|
openai_api_key=openai_api_key) |
|
|
|
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
|
llm, |
|
|
retriever=vectordb.as_retriever(search_kwargs={"k": 5}) |
|
|
) |
|
|
|
|
|
|
|
|
def response_generator(prompt): |
|
|
response = qa_chain({"query": prompt})['result'] |
|
|
|
|
|
for word in response.split(): |
|
|
yield word + " " |
|
|
time.sleep(0.05) |
|
|
|
|
|
st.title("Technical Support Chatbot") |
|
|
|
|
|
|
|
|
if "messages" not in st.session_state: |
|
|
st.session_state.messages = [] |
|
|
|
|
|
|
|
|
for message in st.session_state.messages: |
|
|
with st.chat_message(message["role"]): |
|
|
st.markdown(message["content"]) |
|
|
|
|
|
|
|
|
if prompt := st.chat_input("Enter your question here"): |
|
|
|
|
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
with st.chat_message("user"): |
|
|
st.markdown(prompt) |
|
|
|
|
|
with st.chat_message("assistant"): |
|
|
response = st.write_stream(response_generator(prompt)) |
|
|
st.session_state.messages.append({"role": "assistant", "content": response}) |