# Install the packages
! pip3 install --upgrade google-cloud-aiplatform
! pip3 install shapely<2.0.0
! pip install langchain
! pip install pypdf
! pip install pydantic==1.10.8
! pip install chromadb==0.3.26
! pip install langchain[docarray]
! pip install typing-inspect==0.8.0 typing_extensions==4.5.0
9 Semantic search: Star Wars
In this notebook, we will embed the script for the 1978 Star Wars film: “A New Hope”, then use Vertex AI language models to ‘chat’ with the data.
We will use the following technologies:
Vertex AI Generative Studio
Langchain, a framework for building applications with large language models
The open-source Chroma vector store database
We will apply the following approaches:
- Retrieval Augmented Generation (RAG). Using RAG, we feed the model and ask it to inform its answers based on the details in the data
9.0.1 What is an embedding?
To feed text, image or audio to machine learning models, we first have to convert it to numerical values a model can understand.
Embeddings in this example convert the text in the film script into floating point numbers that denote similarity. We accomplish this by using a trained model (from Vertex) that knows “Lightsaber” and “Jedi” should be close together in the ‘embedding space’. This means we can embed the script and preserve the similarity scores of the words.
9.0.2 Application flow
# Automatically restart kernel after installs so that your environment can access the new packages
import IPython
= IPython.Application.instance()
app True) app.kernel.do_shutdown(
from google.colab import auth
auth.authenticate_user()
9.0.3 SDK and Project Initialization
#Fill in your GCP project_id and region
= "<..>"
PROJECT_ID = "<..>"
REGION
import vertexai
=PROJECT_ID, location=REGION) vertexai.init(project
9.0.4 Import Langchain tools
# Utils
import time
from typing import List
# Langchain
import langchain
from pydantic import BaseModel
print(f"LangChain version: {langchain.__version__}")
# Vertex AI
from google.cloud import aiplatform
from langchain.chat_models import ChatVertexAI
from langchain.embeddings import VertexAIEmbeddings
from langchain.llms import VertexAI
from langchain.schema import HumanMessage, SystemMessage
print(f"Vertex AI SDK version: {aiplatform.__version__}")
10 Import data
!wget https://assets.scriptslug.com/live/pdf/scripts/star-wars-episode-iv-a-new-hope-1977.pdf
from langchain.llms import VertexAI
from langchain import PromptTemplate, LLMChain
from langchain.document_loaders import PyPDFLoader
# Copy the file path of the downloaded script.
# In Colab, it should appear as below.
= PyPDFLoader("/content/star-wars-episode-iv-a-new-hope-1977.pdf")
loader
= loader.load() doc
10.0.1 Text splitters
Language models often constrain the amount of text that can be fed as an input, so it is good practice to use text splitters to keep inputs to manageable ‘chunks’.
We can also often improve results from vector store matches since smaller chunks may be more likely to match queries.
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
= RecursiveCharacterTextSplitter(
text_splitter = 1500,
chunk_size = 150
chunk_overlap )
= text_splitter.split_documents(doc) splits
len(splits)
from vertexai.preview.language_models import TextEmbeddingModel
= TextEmbeddingModel.from_pretrained("textembedding-gecko@001") model
10.0.2 Embeddings example
As a simple example of embedding sentences, we will use the Vertex AI SDK and embedding model to work out numerical values for some simple sentences.
We then calculate the dot product of the resulting arrays of floats. Sentences that are similar should have higher dot product results.
import numpy as np
def text_embedding() -> None:
"""Text embedding with a Large Language Model."""
= ["I like dogs", "Canines are my favourite", "What is life?"]
texts = []
embeddings for text in texts:
embeddings.append(model.get_embeddings([text]))= [next(iter(e)).values for e in embeddings]
vectors print(f"Dot product of '{texts[0]}' and '{texts[1]}': {np.dot(vectors[0], vectors[1])}")
print(f"Dot product of '{texts[0]}' and '{texts[2]}': {np.dot(vectors[0], vectors[2])}")
text_embedding()
from langchain.vectorstores import Chroma
# Clear any previous vector store
!rm -rf ./docs/chroma
Let’s set up a vector database using the open source Chroma.
from langchain.embeddings import VertexAIEmbeddings
= 'docs/chroma/'
persist_directory = VertexAIEmbeddings()
embeddings
= Chroma.from_documents(
vectordb =splits[0:4],
documents=embeddings,
embedding=persist_directory
persist_directory )
print(vectordb._collection.count())
10.0.3 Retrieval
from langchain.chains import RetrievalQA
= VertexAI(
llm ="text-bison@001",
model_name=1024,
max_output_tokens=0.1,
temperature=0.8,
top_p=40,
top_k=True,
verbose
)
= RetrievalQA.from_chain_type(
qa_chain
llm,=vectordb.as_retriever()
retriever )
10.0.4 Prompt
from langchain.prompts import PromptTemplate
# Build prompt
= """Use the following pieces of context to answer the question at the end. \
template If you don't know the answer, just say that you don't know, \
don't try to make up an answer. Use six sentences maximum. \
Keep the answer as concise as possible.
{context}
Question: {question}
Helpful Answer:"""
= PromptTemplate.from_template(template) QA_CHAIN_PROMPT
# Run chain
= RetrievalQA.from_chain_type(
qa_chain
llm,=vectordb.as_retriever(),
retriever=True,
return_source_documents={"prompt": QA_CHAIN_PROMPT}
chain_type_kwargs )
= "Who is Luke Skywalker?"
question = qa_chain({"query": question})
result "result"] result[
10.0.5 Checking for hallucinations
= "Where is France?"
question = qa_chain({"query": question})
result "result"] result[
= "How does Obi Wan know Darth Vader?"
question = qa_chain({"query": question})
result "result"] result[
10.0.6 Chat
# Build prompt
from langchain.prompts import PromptTemplate
= """Use the following pieces of context to answer the question at the end. \
template If you don't know the answer, just say that you don't know, \
don't try to make up an answer. \
Use four sentences maximum. \
Write with the enthusiasm of a true fan for the material. \
Add detail to your answers from the story.
{context}
Question: {question}
Helpful Answer:"""
= PromptTemplate(input_variables=["context", "question"],template=template,)
QA_CHAIN_PROMPT
# Run chain
from langchain.chains import RetrievalQA
= "What are the major topics in the film?"
question = RetrievalQA.from_chain_type(llm,
qa_chain =vectordb.as_retriever(),
retriever=True,
return_source_documents={"prompt": QA_CHAIN_PROMPT})
chain_type_kwargs
= qa_chain({"query": question})
result "result"] result[
10.0.7 Memory
For an effective chat, we need the model to remember its previous responses
from langchain.memory import ConversationBufferMemory
= ConversationBufferMemory(
memory ="chat_history",
memory_key=True
return_messages )
from langchain.chains import ConversationalRetrievalChain
=vectordb.as_retriever()
retriever= ConversationalRetrievalChain.from_llm(
qa
llm,=retriever,
retriever=memory
memory )
= "Does Obi Wan know Darth Vader?"
question = qa({"question": question})
result 'answer'] result[
= "How?"
question = qa({"question": question})
result "answer"] result[
= "Why did they cease to be friends?"
question = qa({"question": question})
result "answer"] result[
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatVertexAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
def load_db(file, chain_type, k):
# load documents
= PyPDFLoader(file)
loader = loader.load()
documents # split documents
= RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
text_splitter = text_splitter.split_documents(documents)
docs # define embedding
= VertexAIEmbeddings()
embeddings # create vector database from data
= DocArrayInMemorySearch.from_documents(docs, embeddings)
db # define retriever
= db.as_retriever(search_type="similarity", search_kwargs={"k": k})
retriever # create a chatbot chain. Memory is managed externally.
= ConversationalRetrievalChain.from_llm(
qa =VertexAI(temperature=0.1, max_output_tokens=1024),
llm=chain_type,
chain_type=retriever,
retriever=True,
return_source_documents=True,
return_generated_question
)return qa
import panel as pn
import param
class cbfs(param.Parameterized):
= param.List([])
chat_history = param.String("")
answer = param.String("")
db_query = param.List([])
db_response
def __init__(self, **params):
super(cbfs, self).__init__( **params)
self.panels = []
self.loaded_file = "/content/star-wars-episode-iv-a-new-hope-1977.pdf"
self.qa = load_db(self.loaded_file,"stuff", 4)
def call_load_db(self, count):
if count == 0 or file_input.value is None: # init or no file specified :
return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
else:
"temp.pdf") # local copy
file_input.save(self.loaded_file = file_input.filename
="outline"
button_load.button_styleself.qa = load_db("temp.pdf", "stuff", 4)
="solid"
button_load.button_styleself.clr_history()
return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
def convchain(self, query):
if not query:
return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
= self.qa({"question": query, "chat_history": self.chat_history})
result self.chat_history.extend([(query, result["answer"])])
self.db_query = result["generated_question"]
self.db_response = result["source_documents"]
self.answer = result['answer']
self.panels.extend([
'User:', pn.pane.Markdown(query, width=600)),
pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600))
pn.Row(
])= '' #clears loading indicator when cleared
inp.value return pn.WidgetBox(*self.panels,scroll=True)
@param.depends('db_query ', )
def get_lquest(self):
if not self.db_query :
return pn.Column(
f"Last question to DB:")),
pn.Row(pn.pane.Markdown("no DB accesses so far"))
pn.Row(pn.pane.Str(
)return pn.Column(
f"DB query:")),
pn.Row(pn.pane.Markdown(self.db_query )
pn.pane.Str(
)
@param.depends('db_response', )
def get_sources(self):
if not self.db_response:
return
=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:"))]
rlistfor doc in self.db_response:
rlist.append(pn.Row(pn.pane.Str(doc)))return pn.WidgetBox(*rlist, width=600, scroll=True)
@param.depends('convchain', 'clr_history')
def get_chats(self):
if not self.chat_history:
return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
=[pn.Row(pn.pane.Markdown(f"Current Chat History variable"))]
rlistfor exchange in self.chat_history:
rlist.append(pn.Row(pn.pane.Str(exchange)))return pn.WidgetBox(*rlist, width=600, scroll=True)
def clr_history(self,count=0):
self.chat_history = []
return
pn.extension()
= cbfs()
cb
= pn.widgets.FileInput(accept='.pdf')
file_input = pn.widgets.Button(name="Load DB", button_type='primary')
button_load = pn.widgets.Button(name="Clear History", button_type='warning')
button_clearhistory
button_clearhistory.on_click(cb.clr_history)= pn.widgets.TextInput( placeholder='Enter text here…')
inp
= pn.bind(cb.call_load_db, button_load.param.clicks)
bound_button_load = pn.bind(cb.convchain, inp)
conversation
= pn.Column(
tab1
pn.Row(inp),
pn.layout.Divider(),=True, height=300),
pn.panel(conversation, loading_indicator
pn.layout.Divider(),
)= pn.Column(
tab2
pn.panel(cb.get_lquest),
pn.layout.Divider(),
pn.panel(cb.get_sources ),
)= pn.Column(
tab3
pn.panel(cb.get_chats),
pn.layout.Divider(),
)=pn.Column(
tab4
pn.Row( file_input, button_load, bound_button_load),"Clears chat history. Can use to start a new topic" )),
pn.Row( button_clearhistory, pn.pane.Markdown(
pn.layout.Divider(),
)= pn.Column(
dashboard '# Chat with your data')),
pn.Row(pn.pane.Markdown('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
pn.Tabs((
) dashboard
With thanks to Deeplearning.ai’s excellent LangChain Chat With Your Data course.