# Install the packages
! pip3 install --upgrade google-cloud-aiplatform
! pip3 install shapely<2.0.0
! pip install langchain
! pip install pypdf
! pip install pydantic==1.10.8
! pip install chromadb==0.3.26
! pip install langchain[docarray]
! pip install typing-inspect==0.8.0 typing_extensions==4.5.09 Semantic search: Star Wars
In this notebook, we will embed the script for the 1978 Star Wars film: “A New Hope”, then use Vertex AI language models to ‘chat’ with the data.
We will use the following technologies:
Vertex AI Generative Studio
Langchain, a framework for building applications with large language models
The open-source Chroma vector store database
We will apply the following approaches:
- Retrieval Augmented Generation (RAG). Using RAG, we feed the model and ask it to inform its answers based on the details in the data

9.0.1 What is an embedding?
To feed text, image or audio to machine learning models, we first have to convert it to numerical values a model can understand.
Embeddings in this example convert the text in the film script into floating point numbers that denote similarity. We accomplish this by using a trained model (from Vertex) that knows “Lightsaber” and “Jedi” should be close together in the ‘embedding space’. This means we can embed the script and preserve the similarity scores of the words.

9.0.2 Application flow

# Automatically restart kernel after installs so that your environment can access the new packages
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)from google.colab import auth
auth.authenticate_user()9.0.3 SDK and Project Initialization
#Fill in your GCP project_id and region
PROJECT_ID = "<..>"
REGION = "<..>"
import vertexai
vertexai.init(project=PROJECT_ID, location=REGION)9.0.4 Import Langchain tools
# Utils
import time
from typing import List
# Langchain
import langchain
from pydantic import BaseModel
print(f"LangChain version: {langchain.__version__}")
# Vertex AI
from google.cloud import aiplatform
from langchain.chat_models import ChatVertexAI
from langchain.embeddings import VertexAIEmbeddings
from langchain.llms import VertexAI
from langchain.schema import HumanMessage, SystemMessage
print(f"Vertex AI SDK version: {aiplatform.__version__}")10 Import data
!wget https://assets.scriptslug.com/live/pdf/scripts/star-wars-episode-iv-a-new-hope-1977.pdffrom langchain.llms import VertexAI
from langchain import PromptTemplate, LLMChain
from langchain.document_loaders import PyPDFLoader
# Copy the file path of the downloaded script.
# In Colab, it should appear as below.
loader = PyPDFLoader("/content/star-wars-episode-iv-a-new-hope-1977.pdf")
doc = loader.load()10.0.1 Text splitters
Language models often constrain the amount of text that can be fed as an input, so it is good practice to use text splitters to keep inputs to manageable ‘chunks’.
We can also often improve results from vector store matches since smaller chunks may be more likely to match queries.
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1500,
chunk_overlap = 150
)splits = text_splitter.split_documents(doc)len(splits)from vertexai.preview.language_models import TextEmbeddingModel
model = TextEmbeddingModel.from_pretrained("textembedding-gecko@001")10.0.2 Embeddings example
As a simple example of embedding sentences, we will use the Vertex AI SDK and embedding model to work out numerical values for some simple sentences.
We then calculate the dot product of the resulting arrays of floats. Sentences that are similar should have higher dot product results.
import numpy as np
def text_embedding() -> None:
"""Text embedding with a Large Language Model."""
texts = ["I like dogs", "Canines are my favourite", "What is life?"]
embeddings = []
for text in texts:
embeddings.append(model.get_embeddings([text]))
vectors = [next(iter(e)).values for e in embeddings]
print(f"Dot product of '{texts[0]}' and '{texts[1]}': {np.dot(vectors[0], vectors[1])}")
print(f"Dot product of '{texts[0]}' and '{texts[2]}': {np.dot(vectors[0], vectors[2])}")text_embedding()from langchain.vectorstores import Chroma
# Clear any previous vector store
!rm -rf ./docs/chromaLet’s set up a vector database using the open source Chroma.
from langchain.embeddings import VertexAIEmbeddings
persist_directory = 'docs/chroma/'
embeddings = VertexAIEmbeddings()
vectordb = Chroma.from_documents(
documents=splits[0:4],
embedding=embeddings,
persist_directory=persist_directory
)print(vectordb._collection.count())10.0.3 Retrieval
from langchain.chains import RetrievalQA
llm = VertexAI(
model_name="text-bison@001",
max_output_tokens=1024,
temperature=0.1,
top_p=0.8,
top_k=40,
verbose=True,
)
qa_chain = RetrievalQA.from_chain_type(
llm,
retriever=vectordb.as_retriever()
)10.0.4 Prompt
from langchain.prompts import PromptTemplate
# Build prompt
template = """Use the following pieces of context to answer the question at the end. \
If you don't know the answer, just say that you don't know, \
don't try to make up an answer. Use six sentences maximum. \
Keep the answer as concise as possible.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)# Run chain
qa_chain = RetrievalQA.from_chain_type(
llm,
retriever=vectordb.as_retriever(),
return_source_documents=True,
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)question = "Who is Luke Skywalker?"
result = qa_chain({"query": question})
result["result"]10.0.5 Checking for hallucinations
question = "Where is France?"
result = qa_chain({"query": question})
result["result"]question = "How does Obi Wan know Darth Vader?"
result = qa_chain({"query": question})
result["result"]10.0.6 Chat
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. \
If you don't know the answer, just say that you don't know, \
don't try to make up an answer. \
Use four sentences maximum. \
Write with the enthusiasm of a true fan for the material. \
Add detail to your answers from the story.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)
# Run chain
from langchain.chains import RetrievalQA
question = "What are the major topics in the film?"
qa_chain = RetrievalQA.from_chain_type(llm,
retriever=vectordb.as_retriever(),
return_source_documents=True,
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})
result = qa_chain({"query": question})
result["result"]10.0.7 Memory
For an effective chat, we need the model to remember its previous responses
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
llm,
retriever=retriever,
memory=memory
)question = "Does Obi Wan know Darth Vader?"
result = qa({"question": question})
result['answer']question = "How?"
result = qa({"question": question})
result["answer"]question = "Why did they cease to be friends?"
result = qa({"question": question})
result["answer"]from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatVertexAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoaderdef load_db(file, chain_type, k):
# load documents
loader = PyPDFLoader(file)
documents = loader.load()
# split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = text_splitter.split_documents(documents)
# define embedding
embeddings = VertexAIEmbeddings()
# create vector database from data
db = DocArrayInMemorySearch.from_documents(docs, embeddings)
# define retriever
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
# create a chatbot chain. Memory is managed externally.
qa = ConversationalRetrievalChain.from_llm(
llm=VertexAI(temperature=0.1, max_output_tokens=1024),
chain_type=chain_type,
retriever=retriever,
return_source_documents=True,
return_generated_question=True,
)
return qaimport panel as pn
import param
class cbfs(param.Parameterized):
chat_history = param.List([])
answer = param.String("")
db_query = param.String("")
db_response = param.List([])
def __init__(self, **params):
super(cbfs, self).__init__( **params)
self.panels = []
self.loaded_file = "/content/star-wars-episode-iv-a-new-hope-1977.pdf"
self.qa = load_db(self.loaded_file,"stuff", 4)
def call_load_db(self, count):
if count == 0 or file_input.value is None: # init or no file specified :
return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
else:
file_input.save("temp.pdf") # local copy
self.loaded_file = file_input.filename
button_load.button_style="outline"
self.qa = load_db("temp.pdf", "stuff", 4)
button_load.button_style="solid"
self.clr_history()
return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
def convchain(self, query):
if not query:
return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
result = self.qa({"question": query, "chat_history": self.chat_history})
self.chat_history.extend([(query, result["answer"])])
self.db_query = result["generated_question"]
self.db_response = result["source_documents"]
self.answer = result['answer']
self.panels.extend([
pn.Row('User:', pn.pane.Markdown(query, width=600)),
pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600))
])
inp.value = '' #clears loading indicator when cleared
return pn.WidgetBox(*self.panels,scroll=True)
@param.depends('db_query ', )
def get_lquest(self):
if not self.db_query :
return pn.Column(
pn.Row(pn.pane.Markdown(f"Last question to DB:")),
pn.Row(pn.pane.Str("no DB accesses so far"))
)
return pn.Column(
pn.Row(pn.pane.Markdown(f"DB query:")),
pn.pane.Str(self.db_query )
)
@param.depends('db_response', )
def get_sources(self):
if not self.db_response:
return
rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:"))]
for doc in self.db_response:
rlist.append(pn.Row(pn.pane.Str(doc)))
return pn.WidgetBox(*rlist, width=600, scroll=True)
@param.depends('convchain', 'clr_history')
def get_chats(self):
if not self.chat_history:
return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable"))]
for exchange in self.chat_history:
rlist.append(pn.Row(pn.pane.Str(exchange)))
return pn.WidgetBox(*rlist, width=600, scroll=True)
def clr_history(self,count=0):
self.chat_history = []
returnpn.extension()
cb = cbfs()
file_input = pn.widgets.FileInput(accept='.pdf')
button_load = pn.widgets.Button(name="Load DB", button_type='primary')
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
button_clearhistory.on_click(cb.clr_history)
inp = pn.widgets.TextInput( placeholder='Enter text here…')
bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp)
tab1 = pn.Column(
pn.Row(inp),
pn.layout.Divider(),
pn.panel(conversation, loading_indicator=True, height=300),
pn.layout.Divider(),
)
tab2= pn.Column(
pn.panel(cb.get_lquest),
pn.layout.Divider(),
pn.panel(cb.get_sources ),
)
tab3= pn.Column(
pn.panel(cb.get_chats),
pn.layout.Divider(),
)
tab4=pn.Column(
pn.Row( file_input, button_load, bound_button_load),
pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
pn.layout.Divider(),
)
dashboard = pn.Column(
pn.Row(pn.pane.Markdown('# Chat with your data')),
pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
)
dashboardWith thanks to Deeplearning.ai’s excellent LangChain Chat With Your Data course.