! pip install --upgrade google-cloud-aiplatform
# LangChain
! pip install langchain
! pip install pypdf
! pip install pydantic==1.10.8
! pip install typing-inspect==0.8.0 typing_extensions==4.5.0
# Hugging Face transformers necessary for ConversationTokenBufferMemory
! pip install transformers
5 Memory
In many applications, it is essential LLMs remember prior interactions and context.
Langchain provides several helper functions to manage and manipulate previous chat messages.
# Automatically restart kernel after installs so that your environment can access the new packages
import IPython
= IPython.Application.instance()
app True) app.kernel.do_shutdown(
This optional cell wraps outputs, which can make them easier to digest.
from IPython.display import HTML, display
def set_css():
'''
display(HTML( <style>
pre {
white-space: pre-wrap;
}
</style>
'''))
'pre_run_cell', set_css) get_ipython().events.register(
If you’re on Colab, authenticate via the following cell
from google.colab import auth
auth.authenticate_user()
5.0.1 Initialize the SDK
# Add your project id and the project's region
= "<..>"
PROJECT_ID = "<..>"
REGION
from google.cloud import aiplatform
=PROJECT_ID, location=REGION) aiplatform.init(project
# Utils
import time
from typing import List
# Langchain
import langchain
from pydantic import BaseModel
print(f"LangChain version: {langchain.__version__}")
# Vertex AI
from google.cloud import aiplatform
from langchain.chat_models import ChatVertexAI
from langchain.llms import VertexAI
from langchain.schema import HumanMessage, SystemMessage
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
print(f"Vertex AI SDK version: {aiplatform.__version__}")
# LLM model
= VertexAI(
llm ="text-bison@001",
model_name=1024,
max_output_tokens=0.2,
temperature=0.8,
top_p=40,
top_k=True,
verbose )
5.0.2 ConversationBufferWindowMemory
Keeps a list of the interactions of the conversation over time. It only uses the last K interactions. This can be useful for keeping a sliding window of the most recent interactions, so the buffer does not get too large
from langchain.memory import ConversationBufferWindowMemory
= ConversationBufferWindowMemory(k=3)
memory
"input": "Hi"},
memory.save_context({"output": "How are you?"})
{"input": "Fine thanks"},
memory.save_context({"output": "Great"})
{
memory.load_memory_variables({})
5.0.3 ConversationTokenBufferMemory
This feature instead keeps a buffer of recent interactions in memory based on token length, rather than number of interactions.
from langchain.memory import ConversationTokenBufferMemory
= ConversationTokenBufferMemory(llm=llm, max_token_limit=100)
memory "input": "All alone, she dreams of the stars!"},
memory.save_context({"output": "As she should!"})
{"input": "Baking cookies today?"},
memory.save_context({"output": "Behold the cookies!"})
{"input": "Chatbots everywhere?"},
memory.save_context({"output": "Certainly!"}) {
memory.load_memory_variables({})
5.0.4 Conversation summaries
LangChain carries forward summaries of chat messages and flushes memory after a specified number of interactions or tokens.
Let’s first look at using the former, ConversationBufferWindowMemory
.
We set verbose=True
to show the prompts and information carried forward by the LLM.
from langchain.memory import ConversationBufferWindowMemory
= ConversationChain(
conversation_with_summary =VertexAI(temperature=0),
llm# We set a low k=2, to only keep the last 2 interactions in memory
=ConversationBufferWindowMemory(k=2),
memory=True
verbose
)input="My favourite sport is fencing. Any tips for how I can go pro?") conversation_with_summary.predict(
input="What equipment do I need?") conversation_with_summary.predict(
input="Who are the greats of the sport I can emulate?") conversation_with_summary.predict(
# Since we have now passed k=2, the LLM will be unable to answer
input="What is my favourite sport?") conversation_with_summary.predict(
5.0.4.1 ConversationSummaryBufferMemory
Ensures conversational memory up to a specified token length
from langchain.chains import ConversationChain
= ConversationChain(
conversation_with_summary =llm,
llm# Change max_token_limit here after running through the conversation.
=ConversationTokenBufferMemory(llm=llm, max_token_limit=400),
memory=True,
verbose
)input="Hi, how are you?") conversation_with_summary.predict(
5.0.5 ConversationSummaryBufferMemory
Ensures conversational memory endures by summarizing old interactions to help inform chat within a new window. It uses token length to determine when to ‘flush’ the interactions.
input="I'm learning the Rust programming language") conversation_with_summary.predict(
input="What's the best book to help me?") conversation_with_summary.predict(
# Notice the buffer here is updated and clears the earlier exchanges
input="Wish me luck!") conversation_with_summary.predict(
The following cell should generate a reply that is clearly generic and missing the previous context of someone trying to learn Rust.
Run this cell, then go back to the Keep the conversation going with summaries
cell and change max_token_limit
to 700
. Then re-run the entire conversation and notice how the model relates its ouptut about learning C to the context of someone trying to learn Rust.
input="Would knowing Haskell help me?") conversation_with_summary.predict(
5.0.6 Summary
In this notebook, we explored various approaches to memory in conversations.
ConversationBufferWindowMemory
ConversationSummaryBufferMemory
ConversationTokenBufferMemory