Building Agents with Memory
Learn how to add persistent memory to your AI agents. This guide covers patterns that work with any agent framework.
What You'll Build
By the end of this guide, your agent will:
- Remember information across conversations
- Retrieve relevant context automatically
- Learn from interactions over time
The Memory-Enhanced Agent Pattern
Here's the core pattern for adding memory to any agent:
┌─────────────────────────────────────────────────────────┐
│ Your Agent │
│ ┌─────────┐ ┌──────────┐ ┌─────────────────┐ │
│ │ User │───▶│ Memory │───▶│ Main LLM │ │
│ │ Query │ │ Retrieval│ │ (with context) │ │
│ └─────────┘ └──────────┘ └─────────────────┘ │
│ │ │ │
│ │ ▼ │
│ │ ┌─────────────────┐ │
│ └─────────▶│ Memory Store │ │
│ (save new) │ (persist) │ │
│ └─────────────────┘ │
└─────────────────────────────────────────────────────────┘
Step 1: Basic Agent with Memory
Let's build a simple agent that remembers user information:
import os
from openai import OpenAI
import requests
# Setup
MEMORY_API_KEY = os.environ["TENSORHEART_API_KEY"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
MEMORY_BASE_URL = "https://api.memory.tensorheart.com/v1"
openai = OpenAI(api_key=OPENAI_API_KEY)
def retrieve_memories(query: str, max_results: int = 5) -> list[str]:
"""Fetch relevant memories for this query."""
response = requests.post(
f"{MEMORY_BASE_URL}/query",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={"context": query, "max_memories": max_results}
)
data = response.json()
return [m["text"] for m in data.get("data", {}).get("memories", [])]
def save_memory(text: str, metadata: dict = None):
"""Store a new memory."""
requests.post(
f"{MEMORY_BASE_URL}/memories",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={"text": text, "metadata": metadata or {}}
)
def agent(user_message: str) -> str:
"""Process a user message with memory-enhanced context."""
# 1. Retrieve relevant memories
memories = retrieve_memories(user_message)
# 2. Build context-aware prompt
memory_context = "\n".join(f"- {m}" for m in memories) if memories else "No relevant memories."
system_prompt = f"""You are a helpful assistant with memory.
What you know about this user:
{memory_context}
Use this information naturally in your responses. If you learn something new about the user, mention it so it can be saved."""
# 3. Get response from main LLM
response = openai.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
]
)
return response.choices[0].message.content
# Example usage
print(agent("Hi, I'm Alex and I work at Stripe"))
# → "Nice to meet you, Alex! Working at Stripe must be exciting..."
# Later...
print(agent("What kind of projects should I work on?"))
# → Remembers Alex works at Stripe, suggests fintech projects
Step 2: Auto-Extract Memories
Instead of manually saving memories, use extraction to automatically capture important information:
def extract_and_save_memories(conversation: str):
"""Automatically extract memories from a conversation."""
response = requests.post(
f"{MEMORY_BASE_URL}/query/extract",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={
"content": conversation,
"content_type": "conversation"
}
)
return response.json().get("data", {}).get("memories", [])
def agent_with_auto_memory(user_message: str, conversation_history: list) -> str:
"""Agent that automatically learns from conversations."""
# Retrieve relevant memories
memories = retrieve_memories(user_message)
# Build prompt with memories
memory_context = "\n".join(f"- {m}" for m in memories) if memories else "None"
messages = [
{"role": "system", "content": f"You are a helpful assistant.\n\nUser context:\n{memory_context}"},
*conversation_history,
{"role": "user", "content": user_message}
]
response = openai.chat.completions.create(model="gpt-4o", messages=messages)
assistant_reply = response.choices[0].message.content
# Auto-extract memories from this exchange
exchange = f"User: {user_message}\nAssistant: {assistant_reply}"
extract_and_save_memories(exchange)
return assistant_reply
Step 3: Memory Spaces for Multi-User Agents
When your agent serves multiple users, use spaces to keep memories separate:
def retrieve_user_memories(user_id: str, query: str) -> list[str]:
"""Get memories for a specific user."""
response = requests.post(
f"{MEMORY_BASE_URL}/query",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={
"context": query,
"space_id": f"user_{user_id}" # Isolate by user
}
)
return [m["text"] for m in response.json().get("data", {}).get("memories", [])]
def save_user_memory(user_id: str, text: str):
"""Save a memory for a specific user."""
requests.post(
f"{MEMORY_BASE_URL}/memories",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={
"text": text,
"space_id": f"user_{user_id}"
}
)
def multi_user_agent(user_id: str, message: str) -> str:
"""Agent that maintains separate memory per user."""
memories = retrieve_user_memories(user_id, message)
# ... rest of agent logic
Integration Patterns
Pattern 1: Tool-Based Memory
Give your agent memory as a tool it can call:
memory_tools = [
{
"type": "function",
"function": {
"name": "remember",
"description": "Save important information about the user for later",
"parameters": {
"type": "object",
"properties": {
"fact": {"type": "string", "description": "The fact to remember"}
},
"required": ["fact"]
}
}
},
{
"type": "function",
"function": {
"name": "recall",
"description": "Search your memory for relevant information",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "What to search for"}
},
"required": ["query"]
}
}
}
]
# Agent can now decide when to remember and recall
response = openai.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=memory_tools
)
Pattern 2: Automatic Background Memory
Run memory operations in the background without blocking:
import asyncio
import aiohttp
async def save_memory_async(text: str):
"""Non-blocking memory save."""
async with aiohttp.ClientSession() as session:
await session.post(
f"{MEMORY_BASE_URL}/memories",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={"text": text}
)
async def agent_async(message: str) -> str:
# Get memories (we need these before responding)
memories = await retrieve_memories_async(message)
# Generate response
response = await generate_response(message, memories)
# Save new memories in background (don't wait)
asyncio.create_task(
extract_and_save_async(f"User: {message}\nAssistant: {response}")
)
return response
Pattern 3: Conversation Summary Memory
For long conversations, periodically summarize and store:
def summarize_and_store(conversation_history: list, user_id: str):
"""Summarize conversation and store key points."""
# Summarize with LLM
summary_prompt = """Summarize the key facts learned about the user in this conversation.
Format as a list of discrete facts, one per line."""
response = openai.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": summary_prompt},
{"role": "user", "content": str(conversation_history)}
]
)
# Extract and save each fact
facts = response.choices[0].message.content.strip().split("\n")
for fact in facts:
if fact.strip():
save_user_memory(user_id, fact.strip())
# Call periodically or at end of session
summarize_and_store(history, user_id="user_123")
Framework Integration Examples
With LangChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationChain
class TensorheartMemory(ConversationBufferMemory):
"""LangChain memory backed by Tensorheart."""
def __init__(self, api_key: str, space_id: str = None):
super().__init__()
self.api_key = api_key
self.space_id = space_id
def load_memory_variables(self, inputs: dict) -> dict:
query = inputs.get("input", "")
memories = retrieve_memories(query) # From Tensorheart
return {"history": "\n".join(memories)}
def save_context(self, inputs: dict, outputs: dict):
# Save to Tensorheart
exchange = f"Human: {inputs['input']}\nAI: {outputs['output']}"
extract_and_save_memories(exchange)
# Use with LangChain
memory = TensorheartMemory(api_key=MEMORY_API_KEY)
chain = ConversationChain(llm=ChatOpenAI(), memory=memory)
With CrewAI
from crewai import Agent, Task, Crew
def create_memory_agent():
"""Create a CrewAI agent with Tensorheart memory."""
@tool
def search_memory(query: str) -> str:
"""Search long-term memory for relevant information."""
memories = retrieve_memories(query)
return "\n".join(memories) if memories else "No relevant memories found."
@tool
def save_to_memory(fact: str) -> str:
"""Save an important fact to long-term memory."""
save_memory(fact)
return f"Saved: {fact}"
return Agent(
role="Research Assistant",
goal="Help users with research while remembering key information",
tools=[search_memory, save_to_memory],
memory=True # Enable built-in memory too
)
Best Practices
1. Be Selective About What to Remember
# Good: Specific, useful facts
save_memory("User prefers Python for data analysis")
save_memory("User's project deadline is March 15, 2024")
# Avoid: Vague or temporary information
# save_memory("User asked about something")
# save_memory("User said hello")
2. Use Metadata for Organization
save_memory(
text="User is allergic to peanuts",
metadata={
"category": "health",
"importance": "critical",
"source": "onboarding"
}
)