Skip to main content

Building Agents with Memory

Learn how to add persistent memory to your AI agents. This guide covers patterns that work with any agent framework.

What You'll Build

By the end of this guide, your agent will:

  • Remember information across conversations
  • Retrieve relevant context automatically
  • Learn from interactions over time

The Memory-Enhanced Agent Pattern

Here's the core pattern for adding memory to any agent:

┌─────────────────────────────────────────────────────────┐
│ Your Agent │
│ ┌─────────┐ ┌──────────┐ ┌─────────────────┐ │
│ │ User │───▶│ Memory │───▶│ Main LLM │ │
│ │ Query │ │ Retrieval│ │ (with context) │ │
│ └─────────┘ └──────────┘ └─────────────────┘ │
│ │ │ │
│ │ ▼ │
│ │ ┌─────────────────┐ │
│ └─────────▶│ Memory Store │ │
│ (save new) │ (persist) │ │
│ └─────────────────┘ │
└─────────────────────────────────────────────────────────┘

Step 1: Basic Agent with Memory

Let's build a simple agent that remembers user information:

import os
from openai import OpenAI
import requests

# Setup
MEMORY_API_KEY = os.environ["TENSORHEART_API_KEY"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
MEMORY_BASE_URL = "https://api.memory.tensorheart.com/v1"

openai = OpenAI(api_key=OPENAI_API_KEY)

def retrieve_memories(query: str, max_results: int = 5) -> list[str]:
"""Fetch relevant memories for this query."""
response = requests.post(
f"{MEMORY_BASE_URL}/query",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={"context": query, "max_memories": max_results}
)
data = response.json()
return [m["text"] for m in data.get("data", {}).get("memories", [])]

def save_memory(text: str, metadata: dict = None):
"""Store a new memory."""
requests.post(
f"{MEMORY_BASE_URL}/memories",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={"text": text, "metadata": metadata or {}}
)

def agent(user_message: str) -> str:
"""Process a user message with memory-enhanced context."""

# 1. Retrieve relevant memories
memories = retrieve_memories(user_message)

# 2. Build context-aware prompt
memory_context = "\n".join(f"- {m}" for m in memories) if memories else "No relevant memories."

system_prompt = f"""You are a helpful assistant with memory.

What you know about this user:
{memory_context}

Use this information naturally in your responses. If you learn something new about the user, mention it so it can be saved."""

# 3. Get response from main LLM
response = openai.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
]
)

return response.choices[0].message.content

# Example usage
print(agent("Hi, I'm Alex and I work at Stripe"))
# → "Nice to meet you, Alex! Working at Stripe must be exciting..."

# Later...
print(agent("What kind of projects should I work on?"))
# → Remembers Alex works at Stripe, suggests fintech projects

Step 2: Auto-Extract Memories

Instead of manually saving memories, use extraction to automatically capture important information:

def extract_and_save_memories(conversation: str):
"""Automatically extract memories from a conversation."""
response = requests.post(
f"{MEMORY_BASE_URL}/query/extract",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={
"content": conversation,
"content_type": "conversation"
}
)
return response.json().get("data", {}).get("memories", [])

def agent_with_auto_memory(user_message: str, conversation_history: list) -> str:
"""Agent that automatically learns from conversations."""

# Retrieve relevant memories
memories = retrieve_memories(user_message)

# Build prompt with memories
memory_context = "\n".join(f"- {m}" for m in memories) if memories else "None"

messages = [
{"role": "system", "content": f"You are a helpful assistant.\n\nUser context:\n{memory_context}"},
*conversation_history,
{"role": "user", "content": user_message}
]

response = openai.chat.completions.create(model="gpt-4o", messages=messages)
assistant_reply = response.choices[0].message.content

# Auto-extract memories from this exchange
exchange = f"User: {user_message}\nAssistant: {assistant_reply}"
extract_and_save_memories(exchange)

return assistant_reply

Step 3: Memory Spaces for Multi-User Agents

When your agent serves multiple users, use spaces to keep memories separate:

def retrieve_user_memories(user_id: str, query: str) -> list[str]:
"""Get memories for a specific user."""
response = requests.post(
f"{MEMORY_BASE_URL}/query",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={
"context": query,
"space_id": f"user_{user_id}" # Isolate by user
}
)
return [m["text"] for m in response.json().get("data", {}).get("memories", [])]

def save_user_memory(user_id: str, text: str):
"""Save a memory for a specific user."""
requests.post(
f"{MEMORY_BASE_URL}/memories",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={
"text": text,
"space_id": f"user_{user_id}"
}
)

def multi_user_agent(user_id: str, message: str) -> str:
"""Agent that maintains separate memory per user."""
memories = retrieve_user_memories(user_id, message)
# ... rest of agent logic

Integration Patterns

Pattern 1: Tool-Based Memory

Give your agent memory as a tool it can call:

memory_tools = [
{
"type": "function",
"function": {
"name": "remember",
"description": "Save important information about the user for later",
"parameters": {
"type": "object",
"properties": {
"fact": {"type": "string", "description": "The fact to remember"}
},
"required": ["fact"]
}
}
},
{
"type": "function",
"function": {
"name": "recall",
"description": "Search your memory for relevant information",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "What to search for"}
},
"required": ["query"]
}
}
}
]

# Agent can now decide when to remember and recall
response = openai.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=memory_tools
)

Pattern 2: Automatic Background Memory

Run memory operations in the background without blocking:

import asyncio
import aiohttp

async def save_memory_async(text: str):
"""Non-blocking memory save."""
async with aiohttp.ClientSession() as session:
await session.post(
f"{MEMORY_BASE_URL}/memories",
headers={"Authorization": f"Bearer {MEMORY_API_KEY}"},
json={"text": text}
)

async def agent_async(message: str) -> str:
# Get memories (we need these before responding)
memories = await retrieve_memories_async(message)

# Generate response
response = await generate_response(message, memories)

# Save new memories in background (don't wait)
asyncio.create_task(
extract_and_save_async(f"User: {message}\nAssistant: {response}")
)

return response

Pattern 3: Conversation Summary Memory

For long conversations, periodically summarize and store:

def summarize_and_store(conversation_history: list, user_id: str):
"""Summarize conversation and store key points."""

# Summarize with LLM
summary_prompt = """Summarize the key facts learned about the user in this conversation.
Format as a list of discrete facts, one per line."""

response = openai.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": summary_prompt},
{"role": "user", "content": str(conversation_history)}
]
)

# Extract and save each fact
facts = response.choices[0].message.content.strip().split("\n")
for fact in facts:
if fact.strip():
save_user_memory(user_id, fact.strip())

# Call periodically or at end of session
summarize_and_store(history, user_id="user_123")

Framework Integration Examples

With LangChain

from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationChain

class TensorheartMemory(ConversationBufferMemory):
"""LangChain memory backed by Tensorheart."""

def __init__(self, api_key: str, space_id: str = None):
super().__init__()
self.api_key = api_key
self.space_id = space_id

def load_memory_variables(self, inputs: dict) -> dict:
query = inputs.get("input", "")
memories = retrieve_memories(query) # From Tensorheart
return {"history": "\n".join(memories)}

def save_context(self, inputs: dict, outputs: dict):
# Save to Tensorheart
exchange = f"Human: {inputs['input']}\nAI: {outputs['output']}"
extract_and_save_memories(exchange)

# Use with LangChain
memory = TensorheartMemory(api_key=MEMORY_API_KEY)
chain = ConversationChain(llm=ChatOpenAI(), memory=memory)

With CrewAI

from crewai import Agent, Task, Crew

def create_memory_agent():
"""Create a CrewAI agent with Tensorheart memory."""

@tool
def search_memory(query: str) -> str:
"""Search long-term memory for relevant information."""
memories = retrieve_memories(query)
return "\n".join(memories) if memories else "No relevant memories found."

@tool
def save_to_memory(fact: str) -> str:
"""Save an important fact to long-term memory."""
save_memory(fact)
return f"Saved: {fact}"

return Agent(
role="Research Assistant",
goal="Help users with research while remembering key information",
tools=[search_memory, save_to_memory],
memory=True # Enable built-in memory too
)

Best Practices

1. Be Selective About What to Remember

# Good: Specific, useful facts
save_memory("User prefers Python for data analysis")
save_memory("User's project deadline is March 15, 2024")

# Avoid: Vague or temporary information
# save_memory("User asked about something")
# save_memory("User said hello")

2. Use Metadata for Organization

save_memory(
text="User is allergic to peanuts",
metadata={
"category": "health",
"importance": "critical",
"source": "onboarding"
}
)

3. Set Appropriate Thresholds

# For precise, factual queries
memories = retrieve_memories(query, relevance_threshold=0.8)

# For exploratory, context-building queries
memories = retrieve_memories(query, relevance_threshold=0.5)

4. Handle Memory Gracefully

def agent_with_fallback(message: str) -> str:
try:
memories = retrieve_memories(message)
except Exception:
memories = [] # Continue without memories if service unavailable

# Agent still works, just without memory context
return generate_response(message, memories)

Next Steps