The tutorial demonstrates how to build a retrieval pipeline using RAG-Anything that processes text, tables, equations, and images within Google Colab. The process starts by configuring the environment, installing dependencies, and securely entering an OpenAI API key to ensure the notebook runs safely.
In this article
Installing RAG-Anything Dependencies
The setup script installs the necessary libraries and fixes a known conflict with the Pillow package. It also imports modules for plotting, PDF generation, and OpenAI access.
import os
import re
import sys
import json
import time
import shutil
import hashlib
import asyncio
import inspect
import getpass
import subprocess
import importlib
import importlib.metadata
from pathlib import Path
from typing import List, Dict, Any
def run_shell(cmd, check=True):
print(f"\n$ {cmd}")
result = subprocess.run(cmd, shell=True, text=True)
if check and result.returncode != 0:
raise RuntimeError(f"Command failed: {cmd}")
return result.returncode
print("=" * 80)
print("RAG-Anything Advanced Colab Tutorial")
print("=" * 80)
print("\n[1/10] Installing dependencies...")
for module_name in list(sys.modules):
if module_name == "PIL" or module_name.startswith("PIL."):
del sys.modules[module_name]
run_shell(
'pip -q install -U '
'"raganything[image,text]" '
'"openai>=1.0.0" '
'"python-dotenv" '
'"reportlab" '
'"pandas" '
'"matplotlib" '
'"tabulate"'
)
run_shell('pip -q install --no-cache-dir --force-reinstall "pillow==11.3.0"')
for module_name in list(sys.modules):
if module_name == "PIL" or module_name.startswith("PIL."):
del sys.modules[module_name]
importlib.invalidate_caches()
try:
print("Pillow version:", importlib.metadata.version("Pillow"))
except Exception as e:
print("Could not read Pillow version:", repr(e))
print("\n[2/10] Importing libraries...")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.units import inch
from openai import AsyncOpenAI
from raganything import RAGAnything, RAGAnythingConfig
from lightrag.utils import EmbeddingFunc
print("Imports successful.")
The script clears cached PIL modules before installing to prevent version conflicts, then reloads the caches. It verifies the Pillow version and imports tools for data handling and document generation.
Configuring Directories, Runtime Variables
The code defines paths for assets, output, logs, and working storage. It creates these folders if they do not exist and clears the working directory before starting.
print("\n[3/10] Preparing directories and runtime settings...")
BASE_DIR = Path("/content/raganything_advanced_tutorial") if Path("/content").exists() else Path.cwd() / "raganything_advanced_tutorial"
ASSET_DIR = BASE_DIR / "assets"
OUTPUT_DIR = BASE_DIR / "output"
WORKING_DIR = BASE_DIR / "rag_storage"
LOG_DIR = BASE_DIR / "logs"
RESET_STORAGE = True
RUN_FULL_DOCUMENT_PARSE = False
PARSER_FOR_FULL_PARSE = "mineru"
PARSE_METHOD = "auto"
for d in [BASE_DIR, ASSET_DIR, OUTPUT_DIR, WORKING_DIR, LOG_DIR]:
d.mkdir(parents=True, exist_ok=True)
if RESET_STORAGE and WORKING_DIR.exists():
shutil.rmtree(WORKING_DIR)
WORKING_DIR.mkdir(parents=True, exist_ok=True)
os.environ["LOG_DIR"] = str(LOG_DIR)
os.environ["SUMMARY_LANGUAGE"] = "English"
os.environ["ENABLE_LLM_CACHE"] = "false"
os.environ["ENABLE_LLM_CACHE_FOR_EXTRACT"] = "false"
os.environ["MAX_ASYNC"] = "2"
os.environ["CHUNK_SIZE"] = "900"
os.environ["CHUNK_OVERLAP_SIZE"] = "120"
os.environ["TIMEOUT"] = "240"
for var in [
"OPENAI_API_KEY",
"OPENAI_ORG_ID",
"OPENAI_ORGANIZATION",
"OPENAI_PROJECT",
"OPENAI_DEFAULT_HEADERS",
"LLM_BINDING_API_KEY",
"LLM_BINDING_HOST",
]:
os.environ.pop(var, None)
print(f"Base directory: {BASE_DIR}")
print(f"Assets directory: {ASSET_DIR}")
print(f"Storage directory: {WORKING_DIR}")
print("\n[4/10] Entering OpenAI API key securely...")
def clean_api_key(raw_value: str) -> str:
raw_value = str(raw_value or "").strip()
raw_value = raw_value.replace("Bearer ", "").replace("bearer ", "").strip()
raw_value = raw_value.strip("'").strip('"').strip("`").strip()
if "=" in raw_value:
raw_value = raw_value.split("=", 1)[1].strip().strip("'").strip('"').strip("`")
raw_value = re.sub(r"\s+", "", raw_value)
raw_value = raw_value.encode("ascii", errors="ignore").decode("ascii").strip()
return raw_value
OPENAI_API_KEY_RAW = getpass.getpass("Paste your OpenAI API key here. Input is hidden: ")
OPENAI_API_KEY = clean_api_key(OPENAI_API_KEY_RAW)
if not OPENAI_API_KEY:
raise ValueError(
"No API key was captured. Paste the key into the hidden input box and press Enter."
)
print("Captured key length:", len(OPENAI_API_KEY))
print("Captured key prefix:", OPENAI_API_KEY[:12] + "...")
print("Captured key suffix:", "..." + OPENAI_API_KEY[-6:])
LLM_MODEL = "gpt-4o-mini"
VISION_MODEL = "gpt-4o-mini"
EMBEDDING_MODEL = "text-embedding-3-small"
EMBEDDING_DIM = 1536
openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
os.environ["LLM_MODEL"] = LLM_MODEL
os.environ["VISION_MODEL"] = VISION_MODEL
os.environ["EMBEDDING_MODEL"] = EMBEDDING_MODEL
os.environ["EMBEDDING_DIM"] = str(EMBEDDING_DIM)
print("Testing OpenAI chat API with the captured key...")
try:
test_response = await openai_client.chat.completions.create(
model=LLM_MODEL,
messages=[{"role": "user", "content": "Reply with exactly: ok"}],
temperature=0,
)
print("Chat API test response:", test_response.choices[0].message.content)
except Exception as e:
raise RuntimeError(
"The key was captured, but OpenAI rejected the request or the account/model access failed. "
"Check billing, project permissions, and make sure this is an OpenAI Platform API key."
) from e
print("\nTesting OpenAI embedding API...")
try:
test_embedding = await openai_client.embeddings.create(
model=EMBEDDING_MODEL,
input=["RAG-Anything embedding test"],
)
print("Embedding vector length:", len(test_embedding.data[0].embedding))
except Exception as e:
raise RuntimeError(
"Chat worked, but embeddings failed. Make sure your API key has permission for embeddings."
) from e
print("OpenAI API key is working.")
print(f"Chat model: {LLM_MODEL}")
print(f"Vision model: {VISION_MODEL}")
print(f"Embedding model: {EMBEDDING_MODEL}")
print(f"Embedding dimension: {EMBEDDING_DIM}")
The script sets environment variables for language, caching, and chunking. It clears any existing OpenAI environment variables to prevent conflicts. Users must paste their API key into a hidden box. The code strips formatting characters like “Bearer ” and quotes before validating the key.
It then tests the connection by sending a simple chat request and generating an embedding vector. If either step fails, the script stops and displays an error message pointing to billing or permission issues.
Generating a Synthetic Multimodal Report
The tutorial creates sample data to test the system. It generates a table and a chart showing query volume and accuracy trends over six months.
print("\n[5/10] Creating a synthetic multimodal report...")
monthly_data = pd.DataFrame(
{
"Month": ["Jan", "Feb", "Mar", "Apr", "May", "Jun"],
"Query Volume": [1200, 1700, 2100, 2600, 3300, 4100],
"Hybrid Accuracy": [0.71, 0.74, 0.79, 0.83, 0.87, 0.91],
"Average Latency ms": [980, 920, 850, 790, 760, 730],
}
)
table_md = monthly_data.to_markdown(index=False)
plt.figure(figsize=(8, 4.8))
plt.plot(monthly_data["Month"], monthly_data["Query Volume"], marker="o", label="Query Volume")
plt.plot(monthly_data["Month"], monthly_data["Hybrid Accuracy"] * 4000, marker="s", label="Hybrid Accuracy scaled")
plt.title("Multimodal RAG Usage and Quality Trend")
plt.xlabel("Month")
plt.ylabel("Volume / Scaled Accuracy")
plt.legend()
plt.grid(True, alpha=0.3)
plt.text(
0.02,
0.95,
"Synthetic figure: usage rises while latency falls",
transform=plt.gca().transAxes,
fontsize=9,
verticalalignment="top",
bbox=dict(boxstyle="round", alpha=0.15),
)
chart_path = ASSET_DIR / "raganything_quality_trend.png"
plt.tight_layout()
pltSource Read original →



