PyGraphistry Implementation Workflow for Interactive Graph Intelligence Pipelines in Security Analytics and Risk Investigation

A tutorial demonstrates how to construct an interactive graph analytics workflow in Google Colab using PyGraphistry. The process involves creating a synthetic…

By AI Maestro June 29, 2026 5 min read
PyGraphistry Implementation Workflow for Interactive Graph Intelligence Pipelines in Security Analytics and Risk Investigation

A tutorial demonstrates how to construct an interactive graph analytics workflow in Google Colab using PyGraphistry. The process involves creating a synthetic enterprise access dataset, converting it into nodes and edges, and layering risk scores, anomaly indicators, centrality metrics, and community detection. Without configured credentials, the system generates local interactive visualisations. The goal is to show how graph intelligence aids in spotting suspicious users, risky devices, IP relationships, sensitive services, and high-risk behavioural patterns within a security context.

Installing PyGraphistry and dependencies

The setup installs PyGraphistry alongside supporting libraries for analytics, visualisation, and machine learning. It configures an output directory, sets a random seed, and handles Graphistry credentials. This allows the notebook to function both locally and when connected to the Graphistry Hub. A helper function defines node naming to keep entity types distinct.

import os, sys, subprocess, warnings, textwrap, json, math, random
warnings.filterwarnings("ignore")
def pip_install(packages):
   subprocess.run([sys.executable, "-m", "pip", "install", "-q", "-U", *packages], check=True)
pip_install([
   "graphistry[networkx,umap-learn]",
   "pandas",
   "numpy",
   "networkx",
   "scikit-learn",
   "pyvis",
   "matplotlib",
   "pyarrow"
])
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import graphistry
from pathlib import Path
from IPython.display import display, HTML, IFrame
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.decomposition import PCA
from pyvis.network import Network
OUT_DIR = Path("/content/pygraphistry_advanced_tutorial")
OUT_DIR.mkdir(parents=True, exist_ok=True)
SEED = 42
rng = np.random.default_rng(SEED)
random.seed(SEED)
print("=" * 100)
print("PyGraphistry Advanced Colab Tutorial")
print("=" * 100)
print("This tutorial builds an enterprise-style access graph, computes graph analytics,")
print("creates suspicious subgraphs, exports graph artifacts, and optionally uploads")
print("interactive visualizations to Graphistry Hub if credentials are available.")
print("=" * 100)
def colab_secret(name, default=""):
   value = os.environ.get(name, default)
   try:
       from google.colab import userdata
       secret_value = userdata.get(name)
       if secret_value:
           value = secret_value
   except Exception:
       pass
   return value or default
GRAPHISTRY_SERVER = colab_secret("GRAPHISTRY_SERVER", "hub.graphistry.com")
GRAPHISTRY_PROTOCOL = colab_secret("GRAPHISTRY_PROTOCOL", "https")
GRAPHISTRY_USERNAME = colab_secret("GRAPHISTRY_USERNAME", "")
GRAPHISTRY_PASSWORD = colab_secret("GRAPHISTRY_PASSWORD", "")
GRAPHISTRY_PERSONAL_KEY_ID = colab_secret("GRAPHISTRY_PERSONAL_KEY_ID", "")
GRAPHISTRY_PERSONAL_KEY_SECRET = colab_secret("GRAPHISTRY_PERSONAL_KEY_SECRET", "")
REGISTERED = False
try:
   if GRAPHISTRY_PERSONAL_KEY_ID and GRAPHISTRY_PERSONAL_KEY_SECRET:
       graphistry.register(
           api=3,
           protocol=GRAPHISTRY_PROTOCOL,
           server=GRAPHISTRY_SERVER,
           personal_key_id=GRAPHISTRY_PERSONAL_KEY_ID,
           personal_key_secret=GRAPHISTRY_PERSONAL_KEY_SECRET
       )
       REGISTERED = True
       print("Graphistry registered with personal key credentials.")
   elif GRAPHISTRY_USERNAME and GRAPHISTRY_PASSWORD:
       graphistry.register(
           api=3,
           protocol=GRAPHISTRY_PROTOCOL,
           server=GRAPHISTRY_SERVER,
           username=GRAPHISTRY_USERNAME,
           password=GRAPHISTRY_PASSWORD
       )
       REGISTERED = True
       print("Graphistry registered with username/password credentials.")
   else:
       graphistry.register(api=3, protocol=GRAPHISTRY_PROTOCOL, server=GRAPHISTRY_SERVER)
       print("No Graphistry credentials found. Local analytics will run; Graphistry .plot() uploads will be skipped.")
       print("To enable live Graphistry plots, add Colab secrets:")
       print("GRAPHISTRY_PERSONAL_KEY_ID and GRAPHISTRY_PERSONAL_KEY_SECRET")
       print("or GRAPHISTRY_USERNAME and GRAPHISTRY_PASSWORD")
except Exception as e:
   REGISTERED = False
   print("Graphistry registration was not completed:", repr(e))
   print("Continuing with local analytics and local HTML visualization.")
def nid(kind, value):
   return f"{kind}:{value}"

Generating enterprise access dataset

The script creates a synthetic dataset representing an enterprise environment. It includes users, devices, IP addresses, services, roles, and geographic locations. The code simulates normal activity alongside suspicious behaviour by seeding specific compromised accounts, risky hardware, and dangerous network nodes.

n_users = 55
n_devices = 42
n_ips = 36
n_services = 15
n_roles = 7
n_geos = 10
n_events = 2200
users = [f"user_{i:03d}" for i in range(n_users)]
devices = [f"device_{i:03d}" for i in range(n_devices)]
ips = [f"10.{i // 255}.{i % 255}.{rng.integers(1, 255)}" for i in range(1, n_ips + 1)]
services = [
"salesforce", "snowflake", "github", "jira", "slack",
"vpn", "okta", "aws_console", "gcp_console", "databricks",
"hris", "email", "crm", "vault", "payments_api"
]
roles = ["employee", "analyst", "engineer", "manager", "admin", "contractor", "service_account"]
geos = ["IN", "US", "GB", "DE", "SG", "AE", "BR", "NL", "AU", "JP"]
privileged_users = set(rng.choice(users, size=7, replace=False))
compromised_users = set(rng.choice(list(set(users) - privileged_users), size=4, replace=False))
risky_devices = set(rng.choice(devices, size=5, replace=False))
risky_ips = set(rng.choice(ips, size=5, replace=False))
sensitive_services = {"aws_console", "gcp_console", "vault", "payments_api", "snowflake"}
user_role = {}
for u in users:
if u in privileged_users:
user_role[u] = rng.choice(["admin", "manager", "engineer"], p=[0.55, 0.2, 0.25])
elif rng.random() < 0.08: user_role[u] = "contractor" else: user_role[u] = rng.choice(["employee", "analyst", "engineer"], p=[0.45, 0.25, 0.30]) user_home_geo = {u: rng.choice(geos, p=[0.30, 0.22, 0.10, 0.08, 0.08, 0.05, 0.04, 0.04, 0.04, 0.05]) for u in users} device_owner = {d: rng.choice(users) for d in devices} base_time = pd.Timestamp("2026-06-01 00:00:00") events = [] for i in range(n_events): if rng.random() < 0.18: user = rng.choice(list(compromised_users)) else: user = rng.choice(users) if user in compromised_users and rng.random() < 0.42: device = rng.choice(list(risky_devices)) else: owned = [d for d, owner in device_owner.items() if owner == user] device = rng.choice(owned if owned and rng.random() < 0.78 else devices) if user in compromised_users and rng.random() < 0.50: ip = rng.choice(list(risky_ips)) else: ip = rng.choice(ips) if user in compromised_users and rng.random() < 0.45: service = rng.choice(list(sensitive_services)) else: service = rng.choice(services) role = user_role[user] home_geo = user_home_geo[user] geo = home_geo if rng.random() < 0.88 else rng.choice([g for g in geos if g != home_geo]) hour = int(rng.integers(0, 24)) minute = int(rng.integers(0, 60)) timestamp = base_time + pd.Timedelta(days=int(rng.integers(0, 10)), hours=hour, minutes=minute) impossible_travel = int(geo != home_geo and rng.random() < 0.65) off_hours = int(hour < 6 or hour > 21)
service_sensitivity = 1.0 if service in sensitive_services else 0.25
privileged = int(role in ["admin", "manager", "service_account"])
compromised = int(user in compromised_users)
risky_infra = int(device in risky_devices or ip in risky_ips)
risk_score = (
0.08
+ 0.22 * compromised
+ 0.18 * risky_infra
+ 0.17 * impossible_travel
+ 0.13 * off_hours
+ 0.15 * service_sensitivity
+ 0.07 * privileged
+ rng.normal(0, 0.06)
)
risk_score = float(np.clip(risk_score, 0.0, 1.0))
success_probability = 0.96 - 0.45 * risk_score
is_success = bool(rng.random() < success_probability) amount = float(np.round(np.exp(rng.normal(7.0 + 1.4 * service_sensitivity, 0.8)), 2)) if service not in {"payments_api", "vault", "snowflake"}: amount = float(np.round(amount * rng.uniform(0.01, 0.10), 2)) events.append({ "event_id": f"evt_{i:05d}", "timestamp": timestamp, "user": user, "device": device, "ip": ip, "service": service, "role": role, "geo": geo, "home_geo": home_geo, "is_success": is_success, "off_hours": bool(off_hours), "impossible_travel": bool(impossible_travel), "risk_score": risk_score, "amount": amount, "is_seeded_compromise": bool
Scroll to Top