#!/usr/bin/env python3 """ Dendrite to Synapse PostgreSQL migration script. Assumes: - Both databases are PostgreSQL - Synapse DB already initialized (run Synapse once to create schema) - Dendrite DB is the source of truth - Only local data is migrated (no federation state) Usage: python3 migrate.py \ --dendrite-db "postgresql://user:pass@localhost/dendrite" \ --synapse-db "postgresql://user:pass@localhost/synapse" \ --server-name "example.com" \ [--dendrite-media-path /path/to/dendrite/media] \ [--synapse-media-path /path/to/synapse/media] \ [--phase 1,2,3,4,5,6,7] \ [--dry-run] """ import argparse import json import logging import os import shutil import sys import hashlib import base64 from typing import Any import psycopg2 import psycopg2.extras logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", ) log = logging.getLogger("dendrite2synapse") class Migrator: def __init__( self, dendrite_dsn: str, synapse_dsn: str, server_name: str, dendrite_media_path: str | None = None, synapse_media_path: str | None = None, dry_run: bool = False, ): self.server_name = server_name self.dendrite_media_path = dendrite_media_path self.synapse_media_path = synapse_media_path self.dry_run = dry_run self.src = psycopg2.connect(dendrite_dsn) self.src.set_session(readonly=True) self.dst = psycopg2.connect(synapse_dsn) # Caches for NID resolution self._event_type_cache: dict[int, str] = {} self._state_key_cache: dict[int, str] = {} self._room_nid_cache: dict[int, str] = {} # room_nid -> room_id self._room_id_nid_cache: dict[str, int] = {} # room_id -> room_nid self._room_version_cache: dict[int, str] = {} # room_nid -> room_version self._user_nid_cache: dict[int, str] = {} # state_key_nid -> user_id (for membership) def close(self): self.src.close() self.dst.close() # ── NID Resolution ────────────────────────────────────────────── def _load_event_types(self): """Load event_type_nid -> event_type mapping.""" with self.src.cursor() as cur: cur.execute("SELECT event_type_nid, event_type FROM roomserver_event_types") self._event_type_cache = dict(cur.fetchall()) log.info("Loaded %d event types", len(self._event_type_cache)) def _load_state_keys(self): """Load event_state_key_nid -> event_state_key mapping.""" with self.src.cursor() as cur: cur.execute( "SELECT event_state_key_nid, event_state_key FROM roomserver_event_state_keys" ) self._state_key_cache = dict(cur.fetchall()) log.info("Loaded %d state keys", len(self._state_key_cache)) def _load_rooms(self): """Load room_nid <-> room_id and room_version mappings.""" with self.src.cursor() as cur: cur.execute("SELECT room_nid, room_id, room_version FROM roomserver_rooms") for nid, rid, ver in cur.fetchall(): self._room_nid_cache[nid] = rid self._room_id_nid_cache[rid] = nid self._room_version_cache[nid] = ver log.info("Loaded %d rooms", len(self._room_nid_cache)) def _resolve_event_type(self, nid: int) -> str: return self._event_type_cache.get(nid, f"unknown.type.{nid}") def _resolve_state_key(self, nid: int) -> str: return self._state_key_cache.get(nid, "") def _resolve_room_nid(self, nid: int) -> str: return self._room_nid_cache.get(nid, f"!unknown:{self.server_name}") def _get_room_version(self, room_nid: int) -> str: return self._room_version_cache.get(room_nid, "10") @staticmethod def _room_version_to_format(room_version: str) -> int: """Map Matrix room version to Synapse event format version. v1-2 -> 1 (EventFormatVersions.ROOM_V1_V2) v3 -> 2 (EventFormatVersions.ROOM_V3) v4-10, MSC variants -> 3 (EventFormatVersions.ROOM_V4_PLUS) v11+, Hydra -> 4 (EventFormatVersions.ROOM_V11_HYDRA_PLUS) """ try: v = int(room_version) except (ValueError, TypeError): # MSC or unknown variants - assume v3 format return 3 if v <= 2: return 1 if v == 3: return 2 if v <= 10: return 3 return 4 # v11+ def load_nid_caches(self): """Load all NID lookup caches from Dendrite.""" self._load_event_types() self._load_state_keys() self._load_rooms() # ── Phase 1: Users & Profiles ─────────────────────────────────── def migrate_users(self): log.info("=== Phase 1: Users & Profiles ===") with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( "SELECT localpart, server_name, created_ts, password_hash, " "appservice_id, is_deactivated, account_type FROM userapi_accounts" ) accounts = src_cur.fetchall() log.info("Found %d accounts to migrate", len(accounts)) with self.dst.cursor() as dst_cur: for acc in accounts: user_id = f"@{acc['localpart']}:{acc['server_name']}" # account_type: 1=user, 2=guest, 3=admin, 4=appservice is_guest = 1 if acc["account_type"] == 2 else 0 is_admin = 1 if acc["account_type"] == 3 else 0 deactivated = 1 if acc["is_deactivated"] else 0 # Dendrite stores created_ts in milliseconds, Synapse in seconds created_ts = acc["created_ts"] // 1000 if acc["created_ts"] else 0 dst_cur.execute( """ INSERT INTO users (name, password_hash, creation_ts, admin, is_guest, appservice_id, deactivated) VALUES (%s, %s, %s, %s, %s, %s, %s) ON CONFLICT (name) DO UPDATE SET password_hash = EXCLUDED.password_hash, creation_ts = EXCLUDED.creation_ts, admin = EXCLUDED.admin, is_guest = EXCLUDED.is_guest, deactivated = EXCLUDED.deactivated """, ( user_id, acc["password_hash"], created_ts, is_admin, is_guest, acc["appservice_id"], deactivated, ), ) log.info("Migrated %d user accounts", len(accounts)) # Profiles src_cur2 = self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) src_cur2.execute( "SELECT localpart, server_name, display_name, avatar_url FROM userapi_profiles" ) profiles = src_cur2.fetchall() src_cur2.close() for prof in profiles: user_id = f"@{prof['localpart']}:{prof['server_name']}" # Synapse profiles: user_id is localpart (UNIQUE), full_user_id is @user:server (NOT NULL since v77) dst_cur.execute( """ INSERT INTO profiles (user_id, full_user_id, displayname, avatar_url) VALUES (%s, %s, %s, %s) ON CONFLICT (user_id) DO UPDATE SET full_user_id = EXCLUDED.full_user_id, displayname = EXCLUDED.displayname, avatar_url = EXCLUDED.avatar_url """, (prof["localpart"], user_id, prof["display_name"], prof["avatar_url"]), ) log.info("Migrated %d profiles", len(profiles)) # Devices src_cur3 = self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) src_cur3.execute( "SELECT device_id, localpart, server_name, display_name, " "created_ts, last_seen_ts, ip, user_agent, access_token FROM userapi_devices" ) devices = src_cur3.fetchall() src_cur3.close() # Determine starting id for access_tokens (fresh DB = 1) dst_cur.execute("SELECT COALESCE(MAX(id), 0) FROM access_tokens") next_token_id = dst_cur.fetchone()[0] + 1 for dev in devices: user_id = f"@{dev['localpart']}:{dev['server_name']}" dst_cur.execute( """ INSERT INTO devices (user_id, device_id, display_name, last_seen, ip, user_agent) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (user_id, device_id) DO UPDATE SET display_name = EXCLUDED.display_name, last_seen = EXCLUDED.last_seen, ip = EXCLUDED.ip, user_agent = EXCLUDED.user_agent """, ( user_id, dev["device_id"], dev["display_name"], dev["last_seen_ts"], dev["ip"], dev["user_agent"], ), ) # Preserve existing client sessions — without this clients are forced # to re-login, losing device_id continuity and with it the local # Megolm key store on many clients (critical for E2EE history). if dev["access_token"]: dst_cur.execute( """ INSERT INTO access_tokens (id, user_id, device_id, token, valid_until_ms) VALUES (%s, %s, %s, %s, NULL) ON CONFLICT (token) DO NOTHING """, (next_token_id, user_id, dev["device_id"], dev["access_token"]), ) next_token_id += 1 log.info("Migrated %d devices (+ access tokens)", len(devices)) if not self.dry_run: self.dst.commit() log.info("Phase 1 complete") # ── Phase 2: Rooms ────────────────────────────────────────────── def migrate_rooms(self): log.info("=== Phase 2: Rooms ===") # Get rooms with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute("SELECT room_nid, room_id, room_version FROM roomserver_rooms") rooms = src_cur.fetchall() log.info("Found %d rooms to migrate", len(rooms)) # Find room creators from m.room.create events creators = {} with self.src.cursor() as src_cur: # event_type_nid 1 = m.room.create src_cur.execute( """ SELECT r.room_id, ej.event_json FROM roomserver_events e JOIN roomserver_rooms r ON e.room_nid = r.room_nid JOIN roomserver_event_json ej ON e.event_nid = ej.event_nid WHERE e.event_type_nid = 1 """ ) for room_id, event_json_str in src_cur.fetchall(): try: ev = json.loads(event_json_str) # Creator is in content.creator or sender field creator = ev.get("content", {}).get("creator") or ev.get("sender", "") creators[room_id] = creator except (json.JSONDecodeError, TypeError): pass with self.dst.cursor() as dst_cur: for room in rooms: room_id = room["room_id"] creator = creators.get(room_id, "") dst_cur.execute( """ INSERT INTO rooms (room_id, creator, room_version, is_public, has_auth_chain_index) VALUES (%s, %s, %s, false, true) ON CONFLICT (room_id) DO UPDATE SET creator = EXCLUDED.creator, room_version = EXCLUDED.room_version """, (room_id, creator, room["room_version"]), ) log.info("Migrated %d rooms", len(rooms)) # Room aliases src_cur2 = self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) src_cur2.execute( "SELECT alias, room_id, creator_id FROM roomserver_room_aliases" ) aliases = src_cur2.fetchall() src_cur2.close() for alias in aliases: dst_cur.execute( """ INSERT INTO room_aliases (room_alias, room_id, creator) VALUES (%s, %s, %s) ON CONFLICT (room_alias) DO NOTHING """, (alias["alias"], alias["room_id"], alias["creator_id"]), ) # Also add the server for each alias # Extract server from alias: #room:server.com -> server.com # room_alias_servers has no unique constraint, check existence first alias_server = alias["alias"].split(":")[-1] if ":" in alias["alias"] else self.server_name dst_cur.execute( "SELECT 1 FROM room_alias_servers WHERE room_alias = %s AND server = %s", (alias["alias"], alias_server), ) if not dst_cur.fetchone(): dst_cur.execute( "INSERT INTO room_alias_servers (room_alias, server) VALUES (%s, %s)", (alias["alias"], alias_server), ) log.info("Migrated %d room aliases", len(aliases)) if not self.dry_run: self.dst.commit() log.info("Phase 2 complete") # ── Phase 3: Events ───────────────────────────────────────────── def migrate_events(self): log.info("=== Phase 3: Events ===") # Fetch all events with their JSON, ordered by event_nid (creation order) with self.src.cursor( name="events_cursor", cursor_factory=psycopg2.extras.DictCursor ) as src_cur: src_cur.itersize = 5000 src_cur.execute( """ SELECT e.event_nid, e.event_id, e.room_nid, e.event_type_nid, e.event_state_key_nid, e.depth, e.is_rejected, e.auth_event_nids, ej.event_json FROM roomserver_events e JOIN roomserver_event_json ej ON e.event_nid = ej.event_nid ORDER BY e.event_nid ASC """ ) # Track stream ordering (global sequential) stream_ordering = 0 batch: list[tuple] = [] batch_json: list[tuple] = [] batch_state: list[tuple] = [] batch_edges: list[tuple] = [] batch_auth: list[tuple] = [] event_count = 0 rejected_count = 0 for row in src_cur: if row["is_rejected"]: rejected_count += 1 continue event_id = row["event_id"] room_id = self._resolve_room_nid(row["room_nid"]) event_type = self._resolve_event_type(row["event_type_nid"]) state_key_nid = row["event_state_key_nid"] depth = row["depth"] event_json_str = row["event_json"] # Parse event JSON for sender, origin_server_ts, content try: ev = json.loads(event_json_str) except (json.JSONDecodeError, TypeError): log.warning("Skipping event %s: invalid JSON", event_id) continue sender = ev.get("sender", "") origin_server_ts = ev.get("origin_server_ts", 0) content = json.dumps(ev.get("content", {})) contains_url = "url" in ev.get("content", {}) # State key: 0 means not a state event in Dendrite (nid 0 doesn't exist) # nid 1 = empty string (which IS a valid state key) is_state_event = state_key_nid != 0 state_key = self._resolve_state_key(state_key_nid) if is_state_event else None # Ordering stream_ordering += 1 # topological_ordering = depth (Synapse uses depth directly) topo = depth # internal_metadata: stream_ordering and outlier are loaded from # events table columns, NOT from this JSON. This JSON stores only # supplementary flags like soft_failed, out_of_band_membership, etc. # For migrated events, empty object is correct. internal_metadata = "{}" # Determine format_version from room version # v1-2 -> 1, v3 -> 2, v4-10 -> 3, v11+ -> 4 room_version_str = ev.get("room_version", "") or "" # Room version might be in the create event content or we look it up format_version = self._room_version_to_format( self._get_room_version(row["room_nid"]) ) # Rejection reason rejection_reason = None batch.append(( topo, # topological_ordering event_id, event_type, # type room_id, content, "", # unrecognized_keys True, # processed - these are already-processed events False, # outlier depth, origin_server_ts, origin_server_ts, # received_ts (use origin_server_ts as approximation) sender, contains_url, "master", # instance_name stream_ordering, state_key, rejection_reason, )) batch_json.append(( event_id, room_id, internal_metadata, event_json_str, format_version, )) # State events get an entry in state_events if is_state_event: batch_state.append(( event_id, room_id, event_type, state_key, "", # prev_state (no longer written per schema v67) )) # Event auth chain entries auth_nids = row["auth_event_nids"] or [] # We need to resolve auth_event_nids to event_ids # We'll do this in a second pass since we need event_nid -> event_id mapping event_count += 1 if len(batch) >= 5000: self._flush_events(batch, batch_json, batch_state) batch.clear() batch_json.clear() batch_state.clear() log.info(" ... migrated %d events so far", event_count) # Flush remaining if batch: self._flush_events(batch, batch_json, batch_state) log.info("Migrated %d events (%d rejected skipped)", event_count, rejected_count) # Second pass: event_edges and event_auth from the event JSON prev_events/auth_events self._migrate_event_graph() # Room depth self._migrate_room_depth() # Forward extremities self._migrate_forward_extremities() if not self.dry_run: self.dst.commit() log.info("Phase 3 complete (stream_ordering max = %d)", stream_ordering) return stream_ordering def _flush_events(self, batch, batch_json, batch_state): """Insert batches of events into Synapse tables.""" with self.dst.cursor() as dst_cur: psycopg2.extras.execute_values( dst_cur, """ INSERT INTO events (topological_ordering, event_id, type, room_id, content, unrecognized_keys, processed, outlier, depth, origin_server_ts, received_ts, sender, contains_url, instance_name, stream_ordering, state_key, rejection_reason) VALUES %s ON CONFLICT (event_id) DO NOTHING """, batch, page_size=1000, ) psycopg2.extras.execute_values( dst_cur, """ INSERT INTO event_json (event_id, room_id, internal_metadata, json, format_version) VALUES %s ON CONFLICT (event_id) DO NOTHING """, batch_json, page_size=1000, ) if batch_state: psycopg2.extras.execute_values( dst_cur, """ INSERT INTO state_events (event_id, room_id, type, state_key, prev_state) VALUES %s ON CONFLICT (event_id) DO NOTHING """, batch_state, page_size=1000, ) def _migrate_event_graph(self): """Build event_edges and event_auth from event JSON prev_events and auth_events.""" log.info("Building event graph (edges + auth)...") with self.dst.cursor(name="event_graph_cur") as dst_cur: dst_cur.itersize = 5000 dst_cur.execute("SELECT event_id, json FROM event_json ORDER BY event_id") batch_edges = [] batch_auth = [] count = 0 for event_id, event_json_str in dst_cur: try: ev = json.loads(event_json_str) except (json.JSONDecodeError, TypeError): continue # prev_events -> event_edges prev_events = ev.get("prev_events", []) for prev in prev_events: # In room v1/v2 prev_events is [[event_id, {hash}], ...] # In room v3+ prev_events is [event_id, ...] if isinstance(prev, list): prev_id = prev[0] else: prev_id = prev batch_edges.append((event_id, prev_id)) # auth_events -> event_auth auth_events = ev.get("auth_events", []) for auth in auth_events: if isinstance(auth, list): auth_id = auth[0] else: auth_id = auth batch_auth.append((event_id, auth_id, ev.get("room_id", ""))) count += 1 if len(batch_edges) >= 5000 or len(batch_auth) >= 5000: self._flush_event_graph(batch_edges, batch_auth) batch_edges.clear() batch_auth.clear() if batch_edges or batch_auth: self._flush_event_graph(batch_edges, batch_auth) log.info("Built event graph for %d events", count) def _flush_event_graph(self, edges, auth): with self.dst.cursor() as cur: if edges: psycopg2.extras.execute_values( cur, """ INSERT INTO event_edges (event_id, prev_event_id) VALUES %s ON CONFLICT (event_id, prev_event_id) DO NOTHING """, edges, page_size=1000, ) if auth: # event_auth has no unique constraint, deduplicate in memory psycopg2.extras.execute_values( cur, """ INSERT INTO event_auth (event_id, auth_id, room_id) VALUES %s """, auth, page_size=1000, ) def _migrate_room_depth(self): """Set room_depth for each room from migrated events.""" log.info("Setting room depth...") with self.dst.cursor() as cur: cur.execute( """ INSERT INTO room_depth (room_id, min_depth) SELECT room_id, MIN(depth) FROM events WHERE NOT outlier GROUP BY room_id ON CONFLICT (room_id) DO UPDATE SET min_depth = EXCLUDED.min_depth """ ) log.info("Set room depth for %d rooms", cur.rowcount) def _migrate_forward_extremities(self): """Set forward extremities from Dendrite's latest_event_nids.""" log.info("Setting forward extremities from Dendrite room state...") with self.src.cursor() as src_cur: # Dendrite tracks forward extremities per room as latest_event_nids src_cur.execute( "SELECT room_id, latest_event_nids FROM roomserver_rooms" ) rooms = src_cur.fetchall() # Build event_nid -> event_id mapping for extremities all_nids = set() for _, nids in rooms: if nids: all_nids.update(nids) nid_to_event_id: dict[int, str] = {} if all_nids: with self.src.cursor() as src_cur: # Fetch in batches nid_list = list(all_nids) src_cur.execute( "SELECT event_nid, event_id FROM roomserver_events WHERE event_nid = ANY(%s)", (nid_list,), ) nid_to_event_id = dict(src_cur.fetchall()) count = 0 with self.dst.cursor() as cur: for room_id, nids in rooms: if not nids: continue for nid in nids: event_id = nid_to_event_id.get(nid) if event_id: cur.execute( """ INSERT INTO event_forward_extremities (event_id, room_id) VALUES (%s, %s) ON CONFLICT (event_id, room_id) DO NOTHING """, (event_id, room_id), ) count += 1 log.info("Set %d forward extremities", count) # ── Phase 4: Room State ───────────────────────────────────────── def migrate_room_state(self): log.info("=== Phase 4: Room State ===") # Build current_state_events from Dendrite's syncapi_current_room_state with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( """ SELECT room_id, event_id, type, sender, state_key, membership FROM syncapi_current_room_state """ ) current_state = src_cur.fetchall() log.info("Found %d current state events", len(current_state)) with self.dst.cursor() as dst_cur: for cs in current_state: dst_cur.execute( """ INSERT INTO current_state_events (event_id, room_id, type, state_key, membership) VALUES (%s, %s, %s, %s, %s) ON CONFLICT (room_id, type, state_key) DO UPDATE SET event_id = EXCLUDED.event_id, membership = EXCLUDED.membership """, (cs["event_id"], cs["room_id"], cs["type"], cs["state_key"], cs["membership"]), ) log.info("Migrated %d current state events", len(current_state)) # Build state groups - one per room for current state # Synapse uses state groups extensively; we create minimal groups from current state self._build_state_groups() if not self.dry_run: self.dst.commit() log.info("Phase 4 complete") def _build_state_groups(self): """Build incremental state groups per room. Walks events in each room by topological/stream order. Each state event creates a new state group (delta from previous). Non-state events share the most recent state group. This gives Synapse correct historical state lookups. """ log.info("Building state groups (incremental per room)...") with self.dst.cursor() as dst_cur: # Get all rooms that have events dst_cur.execute("SELECT DISTINCT room_id FROM events ORDER BY room_id") rooms = [r[0] for r in dst_cur.fetchall()] state_group_id = 0 total_groups = 0 total_mappings = 0 for room_idx, room_id in enumerate(rooms): with self.dst.cursor(name=f"sg_room_{room_idx}") as ev_cur: ev_cur.itersize = 2000 ev_cur.execute( """ SELECT event_id, type, state_key FROM events WHERE room_id = %s ORDER BY topological_ordering ASC, stream_ordering ASC """, (room_id,), ) current_sg = None prev_sg = None # Track running state: (type, state_key) -> event_id running_state: dict[tuple[str, str], str] = {} batch_groups: list[tuple] = [] # (id, room_id, event_id) batch_state: list[tuple] = [] # (sg, room_id, type, state_key, event_id) batch_edges: list[tuple] = [] # (sg, prev_sg) batch_mappings: list[tuple] = [] # (event_id, sg) for event_id, ev_type, state_key in ev_cur: is_state = state_key is not None if is_state: # State event: create new state group state_group_id += 1 prev_sg = current_sg current_sg = state_group_id # Update running state running_state[(ev_type, state_key)] = event_id batch_groups.append((current_sg, room_id, event_id)) # Only store the delta (the changed state entry) # Synapse resolves full state by walking state_group_edges batch_state.append(( current_sg, room_id, ev_type, state_key, event_id )) if prev_sg is not None: batch_edges.append((current_sg, prev_sg)) total_groups += 1 elif current_sg is None: # Non-state event before any state - create initial empty group state_group_id += 1 current_sg = state_group_id batch_groups.append((current_sg, room_id, event_id)) total_groups += 1 # Map this event to current state group if current_sg is not None: batch_mappings.append((event_id, current_sg)) total_mappings += 1 # Flush this room's data if batch_groups or batch_mappings: self._flush_state_groups( batch_groups, batch_state, batch_edges, batch_mappings ) if (room_idx + 1) % 100 == 0: log.info(" ... processed %d/%d rooms", room_idx + 1, len(rooms)) log.info( "Built %d state groups, %d event mappings across %d rooms", total_groups, total_mappings, len(rooms), ) def _flush_state_groups(self, groups, state, edges, mappings): with self.dst.cursor() as cur: if groups: psycopg2.extras.execute_values( cur, """ INSERT INTO state_groups (id, room_id, event_id) VALUES %s ON CONFLICT (id) DO NOTHING """, groups, page_size=1000, ) if state: psycopg2.extras.execute_values( cur, """ INSERT INTO state_groups_state (state_group, room_id, type, state_key, event_id) VALUES %s ON CONFLICT DO NOTHING """, state, page_size=1000, ) if edges: psycopg2.extras.execute_values( cur, """ INSERT INTO state_group_edges (state_group, prev_state_group) VALUES %s ON CONFLICT DO NOTHING """, edges, page_size=1000, ) if mappings: psycopg2.extras.execute_values( cur, """ INSERT INTO event_to_state_groups (event_id, state_group) VALUES %s ON CONFLICT (event_id) DO NOTHING """, mappings, page_size=1000, ) # ── Phase 5: Membership ───────────────────────────────────────── def migrate_membership(self): log.info("=== Phase 5: Membership ===") # Use syncapi_current_room_state for membership events (type = m.room.member) # These have the headered event JSON with all needed fields with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( """ SELECT room_id, event_id, sender, state_key, membership, headered_event_json FROM syncapi_current_room_state WHERE type = 'm.room.member' """ ) memberships = src_cur.fetchall() log.info("Found %d membership entries", len(memberships)) with self.dst.cursor() as dst_cur: for m in memberships: event_id = m["event_id"] room_id = m["room_id"] user_id = m["state_key"] # state_key is the target user for membership sender = m["sender"] membership = m["membership"] # Extract display_name and avatar_url from event content display_name = None avatar_url = None try: ev = json.loads(m["headered_event_json"]) content = ev.get("content", {}) display_name = content.get("displayname") avatar_url = content.get("avatar_url") except (json.JSONDecodeError, TypeError): pass # Get event_stream_ordering from events table dst_cur.execute( "SELECT stream_ordering FROM events WHERE event_id = %s", (event_id,), ) so_row = dst_cur.fetchone() event_stream_ordering = so_row[0] if so_row else 0 dst_cur.execute( """ INSERT INTO room_memberships (event_id, user_id, sender, room_id, membership, forgotten, display_name, avatar_url, event_stream_ordering) VALUES (%s, %s, %s, %s, %s, 0, %s, %s, %s) ON CONFLICT (event_id) DO UPDATE SET membership = EXCLUDED.membership, display_name = EXCLUDED.display_name, avatar_url = EXCLUDED.avatar_url """, ( event_id, user_id, sender, room_id, membership, display_name, avatar_url, event_stream_ordering, ), ) # local_current_membership for local users # unique index on (user_id, room_id) via local_current_membership_idx if user_id.endswith(f":{self.server_name}"): dst_cur.execute( """ INSERT INTO local_current_membership (room_id, user_id, event_id, membership, event_stream_ordering) VALUES (%s, %s, %s, %s, %s) ON CONFLICT (user_id, room_id) DO UPDATE SET event_id = EXCLUDED.event_id, membership = EXCLUDED.membership, event_stream_ordering = EXCLUDED.event_stream_ordering """, (room_id, user_id, event_id, membership, event_stream_ordering), ) log.info("Migrated %d memberships", len(memberships)) if not self.dry_run: self.dst.commit() log.info("Phase 5 complete") # ── Phase 6: Media ────────────────────────────────────────────── def migrate_media(self): log.info("=== Phase 6: Media ===") with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( """ SELECT media_id, media_origin, content_type, file_size_bytes, creation_ts, upload_name, base64hash, user_id FROM mediaapi_media_repository WHERE media_origin = %s """, (self.server_name,), ) media = src_cur.fetchall() log.info("Found %d local media entries", len(media)) with self.dst.cursor() as dst_cur: for m in media: # Synapse creation_ts is in milliseconds dst_cur.execute( """ INSERT INTO local_media_repository (media_id, media_type, media_length, created_ts, upload_name, user_id) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (media_id) DO NOTHING """, ( m["media_id"], m["content_type"], m["file_size_bytes"], m["creation_ts"], m["upload_name"], m["user_id"], ), ) log.info("Migrated %d media metadata entries", len(media)) # Thumbnails with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( """ SELECT media_id, media_origin, content_type, file_size_bytes, creation_ts, width, height, resize_method FROM mediaapi_thumbnail WHERE media_origin = %s """, (self.server_name,), ) thumbs = src_cur.fetchall() with self.dst.cursor() as dst_cur: for t in thumbs: dst_cur.execute( """ INSERT INTO local_media_repository_thumbnails (media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method, thumbnail_length) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method) DO NOTHING """, ( t["media_id"], t["width"], t["height"], t["content_type"], t["resize_method"], t["file_size_bytes"], ), ) log.info("Migrated %d thumbnail entries", len(thumbs)) if not self.dry_run: self.dst.commit() # Copy actual files if paths provided if self.dendrite_media_path and self.synapse_media_path: self._copy_media_files(media, thumbs) else: log.warning("Media paths not provided, skipping file copy. " "Use --dendrite-media-path and --synapse-media-path to copy files.") log.info("Phase 6 complete") @staticmethod def _dendrite_media_path(base: str, b64hash: str) -> str | None: """Dendrite content path: {base}/{hash[0]}/{hash[1]}/{hash[2:]}/file""" if not b64hash or len(b64hash) < 3: return None return os.path.join(base, b64hash[0], b64hash[1], b64hash[2:], "file") @staticmethod def _dendrite_thumb_path(base: str, b64hash: str, width: int, height: int, method: str) -> str | None: """Dendrite thumbnail path: {base}/{hash[0]}/{hash[1]}/{hash[2:]}/thumbnail-{w}x{h}-{method}""" if not b64hash or len(b64hash) < 3: return None return os.path.join( base, b64hash[0], b64hash[1], b64hash[2:], f"thumbnail-{width}x{height}-{method}", ) @staticmethod def _synapse_content_path(base: str, media_id: str) -> str | None: """Synapse content path: {base}/local_content/{id[0:2]}/{id[2:4]}/{id[4:]}""" if len(media_id) < 4: return None return os.path.join(base, "local_content", media_id[0:2], media_id[2:4], media_id[4:]) @staticmethod def _synapse_thumb_path( base: str, media_id: str, width: int, height: int, content_type: str, method: str, ) -> str | None: """Synapse thumbnail path: {base}/local_thumbnails/{id[0:2]}/{id[2:4]}/{id[4:]}/{w}-{h}-{top}-{sub}-{method} """ if len(media_id) < 4: return None # content_type e.g. "image/jpeg" -> top="image", sub="jpeg" parts = content_type.split("/", 1) top = parts[0] if parts else "application" sub = parts[1] if len(parts) > 1 else "octet-stream" return os.path.join( base, "local_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], f"{width}-{height}-{top}-{sub}-{method}", ) def _copy_media_files(self, media_rows, thumb_rows): """Copy media files + thumbnails from Dendrite to Synapse path structure. Dendrite content: {base}/{hash[0]}/{hash[1]}/{hash[2:]}/file Synapse content: {base}/local_content/{id[0:2]}/{id[2:4]}/{id[4:]} Dendrite thumbnails: {base}/{hash[0]}/{hash[1]}/{hash[2:]}/thumbnail-{w}x{h}-{method} Synapse thumbnails: {base}/local_thumbnails/{id[0:2]}/{id[2:4]}/{id[4:]}/{w}-{h}-{top}-{sub}-{method} """ log.info("Copying media content files...") copied = 0 skipped = 0 errors = 0 for m in media_rows: src = self._dendrite_media_path(self.dendrite_media_path, m["base64hash"]) dst = self._synapse_content_path(self.synapse_media_path, m["media_id"]) if not src or not dst: log.warning("Media %s: invalid hash or media_id, skipping", m["media_id"]) errors += 1 continue if os.path.exists(dst): skipped += 1 continue if not os.path.exists(src): log.warning("Source missing: %s (media_id=%s)", src, m["media_id"]) errors += 1 continue if not self.dry_run: os.makedirs(os.path.dirname(dst), exist_ok=True) shutil.copy2(src, dst) copied += 1 log.info("Content files: %d copied, %d skipped (exist), %d errors", copied, skipped, errors) # Build media_id -> base64hash lookup for thumbnails hash_lookup = {m["media_id"]: m["base64hash"] for m in media_rows} log.info("Copying thumbnail files...") t_copied = 0 t_skipped = 0 t_errors = 0 for t in thumb_rows: media_id = t["media_id"] b64hash = hash_lookup.get(media_id) if not b64hash: t_errors += 1 continue src = self._dendrite_thumb_path( self.dendrite_media_path, b64hash, t["width"], t["height"], t["resize_method"], ) dst = self._synapse_thumb_path( self.synapse_media_path, media_id, t["width"], t["height"], t["content_type"], t["resize_method"], ) if not src or not dst: t_errors += 1 continue if os.path.exists(dst): t_skipped += 1 continue if not os.path.exists(src): log.debug("Thumbnail missing: %s (media_id=%s)", src, media_id) t_errors += 1 continue if not self.dry_run: os.makedirs(os.path.dirname(dst), exist_ok=True) shutil.copy2(src, dst) t_copied += 1 log.info("Thumbnails: %d copied, %d skipped (exist), %d errors", t_copied, t_skipped, t_errors) # ── Phase 7: Auxiliary ────────────────────────────────────────── def migrate_auxiliary(self): log.info("=== Phase 7: Auxiliary Data ===") self._migrate_receipts() self._migrate_redactions() self._populate_room_stats() self._update_stream_positions() if not self.dry_run: self.dst.commit() log.info("Phase 7 complete") def _migrate_receipts(self): """Migrate read receipts.""" with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( """ SELECT id, room_id, receipt_type, user_id, event_id, receipt_ts FROM syncapi_receipts """ ) receipts = src_cur.fetchall() log.info("Found %d receipts", len(receipts)) with self.dst.cursor() as dst_cur: for r in receipts: # receipts_linearized - partial unique index WHERE thread_id IS NULL dst_cur.execute( """ INSERT INTO receipts_linearized (stream_id, room_id, receipt_type, user_id, event_id, data, instance_name, thread_id) VALUES (%s, %s, %s, %s, %s, '{}', 'master', NULL) ON CONFLICT (room_id, receipt_type, user_id) WHERE thread_id IS NULL DO UPDATE SET event_id = EXCLUDED.event_id, stream_id = EXCLUDED.stream_id """, (r["id"], r["room_id"], r["receipt_type"], r["user_id"], r["event_id"]), ) # receipts_graph - use partial unique index for NULL thread_id event_ids_json = json.dumps([r["event_id"]]) dst_cur.execute( """ INSERT INTO receipts_graph (room_id, receipt_type, user_id, event_ids, data, thread_id) VALUES (%s, %s, %s, %s, '{}', NULL) ON CONFLICT (room_id, receipt_type, user_id) WHERE thread_id IS NULL DO UPDATE SET event_ids = EXCLUDED.event_ids """, (r["room_id"], r["receipt_type"], r["user_id"], event_ids_json), ) log.info("Migrated %d receipts", len(receipts)) def _migrate_redactions(self): """Migrate redaction tracking.""" with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( "SELECT redaction_event_id, redacts_event_id, validated FROM roomserver_redactions" ) redactions = src_cur.fetchall() log.info("Found %d redactions", len(redactions)) with self.dst.cursor() as dst_cur: for r in redactions: # Get received_ts from the event dst_cur.execute( "SELECT received_ts FROM events WHERE event_id = %s", (r["redaction_event_id"],), ) row = dst_cur.fetchone() received_ts = row[0] if row else 0 dst_cur.execute( """ INSERT INTO redactions (event_id, redacts, have_censored, received_ts) VALUES (%s, %s, %s, %s) ON CONFLICT (event_id) DO NOTHING """, (r["redaction_event_id"], r["redacts_event_id"], r["validated"], received_ts), ) log.info("Migrated %d redactions", len(redactions)) def _populate_room_stats(self): """Populate room_stats_current and room_stats_state from migrated data.""" log.info("Populating room stats...") with self.dst.cursor() as cur: # room_stats_current: count members by membership type cur.execute( """ INSERT INTO room_stats_current (room_id, current_state_events, joined_members, invited_members, left_members, banned_members, local_users_in_room, completed_delta_stream_id, knocked_members) SELECT r.room_id, COALESCE(cse.cnt, 0), COALESCE(jm.cnt, 0), COALESCE(im.cnt, 0), COALESCE(lm.cnt, 0), COALESCE(bm.cnt, 0), COALESCE(lu.cnt, 0), 0, 0 FROM rooms r LEFT JOIN ( SELECT room_id, COUNT(*) cnt FROM current_state_events GROUP BY room_id ) cse ON cse.room_id = r.room_id LEFT JOIN ( SELECT room_id, COUNT(*) cnt FROM room_memberships WHERE membership = 'join' GROUP BY room_id ) jm ON jm.room_id = r.room_id LEFT JOIN ( SELECT room_id, COUNT(*) cnt FROM room_memberships WHERE membership = 'invite' GROUP BY room_id ) im ON im.room_id = r.room_id LEFT JOIN ( SELECT room_id, COUNT(*) cnt FROM room_memberships WHERE membership = 'leave' GROUP BY room_id ) lm ON lm.room_id = r.room_id LEFT JOIN ( SELECT room_id, COUNT(*) cnt FROM room_memberships WHERE membership = 'ban' GROUP BY room_id ) bm ON bm.room_id = r.room_id LEFT JOIN ( SELECT lcm.room_id, COUNT(*) cnt FROM local_current_membership lcm WHERE lcm.membership = 'join' GROUP BY lcm.room_id ) lu ON lu.room_id = r.room_id ON CONFLICT (room_id) DO UPDATE SET current_state_events = EXCLUDED.current_state_events, joined_members = EXCLUDED.joined_members, invited_members = EXCLUDED.invited_members, left_members = EXCLUDED.left_members, banned_members = EXCLUDED.banned_members, local_users_in_room = EXCLUDED.local_users_in_room """ ) log.info("Populated room_stats_current: %d rows", cur.rowcount) # room_stats_state: extract from current state events cur.execute( """ INSERT INTO room_stats_state (room_id, name, canonical_alias, join_rules, history_visibility, encryption, avatar, guest_access, is_federatable, topic) SELECT r.room_id, MAX(CASE WHEN cse.type = 'm.room.name' THEN (SELECT e.content::jsonb->>'name' FROM events e WHERE e.event_id = cse.event_id) END), MAX(CASE WHEN cse.type = 'm.room.canonical_alias' THEN (SELECT e.content::jsonb->>'alias' FROM events e WHERE e.event_id = cse.event_id) END), MAX(CASE WHEN cse.type = 'm.room.join_rules' THEN (SELECT e.content::jsonb->>'join_rule' FROM events e WHERE e.event_id = cse.event_id) END), MAX(CASE WHEN cse.type = 'm.room.history_visibility' THEN (SELECT e.content::jsonb->>'history_visibility' FROM events e WHERE e.event_id = cse.event_id) END), MAX(CASE WHEN cse.type = 'm.room.encryption' THEN (SELECT e.content::jsonb->>'algorithm' FROM events e WHERE e.event_id = cse.event_id) END), MAX(CASE WHEN cse.type = 'm.room.avatar' THEN (SELECT e.content::jsonb->>'url' FROM events e WHERE e.event_id = cse.event_id) END), MAX(CASE WHEN cse.type = 'm.room.guest_access' THEN (SELECT e.content::jsonb->>'guest_access' FROM events e WHERE e.event_id = cse.event_id) END), TRUE, MAX(CASE WHEN cse.type = 'm.room.topic' THEN (SELECT e.content::jsonb->>'topic' FROM events e WHERE e.event_id = cse.event_id) END) FROM rooms r JOIN current_state_events cse ON cse.room_id = r.room_id WHERE cse.type IN ('m.room.name', 'm.room.canonical_alias', 'm.room.join_rules', 'm.room.history_visibility', 'm.room.encryption', 'm.room.avatar', 'm.room.guest_access', 'm.room.topic') GROUP BY r.room_id ON CONFLICT (room_id) DO UPDATE SET name = EXCLUDED.name, canonical_alias = EXCLUDED.canonical_alias, join_rules = EXCLUDED.join_rules, history_visibility = EXCLUDED.history_visibility, encryption = EXCLUDED.encryption, avatar = EXCLUDED.avatar, guest_access = EXCLUDED.guest_access, topic = EXCLUDED.topic """ ) log.info("Populated room_stats_state: %d rows", cur.rowcount) def _update_stream_positions(self): """Update various stream position trackers so Synapse knows where to resume.""" log.info("Updating stream positions...") with self.dst.cursor() as cur: # Get max stream_ordering from events cur.execute("SELECT COALESCE(MAX(stream_ordering), 0) FROM events") max_stream = cur.fetchone()[0] # Update the events stream sequence if max_stream > 0: cur.execute( "SELECT setval('events_stream_seq', %s, true)", (max_stream,), ) # Get max receipt stream_id cur.execute("SELECT COALESCE(MAX(stream_id), 0) FROM receipts_linearized") max_receipt_stream = cur.fetchone()[0] # user_stats_current cur.execute( """ INSERT INTO user_stats_current (user_id, joined_rooms, completed_delta_stream_id) SELECT u.name, COALESCE(jr.cnt, 0), 0 FROM users u LEFT JOIN ( SELECT user_id, COUNT(*) cnt FROM local_current_membership WHERE membership = 'join' GROUP BY user_id ) jr ON jr.user_id = u.name ON CONFLICT (user_id) DO UPDATE SET joined_rooms = EXCLUDED.joined_rooms """ ) log.info("Updated user stats: %d rows", cur.rowcount) log.info("Stream positions: events=%d, receipts=%d", max_stream, max_receipt_stream) # ── Phase 8: E2EE Keys ───────────────────────────────────────── _CROSS_SIGNING_KEY_TYPE = {1: "master", 2: "self_signing", 3: "user_signing"} def migrate_e2ee(self): log.info("=== Phase 8: E2EE Keys ===") self._migrate_key_backup() self._migrate_device_keys() self._migrate_one_time_keys() self._migrate_fallback_keys() self._migrate_cross_signing_keys() self._migrate_cross_signing_sigs() self._migrate_device_inbox() self._migrate_device_lists_stream() if not self.dry_run: self.dst.commit() log.info("Phase 8 complete") def _migrate_key_backup(self): """Migrate server-side E2EE key backup (versions + room keys).""" with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute("SELECT user_id, version, algorithm, auth_data, etag, deleted FROM userapi_key_backup_versions") versions = src_cur.fetchall() log.info("Found %d key backup versions", len(versions)) with self.dst.cursor() as dst_cur: for v in versions: # Dendrite user_id is localpart, prepend @...:server user_id = v["user_id"] if v["user_id"].startswith("@") else f"@{v['user_id']}:{self.server_name}" # etag is text in Dendrite, bigint in Synapse try: etag = int(v["etag"]) except (ValueError, TypeError): etag = 0 dst_cur.execute( """ INSERT INTO e2e_room_keys_versions (user_id, version, algorithm, auth_data, deleted, etag) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (user_id, version) DO NOTHING """, (user_id, v["version"], v["algorithm"], v["auth_data"], v["deleted"], etag), ) log.info("Migrated %d key backup versions", len(versions)) with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( "SELECT user_id, room_id, session_id, version, first_message_index, " "forwarded_count, is_verified, session_data FROM userapi_key_backups" ) keys = src_cur.fetchall() log.info("Found %d backed-up room keys", len(keys)) with self.dst.cursor() as dst_cur: for k in keys: user_id = k["user_id"] if k["user_id"].startswith("@") else f"@{k['user_id']}:{self.server_name}" dst_cur.execute( """ INSERT INTO e2e_room_keys (user_id, room_id, session_id, version, first_message_index, forwarded_count, is_verified, session_data) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (user_id, version, room_id, session_id) DO NOTHING """, (user_id, k["room_id"], k["session_id"], int(k["version"]), k["first_message_index"], k["forwarded_count"], k["is_verified"], k["session_data"]), ) log.info("Migrated %d backed-up room keys", len(keys)) def _migrate_device_keys(self): """Migrate E2E device keys.""" with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( "SELECT user_id, device_id, ts_added_secs, key_json FROM keyserver_device_keys" ) keys = src_cur.fetchall() log.info("Found %d device keys", len(keys)) with self.dst.cursor() as dst_cur: for k in keys: dst_cur.execute( """ INSERT INTO e2e_device_keys_json (user_id, device_id, ts_added_ms, key_json) VALUES (%s, %s, %s, %s) ON CONFLICT (user_id, device_id) DO NOTHING """, (k["user_id"], k["device_id"], k["ts_added_secs"] * 1000, k["key_json"]), ) log.info("Migrated %d device keys", len(keys)) def _migrate_one_time_keys(self): """Migrate E2E one-time keys.""" with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( "SELECT user_id, device_id, key_id, algorithm, ts_added_secs, key_json " "FROM keyserver_one_time_keys" ) keys = src_cur.fetchall() log.info("Found %d one-time keys", len(keys)) with self.dst.cursor() as dst_cur: for k in keys: dst_cur.execute( """ INSERT INTO e2e_one_time_keys_json (user_id, device_id, algorithm, key_id, ts_added_ms, key_json) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (user_id, device_id, algorithm, key_id) DO NOTHING """, (k["user_id"], k["device_id"], k["algorithm"], k["key_id"], k["ts_added_secs"] * 1000, k["key_json"]), ) log.info("Migrated %d one-time keys", len(keys)) def _migrate_fallback_keys(self): """Migrate fallback keys (used when OTKs exhausted; essential for new Olm sessions).""" with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( "SELECT user_id, device_id, key_id, algorithm, key_json, used " "FROM keyserver_fallback_keys" ) keys = src_cur.fetchall() log.info("Found %d fallback keys", len(keys)) with self.dst.cursor() as dst_cur: for k in keys: dst_cur.execute( """ INSERT INTO e2e_fallback_keys_json (user_id, device_id, algorithm, key_id, key_json, used) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (user_id, device_id, algorithm) DO NOTHING """, (k["user_id"], k["device_id"], k["algorithm"], k["key_id"], k["key_json"], k["used"]), ) log.info("Migrated %d fallback keys", len(keys)) def _migrate_cross_signing_keys(self): """Migrate cross-signing keys (key_type int -> string).""" with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute("SELECT user_id, key_type, key_data FROM keyserver_cross_signing_keys") keys = src_cur.fetchall() log.info("Found %d cross-signing keys", len(keys)) with self.dst.cursor() as dst_cur: # Use the e2e_cross_signing_keys_sequence so post-migration writes # don't collide on the UNIQUE(stream_id) index. for k in keys: key_type_str = self._CROSS_SIGNING_KEY_TYPE.get(k["key_type"]) if not key_type_str: log.warning("Unknown cross-signing key_type %d for user %s", k["key_type"], k["user_id"]) continue dst_cur.execute("SELECT nextval('e2e_cross_signing_keys_sequence')") stream_id = dst_cur.fetchone()[0] dst_cur.execute( """ INSERT INTO e2e_cross_signing_keys (user_id, keytype, keydata, stream_id) VALUES (%s, %s, %s, %s) ON CONFLICT (user_id, keytype, stream_id) DO NOTHING """, (k["user_id"], key_type_str, k["key_data"], stream_id), ) log.info("Migrated %d cross-signing keys", len(keys)) def _migrate_cross_signing_sigs(self): """Migrate cross-signing signatures.""" with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( "SELECT origin_user_id, origin_key_id, target_user_id, target_key_id, signature " "FROM keyserver_cross_signing_sigs" ) sigs = src_cur.fetchall() log.info("Found %d cross-signing signatures", len(sigs)) with self.dst.cursor() as dst_cur: for s in sigs: dst_cur.execute( """ INSERT INTO e2e_cross_signing_signatures (user_id, key_id, target_user_id, target_device_id, signature) VALUES (%s, %s, %s, %s, %s) ON CONFLICT DO NOTHING """, (s["origin_user_id"], s["origin_key_id"], s["target_user_id"], s["target_key_id"], s["signature"]), ) log.info("Migrated %d cross-signing signatures", len(sigs)) def _migrate_device_inbox(self): """Migrate pending to-device messages (undelivered m.room_key shares etc). Dendrite holds these in syncapi_send_to_device until a client syncs. These rows typically contain m.room.encrypted Olm messages carrying Megolm session keys — dropping them causes permanent key loss for recipients that were offline at migration time. """ with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: src_cur.execute( "SELECT id, user_id, device_id, content " "FROM syncapi_send_to_device ORDER BY id" ) msgs = src_cur.fetchall() log.info("Found %d pending to-device messages", len(msgs)) if not msgs: return with self.dst.cursor() as dst_cur: # Use the device_inbox_sequence so Synapse's next stream positions # continue past our migrated range. for m in msgs: dst_cur.execute("SELECT nextval('device_inbox_sequence')") stream_id = dst_cur.fetchone()[0] dst_cur.execute( """ INSERT INTO device_inbox (user_id, device_id, stream_id, message_json, instance_name) VALUES (%s, %s, %s, %s, 'master') """, (m["user_id"], m["device_id"], stream_id, m["content"]), ) log.info("Migrated %d to-device messages", len(msgs)) def _migrate_device_lists_stream(self): """Bootstrap device_lists_stream for every local device. Without an entry here, clients never see the device in /sync's `device_lists.changed` field and rely on stale cached keys. One synthetic stream entry per local device forces a re-verification on the next sync, which fixes the "device key mismatch → refuse to decrypt" class of breakage after migration. """ with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur: # Only local devices (matching our server_name). Remote devices # live in device_lists_remote_cache and aren't streamed. src_cur.execute( """ SELECT DISTINCT k.user_id, k.device_id FROM keyserver_device_keys k WHERE k.user_id LIKE %s """, (f"%:{self.server_name}",), ) rows = src_cur.fetchall() log.info("Bootstrapping %d device_lists_stream entries", len(rows)) with self.dst.cursor() as dst_cur: for r in rows: dst_cur.execute("SELECT nextval('device_lists_sequence')") stream_id = dst_cur.fetchone()[0] dst_cur.execute( """ INSERT INTO device_lists_stream (stream_id, user_id, device_id, instance_name) VALUES (%s, %s, %s, 'master') """, (stream_id, r["user_id"], r["device_id"]), ) log.info("Inserted %d device_lists_stream rows", len(rows)) # ── Run ───────────────────────────────────────────────────────── def run(self, phases: list[int]): self.load_nid_caches() phase_map = { 1: ("Users & Profiles", self.migrate_users), 2: ("Rooms", self.migrate_rooms), 3: ("Events", self.migrate_events), 4: ("Room State", self.migrate_room_state), 5: ("Membership", self.migrate_membership), 6: ("Media", self.migrate_media), 7: ("Auxiliary", self.migrate_auxiliary), 8: ("E2EE Keys", self.migrate_e2ee), } for phase_num in sorted(phases): if phase_num in phase_map: name, func = phase_map[phase_num] log.info("Starting phase %d: %s", phase_num, name) try: func() except Exception: log.exception("Phase %d failed", phase_num) if not self.dry_run: self.dst.rollback() raise log.info("Migration complete!") # Print summary self._print_summary() def _print_summary(self): """Print migration summary counts.""" with self.dst.cursor() as cur: tables = [ "users", "profiles", "devices", "rooms", "room_aliases", "events", "event_json", "state_events", "current_state_events", "room_memberships", "local_current_membership", "local_media_repository", "receipts_linearized", "redactions", "e2e_room_keys_versions", "e2e_room_keys", "e2e_device_keys_json", "e2e_one_time_keys_json", "e2e_fallback_keys_json", "e2e_cross_signing_keys", "e2e_cross_signing_signatures", "device_inbox", "device_lists_stream", "access_tokens", ] log.info("=== Migration Summary ===") for table in tables: try: cur.execute(f"SELECT COUNT(*) FROM {table}") # noqa: S608 count = cur.fetchone()[0] log.info(" %-35s %d rows", table, count) except Exception: self.dst.rollback() PHASE_HELP = """\ Migration phases (use --phase to select): Phase 1 - Users & Profiles Migrates user accounts, display names, avatars, and devices. Dendrite tables: userapi_accounts, userapi_profiles, userapi_devices Synapse tables: users, profiles, devices Notes: Timestamps converted from milliseconds to seconds. Account type mapped to admin/guest flags. Passwords (bcrypt hashes) copied as-is. Phase 2 - Rooms Migrates room metadata and aliases. Dendrite tables: roomserver_rooms, roomserver_room_aliases Synapse tables: rooms, room_aliases, room_alias_servers Notes: Room creator extracted from m.room.create events in Dendrite. Phase 3 - Events Core migration: converts all non-rejected events with full denormalization of Dendrite's numeric IDs (NIDs) to text IDs. Dendrite tables: roomserver_events, roomserver_event_json, roomserver_event_types, roomserver_event_state_keys Synapse tables: events, event_json, state_events, event_edges, event_auth, event_forward_extremities, room_depth Notes: topological_ordering set to event depth (not a counter). Event format version derived from room version. Rejected events skipped. Forward extremities taken from Dendrite's latest_event_nids. Event graph (prev_events, auth_events) built from event JSON. Phase 4 - Room State Builds current state snapshot and incremental state groups for correct historical state lookups. Dendrite tables: syncapi_current_room_state Synapse tables: current_state_events, state_groups, state_groups_state, state_group_edges, event_to_state_groups Notes: One state group created per state event (delta chain). Every event mapped to the state group that was active when it was processed. Phase 5 - Membership Migrates room membership (joins, leaves, invites, bans). Dendrite tables: syncapi_current_room_state (type=m.room.member) Synapse tables: room_memberships, local_current_membership Notes: Display name and avatar extracted from membership event content. local_current_membership populated only for users on this server. Phase 6 - Media Migrates media metadata and optionally copies files between filesystem layouts. Requires --dendrite-media-path and --synapse-media-path to copy actual files; without them, only database metadata is migrated. Dendrite tables: mediaapi_media_repository, mediaapi_thumbnail Synapse tables: local_media_repository, local_media_repository_thumbnails File layout conversion: Content: {dendrite}/H/A/SH.../file -> {synapse}/local_content/me/di/a_id... Thumbnails: {dendrite}/H/A/SH.../thumbnail-WxH-method -> {synapse}/local_thumbnails/me/di/a_id.../W-H-type-subtype-method Phase 7 - Auxiliary Data Migrates read receipts, redactions, and populates statistics/caches that Synapse expects. Dendrite tables: syncapi_receipts, roomserver_redactions Synapse tables: receipts_linearized, receipts_graph, redactions, room_stats_current, room_stats_state, user_stats_current Notes: Updates the events_stream_seq sequence so Synapse continues from the correct position. Room stats populated from migrated membership data. Phase 8 - E2EE Keys Migrates end-to-end encryption data: server-side key backups, device keys, one-time keys, fallback keys, cross-signing keys and signatures, the to-device inbox, and a bootstrap of the device_lists_stream. This phase is critical for restoring encrypted message history and for keeping existing Olm/Megolm sessions alive across the migration. Dendrite tables: userapi_key_backup_versions, userapi_key_backups, keyserver_device_keys, keyserver_one_time_keys, keyserver_fallback_keys, keyserver_cross_signing_keys, keyserver_cross_signing_sigs, syncapi_send_to_device Synapse tables: e2e_room_keys_versions, e2e_room_keys, e2e_device_keys_json, e2e_one_time_keys_json, e2e_fallback_keys_json, e2e_cross_signing_keys, e2e_cross_signing_signatures, device_inbox, device_lists_stream Notes: - Cross-signing key_type int (1=master, 2=self_signing, 3=user_signing) converted to text; stream_id drawn from e2e_cross_signing_keys_sequence so post-migration writes don't collide on UNIQUE(stream_id). - Timestamps seconds -> milliseconds for device/OTK keys. - syncapi_send_to_device -> device_inbox preserves undelivered m.room.encrypted Olm messages (these carry m.room_key Megolm shares to offline devices; dropping them causes permanent key loss). - device_lists_stream is bootstrapped from local keyserver_device_keys so clients see every device as "changed" on next sync and re-verify against the migrated e2e_device_keys_json, preventing stale-cache device-key mismatches. Related: - Access tokens are copied alongside devices in Phase 1 so clients keep their existing sessions (no re-login -> device_id continuity preserved -> local Megolm store on each client stays valid). """ def main(): parser = argparse.ArgumentParser( description="Migrate a Dendrite PostgreSQL database to Synapse.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=PHASE_HELP + """\ prerequisites: 1. Both Dendrite and Synapse databases must be PostgreSQL. 2. Synapse must be initialized BEFORE running this script - start Synapse once against an empty database so it creates its schema (v93+), then stop it before migrating. 3. The Dendrite database should not be actively written to during migration. examples: # Full migration (all phases): %(prog)s \\ --dendrite-db "dbname=dendrite host=/run/postgresql" \\ --synapse-db "dbname=synapse host=/run/postgresql" \\ --server-name example.com # With media file copying: %(prog)s \\ --dendrite-db "dbname=dendrite host=/run/postgresql" \\ --synapse-db "dbname=synapse host=/run/postgresql" \\ --server-name example.com \\ --dendrite-media-path /var/lib/dendrite/media \\ --synapse-media-path /var/lib/synapse/media_store # Re-run only media phase: %(prog)s ... --phase 6 # Dry run (inspect without committing): %(prog)s ... --dry-run -v # Copy only media files (no database writes): %(prog)s --media-only \\ --dendrite-db "dbname=dendrite host=/run/postgresql" \\ --server-name example.com \\ --dendrite-media-path /var/lib/dendrite/media \\ --synapse-media-path /var/lib/synapse/media_store """, ) parser.add_argument( "--dendrite-db", required=True, metavar="DSN", help="Dendrite PostgreSQL connection string. Accepts any format " "supported by libpq: a URI like " '"postgresql://user:pass@host/dbname" or a keyword string like ' '"dbname=dendrite host=/run/postgresql". ' "This database is opened read-only.", ) parser.add_argument( "--synapse-db", metavar="DSN", help="Synapse PostgreSQL connection string (same formats as " "--dendrite-db). The Synapse schema must already exist - run " "Synapse once to create it. This database is written to.", ) parser.add_argument( "--server-name", metavar="NAME", help="The Matrix server name (e.g., example.com). Must match the " "server_name in both your Dendrite and Synapse configs. Used to " "construct full user IDs (@user:NAME) and to filter local media.", ) parser.add_argument( "--dendrite-media-path", metavar="DIR", help="Path to Dendrite's media_store directory (the directory " "containing hash-based subdirectories). Required together with " "--synapse-media-path to copy media files. Without both flags, " "only database metadata is migrated and files must be copied " "manually.", ) parser.add_argument( "--synapse-media-path", metavar="DIR", help="Path to Synapse's media_store directory (where local_content/ " "and local_thumbnails/ will be created). Required together with " "--dendrite-media-path.", ) parser.add_argument( "--phase", default="1,2,3,4,5,6,7,8", metavar="N[,N...]", help="Comma-separated list of phases to run, from 1-8 (default: all). " "Phases must be run in order on first migration. Individual phases " "can be re-run safely (idempotent via ON CONFLICT). " "Example: --phase 1,2,3 or --phase 6", ) parser.add_argument( "--media-only", action="store_true", help="Copy media files only, without touching any database. Reads " "media metadata (file hashes and IDs) from the Dendrite database " "to determine source/destination paths, but writes nothing to " "either database. Requires --dendrite-media-path and " "--synapse-media-path. --synapse-db is not needed in this mode.", ) parser.add_argument( "--dry-run", action="store_true", help="Run the migration without committing. All database changes are " "rolled back at the end of each phase. Media files are not " "copied. Useful for verifying the migration will succeed.", ) parser.add_argument( "--verbose", "-v", action="store_true", help="Enable debug logging (shows per-event warnings, SQL details).", ) args = parser.parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) if not args.server_name: parser.error("--server-name is required") if args.media_only: if not args.dendrite_media_path or not args.synapse_media_path: parser.error("--media-only requires both --dendrite-media-path and --synapse-media-path") _run_media_only(args) else: if not args.synapse_db: parser.error("--synapse-db is required (unless using --media-only)") phases = [int(p.strip()) for p in args.phase.split(",")] migrator = Migrator( dendrite_dsn=args.dendrite_db, synapse_dsn=args.synapse_db, server_name=args.server_name, dendrite_media_path=args.dendrite_media_path, synapse_media_path=args.synapse_media_path, dry_run=args.dry_run, ) try: migrator.run(phases) finally: migrator.close() def _run_media_only(args): """Copy media files using Dendrite DB for path lookup, no Synapse DB needed.""" log.info("=== Media-only mode: copying files without database changes ===") src_conn = psycopg2.connect(args.dendrite_db) src_conn.set_session(readonly=True) try: with src_conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute( "SELECT media_id, base64hash, content_type FROM mediaapi_media_repository " "WHERE media_origin = %s", (args.server_name,), ) media = cur.fetchall() cur.execute( "SELECT t.media_id, m.base64hash, t.width, t.height, " " t.resize_method, t.content_type " "FROM mediaapi_thumbnail t " "JOIN mediaapi_media_repository m " " ON t.media_id = m.media_id AND t.media_origin = m.media_origin " "WHERE t.media_origin = %s", (args.server_name,), ) thumbs = cur.fetchall() log.info("Found %d media files and %d thumbnails to copy", len(media), len(thumbs)) # Reuse static path methods from Migrator _dp = Migrator._dendrite_media_path _sp = Migrator._synapse_content_path _dt = Migrator._dendrite_thumb_path _st = Migrator._synapse_thumb_path copied, skipped, errors = 0, 0, 0 for m in media: src = _dp(args.dendrite_media_path, m["base64hash"]) dst = _sp(args.synapse_media_path, m["media_id"]) if not src or not dst: errors += 1 continue if os.path.exists(dst): skipped += 1 continue if not os.path.exists(src): log.warning("Source missing: %s (media_id=%s)", src, m["media_id"]) errors += 1 continue if not args.dry_run: os.makedirs(os.path.dirname(dst), exist_ok=True) shutil.copy2(src, dst) copied += 1 log.info("Content files: %d copied, %d skipped (exist), %d errors", copied, skipped, errors) t_copied, t_skipped, t_errors = 0, 0, 0 for t in thumbs: src = _dt(args.dendrite_media_path, t["base64hash"], t["width"], t["height"], t["resize_method"]) dst = _st(args.synapse_media_path, t["media_id"], t["width"], t["height"], t["content_type"], t["resize_method"]) if not src or not dst: t_errors += 1 continue if os.path.exists(dst): t_skipped += 1 continue if not os.path.exists(src): log.debug("Thumbnail missing: %s", src) t_errors += 1 continue if not args.dry_run: os.makedirs(os.path.dirname(dst), exist_ok=True) shutil.copy2(src, dst) t_copied += 1 log.info("Thumbnails: %d copied, %d skipped (exist), %d errors", t_copied, t_skipped, t_errors) finally: src_conn.close() if __name__ == "__main__": main()