fix: potenitally fix e2ee from keybackups

This commit is contained in:
2026-04-12 12:05:32 +02:00
parent 9693c714d6
commit 3b85872350
3 changed files with 192 additions and 18 deletions

View File

@@ -215,11 +215,15 @@ class Migrator:
src_cur3 = self.src.cursor(cursor_factory=psycopg2.extras.DictCursor)
src_cur3.execute(
"SELECT device_id, localpart, server_name, display_name, "
"created_ts, last_seen_ts, ip, user_agent FROM userapi_devices"
"created_ts, last_seen_ts, ip, user_agent, access_token FROM userapi_devices"
)
devices = src_cur3.fetchall()
src_cur3.close()
# Determine starting id for access_tokens (fresh DB = 1)
dst_cur.execute("SELECT COALESCE(MAX(id), 0) FROM access_tokens")
next_token_id = dst_cur.fetchone()[0] + 1
for dev in devices:
user_id = f"@{dev['localpart']}:{dev['server_name']}"
dst_cur.execute(
@@ -242,7 +246,21 @@ class Migrator:
),
)
log.info("Migrated %d devices", len(devices))
# Preserve existing client sessions — without this clients are forced
# to re-login, losing device_id continuity and with it the local
# Megolm key store on many clients (critical for E2EE history).
if dev["access_token"]:
dst_cur.execute(
"""
INSERT INTO access_tokens (id, user_id, device_id, token, valid_until_ms)
VALUES (%s, %s, %s, %s, NULL)
ON CONFLICT (token) DO NOTHING
""",
(next_token_id, user_id, dev["device_id"], dev["access_token"]),
)
next_token_id += 1
log.info("Migrated %d devices (+ access tokens)", len(devices))
if not self.dry_run:
self.dst.commit()
@@ -1387,8 +1405,11 @@ class Migrator:
self._migrate_key_backup()
self._migrate_device_keys()
self._migrate_one_time_keys()
self._migrate_fallback_keys()
self._migrate_cross_signing_keys()
self._migrate_cross_signing_sigs()
self._migrate_device_inbox()
self._migrate_device_lists_stream()
if not self.dry_run:
self.dst.commit()
@@ -1496,6 +1517,31 @@ class Migrator:
)
log.info("Migrated %d one-time keys", len(keys))
def _migrate_fallback_keys(self):
"""Migrate fallback keys (used when OTKs exhausted; essential for new Olm sessions)."""
with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur:
src_cur.execute(
"SELECT user_id, device_id, key_id, algorithm, key_json, used "
"FROM keyserver_fallback_keys"
)
keys = src_cur.fetchall()
log.info("Found %d fallback keys", len(keys))
with self.dst.cursor() as dst_cur:
for k in keys:
dst_cur.execute(
"""
INSERT INTO e2e_fallback_keys_json
(user_id, device_id, algorithm, key_id, key_json, used)
VALUES (%s, %s, %s, %s, %s, %s)
ON CONFLICT (user_id, device_id, algorithm) DO NOTHING
""",
(k["user_id"], k["device_id"], k["algorithm"],
k["key_id"], k["key_json"], k["used"]),
)
log.info("Migrated %d fallback keys", len(keys))
def _migrate_cross_signing_keys(self):
"""Migrate cross-signing keys (key_type int -> string)."""
with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur:
@@ -1505,13 +1551,15 @@ class Migrator:
log.info("Found %d cross-signing keys", len(keys))
with self.dst.cursor() as dst_cur:
# Need a stream_id sequence for cross-signing keys
stream_id = 1
# Use the e2e_cross_signing_keys_sequence so post-migration writes
# don't collide on the UNIQUE(stream_id) index.
for k in keys:
key_type_str = self._CROSS_SIGNING_KEY_TYPE.get(k["key_type"])
if not key_type_str:
log.warning("Unknown cross-signing key_type %d for user %s", k["key_type"], k["user_id"])
continue
dst_cur.execute("SELECT nextval('e2e_cross_signing_keys_sequence')")
stream_id = dst_cur.fetchone()[0]
dst_cur.execute(
"""
INSERT INTO e2e_cross_signing_keys
@@ -1521,7 +1569,6 @@ class Migrator:
""",
(k["user_id"], key_type_str, k["key_data"], stream_id),
)
stream_id += 1
log.info("Migrated %d cross-signing keys", len(keys))
def _migrate_cross_signing_sigs(self):
@@ -1549,6 +1596,80 @@ class Migrator:
)
log.info("Migrated %d cross-signing signatures", len(sigs))
def _migrate_device_inbox(self):
"""Migrate pending to-device messages (undelivered m.room_key shares etc).
Dendrite holds these in syncapi_send_to_device until a client syncs.
These rows typically contain m.room.encrypted Olm messages carrying
Megolm session keys — dropping them causes permanent key loss for
recipients that were offline at migration time.
"""
with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur:
src_cur.execute(
"SELECT id, user_id, device_id, content "
"FROM syncapi_send_to_device ORDER BY id"
)
msgs = src_cur.fetchall()
log.info("Found %d pending to-device messages", len(msgs))
if not msgs:
return
with self.dst.cursor() as dst_cur:
# Use the device_inbox_sequence so Synapse's next stream positions
# continue past our migrated range.
for m in msgs:
dst_cur.execute("SELECT nextval('device_inbox_sequence')")
stream_id = dst_cur.fetchone()[0]
dst_cur.execute(
"""
INSERT INTO device_inbox
(user_id, device_id, stream_id, message_json, instance_name)
VALUES (%s, %s, %s, %s, 'master')
""",
(m["user_id"], m["device_id"], stream_id, m["content"]),
)
log.info("Migrated %d to-device messages", len(msgs))
def _migrate_device_lists_stream(self):
"""Bootstrap device_lists_stream for every local device.
Without an entry here, clients never see the device in /sync's
`device_lists.changed` field and rely on stale cached keys. One
synthetic stream entry per local device forces a re-verification
on the next sync, which fixes the "device key mismatch → refuse
to decrypt" class of breakage after migration.
"""
with self.src.cursor(cursor_factory=psycopg2.extras.DictCursor) as src_cur:
# Only local devices (matching our server_name). Remote devices
# live in device_lists_remote_cache and aren't streamed.
src_cur.execute(
"""
SELECT DISTINCT k.user_id, k.device_id
FROM keyserver_device_keys k
WHERE k.user_id LIKE %s
""",
(f"%:{self.server_name}",),
)
rows = src_cur.fetchall()
log.info("Bootstrapping %d device_lists_stream entries", len(rows))
with self.dst.cursor() as dst_cur:
for r in rows:
dst_cur.execute("SELECT nextval('device_lists_sequence')")
stream_id = dst_cur.fetchone()[0]
dst_cur.execute(
"""
INSERT INTO device_lists_stream
(stream_id, user_id, device_id, instance_name)
VALUES (%s, %s, %s, 'master')
""",
(stream_id, r["user_id"], r["device_id"]),
)
log.info("Inserted %d device_lists_stream rows", len(rows))
# ── Run ─────────────────────────────────────────────────────────
def run(self, phases: list[int]):
@@ -1592,7 +1713,9 @@ class Migrator:
"local_media_repository", "receipts_linearized", "redactions",
"e2e_room_keys_versions", "e2e_room_keys",
"e2e_device_keys_json", "e2e_one_time_keys_json",
"e2e_fallback_keys_json",
"e2e_cross_signing_keys", "e2e_cross_signing_signatures",
"device_inbox", "device_lists_stream", "access_tokens",
]
log.info("=== Migration Summary ===")
for table in tables:
@@ -1671,18 +1794,36 @@ Migration phases (use --phase to select):
Phase 8 - E2EE Keys
Migrates end-to-end encryption data: server-side key backups, device keys,
one-time keys, cross-signing keys, and cross-signing signatures. This phase
is critical for restoring encrypted message history when clients use
server-side key backup.
one-time keys, fallback keys, cross-signing keys and signatures, the
to-device inbox, and a bootstrap of the device_lists_stream. This phase
is critical for restoring encrypted message history and for keeping
existing Olm/Megolm sessions alive across the migration.
Dendrite tables: userapi_key_backup_versions, userapi_key_backups,
keyserver_device_keys, keyserver_one_time_keys,
keyserver_cross_signing_keys, keyserver_cross_signing_sigs
keyserver_fallback_keys,
keyserver_cross_signing_keys, keyserver_cross_signing_sigs,
syncapi_send_to_device
Synapse tables: e2e_room_keys_versions, e2e_room_keys,
e2e_device_keys_json, e2e_one_time_keys_json,
e2e_cross_signing_keys, e2e_cross_signing_signatures
Notes: Dendrite stores cross-signing key_type as int (1=master, 2=self_signing,
3=user_signing); converted to text for Synapse. Timestamps converted from
seconds to milliseconds for device/OTK keys.
e2e_fallback_keys_json,
e2e_cross_signing_keys, e2e_cross_signing_signatures,
device_inbox, device_lists_stream
Notes:
- Cross-signing key_type int (1=master, 2=self_signing, 3=user_signing)
converted to text; stream_id drawn from e2e_cross_signing_keys_sequence
so post-migration writes don't collide on UNIQUE(stream_id).
- Timestamps seconds -> milliseconds for device/OTK keys.
- syncapi_send_to_device -> device_inbox preserves undelivered
m.room.encrypted Olm messages (these carry m.room_key Megolm shares
to offline devices; dropping them causes permanent key loss).
- device_lists_stream is bootstrapped from local keyserver_device_keys
so clients see every device as "changed" on next sync and re-verify
against the migrated e2e_device_keys_json, preventing stale-cache
device-key mismatches.
Related:
- Access tokens are copied alongside devices in Phase 1 so clients
keep their existing sessions (no re-login -> device_id continuity
preserved -> local Megolm store on each client stays valid).
"""