mirror your GitHub repos to tangled.org automatically
1import { Agent } from '@atproto/api'
2import type { OAuthSession } from '@atproto/oauth-client-node'
3import { now as tidNow } from '@atcute/tid'
4import { and, eq, sql } from 'drizzle-orm'
5import { repoMapping } from '../db/schema'
6import { useDb } from './db'
7import { installationOctokit } from './github-app'
8
9const REPO_LEXICON = 'sh.tangled.repo'
10const REPO_CREATE_NSID = 'sh.tangled.repo.create'
11const LIST_RECORDS_PAGE_SIZE = 100
12
13/**
14 * GitHub repo fields we mirror into the `sh.tangled.repo` record. Kept narrow
15 * so the merge helper is easy to reason about and to test without pulling in
16 * the full Octokit type.
17 */
18export interface GithubRepoMetadata {
19 full_name: string
20 description: string | null
21 homepage: string | null
22 topics?: string[]
23}
24
25/**
26 * Strip our `[READ-ONLY] Mirror of ...` prefix from a description, if present.
27 * Idempotent: returns the original string when there's no prefix to remove.
28 * Guards against accumulating prefixes if a GitHub description ever round-trips
29 * back through our marker (e.g. a user copy-pasted the tangled description
30 * into GitHub).
31 */
32export function stripReadOnlyMarker(value: string | null | undefined): string {
33 if (!value) return ''
34 let s = value
35 // Strip repeatedly so any accidental doubling is collapsed.
36 for (;;) {
37 const next = s.replace(/^\[READ-ONLY\]\s*Mirror of https:\/\/github\.com\/[^\s.]+\/[^\s.]+\.\s*/, '')
38 if (next === s) return s
39 s = next
40 }
41}
42
43/**
44 * Build the `description` we want on the tangled-side record from GitHub's
45 * current state. Always rebuilt from scratch so we never compound the marker.
46 */
47export function buildReadOnlyDescription(githubFullName: string, githubDescription: string | null | undefined): string {
48 const stripped = stripReadOnlyMarker(githubDescription).trim()
49 const prefix = `[READ-ONLY] Mirror of https://github.com/${githubFullName}.`
50 return stripped ? `${prefix} ${stripped}` : prefix
51}
52
53/**
54 * Merge GitHub metadata into an existing PDS record value, preserving fields
55 * we don't manage (`$type`, `name`, `knot`, `repoDid`, `createdAt`, plus any
56 * future additions). Pass `existing = undefined` for the initial enrolment
57 * write.
58 */
59export function mergeRepoRecord(
60 existing: Record<string, unknown> | undefined,
61 base: { name: string, knot: string, repoDid: string, createdAt: string },
62 gh: GithubRepoMetadata,
63): Record<string, unknown> {
64 const description = buildReadOnlyDescription(gh.full_name, gh.description)
65 const website = gh.homepage && gh.homepage.length > 0 ? gh.homepage : undefined
66 const topics = Array.isArray(gh.topics) ? gh.topics : undefined
67
68 // Start from existing so unknown fields survive a round-trip. Then overlay
69 // the immutable base (in case the existing record is malformed) and the
70 // managed metadata.
71 const merged: Record<string, unknown> = { ...existing }
72 merged.$type = REPO_LEXICON
73 merged.name = base.name
74 merged.knot = base.knot
75 merged.repoDid = base.repoDid
76 merged.createdAt = (typeof existing?.createdAt === 'string' && existing.createdAt) || base.createdAt
77 merged.description = description
78 if (topics !== undefined) merged.topics = topics
79 if (website !== undefined) merged.website = website
80 else delete merged.website
81 return merged
82}
83
84/**
85 * Default knot for users with no `sh.tangled.knot` records. PLAN.md "Open
86 * questions" #1: confirm with the tangled team that this is the right
87 * appview-hosted default.
88 */
89const DEFAULT_KNOT = 'knot1.tangled.sh'
90
91export interface EnrolResult {
92 status: 'enrolled' | 'already' | 'skipped'
93 reason?: 'private' | 'fork' | 'no-identity'
94}
95
96/**
97 * Enroll a single GitHub repo on tangled.
98 *
99 * Flow:
100 * 1. Skip if a `repo_mapping` row already exists.
101 * 2. Fetch GitHub repo metadata via the install token. Skip private/fork.
102 * 3. Pick a knot (user default → `DEFAULT_KNOT`).
103 * 4. Get a service-auth JWT for `(aud=did:web:<knot>, lxm=sh.tangled.repo.create)`.
104 * 5. POST to `https://<knot>/xrpc/sh.tangled.repo.create` with
105 * `{ rkey, name, source, defaultBranch }`. The knot clones the repo from
106 * `source` and mints a `repoDid`.
107 * 6. Write a `sh.tangled.repo` record on the user's PDS.
108 * 7. Insert the `repo_mapping` row.
109 */
110export async function enrollRepo(opts: {
111 oauthSession: OAuthSession
112 installationId: number
113 githubRepoId: number
114 /**
115 * Used by the dashboard "Resync now" action. When true, ignore an existing
116 * `repo_mapping` row in `active` state and re-run the enrolment flow. Note
117 * this still performs the knot procedure call, which mints a *new*
118 * `repoDid`; v1 then overwrites the mapping with the new identity. A
119 * more surgical "poke the knot to re-sync from source" path is a future
120 * improvement.
121 */
122 force?: boolean
123}): Promise<EnrolResult> {
124 const db = useDb()
125
126 const existing = await db.select({ id: repoMapping.id, status: repoMapping.status })
127 .from(repoMapping)
128 .where(sql`${repoMapping.installationId} = ${opts.installationId} AND ${repoMapping.githubRepoId} = ${opts.githubRepoId}`)
129 if (existing.length > 0 && !opts.force) {
130 return { status: 'already' }
131 }
132
133 // 1. GitHub repo metadata.
134 const octokit = await installationOctokit(opts.installationId)
135 const { data: repo } = await octokit.request('GET /repositories/{repository_id}', {
136 repository_id: opts.githubRepoId,
137 })
138
139 if (repo.private) return { status: 'skipped', reason: 'private' }
140 if (repo.fork) return { status: 'skipped', reason: 'fork' }
141
142 const [owner, name] = repo.full_name.split('/')
143 if (!owner || !name) {
144 throw new Error(`unexpected github full_name shape: ${repo.full_name}`)
145 }
146
147 // 2. Pick a knot. Users *can* configure additional knots; v1 always uses
148 // the default. Wiring user choice through is dashboard work.
149 const knot = DEFAULT_KNOT
150
151 // 3. Service-auth JWT for the knot procedure.
152 const agent = new Agent(opts.oauthSession)
153 const aud = `did:web:${knot}`
154 const exp = Math.floor(Date.now() / 1000) + 60
155 const { data: { token } } = await agent.com.atproto.server.getServiceAuth({
156 aud,
157 lxm: REPO_CREATE_NSID,
158 exp,
159 })
160
161 // 4. Knot procedure call. Tangled mints a repoDid here and starts cloning
162 // from `source`.
163 const rkey = tidNow()
164 const sourceUrl = `https://github.com/${owner}/${name}`
165 const knotResponse = await fetch(`https://${knot}/xrpc/${REPO_CREATE_NSID}`, {
166 method: 'POST',
167 headers: {
168 'authorization': `Bearer ${token}`,
169 'content-type': 'application/json',
170 },
171 body: JSON.stringify({
172 rkey,
173 name,
174 source: sourceUrl,
175 defaultBranch: repo.default_branch,
176 }),
177 })
178 if (!knotResponse.ok) {
179 const body = await knotResponse.text()
180 throw new Error(`knot ${knot} returned ${knotResponse.status}: ${body}`)
181 }
182 const knotJson: { repoDid?: string } = await knotResponse.json()
183 const { repoDid } = knotJson
184 if (!repoDid) {
185 throw new Error(`knot ${knot} returned no repoDid`)
186 }
187
188 // 5. PDS record so the appview firehose discovers the repo. Includes the
189 // read-only marker and current GitHub metadata from the off — no follow-up
190 // metadata sync needed at enrolment time.
191 const record = mergeRepoRecord(undefined,
192 { name, knot, repoDid, createdAt: new Date().toISOString() },
193 {
194 full_name: repo.full_name,
195 description: repo.description,
196 homepage: repo.homepage,
197 topics: repo.topics,
198 },
199 )
200 await agent.com.atproto.repo.putRecord({
201 repo: opts.oauthSession.did,
202 collection: REPO_LEXICON,
203 rkey,
204 record,
205 })
206
207 // 6. Persist mapping. On a forced resync the row already exists; update
208 // in place so we retain `lastSyncedRefs` (the worker uses it for ref-tip
209 // dedupe) but refresh the tangled-side identifiers and clear any prior
210 // error.
211 if (existing.length > 0) {
212 await db.update(repoMapping)
213 .set({
214 githubFullName: repo.full_name,
215 tangledRepoDid: repoDid,
216 tangledFullName: `${opts.oauthSession.did}/${name}`,
217 knot,
218 status: 'active',
219 lastError: null,
220 updatedAt: new Date(),
221 })
222 .where(sql`${repoMapping.id} = ${existing[0]!.id}`)
223 }
224 else {
225 await db.insert(repoMapping).values({
226 installationId: opts.installationId,
227 githubRepoId: opts.githubRepoId,
228 githubFullName: repo.full_name,
229 tangledRepoDid: repoDid,
230 tangledFullName: `${opts.oauthSession.did}/${name}`,
231 knot,
232 status: 'active',
233 })
234 }
235
236 return { status: 'enrolled' }
237}
238
239export interface SyncMetadataResult {
240 status: 'synced' | 'skipped'
241 reason?: 'no-mapping' | 'disabled' | 'private' | 'fork' | 'no-pds-record'
242}
243
244/**
245 * Refresh the `sh.tangled.repo` record on the user's PDS to match GitHub's
246 * current description, topics, and homepage. Triggered on `repository.edited`.
247 *
248 * We don't store the rkey locally, so we discover it by listing the user's
249 * `sh.tangled.repo` records and matching on `repoDid` (which we do store).
250 * `swapRecord` is passed for optimistic concurrency in case two webhook
251 * deliveries race.
252 */
253export async function syncRepoMetadata(opts: {
254 oauthSession: OAuthSession
255 installationId: number
256 githubRepoId: number
257}): Promise<SyncMetadataResult> {
258 const db = useDb()
259
260 const rows = await db.select().from(repoMapping).where(
261 and(
262 eq(repoMapping.installationId, opts.installationId),
263 eq(repoMapping.githubRepoId, opts.githubRepoId),
264 ),
265 ).limit(1)
266 if (rows.length === 0) return { status: 'skipped', reason: 'no-mapping' }
267 const row = rows[0]!
268
269 if (row.disabledAt) return { status: 'skipped', reason: 'disabled' }
270 if (!row.tangledRepoDid || !row.knot) return { status: 'skipped', reason: 'no-mapping' }
271
272 // Refetch GitHub state rather than trusting the webhook body.
273 const octokit = await installationOctokit(opts.installationId)
274 const { data: repo } = await octokit.request('GET /repositories/{repository_id}', {
275 repository_id: opts.githubRepoId,
276 })
277 if (repo.private) return { status: 'skipped', reason: 'private' }
278 if (repo.fork) return { status: 'skipped', reason: 'fork' }
279
280 const [, name] = repo.full_name.split('/')
281 if (!name) throw new Error(`unexpected github full_name shape: ${repo.full_name}`)
282
283 const agent = new Agent(opts.oauthSession)
284
285 // Discover the rkey by walking the collection until we find the record
286 // matching this repo's `repoDid`. Typical installs have <100 records so
287 // pagination is mostly defensive.
288 let cursor: string | undefined
289 let found: { uri: string, cid: string, value: Record<string, unknown> } | undefined
290 do {
291 // eslint-disable-next-line no-await-in-loop -- sequential pagination
292 const page = await agent.com.atproto.repo.listRecords({
293 repo: opts.oauthSession.did,
294 collection: REPO_LEXICON,
295 limit: LIST_RECORDS_PAGE_SIZE,
296 cursor,
297 })
298 for (const rec of page.data.records) {
299 const value = rec.value as Record<string, unknown>
300 if (value.repoDid === row.tangledRepoDid) {
301 found = { uri: rec.uri, cid: rec.cid, value }
302 break
303 }
304 }
305 cursor = found ? undefined : page.data.cursor
306 } while (cursor)
307
308 if (!found) return { status: 'skipped', reason: 'no-pds-record' }
309
310 const rkey = found.uri.split('/').pop()
311 if (!rkey) throw new Error(`could not parse rkey from at-uri: ${found.uri}`)
312
313 const createdAt = typeof found.value.createdAt === 'string'
314 ? found.value.createdAt
315 : new Date().toISOString()
316
317 const record = mergeRepoRecord(found.value,
318 { name, knot: row.knot, repoDid: row.tangledRepoDid, createdAt },
319 {
320 full_name: repo.full_name,
321 description: repo.description,
322 homepage: repo.homepage,
323 topics: repo.topics,
324 },
325 )
326
327 await agent.com.atproto.repo.putRecord({
328 repo: opts.oauthSession.did,
329 collection: REPO_LEXICON,
330 rkey,
331 record,
332 swapRecord: found.cid,
333 })
334
335 // Refresh the cached display name. githubFullName is display-only (joins go
336 // through githubRepoId), but the dashboard reads it.
337 if (row.githubFullName !== repo.full_name) {
338 await db.update(repoMapping)
339 .set({ githubFullName: repo.full_name, updatedAt: new Date() })
340 .where(eq(repoMapping.id, row.id))
341 }
342
343 return { status: 'synced' }
344}