mirror your GitHub repos to tangled.org automatically
1import { Agent } from '@atproto/api'
2import type { OAuthSession } from '@atproto/oauth-client-node'
3import { now as tidNow } from '@atcute/tid'
4import { and, eq, sql } from 'drizzle-orm'
5import { repoMapping } from '../db/schema'
6import { useDb } from './db'
7import { installationOctokit } from './github-app'
8
9const REPO_LEXICON = 'sh.tangled.repo'
10const REPO_CREATE_NSID = 'sh.tangled.repo.create'
11const LIST_RECORDS_PAGE_SIZE = 100
12
13/**
14 * GitHub repo fields we mirror into the `sh.tangled.repo` record. Kept narrow
15 * so the merge helper is easy to reason about and to test without pulling in
16 * the full Octokit type.
17 */
18export interface GithubRepoMetadata {
19 full_name: string
20 description: string | null
21 homepage: string | null
22 topics?: string[]
23}
24
25/**
26 * Strip our `[READ-ONLY] Mirror of ...` prefix from a description, if present.
27 * Idempotent: returns the original string when there's no prefix to remove.
28 * Guards against accumulating prefixes if a GitHub description ever round-trips
29 * back through our marker (e.g. a user copy-pasted the tangled description
30 * into GitHub).
31 */
32export function stripReadOnlyMarker(value: string | null | undefined): string {
33 if (!value) return ''
34 let s = value
35 // Strip repeatedly so any accidental doubling is collapsed.
36 for (;;) {
37 const next = s.replace(/^\[READ-ONLY\]\s*Mirror of https:\/\/github\.com\/[^\s.]+\/[^\s.]+\.\s*/, '')
38 if (next === s) return s
39 s = next
40 }
41}
42
43/**
44 * Build the `description` we want on the tangled-side record from GitHub's
45 * current state. Always rebuilt from scratch so we never compound the marker.
46 */
47export function buildReadOnlyDescription(githubFullName: string, githubDescription: string | null | undefined): string {
48 const stripped = stripReadOnlyMarker(githubDescription).trim()
49 const prefix = `[READ-ONLY] Mirror of https://github.com/${githubFullName}.`
50 return stripped ? `${prefix} ${stripped}` : prefix
51}
52
53/**
54 * Merge GitHub metadata into an existing PDS record value, preserving fields
55 * we don't manage (`$type`, `name`, `knot`, `repoDid`, `createdAt`, plus any
56 * future additions). Pass `existing = undefined` for the initial enrolment
57 * write.
58 */
59export function mergeRepoRecord(
60 existing: Record<string, unknown> | undefined,
61 base: { name: string, knot: string, repoDid: string, createdAt: string },
62 gh: GithubRepoMetadata,
63): Record<string, unknown> {
64 const description = buildReadOnlyDescription(gh.full_name, gh.description)
65 const website = gh.homepage && gh.homepage.length > 0 ? gh.homepage : undefined
66 const topics = Array.isArray(gh.topics) ? gh.topics : undefined
67
68 // Start from existing so unknown fields survive a round-trip. Then overlay
69 // the immutable base (in case the existing record is malformed) and the
70 // managed metadata.
71 const merged: Record<string, unknown> = { ...existing }
72 merged.$type = REPO_LEXICON
73 merged.name = base.name
74 merged.knot = base.knot
75 merged.repoDid = base.repoDid
76 merged.createdAt = (typeof existing?.createdAt === 'string' && existing.createdAt) || base.createdAt
77 merged.description = description
78 if (topics !== undefined) merged.topics = topics
79 if (website !== undefined) merged.website = website
80 else delete merged.website
81 return merged
82}
83
84/**
85 * Default knot for users with no `sh.tangled.knot` records. PLAN.md "Open
86 * questions" #1: confirm with the tangled team that this is the right
87 * appview-hosted default.
88 */
89const DEFAULT_KNOT = 'knot1.tangled.sh'
90
91export interface EnrolResult {
92 status: 'enrolled' | 'already' | 'skipped'
93 reason?: 'private' | 'fork' | 'no-identity'
94}
95
96/**
97 * Enroll a single GitHub repo on tangled.
98 *
99 * Flow:
100 * 1. Skip if a `repo_mapping` row already exists.
101 * 2. Fetch GitHub repo metadata via the install token. Skip private/fork.
102 * 3. Pick a knot (user default → `DEFAULT_KNOT`).
103 * 4. Get a service-auth JWT for `(aud=did:web:<knot>, lxm=sh.tangled.repo.create)`.
104 * 5. POST to `https://<knot>/xrpc/sh.tangled.repo.create` with
105 * `{ rkey, name, source, defaultBranch }`. The knot clones the repo from
106 * `source` and mints a `repoDid`.
107 * 6. Write a `sh.tangled.repo` record on the user's PDS.
108 * 7. Insert the `repo_mapping` row.
109 */
110export async function enrollRepo(opts: {
111 oauthSession: OAuthSession
112 installationId: number
113 githubRepoId: number
114}): Promise<EnrolResult> {
115 const db = useDb()
116
117 const existing = await db.select({ id: repoMapping.id })
118 .from(repoMapping)
119 .where(sql`${repoMapping.installationId} = ${opts.installationId} AND ${repoMapping.githubRepoId} = ${opts.githubRepoId}`)
120 if (existing.length > 0) {
121 return { status: 'already' }
122 }
123
124 // 1. GitHub repo metadata.
125 const octokit = await installationOctokit(opts.installationId)
126 const { data: repo } = await octokit.request('GET /repositories/{repository_id}', {
127 repository_id: opts.githubRepoId,
128 })
129
130 if (repo.private) return { status: 'skipped', reason: 'private' }
131 if (repo.fork) return { status: 'skipped', reason: 'fork' }
132
133 const [owner, name] = repo.full_name.split('/')
134 if (!owner || !name) {
135 throw new Error(`unexpected github full_name shape: ${repo.full_name}`)
136 }
137
138 // 2. Pick a knot. Users *can* configure additional knots; v1 always uses
139 // the default. Wiring user choice through is dashboard work.
140 const knot = DEFAULT_KNOT
141
142 // 3. Service-auth JWT for the knot procedure.
143 const agent = new Agent(opts.oauthSession)
144 const aud = `did:web:${knot}`
145 const exp = Math.floor(Date.now() / 1000) + 60
146 const { data: { token } } = await agent.com.atproto.server.getServiceAuth({
147 aud,
148 lxm: REPO_CREATE_NSID,
149 exp,
150 })
151
152 // 4. Knot procedure call. Tangled mints a repoDid here and starts cloning
153 // from `source`.
154 const rkey = tidNow()
155 const sourceUrl = `https://github.com/${owner}/${name}`
156 const knotResponse = await fetch(`https://${knot}/xrpc/${REPO_CREATE_NSID}`, {
157 method: 'POST',
158 headers: {
159 'authorization': `Bearer ${token}`,
160 'content-type': 'application/json',
161 },
162 body: JSON.stringify({
163 rkey,
164 name,
165 source: sourceUrl,
166 defaultBranch: repo.default_branch,
167 }),
168 })
169 if (!knotResponse.ok) {
170 const body = await knotResponse.text()
171 throw new Error(`knot ${knot} returned ${knotResponse.status}: ${body}`)
172 }
173 const knotJson: { repoDid?: string } = await knotResponse.json()
174 const { repoDid } = knotJson
175 if (!repoDid) {
176 throw new Error(`knot ${knot} returned no repoDid`)
177 }
178
179 // 5. PDS record so the appview firehose discovers the repo. Includes the
180 // read-only marker and current GitHub metadata from the off — no follow-up
181 // metadata sync needed at enrolment time.
182 const record = mergeRepoRecord(undefined,
183 { name, knot, repoDid, createdAt: new Date().toISOString() },
184 {
185 full_name: repo.full_name,
186 description: repo.description,
187 homepage: repo.homepage,
188 topics: repo.topics,
189 },
190 )
191 await agent.com.atproto.repo.putRecord({
192 repo: opts.oauthSession.did,
193 collection: REPO_LEXICON,
194 rkey,
195 record,
196 })
197
198 // 6. Persist mapping.
199 await db.insert(repoMapping).values({
200 installationId: opts.installationId,
201 githubRepoId: opts.githubRepoId,
202 githubFullName: repo.full_name,
203 tangledRepoDid: repoDid,
204 tangledFullName: `${opts.oauthSession.did}/${name}`,
205 knot,
206 status: 'active',
207 })
208
209 return { status: 'enrolled' }
210}
211
212export interface SyncMetadataResult {
213 status: 'synced' | 'skipped'
214 reason?: 'no-mapping' | 'disabled' | 'private' | 'fork' | 'no-pds-record'
215}
216
217/**
218 * Refresh the `sh.tangled.repo` record on the user's PDS to match GitHub's
219 * current description, topics, and homepage. Triggered on `repository.edited`.
220 *
221 * We don't store the rkey locally, so we discover it by listing the user's
222 * `sh.tangled.repo` records and matching on `repoDid` (which we do store).
223 * `swapRecord` is passed for optimistic concurrency in case two webhook
224 * deliveries race.
225 */
226export async function syncRepoMetadata(opts: {
227 oauthSession: OAuthSession
228 installationId: number
229 githubRepoId: number
230}): Promise<SyncMetadataResult> {
231 const db = useDb()
232
233 const rows = await db.select().from(repoMapping).where(
234 and(
235 eq(repoMapping.installationId, opts.installationId),
236 eq(repoMapping.githubRepoId, opts.githubRepoId),
237 ),
238 ).limit(1)
239 if (rows.length === 0) return { status: 'skipped', reason: 'no-mapping' }
240 const row = rows[0]!
241
242 if (row.disabledAt) return { status: 'skipped', reason: 'disabled' }
243 if (!row.tangledRepoDid || !row.knot) return { status: 'skipped', reason: 'no-mapping' }
244
245 // Refetch GitHub state rather than trusting the webhook body.
246 const octokit = await installationOctokit(opts.installationId)
247 const { data: repo } = await octokit.request('GET /repositories/{repository_id}', {
248 repository_id: opts.githubRepoId,
249 })
250 if (repo.private) return { status: 'skipped', reason: 'private' }
251 if (repo.fork) return { status: 'skipped', reason: 'fork' }
252
253 const [, name] = repo.full_name.split('/')
254 if (!name) throw new Error(`unexpected github full_name shape: ${repo.full_name}`)
255
256 const agent = new Agent(opts.oauthSession)
257
258 // Discover the rkey by walking the collection until we find the record
259 // matching this repo's `repoDid`. Typical installs have <100 records so
260 // pagination is mostly defensive.
261 let cursor: string | undefined
262 let found: { uri: string, cid: string, value: Record<string, unknown> } | undefined
263 do {
264 // eslint-disable-next-line no-await-in-loop -- sequential pagination
265 const page = await agent.com.atproto.repo.listRecords({
266 repo: opts.oauthSession.did,
267 collection: REPO_LEXICON,
268 limit: LIST_RECORDS_PAGE_SIZE,
269 cursor,
270 })
271 for (const rec of page.data.records) {
272 const value = rec.value as Record<string, unknown>
273 if (value.repoDid === row.tangledRepoDid) {
274 found = { uri: rec.uri, cid: rec.cid, value }
275 break
276 }
277 }
278 cursor = found ? undefined : page.data.cursor
279 } while (cursor)
280
281 if (!found) return { status: 'skipped', reason: 'no-pds-record' }
282
283 const rkey = found.uri.split('/').pop()
284 if (!rkey) throw new Error(`could not parse rkey from at-uri: ${found.uri}`)
285
286 const createdAt = typeof found.value.createdAt === 'string'
287 ? found.value.createdAt
288 : new Date().toISOString()
289
290 const record = mergeRepoRecord(found.value,
291 { name, knot: row.knot, repoDid: row.tangledRepoDid, createdAt },
292 {
293 full_name: repo.full_name,
294 description: repo.description,
295 homepage: repo.homepage,
296 topics: repo.topics,
297 },
298 )
299
300 await agent.com.atproto.repo.putRecord({
301 repo: opts.oauthSession.did,
302 collection: REPO_LEXICON,
303 rkey,
304 record,
305 swapRecord: found.cid,
306 })
307
308 // Refresh the cached display name. githubFullName is display-only (joins go
309 // through githubRepoId), but the dashboard reads it.
310 if (row.githubFullName !== repo.full_name) {
311 await db.update(repoMapping)
312 .set({ githubFullName: repo.full_name, updatedAt: new Date() })
313 .where(eq(repoMapping.id, row.id))
314 }
315
316 return { status: 'synced' }
317}