mirror your GitHub repos to tangled.org automatically
1

Configure Feed

Select the types of activity you want to include in your feed.

at main 12 kB View raw
1import { Agent } from '@atproto/api' 2import type { OAuthSession } from '@atproto/oauth-client-node' 3import { now as tidNow } from '@atcute/tid' 4import { and, eq, sql } from 'drizzle-orm' 5import { repoMapping } from '../db/schema' 6import { useDb } from './db' 7import { installationOctokit } from './github-app' 8 9const REPO_LEXICON = 'sh.tangled.repo' 10const REPO_CREATE_NSID = 'sh.tangled.repo.create' 11const LIST_RECORDS_PAGE_SIZE = 100 12 13/** 14 * GitHub repo fields we mirror into the `sh.tangled.repo` record. Kept narrow 15 * so the merge helper is easy to reason about and to test without pulling in 16 * the full Octokit type. 17 */ 18export interface GithubRepoMetadata { 19 full_name: string 20 description: string | null 21 homepage: string | null 22 topics?: string[] 23} 24 25/** 26 * Strip our `[READ-ONLY] Mirror of ...` prefix from a description, if present. 27 * Idempotent: returns the original string when there's no prefix to remove. 28 * Guards against accumulating prefixes if a GitHub description ever round-trips 29 * back through our marker (e.g. a user copy-pasted the tangled description 30 * into GitHub). 31 */ 32export function stripReadOnlyMarker(value: string | null | undefined): string { 33 if (!value) return '' 34 let s = value 35 // Strip repeatedly so any accidental doubling is collapsed. 36 for (;;) { 37 const next = s.replace(/^\[READ-ONLY\]\s*Mirror of https:\/\/github\.com\/[^\s.]+\/[^\s.]+\.\s*/, '') 38 if (next === s) return s 39 s = next 40 } 41} 42 43/** 44 * Build the `description` we want on the tangled-side record from GitHub's 45 * current state. Always rebuilt from scratch so we never compound the marker. 46 */ 47export function buildReadOnlyDescription(githubFullName: string, githubDescription: string | null | undefined): string { 48 const stripped = stripReadOnlyMarker(githubDescription).trim() 49 const prefix = `[READ-ONLY] Mirror of https://github.com/${githubFullName}.` 50 return stripped ? `${prefix} ${stripped}` : prefix 51} 52 53/** 54 * Merge GitHub metadata into an existing PDS record value, preserving fields 55 * we don't manage (`$type`, `name`, `knot`, `repoDid`, `createdAt`, plus any 56 * future additions). Pass `existing = undefined` for the initial enrolment 57 * write. 58 */ 59export function mergeRepoRecord( 60 existing: Record<string, unknown> | undefined, 61 base: { name: string, knot: string, repoDid: string, createdAt: string }, 62 gh: GithubRepoMetadata, 63): Record<string, unknown> { 64 const description = buildReadOnlyDescription(gh.full_name, gh.description) 65 const website = gh.homepage && gh.homepage.length > 0 ? gh.homepage : undefined 66 const topics = Array.isArray(gh.topics) ? gh.topics : undefined 67 68 // Start from existing so unknown fields survive a round-trip. Then overlay 69 // the immutable base (in case the existing record is malformed) and the 70 // managed metadata. 71 const merged: Record<string, unknown> = { ...existing } 72 merged.$type = REPO_LEXICON 73 merged.name = base.name 74 merged.knot = base.knot 75 merged.repoDid = base.repoDid 76 merged.createdAt = (typeof existing?.createdAt === 'string' && existing.createdAt) || base.createdAt 77 merged.description = description 78 if (topics !== undefined) merged.topics = topics 79 if (website !== undefined) merged.website = website 80 else delete merged.website 81 return merged 82} 83 84/** 85 * Default knot for users with no `sh.tangled.knot` records. PLAN.md "Open 86 * questions" #1: confirm with the tangled team that this is the right 87 * appview-hosted default. 88 */ 89const DEFAULT_KNOT = 'knot1.tangled.sh' 90 91export interface EnrolResult { 92 status: 'enrolled' | 'already' | 'skipped' 93 reason?: 'private' | 'fork' | 'no-identity' 94} 95 96/** 97 * Enroll a single GitHub repo on tangled. 98 * 99 * Flow: 100 * 1. Skip if a `repo_mapping` row already exists. 101 * 2. Fetch GitHub repo metadata via the install token. Skip private/fork. 102 * 3. Pick a knot (user default → `DEFAULT_KNOT`). 103 * 4. Get a service-auth JWT for `(aud=did:web:<knot>, lxm=sh.tangled.repo.create)`. 104 * 5. POST to `https://<knot>/xrpc/sh.tangled.repo.create` with 105 * `{ rkey, name, source, defaultBranch }`. The knot clones the repo from 106 * `source` and mints a `repoDid`. 107 * 6. Write a `sh.tangled.repo` record on the user's PDS. 108 * 7. Insert the `repo_mapping` row. 109 */ 110export async function enrollRepo(opts: { 111 oauthSession: OAuthSession 112 installationId: number 113 githubRepoId: number 114 /** 115 * Used by the dashboard "Resync now" action. When true, ignore an existing 116 * `repo_mapping` row in `active` state and re-run the enrolment flow. Note 117 * this still performs the knot procedure call, which mints a *new* 118 * `repoDid`; v1 then overwrites the mapping with the new identity. A 119 * more surgical "poke the knot to re-sync from source" path is a future 120 * improvement. 121 */ 122 force?: boolean 123}): Promise<EnrolResult> { 124 const db = useDb() 125 126 const existing = await db.select({ id: repoMapping.id, status: repoMapping.status }) 127 .from(repoMapping) 128 .where(sql`${repoMapping.installationId} = ${opts.installationId} AND ${repoMapping.githubRepoId} = ${opts.githubRepoId}`) 129 if (existing.length > 0 && !opts.force) { 130 return { status: 'already' } 131 } 132 133 // 1. GitHub repo metadata. 134 const octokit = await installationOctokit(opts.installationId) 135 const { data: repo } = await octokit.request('GET /repositories/{repository_id}', { 136 repository_id: opts.githubRepoId, 137 }) 138 139 if (repo.private) return { status: 'skipped', reason: 'private' } 140 if (repo.fork) return { status: 'skipped', reason: 'fork' } 141 142 const [owner, name] = repo.full_name.split('/') 143 if (!owner || !name) { 144 throw new Error(`unexpected github full_name shape: ${repo.full_name}`) 145 } 146 147 // 2. Pick a knot. Users *can* configure additional knots; v1 always uses 148 // the default. Wiring user choice through is dashboard work. 149 const knot = DEFAULT_KNOT 150 151 // 3. Service-auth JWT for the knot procedure. 152 const agent = new Agent(opts.oauthSession) 153 const aud = `did:web:${knot}` 154 const exp = Math.floor(Date.now() / 1000) + 60 155 const { data: { token } } = await agent.com.atproto.server.getServiceAuth({ 156 aud, 157 lxm: REPO_CREATE_NSID, 158 exp, 159 }) 160 161 // 4. Knot procedure call. Tangled mints a repoDid here and starts cloning 162 // from `source`. 163 const rkey = tidNow() 164 const sourceUrl = `https://github.com/${owner}/${name}` 165 const knotResponse = await fetch(`https://${knot}/xrpc/${REPO_CREATE_NSID}`, { 166 method: 'POST', 167 headers: { 168 'authorization': `Bearer ${token}`, 169 'content-type': 'application/json', 170 }, 171 body: JSON.stringify({ 172 rkey, 173 name, 174 source: sourceUrl, 175 defaultBranch: repo.default_branch, 176 }), 177 }) 178 if (!knotResponse.ok) { 179 const body = await knotResponse.text() 180 throw new Error(`knot ${knot} returned ${knotResponse.status}: ${body}`) 181 } 182 const knotJson: { repoDid?: string } = await knotResponse.json() 183 const { repoDid } = knotJson 184 if (!repoDid) { 185 throw new Error(`knot ${knot} returned no repoDid`) 186 } 187 188 // 5. PDS record so the appview firehose discovers the repo. Includes the 189 // read-only marker and current GitHub metadata from the off — no follow-up 190 // metadata sync needed at enrolment time. 191 const record = mergeRepoRecord(undefined, 192 { name, knot, repoDid, createdAt: new Date().toISOString() }, 193 { 194 full_name: repo.full_name, 195 description: repo.description, 196 homepage: repo.homepage, 197 topics: repo.topics, 198 }, 199 ) 200 await agent.com.atproto.repo.putRecord({ 201 repo: opts.oauthSession.did, 202 collection: REPO_LEXICON, 203 rkey, 204 record, 205 }) 206 207 // 6. Persist mapping. On a forced resync the row already exists; update 208 // in place so we retain `lastSyncedRefs` (the worker uses it for ref-tip 209 // dedupe) but refresh the tangled-side identifiers and clear any prior 210 // error. 211 if (existing.length > 0) { 212 await db.update(repoMapping) 213 .set({ 214 githubFullName: repo.full_name, 215 tangledRepoDid: repoDid, 216 tangledFullName: `${opts.oauthSession.did}/${name}`, 217 knot, 218 status: 'active', 219 lastError: null, 220 updatedAt: new Date(), 221 }) 222 .where(sql`${repoMapping.id} = ${existing[0]!.id}`) 223 } 224 else { 225 await db.insert(repoMapping).values({ 226 installationId: opts.installationId, 227 githubRepoId: opts.githubRepoId, 228 githubFullName: repo.full_name, 229 tangledRepoDid: repoDid, 230 tangledFullName: `${opts.oauthSession.did}/${name}`, 231 knot, 232 status: 'active', 233 }) 234 } 235 236 return { status: 'enrolled' } 237} 238 239export interface SyncMetadataResult { 240 status: 'synced' | 'skipped' 241 reason?: 'no-mapping' | 'disabled' | 'private' | 'fork' | 'no-pds-record' 242} 243 244/** 245 * Refresh the `sh.tangled.repo` record on the user's PDS to match GitHub's 246 * current description, topics, and homepage. Triggered on `repository.edited`. 247 * 248 * We don't store the rkey locally, so we discover it by listing the user's 249 * `sh.tangled.repo` records and matching on `repoDid` (which we do store). 250 * `swapRecord` is passed for optimistic concurrency in case two webhook 251 * deliveries race. 252 */ 253export async function syncRepoMetadata(opts: { 254 oauthSession: OAuthSession 255 installationId: number 256 githubRepoId: number 257}): Promise<SyncMetadataResult> { 258 const db = useDb() 259 260 const rows = await db.select().from(repoMapping).where( 261 and( 262 eq(repoMapping.installationId, opts.installationId), 263 eq(repoMapping.githubRepoId, opts.githubRepoId), 264 ), 265 ).limit(1) 266 if (rows.length === 0) return { status: 'skipped', reason: 'no-mapping' } 267 const row = rows[0]! 268 269 if (row.disabledAt) return { status: 'skipped', reason: 'disabled' } 270 if (!row.tangledRepoDid || !row.knot) return { status: 'skipped', reason: 'no-mapping' } 271 272 // Refetch GitHub state rather than trusting the webhook body. 273 const octokit = await installationOctokit(opts.installationId) 274 const { data: repo } = await octokit.request('GET /repositories/{repository_id}', { 275 repository_id: opts.githubRepoId, 276 }) 277 if (repo.private) return { status: 'skipped', reason: 'private' } 278 if (repo.fork) return { status: 'skipped', reason: 'fork' } 279 280 const [, name] = repo.full_name.split('/') 281 if (!name) throw new Error(`unexpected github full_name shape: ${repo.full_name}`) 282 283 const agent = new Agent(opts.oauthSession) 284 285 // Discover the rkey by walking the collection until we find the record 286 // matching this repo's `repoDid`. Typical installs have <100 records so 287 // pagination is mostly defensive. 288 let cursor: string | undefined 289 let found: { uri: string, cid: string, value: Record<string, unknown> } | undefined 290 do { 291 // eslint-disable-next-line no-await-in-loop -- sequential pagination 292 const page = await agent.com.atproto.repo.listRecords({ 293 repo: opts.oauthSession.did, 294 collection: REPO_LEXICON, 295 limit: LIST_RECORDS_PAGE_SIZE, 296 cursor, 297 }) 298 for (const rec of page.data.records) { 299 const value = rec.value as Record<string, unknown> 300 if (value.repoDid === row.tangledRepoDid) { 301 found = { uri: rec.uri, cid: rec.cid, value } 302 break 303 } 304 } 305 cursor = found ? undefined : page.data.cursor 306 } while (cursor) 307 308 if (!found) return { status: 'skipped', reason: 'no-pds-record' } 309 310 const rkey = found.uri.split('/').pop() 311 if (!rkey) throw new Error(`could not parse rkey from at-uri: ${found.uri}`) 312 313 const createdAt = typeof found.value.createdAt === 'string' 314 ? found.value.createdAt 315 : new Date().toISOString() 316 317 const record = mergeRepoRecord(found.value, 318 { name, knot: row.knot, repoDid: row.tangledRepoDid, createdAt }, 319 { 320 full_name: repo.full_name, 321 description: repo.description, 322 homepage: repo.homepage, 323 topics: repo.topics, 324 }, 325 ) 326 327 await agent.com.atproto.repo.putRecord({ 328 repo: opts.oauthSession.did, 329 collection: REPO_LEXICON, 330 rkey, 331 record, 332 swapRecord: found.cid, 333 }) 334 335 // Refresh the cached display name. githubFullName is display-only (joins go 336 // through githubRepoId), but the dashboard reads it. 337 if (row.githubFullName !== repo.full_name) { 338 await db.update(repoMapping) 339 .set({ githubFullName: repo.full_name, updatedAt: new Date() }) 340 .where(eq(repoMapping.id, row.id)) 341 } 342 343 return { status: 'synced' } 344}