···689689func indexCatfileBlobs(cr *catfileReader, keys []fileKey, repos map[fileKey]BlobLocation, opts Options, builder *index.Builder) error {
690690 defer cr.Close()
691691692692+ slab := newContentSlab(16 << 20) // 16 MB per slab
693693+692694 for idx, key := range keys {
693695 size, missing, excluded, err := cr.Next()
694696 if err != nil {
···711713 // Skip without reading content into memory.
712714 doc = skippedDoc(key, branches, index.SkipReasonTooLarge)
713715 } else {
714714- // Pre-allocate and read the full blob content in one call.
715715- // io.ReadFull is preferred over io.LimitedReader here as it
716716- // avoids the intermediate allocation and the size is known.
717717- content := make([]byte, size)
716716+ content := slab.alloc(size)
718717 if _, err := io.ReadFull(cr, content); err != nil {
719718 return fmt.Errorf("read blob %s: %w", keyFullPath, err)
720719 }
+32
gitindex/slab.go
···11+package gitindex
22+33+// contentSlab reduces per-file heap allocations by sub-slicing from a
44+// shared buffer. Each returned slice has its capacity capped (3-index
55+// slice) so appending to one file's content cannot overwrite adjacent
66+// data. Files larger than the slab get their own allocation.
77+type contentSlab struct {
88+ buf []byte
99+ cap int
1010+}
1111+1212+func newContentSlab(slabCap int) contentSlab {
1313+ return contentSlab{
1414+ buf: make([]byte, 0, slabCap),
1515+ cap: slabCap,
1616+ }
1717+}
1818+1919+// alloc returns a byte slice of length n. The caller must write into it
2020+// immediately (the bytes are uninitialized when sourced from the slab).
2121+func (s *contentSlab) alloc(n int) []byte {
2222+ if n > s.cap {
2323+ return make([]byte, n)
2424+ }
2525+ if len(s.buf)+n > cap(s.buf) {
2626+ s.buf = make([]byte, n, s.cap)
2727+ return s.buf[:n:n]
2828+ }
2929+ off := len(s.buf)
3030+ s.buf = s.buf[:off+n]
3131+ return s.buf[off : off+n : off+n]
3232+}
+72
gitindex/slab_test.go
···11+package gitindex
22+33+import "testing"
44+55+func TestContentSlab(t *testing.T) {
66+ t.Run("fits in slab", func(t *testing.T) {
77+ s := newContentSlab(1024)
88+ b := s.alloc(100)
99+ if len(b) != 100 {
1010+ t.Fatalf("len = %d, want 100", len(b))
1111+ }
1212+ if cap(b) != 100 {
1313+ t.Fatalf("cap = %d, want 100 (3-index slice)", cap(b))
1414+ }
1515+ })
1616+1717+ t.Run("cap is capped so append cannot corrupt adjacent data", func(t *testing.T) {
1818+ s := newContentSlab(1024)
1919+ a := s.alloc(10)
2020+ copy(a, []byte("aaaaaaaaaa"))
2121+2222+ b := s.alloc(10)
2323+ copy(b, []byte("bbbbbbbbbb"))
2424+2525+ // Appending to a must not overwrite b.
2626+ a = append(a, 'X') // triggers new backing array since cap==len
2727+ if string(b) != "bbbbbbbbbb" {
2828+ t.Fatalf("adjacent data corrupted: got %q", b)
2929+ }
3030+ _ = a
3131+ })
3232+3333+ t.Run("slab rollover", func(t *testing.T) {
3434+ s := newContentSlab(64)
3535+ a := s.alloc(60)
3636+ if len(a) != 60 || cap(a) != 60 {
3737+ t.Fatalf("a: len=%d cap=%d", len(a), cap(a))
3838+ }
3939+ // Next alloc doesn't fit in remaining 4 bytes → new slab.
4040+ b := s.alloc(10)
4141+ if len(b) != 10 || cap(b) != 10 {
4242+ t.Fatalf("b: len=%d cap=%d", len(b), cap(b))
4343+ }
4444+ // a and b should not share backing arrays.
4545+ copy(a, make([]byte, 60))
4646+ copy(b, []byte("0123456789"))
4747+ if string(b) != "0123456789" {
4848+ t.Fatal("rollover corrupted data")
4949+ }
5050+ })
5151+5252+ t.Run("oversized allocation", func(t *testing.T) {
5353+ s := newContentSlab(64)
5454+ b := s.alloc(128)
5555+ if len(b) != 128 {
5656+ t.Fatalf("len = %d, want 128", len(b))
5757+ }
5858+ // Oversized alloc should not consume slab space.
5959+ c := s.alloc(32)
6060+ if len(c) != 32 || cap(c) != 32 {
6161+ t.Fatalf("c: len=%d cap=%d", len(c), cap(c))
6262+ }
6363+ })
6464+6565+ t.Run("zero size", func(t *testing.T) {
6666+ s := newContentSlab(64)
6767+ b := s.alloc(0)
6868+ if len(b) != 0 {
6969+ t.Fatalf("len = %d, want 0", len(b))
7070+ }
7171+ })
7272+}
+9-4
index/builder.go
···37373838 "github.com/bmatcuk/doublestar"
3939 "github.com/dustin/go-humanize"
4040- "github.com/go-enry/go-enry/v2"
4140 "github.com/rs/xid"
4241 "golang.org/x/sys/unix"
4342···625624 doc.SkipReason = skip
626625 }
627626627627+ // Pre-compute file category and language while content is still
628628+ // available, before content is dropped for skipped documents.
629629+ DetermineFileCategory(&doc)
630630+ DetermineLanguageIfUnknown(&doc)
631631+628632 b.todo = append(b.todo, &doc)
629633630634 if doc.SkipReason == SkipReasonNone {
···888892 skipped = 1.0
889893 }
890894895895+ // Use pre-computed Category from DetermineFileCategory.
891896 generated := 0.0
892892- if enry.IsGenerated(d.Name, d.Content) {
897897+ if d.Category == FileCategoryGenerated {
893898 generated = 1.0
894899 }
895900896901 vendor := 0.0
897897- if enry.IsVendor(d.Name) {
902902+ if d.Category == FileCategoryVendored {
898903 vendor = 1.0
899904 }
900905901906 test := 0.0
902902- if enry.IsTest(d.Name) {
907907+ if d.Category == FileCategoryTest {
903908 test = 1.0
904909 }
905910
+3
index/builder_test.go
···1084108410851085 got := make([]*Document, len(c.docs))
10861086 copy(got, c.docs)
10871087+ for _, d := range got {
10881088+ DetermineFileCategory(d)
10891089+ }
10871090 sortDocuments(got)
1088109110891092 print := func(ds []*Document) string {