fork of https://github.com/sourcegraph/zoekt
1package gitindex
2
3import (
4 "bytes"
5 "fmt"
6 "io"
7 "os"
8 "os/exec"
9 "path/filepath"
10 "sync"
11 "testing"
12 "time"
13
14 "github.com/go-git/go-git/v5/plumbing"
15)
16
17// --- Close lifecycle tests ---
18
19// TestCatfileReader_DoubleClose verifies that Close is idempotent.
20// Calling Close twice must not deadlock or panic.
21func TestCatfileReader_DoubleClose(t *testing.T) {
22 t.Parallel()
23
24 repoDir, blobs := createTestRepo(t)
25 ids := []plumbing.Hash{blobs["hello.txt"]}
26
27 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
28 if err != nil {
29 t.Fatal(err)
30 }
31
32 // Consume the entry so the process can exit cleanly.
33 if _, _, _, err := cr.Next(); err != nil {
34 t.Fatal(err)
35 }
36
37 if err := cr.Close(); err != nil {
38 t.Fatalf("first Close: %v", err)
39 }
40
41 // Second Close must not deadlock or panic.
42 done := make(chan error, 1)
43 go func() {
44 done <- cr.Close()
45 }()
46
47 select {
48 case <-done:
49 // Success — whether err is nil or not, it didn't block.
50 case <-time.After(5 * time.Second):
51 t.Fatal("second Close() deadlocked — writeErr channel was never closed")
52 }
53}
54
55// TestCatfileReader_ConcurrentClose verifies that calling Close from
56// multiple goroutines simultaneously does not panic, deadlock, or
57// corrupt state.
58func TestCatfileReader_ConcurrentClose(t *testing.T) {
59 t.Parallel()
60
61 repoDir, blobs := createTestRepo(t)
62 ids := []plumbing.Hash{
63 blobs["hello.txt"],
64 blobs["large.bin"],
65 blobs["binary.bin"],
66 }
67
68 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
69 if err != nil {
70 t.Fatal(err)
71 }
72
73 // Read one entry, leave two unconsumed.
74 if _, _, _, err := cr.Next(); err != nil {
75 t.Fatal(err)
76 }
77
78 const goroutines = 5
79 var wg sync.WaitGroup
80 wg.Add(goroutines)
81 barrier := make(chan struct{})
82
83 for i := 0; i < goroutines; i++ {
84 go func() {
85 defer wg.Done()
86 <-barrier // all start at once
87 cr.Close()
88 }()
89 }
90
91 done := make(chan struct{})
92 go func() {
93 close(barrier)
94 wg.Wait()
95 close(done)
96 }()
97
98 select {
99 case <-done:
100 // All goroutines returned.
101 case <-time.After(10 * time.Second):
102 t.Fatal("concurrent Close() deadlocked")
103 }
104}
105
106// TestCatfileReader_CloseWithoutReading verifies that closing
107// immediately after creation (without reading any entries) completes
108// without hanging.
109func TestCatfileReader_CloseWithoutReading(t *testing.T) {
110 t.Parallel()
111
112 repoDir, blobs := createTestRepo(t)
113 ids := []plumbing.Hash{
114 blobs["hello.txt"],
115 blobs["large.bin"],
116 blobs["binary.bin"],
117 blobs["empty.txt"],
118 }
119
120 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
121 if err != nil {
122 t.Fatal(err)
123 }
124
125 done := make(chan error, 1)
126 go func() {
127 done <- cr.Close()
128 }()
129
130 select {
131 case err := <-done:
132 if err != nil {
133 t.Fatalf("Close: %v", err)
134 }
135 case <-time.After(10 * time.Second):
136 t.Fatal("Close() without reading any entries hung")
137 }
138}
139
140// TestCatfileReader_CloseBeforeExhausted_ManyBlobs simulates early
141// termination (e.g., builder.Add error) with many unconsumed blobs.
142// Close should complete promptly — not drain the entire git output.
143func TestCatfileReader_CloseBeforeExhausted_ManyBlobs(t *testing.T) {
144 t.Parallel()
145
146 // Create enough blobs to make a draining Close noticeable without spending
147 // most of the test runtime on shelling out for fixture setup.
148 repoDir, ids := createManyBlobRepo(t, 128, 4<<10)
149
150 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
151 if err != nil {
152 t.Fatal(err)
153 }
154
155 // Read only 1 of 200 entries.
156 if _, _, _, err := cr.Next(); err != nil {
157 t.Fatal(err)
158 }
159
160 // Close should be fast (kill, not drain). With drain it still works but
161 // is slow — we enforce a generous bound.
162 start := time.Now()
163 done := make(chan error, 1)
164 go func() {
165 done <- cr.Close()
166 }()
167
168 select {
169 case <-done:
170 elapsed := time.Since(start)
171 // With Kill: sub-millisecond. Draining 200×10KB is fast too, so we
172 // use a generous 3s bound that still catches pathological stalls.
173 if elapsed > 3*time.Second {
174 t.Errorf("Close took %v after reading 1 of 200 entries — consider killing instead of draining", elapsed)
175 }
176 case <-time.After(30 * time.Second):
177 t.Fatal("Close() deadlocked with many unconsumed blobs")
178 }
179}
180
181func createManyBlobRepo(t *testing.T, fileCount, fileSize int) (string, []plumbing.Hash) {
182 t.Helper()
183
184 dir := t.TempDir()
185 repoDir := filepath.Join(dir, "repo")
186
187 runGit(t, dir, "init", "-b", "main", "repo")
188
189 for i := 0; i < fileCount; i++ {
190 content := bytes.Repeat([]byte{byte(i)}, fileSize)
191 name := filepath.Join(repoDir, fmt.Sprintf("file_%03d.bin", i))
192 if err := os.WriteFile(name, content, 0o644); err != nil {
193 t.Fatalf("WriteFile(%q): %v", name, err)
194 }
195 }
196
197 runGit(t, repoDir, "add", ".")
198 runGit(t, repoDir, "commit", "-m", "many files")
199
200 out, err := exec.Command("git", "-C", repoDir, "ls-tree", "-r", "-z", "HEAD").Output()
201 if err != nil {
202 t.Fatalf("git ls-tree: %v", err)
203 }
204
205 ids := make([]plumbing.Hash, 0, fileCount)
206 for _, entry := range bytes.Split(out, []byte{0}) {
207 if len(entry) == 0 {
208 continue
209 }
210
211 fields := bytes.Fields(entry)
212 if len(fields) < 3 {
213 t.Fatalf("unexpected ls-tree entry %q", entry)
214 }
215
216 ids = append(ids, plumbing.NewHash(string(fields[2])))
217 }
218
219 if len(ids) != fileCount {
220 t.Fatalf("got %d blob IDs, want %d", len(ids), fileCount)
221 }
222
223 return repoDir, ids
224}
225
226// --- Read edge-case tests ---
227
228// TestCatfileReader_ReadWithoutNext verifies that calling Read
229// before calling Next returns io.EOF, not a panic or garbage data.
230func TestCatfileReader_ReadWithoutNext(t *testing.T) {
231 t.Parallel()
232
233 repoDir, blobs := createTestRepo(t)
234 ids := []plumbing.Hash{blobs["hello.txt"]}
235
236 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
237 if err != nil {
238 t.Fatal(err)
239 }
240 defer cr.Close()
241
242 buf := make([]byte, 10)
243 n, err := cr.Read(buf)
244 if n != 0 || err != io.EOF {
245 t.Fatalf("Read without Next: n=%d err=%v, want n=0 err=io.EOF", n, err)
246 }
247}
248
249// TestCatfileReader_ReadAfterFullConsumption verifies that extra Read
250// calls after a blob is fully consumed return io.EOF, not duplicate
251// data or trailing LF bytes.
252func TestCatfileReader_ReadAfterFullConsumption(t *testing.T) {
253 t.Parallel()
254
255 repoDir, blobs := createTestRepo(t)
256 ids := []plumbing.Hash{blobs["hello.txt"]}
257
258 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
259 if err != nil {
260 t.Fatal(err)
261 }
262 defer cr.Close()
263
264 size, _, _, _ := cr.Next()
265 content := make([]byte, size)
266 if _, err := io.ReadFull(cr, content); err != nil {
267 t.Fatal(err)
268 }
269
270 // Blob is fully read — additional Reads must return EOF.
271 for i := 0; i < 3; i++ {
272 buf := make([]byte, 10)
273 n, err := cr.Read(buf)
274 if n != 0 || err != io.EOF {
275 t.Fatalf("Read #%d after full consumption: n=%d err=%v, want n=0 err=io.EOF", i, n, err)
276 }
277 }
278}
279
280// TestCatfileReader_SmallBufferReads reads a blob one byte at a time
281// and verifies the entire content is reconstructed correctly without
282// any trailing LF leaking into user content.
283func TestCatfileReader_SmallBufferReads(t *testing.T) {
284 t.Parallel()
285
286 repoDir, blobs := createTestRepo(t)
287 ids := []plumbing.Hash{blobs["hello.txt"]}
288
289 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
290 if err != nil {
291 t.Fatal(err)
292 }
293 defer cr.Close()
294
295 size, _, _, _ := cr.Next()
296
297 var result []byte
298 buf := make([]byte, 1)
299 for {
300 n, err := cr.Read(buf)
301 if n > 0 {
302 result = append(result, buf[:n]...)
303 }
304 if err == io.EOF {
305 break
306 }
307 if err != nil {
308 t.Fatal(err)
309 }
310 }
311
312 if len(result) != size {
313 t.Fatalf("read %d bytes, want %d", len(result), size)
314 }
315 if string(result) != "hello world\n" {
316 t.Errorf("content = %q, want %q", result, "hello world\n")
317 }
318}
319
320// TestCatfileReader_PartialReadThenNext reads only part of a blob's
321// content, then advances to the next entry. Verifies that the discard
322// of pending bytes doesn't corrupt the stream.
323func TestCatfileReader_PartialReadThenNext(t *testing.T) {
324 t.Parallel()
325
326 repoDir, blobs := createTestRepo(t)
327 ids := []plumbing.Hash{
328 blobs["hello.txt"], // 12 bytes: "hello world\n"
329 blobs["binary.bin"], // variable, starts with 0x00
330 }
331
332 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
333 if err != nil {
334 t.Fatal(err)
335 }
336 defer cr.Close()
337
338 // Read only 5 of 12 bytes from hello.txt.
339 size, _, _, _ := cr.Next()
340 if size != 12 {
341 t.Fatalf("hello.txt size = %d, want 12", size)
342 }
343 partial := make([]byte, 5)
344 if _, err := io.ReadFull(cr, partial); err != nil {
345 t.Fatal(err)
346 }
347 if string(partial) != "hello" {
348 t.Fatalf("partial = %q, want %q", partial, "hello")
349 }
350
351 // Advance — must discard remaining 7 content bytes + trailing LF.
352 size, _, _, err = cr.Next()
353 if err != nil {
354 t.Fatalf("Next binary.bin after partial read: %v", err)
355 }
356
357 // Verify binary.bin content is intact.
358 content := make([]byte, size)
359 if _, err := io.ReadFull(cr, content); err != nil {
360 t.Fatal(err)
361 }
362 if content[0] != 0x00 {
363 t.Errorf("binary.bin first byte = 0x%02x after partial-read skip, want 0x00", content[0])
364 }
365}
366
367// TestCatfileReader_PartialReadExactlyOneByteShort reads size-1 bytes
368// from a blob. The pending field should be exactly 2 (1 content byte +
369// 1 trailing LF). This stresses the boundary between content and LF
370// in the discard path.
371func TestCatfileReader_PartialReadExactlyOneByteShort(t *testing.T) {
372 t.Parallel()
373
374 repoDir, blobs := createTestRepo(t)
375 ids := []plumbing.Hash{
376 blobs["hello.txt"], // 12 bytes
377 blobs["binary.bin"], // starts with 0x00
378 }
379
380 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
381 if err != nil {
382 t.Fatal(err)
383 }
384 defer cr.Close()
385
386 size, _, _, _ := cr.Next()
387 // Read exactly size-1 bytes — leaves 1 content byte + trailing LF.
388 buf := make([]byte, size-1)
389 if _, err := io.ReadFull(cr, buf); err != nil {
390 t.Fatal(err)
391 }
392 if string(buf) != "hello world" { // missing final \n
393 t.Fatalf("partial = %q", buf)
394 }
395
396 // Advance — pending should be 2 (1 content byte + 1 LF). The
397 // Discard call must handle this exact boundary correctly.
398 size, missing, excluded, err := cr.Next()
399 if err != nil {
400 t.Fatalf("Next after size-1 partial read: %v", err)
401 }
402 if missing || excluded {
403 t.Fatal("binary.bin unexpectedly missing")
404 }
405
406 // Read binary.bin to verify stream integrity.
407 content := make([]byte, size)
408 if _, err := io.ReadFull(cr, content); err != nil {
409 t.Fatal(err)
410 }
411 if content[0] != 0x00 {
412 t.Errorf("binary.bin[0] = 0x%02x after boundary skip, want 0x00", content[0])
413 }
414}
415
416// --- Empty / degenerate input tests ---
417
418// TestCatfileReader_EmptyIds verifies that an empty id slice produces
419// immediate EOF without errors.
420func TestCatfileReader_EmptyIds(t *testing.T) {
421 t.Parallel()
422
423 repoDir, _ := createTestRepo(t)
424
425 cr, err := newCatfileReader(repoDir, nil, catfileReaderOptions{})
426 if err != nil {
427 t.Fatal(err)
428 }
429 defer cr.Close()
430
431 _, _, _, err = cr.Next()
432 if err != io.EOF {
433 t.Fatalf("expected io.EOF for empty ids, got %v", err)
434 }
435}
436
437// TestCatfileReader_MultipleEmptyBlobs stresses the trailing-LF
438// handling for size-0 blobs. Git still outputs a LF after a 0-byte
439// blob body. Repeated empty blobs test the pending=1 discard path.
440func TestCatfileReader_MultipleEmptyBlobs(t *testing.T) {
441 t.Parallel()
442
443 repoDir, blobs := createTestRepo(t)
444
445 // Send the empty blob SHA 5 times — git outputs each independently.
446 emptyID := blobs["empty.txt"]
447 ids := []plumbing.Hash{emptyID, emptyID, emptyID, emptyID, emptyID}
448
449 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
450 if err != nil {
451 t.Fatal(err)
452 }
453 defer cr.Close()
454
455 for i := range ids {
456 size, missing, excluded, err := cr.Next()
457 if err != nil {
458 t.Fatalf("Next #%d: %v", i, err)
459 }
460 if missing || excluded {
461 t.Fatalf("#%d unexpectedly missing", i)
462 }
463 if size != 0 {
464 t.Fatalf("#%d size = %d, want 0", i, size)
465 }
466 // Don't read — Next should discard the trailing LF for us.
467 }
468
469 _, _, _, err = cr.Next()
470 if err != io.EOF {
471 t.Fatalf("expected EOF after %d empty blobs, got %v", len(ids), err)
472 }
473}
474
475// TestCatfileReader_EmptyBlobRead verifies that reading a 0-byte blob
476// through the io.Reader interface returns 0 bytes and io.EOF, and that
477// the trailing LF is consumed transparently.
478func TestCatfileReader_EmptyBlobRead(t *testing.T) {
479 t.Parallel()
480
481 repoDir, blobs := createTestRepo(t)
482 ids := []plumbing.Hash{
483 blobs["empty.txt"], // 0 bytes
484 blobs["hello.txt"], // 12 bytes — sentinel
485 }
486
487 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
488 if err != nil {
489 t.Fatal(err)
490 }
491 defer cr.Close()
492
493 size, _, _, _ := cr.Next()
494 if size != 0 {
495 t.Fatalf("empty.txt size = %d", size)
496 }
497
498 // Explicitly Read on the 0-byte blob.
499 buf := make([]byte, 10)
500 n, err := cr.Read(buf)
501 if n != 0 || err != io.EOF {
502 t.Fatalf("Read empty blob: n=%d err=%v, want n=0 err=io.EOF", n, err)
503 }
504
505 // The trailing LF must have been consumed. Verify by reading the
506 // next entry — if the LF leaked, the header parse would fail.
507 size, _, _, err = cr.Next()
508 if err != nil {
509 t.Fatalf("Next hello.txt after empty blob Read: %v", err)
510 }
511 if size != 12 {
512 t.Fatalf("hello.txt size = %d, want 12", size)
513 }
514 content := make([]byte, size)
515 if _, err := io.ReadFull(cr, content); err != nil {
516 t.Fatal(err)
517 }
518 if string(content) != "hello world\n" {
519 t.Errorf("hello.txt = %q", content)
520 }
521}
522
523// --- Missing object edge cases ---
524
525// TestCatfileReader_AllMissing verifies that a sequence of entirely
526// missing objects is handled gracefully — no errors, no panics, just
527// missing=true for each followed by EOF.
528func TestCatfileReader_AllMissing(t *testing.T) {
529 t.Parallel()
530
531 repoDir, _ := createTestRepo(t)
532
533 ids := []plumbing.Hash{
534 plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"),
535 plumbing.NewHash("1111111111111111111111111111111111111111"),
536 plumbing.NewHash("2222222222222222222222222222222222222222"),
537 }
538
539 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
540 if err != nil {
541 t.Fatal(err)
542 }
543 defer cr.Close()
544
545 for i, id := range ids {
546 _, missing, excluded, err := cr.Next()
547 if err != nil {
548 t.Fatalf("Next #%d (%s): %v", i, id, err)
549 }
550 if excluded {
551 t.Errorf("expected #%d (%s) to be missing, not excluded", i, id)
552 }
553 if !missing {
554 t.Errorf("expected #%d (%s) to be missing", i, id)
555 }
556 }
557
558 _, _, _, err = cr.Next()
559 if err != io.EOF {
560 t.Fatalf("expected EOF after all missing, got %v", err)
561 }
562}
563
564// TestCatfileReader_AlternatingMissingPresent interleaves missing and
565// present objects, verifying that stream alignment is maintained.
566func TestCatfileReader_AlternatingMissingPresent(t *testing.T) {
567 t.Parallel()
568
569 repoDir, blobs := createTestRepo(t)
570
571 fake1 := plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
572 fake2 := plumbing.NewHash("1111111111111111111111111111111111111111")
573
574 ids := []plumbing.Hash{
575 fake1,
576 blobs["hello.txt"],
577 fake2,
578 blobs["empty.txt"],
579 blobs["binary.bin"],
580 }
581
582 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
583 if err != nil {
584 t.Fatal(err)
585 }
586 defer cr.Close()
587
588 // fake1 — missing
589 _, missing, excluded, err := cr.Next()
590 if err != nil || !missing || excluded {
591 t.Fatalf("fake1: err=%v missing=%v excluded=%v", err, missing, excluded)
592 }
593
594 // hello.txt — present, read it
595 size, missing, excluded, err := cr.Next()
596 if err != nil || missing || excluded {
597 t.Fatalf("hello.txt: err=%v missing=%v excluded=%v", err, missing, excluded)
598 }
599 content := make([]byte, size)
600 if _, err := io.ReadFull(cr, content); err != nil {
601 t.Fatal(err)
602 }
603 if string(content) != "hello world\n" {
604 t.Errorf("hello.txt = %q", content)
605 }
606
607 // fake2 — missing
608 _, missing, excluded, err = cr.Next()
609 if err != nil || !missing || excluded {
610 t.Fatalf("fake2: err=%v missing=%v excluded=%v", err, missing, excluded)
611 }
612
613 // empty.txt — present, skip it
614 size, missing, excluded, err = cr.Next()
615 if err != nil || missing || excluded {
616 t.Fatalf("empty.txt: err=%v missing=%v excluded=%v", err, missing, excluded)
617 }
618 if size != 0 {
619 t.Errorf("empty.txt size = %d", size)
620 }
621
622 // binary.bin — present, read it
623 size, missing, excluded, err = cr.Next()
624 if err != nil || missing || excluded {
625 t.Fatalf("binary.bin: err=%v missing=%v excluded=%v", err, missing, excluded)
626 }
627 binContent := make([]byte, size)
628 if _, err := io.ReadFull(cr, binContent); err != nil {
629 t.Fatal(err)
630 }
631 if binContent[0] != 0x00 {
632 t.Errorf("binary.bin[0] = 0x%02x, want 0x00", binContent[0])
633 }
634
635 _, _, _, err = cr.Next()
636 if err != io.EOF {
637 t.Fatalf("expected EOF, got %v", err)
638 }
639}
640
641// TestCatfileReader_MissingThenSkip verifies that a missing object
642// followed by a present but skipped (unread) object doesn't corrupt
643// the stream. Missing objects have no content body, so there must be
644// no stale pending bytes interfering with the next header read.
645func TestCatfileReader_MissingThenSkip(t *testing.T) {
646 t.Parallel()
647
648 repoDir, blobs := createTestRepo(t)
649
650 fake := plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
651 ids := []plumbing.Hash{
652 fake,
653 blobs["large.bin"], // 64KB — skip without reading
654 blobs["hello.txt"], // sentinel — read to verify integrity
655 }
656
657 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
658 if err != nil {
659 t.Fatal(err)
660 }
661 defer cr.Close()
662
663 // missing
664 _, missing, excluded, _ := cr.Next()
665 if !missing || excluded {
666 t.Fatal("expected missing")
667 }
668
669 // large.bin — skip
670 size, missing, excluded, err := cr.Next()
671 if err != nil || missing || excluded {
672 t.Fatalf("large.bin: err=%v missing=%v excluded=%v", err, missing, excluded)
673 }
674 if size != 64*1024 {
675 t.Fatalf("large.bin size = %d", size)
676 }
677 // deliberately don't read
678
679 // hello.txt — read after missing+skip
680 size, missing, excluded, err = cr.Next()
681 if err != nil || missing || excluded {
682 t.Fatalf("hello.txt: err=%v missing=%v excluded=%v", err, missing, excluded)
683 }
684 content := make([]byte, size)
685 if _, err := io.ReadFull(cr, content); err != nil {
686 t.Fatal(err)
687 }
688 if string(content) != "hello world\n" {
689 t.Errorf("hello.txt = %q", content)
690 }
691}
692
693// --- Next() edge cases ---
694
695// TestCatfileReader_RepeatedNextAfterEOF verifies that calling Next
696// after EOF keeps returning EOF — not a panic, not a different error.
697func TestCatfileReader_RepeatedNextAfterEOF(t *testing.T) {
698 t.Parallel()
699
700 repoDir, blobs := createTestRepo(t)
701 ids := []plumbing.Hash{blobs["hello.txt"]}
702
703 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
704 if err != nil {
705 t.Fatal(err)
706 }
707 defer cr.Close()
708
709 // Consume and skip the only entry.
710 if _, _, _, err := cr.Next(); err != nil {
711 t.Fatal(err)
712 }
713
714 // First EOF.
715 _, _, _, err = cr.Next()
716 if err != io.EOF {
717 t.Fatalf("first post-exhaust Next: %v, want io.EOF", err)
718 }
719
720 // Second and third EOF — must be stable.
721 for i := 0; i < 2; i++ {
722 _, _, _, err = cr.Next()
723 if err != io.EOF {
724 t.Fatalf("Next #%d after EOF: %v, want io.EOF", i+2, err)
725 }
726 }
727}
728
729// --- Large blob precision tests ---
730
731// TestCatfileReader_LargeBlobBytePrecision verifies that a 64KB blob
732// is read with byte-exact precision — no off-by-one from trailing LF
733// handling, no truncation, no extra bytes.
734func TestCatfileReader_LargeBlobBytePrecision(t *testing.T) {
735 t.Parallel()
736
737 repoDir, blobs := createTestRepo(t)
738 ids := []plumbing.Hash{blobs["large.bin"]}
739
740 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
741 if err != nil {
742 t.Fatal(err)
743 }
744 defer cr.Close()
745
746 size, _, _, err := cr.Next()
747 if err != nil {
748 t.Fatal(err)
749 }
750 if size != 64*1024 {
751 t.Fatalf("size = %d, want %d", size, 64*1024)
752 }
753
754 // Read the full blob content.
755 content := make([]byte, size)
756 n, err := io.ReadFull(cr, content)
757 if err != nil {
758 t.Fatalf("ReadFull: %v (read %d of %d)", err, n, size)
759 }
760 if n != size {
761 t.Fatalf("read %d bytes, want %d", n, size)
762 }
763
764 // Verify git agrees on the content via cat-file -p.
765 expected, err := exec.Command("git", "-C", repoDir, "cat-file", "-p", blobs["large.bin"].String()).Output()
766 if err != nil {
767 t.Fatalf("git cat-file -p: %v", err)
768 }
769 if !bytes.Equal(content, expected) {
770 t.Errorf("content mismatch: got %d bytes, git says %d bytes", len(content), len(expected))
771 // Find first divergence.
772 for i := range content {
773 if i >= len(expected) || content[i] != expected[i] {
774 t.Errorf("first diff at byte %d: got 0x%02x, want 0x%02x", i, content[i], expected[i])
775 break
776 }
777 }
778 }
779}
780
781// TestCatfileReader_LargeBlobChunkedRead reads a 64KB blob in 997-byte
782// chunks (a prime number that doesn't align with any power-of-2 buffer)
783// to verify no byte is lost or duplicated across read boundaries.
784func TestCatfileReader_LargeBlobChunkedRead(t *testing.T) {
785 t.Parallel()
786
787 repoDir, blobs := createTestRepo(t)
788 ids := []plumbing.Hash{blobs["large.bin"]}
789
790 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
791 if err != nil {
792 t.Fatal(err)
793 }
794 defer cr.Close()
795
796 size, _, _, _ := cr.Next()
797 if size != 64*1024 {
798 t.Fatalf("size = %d", size)
799 }
800
801 var result bytes.Buffer
802 buf := make([]byte, 997) // prime-sized chunks
803 for {
804 n, err := cr.Read(buf)
805 if n > 0 {
806 result.Write(buf[:n])
807 }
808 if err == io.EOF {
809 break
810 }
811 if err != nil {
812 t.Fatal(err)
813 }
814 }
815
816 if result.Len() != size {
817 t.Fatalf("total read = %d, want %d", result.Len(), size)
818 }
819
820 // Cross-check with git.
821 expected, _ := exec.Command("git", "-C", repoDir, "cat-file", "-p", blobs["large.bin"].String()).Output()
822 if !bytes.Equal(result.Bytes(), expected) {
823 t.Error("chunked read content differs from git cat-file -p output")
824 }
825}
826
827// --- Duplicate SHA test ---
828
829// TestCatfileReader_DuplicateSHAs verifies that requesting the same
830// SHA multiple times works — git cat-file --batch outputs the object
831// for each request independently.
832func TestCatfileReader_DuplicateSHAs(t *testing.T) {
833 t.Parallel()
834
835 repoDir, blobs := createTestRepo(t)
836
837 sha := blobs["hello.txt"]
838 ids := []plumbing.Hash{sha, sha, sha}
839
840 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
841 if err != nil {
842 t.Fatal(err)
843 }
844 defer cr.Close()
845
846 for i := 0; i < 3; i++ {
847 size, missing, excluded, err := cr.Next()
848 if err != nil {
849 t.Fatalf("Next #%d: %v", i, err)
850 }
851 if missing || excluded {
852 t.Fatalf("#%d unexpectedly missing", i)
853 }
854 if size != 12 {
855 t.Fatalf("#%d size = %d, want 12", i, size)
856 }
857 content := make([]byte, size)
858 if _, err := io.ReadFull(cr, content); err != nil {
859 t.Fatal(err)
860 }
861 if string(content) != "hello world\n" {
862 t.Errorf("#%d content = %q", i, content)
863 }
864 }
865
866 _, _, _, err = cr.Next()
867 if err != io.EOF {
868 t.Fatalf("expected EOF, got %v", err)
869 }
870}