fork of https://github.com/sourcegraph/zoekt
1package gitindex
2
3import (
4 "bytes"
5 "fmt"
6 "io"
7 "os"
8 "os/exec"
9 "path/filepath"
10 "sync"
11 "testing"
12 "time"
13
14 "github.com/go-git/go-git/v5/plumbing"
15)
16
17// --- Close lifecycle tests ---
18
19// TestCatfileReader_DoubleClose verifies that Close is idempotent.
20// Calling Close twice must not deadlock or panic.
21func TestCatfileReader_DoubleClose(t *testing.T) {
22 repoDir, blobs := createTestRepo(t)
23 ids := []plumbing.Hash{blobs["hello.txt"]}
24
25 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
26 if err != nil {
27 t.Fatal(err)
28 }
29
30 // Consume the entry so the process can exit cleanly.
31 if _, _, _, err := cr.Next(); err != nil {
32 t.Fatal(err)
33 }
34
35 if err := cr.Close(); err != nil {
36 t.Fatalf("first Close: %v", err)
37 }
38
39 // Second Close must not deadlock or panic.
40 done := make(chan error, 1)
41 go func() {
42 done <- cr.Close()
43 }()
44
45 select {
46 case <-done:
47 // Success — whether err is nil or not, it didn't block.
48 case <-time.After(5 * time.Second):
49 t.Fatal("second Close() deadlocked — writeErr channel was never closed")
50 }
51}
52
53// TestCatfileReader_ConcurrentClose verifies that calling Close from
54// multiple goroutines simultaneously does not panic, deadlock, or
55// corrupt state.
56func TestCatfileReader_ConcurrentClose(t *testing.T) {
57 repoDir, blobs := createTestRepo(t)
58 ids := []plumbing.Hash{
59 blobs["hello.txt"],
60 blobs["large.bin"],
61 blobs["binary.bin"],
62 }
63
64 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
65 if err != nil {
66 t.Fatal(err)
67 }
68
69 // Read one entry, leave two unconsumed.
70 if _, _, _, err := cr.Next(); err != nil {
71 t.Fatal(err)
72 }
73
74 const goroutines = 5
75 var wg sync.WaitGroup
76 wg.Add(goroutines)
77 barrier := make(chan struct{})
78
79 for i := 0; i < goroutines; i++ {
80 go func() {
81 defer wg.Done()
82 <-barrier // all start at once
83 cr.Close()
84 }()
85 }
86
87 done := make(chan struct{})
88 go func() {
89 close(barrier)
90 wg.Wait()
91 close(done)
92 }()
93
94 select {
95 case <-done:
96 // All goroutines returned.
97 case <-time.After(10 * time.Second):
98 t.Fatal("concurrent Close() deadlocked")
99 }
100}
101
102// TestCatfileReader_CloseWithoutReading verifies that closing
103// immediately after creation (without reading any entries) completes
104// without hanging.
105func TestCatfileReader_CloseWithoutReading(t *testing.T) {
106 repoDir, blobs := createTestRepo(t)
107 ids := []plumbing.Hash{
108 blobs["hello.txt"],
109 blobs["large.bin"],
110 blobs["binary.bin"],
111 blobs["empty.txt"],
112 }
113
114 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
115 if err != nil {
116 t.Fatal(err)
117 }
118
119 done := make(chan error, 1)
120 go func() {
121 done <- cr.Close()
122 }()
123
124 select {
125 case err := <-done:
126 if err != nil {
127 t.Fatalf("Close: %v", err)
128 }
129 case <-time.After(10 * time.Second):
130 t.Fatal("Close() without reading any entries hung")
131 }
132}
133
134// TestCatfileReader_CloseBeforeExhausted_ManyBlobs simulates early
135// termination (e.g., builder.Add error) with many unconsumed blobs.
136// Close should complete promptly — not drain the entire git output.
137func TestCatfileReader_CloseBeforeExhausted_ManyBlobs(t *testing.T) {
138 // Create a repo with many non-trivial files.
139 dir := t.TempDir()
140 repoDir := filepath.Join(dir, "repo")
141
142 script := `
143set -e
144git init -b main repo
145cd repo
146git config user.email "test@test.com"
147git config user.name "Test"
148for i in $(seq 1 200); do
149 dd if=/dev/urandom bs=1024 count=10 of="file_$i.bin" 2>/dev/null
150done
151git add -A
152git commit -m "many files"
153`
154 cmd := exec.Command("/bin/sh", "-c", script)
155 cmd.Dir = dir
156 cmd.Stderr = os.Stderr
157 if err := cmd.Run(); err != nil {
158 t.Fatalf("create test repo: %v", err)
159 }
160
161 var ids []plumbing.Hash
162 for i := 1; i <= 200; i++ {
163 name := fmt.Sprintf("file_%d.bin", i)
164 out, err := exec.Command("git", "-C", repoDir, "rev-parse", "HEAD:"+name).Output()
165 if err != nil {
166 t.Fatalf("rev-parse %s: %v", name, err)
167 }
168 ids = append(ids, plumbing.NewHash(string(out[:len(out)-1])))
169 }
170
171 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
172 if err != nil {
173 t.Fatal(err)
174 }
175
176 // Read only 1 of 200 entries.
177 if _, _, _, err := cr.Next(); err != nil {
178 t.Fatal(err)
179 }
180
181 // Close should be fast (kill, not drain). With drain it still works but
182 // is slow — we enforce a generous bound.
183 start := time.Now()
184 done := make(chan error, 1)
185 go func() {
186 done <- cr.Close()
187 }()
188
189 select {
190 case <-done:
191 elapsed := time.Since(start)
192 // With Kill: sub-millisecond. Draining 200×10KB is fast too, so we
193 // use a generous 3s bound that still catches pathological stalls.
194 if elapsed > 3*time.Second {
195 t.Errorf("Close took %v after reading 1 of 200 entries — consider killing instead of draining", elapsed)
196 }
197 case <-time.After(30 * time.Second):
198 t.Fatal("Close() deadlocked with many unconsumed blobs")
199 }
200}
201
202// --- Read edge-case tests ---
203
204// TestCatfileReader_ReadWithoutNext verifies that calling Read
205// before calling Next returns io.EOF, not a panic or garbage data.
206func TestCatfileReader_ReadWithoutNext(t *testing.T) {
207 repoDir, blobs := createTestRepo(t)
208 ids := []plumbing.Hash{blobs["hello.txt"]}
209
210 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
211 if err != nil {
212 t.Fatal(err)
213 }
214 defer cr.Close()
215
216 buf := make([]byte, 10)
217 n, err := cr.Read(buf)
218 if n != 0 || err != io.EOF {
219 t.Fatalf("Read without Next: n=%d err=%v, want n=0 err=io.EOF", n, err)
220 }
221}
222
223// TestCatfileReader_ReadAfterFullConsumption verifies that extra Read
224// calls after a blob is fully consumed return io.EOF, not duplicate
225// data or trailing LF bytes.
226func TestCatfileReader_ReadAfterFullConsumption(t *testing.T) {
227 repoDir, blobs := createTestRepo(t)
228 ids := []plumbing.Hash{blobs["hello.txt"]}
229
230 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
231 if err != nil {
232 t.Fatal(err)
233 }
234 defer cr.Close()
235
236 size, _, _, _ := cr.Next()
237 content := make([]byte, size)
238 if _, err := io.ReadFull(cr, content); err != nil {
239 t.Fatal(err)
240 }
241
242 // Blob is fully read — additional Reads must return EOF.
243 for i := 0; i < 3; i++ {
244 buf := make([]byte, 10)
245 n, err := cr.Read(buf)
246 if n != 0 || err != io.EOF {
247 t.Fatalf("Read #%d after full consumption: n=%d err=%v, want n=0 err=io.EOF", i, n, err)
248 }
249 }
250}
251
252// TestCatfileReader_SmallBufferReads reads a blob one byte at a time
253// and verifies the entire content is reconstructed correctly without
254// any trailing LF leaking into user content.
255func TestCatfileReader_SmallBufferReads(t *testing.T) {
256 repoDir, blobs := createTestRepo(t)
257 ids := []plumbing.Hash{blobs["hello.txt"]}
258
259 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
260 if err != nil {
261 t.Fatal(err)
262 }
263 defer cr.Close()
264
265 size, _, _, _ := cr.Next()
266
267 var result []byte
268 buf := make([]byte, 1)
269 for {
270 n, err := cr.Read(buf)
271 if n > 0 {
272 result = append(result, buf[:n]...)
273 }
274 if err == io.EOF {
275 break
276 }
277 if err != nil {
278 t.Fatal(err)
279 }
280 }
281
282 if len(result) != size {
283 t.Fatalf("read %d bytes, want %d", len(result), size)
284 }
285 if string(result) != "hello world\n" {
286 t.Errorf("content = %q, want %q", result, "hello world\n")
287 }
288}
289
290// TestCatfileReader_PartialReadThenNext reads only part of a blob's
291// content, then advances to the next entry. Verifies that the discard
292// of pending bytes doesn't corrupt the stream.
293func TestCatfileReader_PartialReadThenNext(t *testing.T) {
294 repoDir, blobs := createTestRepo(t)
295 ids := []plumbing.Hash{
296 blobs["hello.txt"], // 12 bytes: "hello world\n"
297 blobs["binary.bin"], // variable, starts with 0x00
298 }
299
300 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
301 if err != nil {
302 t.Fatal(err)
303 }
304 defer cr.Close()
305
306 // Read only 5 of 12 bytes from hello.txt.
307 size, _, _, _ := cr.Next()
308 if size != 12 {
309 t.Fatalf("hello.txt size = %d, want 12", size)
310 }
311 partial := make([]byte, 5)
312 if _, err := io.ReadFull(cr, partial); err != nil {
313 t.Fatal(err)
314 }
315 if string(partial) != "hello" {
316 t.Fatalf("partial = %q, want %q", partial, "hello")
317 }
318
319 // Advance — must discard remaining 7 content bytes + trailing LF.
320 size, _, _, err = cr.Next()
321 if err != nil {
322 t.Fatalf("Next binary.bin after partial read: %v", err)
323 }
324
325 // Verify binary.bin content is intact.
326 content := make([]byte, size)
327 if _, err := io.ReadFull(cr, content); err != nil {
328 t.Fatal(err)
329 }
330 if content[0] != 0x00 {
331 t.Errorf("binary.bin first byte = 0x%02x after partial-read skip, want 0x00", content[0])
332 }
333}
334
335// TestCatfileReader_PartialReadExactlyOneByteShort reads size-1 bytes
336// from a blob. The pending field should be exactly 2 (1 content byte +
337// 1 trailing LF). This stresses the boundary between content and LF
338// in the discard path.
339func TestCatfileReader_PartialReadExactlyOneByteShort(t *testing.T) {
340 repoDir, blobs := createTestRepo(t)
341 ids := []plumbing.Hash{
342 blobs["hello.txt"], // 12 bytes
343 blobs["binary.bin"], // starts with 0x00
344 }
345
346 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
347 if err != nil {
348 t.Fatal(err)
349 }
350 defer cr.Close()
351
352 size, _, _, _ := cr.Next()
353 // Read exactly size-1 bytes — leaves 1 content byte + trailing LF.
354 buf := make([]byte, size-1)
355 if _, err := io.ReadFull(cr, buf); err != nil {
356 t.Fatal(err)
357 }
358 if string(buf) != "hello world" { // missing final \n
359 t.Fatalf("partial = %q", buf)
360 }
361
362 // Advance — pending should be 2 (1 content byte + 1 LF). The
363 // Discard call must handle this exact boundary correctly.
364 size, missing, excluded, err := cr.Next()
365 if err != nil {
366 t.Fatalf("Next after size-1 partial read: %v", err)
367 }
368 if missing || excluded {
369 t.Fatal("binary.bin unexpectedly missing")
370 }
371
372 // Read binary.bin to verify stream integrity.
373 content := make([]byte, size)
374 if _, err := io.ReadFull(cr, content); err != nil {
375 t.Fatal(err)
376 }
377 if content[0] != 0x00 {
378 t.Errorf("binary.bin[0] = 0x%02x after boundary skip, want 0x00", content[0])
379 }
380}
381
382// --- Empty / degenerate input tests ---
383
384// TestCatfileReader_EmptyIds verifies that an empty id slice produces
385// immediate EOF without errors.
386func TestCatfileReader_EmptyIds(t *testing.T) {
387 repoDir, _ := createTestRepo(t)
388
389 cr, err := newCatfileReader(repoDir, nil, catfileReaderOptions{})
390 if err != nil {
391 t.Fatal(err)
392 }
393 defer cr.Close()
394
395 _, _, _, err = cr.Next()
396 if err != io.EOF {
397 t.Fatalf("expected io.EOF for empty ids, got %v", err)
398 }
399}
400
401// TestCatfileReader_MultipleEmptyBlobs stresses the trailing-LF
402// handling for size-0 blobs. Git still outputs a LF after a 0-byte
403// blob body. Repeated empty blobs test the pending=1 discard path.
404func TestCatfileReader_MultipleEmptyBlobs(t *testing.T) {
405 repoDir, blobs := createTestRepo(t)
406
407 // Send the empty blob SHA 5 times — git outputs each independently.
408 emptyID := blobs["empty.txt"]
409 ids := []plumbing.Hash{emptyID, emptyID, emptyID, emptyID, emptyID}
410
411 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
412 if err != nil {
413 t.Fatal(err)
414 }
415 defer cr.Close()
416
417 for i := range ids {
418 size, missing, excluded, err := cr.Next()
419 if err != nil {
420 t.Fatalf("Next #%d: %v", i, err)
421 }
422 if missing || excluded {
423 t.Fatalf("#%d unexpectedly missing", i)
424 }
425 if size != 0 {
426 t.Fatalf("#%d size = %d, want 0", i, size)
427 }
428 // Don't read — Next should discard the trailing LF for us.
429 }
430
431 _, _, _, err = cr.Next()
432 if err != io.EOF {
433 t.Fatalf("expected EOF after %d empty blobs, got %v", len(ids), err)
434 }
435}
436
437// TestCatfileReader_EmptyBlobRead verifies that reading a 0-byte blob
438// through the io.Reader interface returns 0 bytes and io.EOF, and that
439// the trailing LF is consumed transparently.
440func TestCatfileReader_EmptyBlobRead(t *testing.T) {
441 repoDir, blobs := createTestRepo(t)
442 ids := []plumbing.Hash{
443 blobs["empty.txt"], // 0 bytes
444 blobs["hello.txt"], // 12 bytes — sentinel
445 }
446
447 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
448 if err != nil {
449 t.Fatal(err)
450 }
451 defer cr.Close()
452
453 size, _, _, _ := cr.Next()
454 if size != 0 {
455 t.Fatalf("empty.txt size = %d", size)
456 }
457
458 // Explicitly Read on the 0-byte blob.
459 buf := make([]byte, 10)
460 n, err := cr.Read(buf)
461 if n != 0 || err != io.EOF {
462 t.Fatalf("Read empty blob: n=%d err=%v, want n=0 err=io.EOF", n, err)
463 }
464
465 // The trailing LF must have been consumed. Verify by reading the
466 // next entry — if the LF leaked, the header parse would fail.
467 size, _, _, err = cr.Next()
468 if err != nil {
469 t.Fatalf("Next hello.txt after empty blob Read: %v", err)
470 }
471 if size != 12 {
472 t.Fatalf("hello.txt size = %d, want 12", size)
473 }
474 content := make([]byte, size)
475 if _, err := io.ReadFull(cr, content); err != nil {
476 t.Fatal(err)
477 }
478 if string(content) != "hello world\n" {
479 t.Errorf("hello.txt = %q", content)
480 }
481}
482
483// --- Missing object edge cases ---
484
485// TestCatfileReader_AllMissing verifies that a sequence of entirely
486// missing objects is handled gracefully — no errors, no panics, just
487// missing=true for each followed by EOF.
488func TestCatfileReader_AllMissing(t *testing.T) {
489 repoDir, _ := createTestRepo(t)
490
491 ids := []plumbing.Hash{
492 plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"),
493 plumbing.NewHash("1111111111111111111111111111111111111111"),
494 plumbing.NewHash("2222222222222222222222222222222222222222"),
495 }
496
497 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
498 if err != nil {
499 t.Fatal(err)
500 }
501 defer cr.Close()
502
503 for i, id := range ids {
504 _, missing, excluded, err := cr.Next()
505 if err != nil {
506 t.Fatalf("Next #%d (%s): %v", i, id, err)
507 }
508 if excluded {
509 t.Errorf("expected #%d (%s) to be missing, not excluded", i, id)
510 }
511 if !missing {
512 t.Errorf("expected #%d (%s) to be missing", i, id)
513 }
514 }
515
516 _, _, _, err = cr.Next()
517 if err != io.EOF {
518 t.Fatalf("expected EOF after all missing, got %v", err)
519 }
520}
521
522// TestCatfileReader_AlternatingMissingPresent interleaves missing and
523// present objects, verifying that stream alignment is maintained.
524func TestCatfileReader_AlternatingMissingPresent(t *testing.T) {
525 repoDir, blobs := createTestRepo(t)
526
527 fake1 := plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
528 fake2 := plumbing.NewHash("1111111111111111111111111111111111111111")
529
530 ids := []plumbing.Hash{
531 fake1,
532 blobs["hello.txt"],
533 fake2,
534 blobs["empty.txt"],
535 blobs["binary.bin"],
536 }
537
538 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
539 if err != nil {
540 t.Fatal(err)
541 }
542 defer cr.Close()
543
544 // fake1 — missing
545 _, missing, excluded, err := cr.Next()
546 if err != nil || !missing || excluded {
547 t.Fatalf("fake1: err=%v missing=%v excluded=%v", err, missing, excluded)
548 }
549
550 // hello.txt — present, read it
551 size, missing, excluded, err := cr.Next()
552 if err != nil || missing || excluded {
553 t.Fatalf("hello.txt: err=%v missing=%v excluded=%v", err, missing, excluded)
554 }
555 content := make([]byte, size)
556 if _, err := io.ReadFull(cr, content); err != nil {
557 t.Fatal(err)
558 }
559 if string(content) != "hello world\n" {
560 t.Errorf("hello.txt = %q", content)
561 }
562
563 // fake2 — missing
564 _, missing, excluded, err = cr.Next()
565 if err != nil || !missing || excluded {
566 t.Fatalf("fake2: err=%v missing=%v excluded=%v", err, missing, excluded)
567 }
568
569 // empty.txt — present, skip it
570 size, missing, excluded, err = cr.Next()
571 if err != nil || missing || excluded {
572 t.Fatalf("empty.txt: err=%v missing=%v excluded=%v", err, missing, excluded)
573 }
574 if size != 0 {
575 t.Errorf("empty.txt size = %d", size)
576 }
577
578 // binary.bin — present, read it
579 size, missing, excluded, err = cr.Next()
580 if err != nil || missing || excluded {
581 t.Fatalf("binary.bin: err=%v missing=%v excluded=%v", err, missing, excluded)
582 }
583 binContent := make([]byte, size)
584 if _, err := io.ReadFull(cr, binContent); err != nil {
585 t.Fatal(err)
586 }
587 if binContent[0] != 0x00 {
588 t.Errorf("binary.bin[0] = 0x%02x, want 0x00", binContent[0])
589 }
590
591 _, _, _, err = cr.Next()
592 if err != io.EOF {
593 t.Fatalf("expected EOF, got %v", err)
594 }
595}
596
597// TestCatfileReader_MissingThenSkip verifies that a missing object
598// followed by a present but skipped (unread) object doesn't corrupt
599// the stream. Missing objects have no content body, so there must be
600// no stale pending bytes interfering with the next header read.
601func TestCatfileReader_MissingThenSkip(t *testing.T) {
602 repoDir, blobs := createTestRepo(t)
603
604 fake := plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
605 ids := []plumbing.Hash{
606 fake,
607 blobs["large.bin"], // 64KB — skip without reading
608 blobs["hello.txt"], // sentinel — read to verify integrity
609 }
610
611 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
612 if err != nil {
613 t.Fatal(err)
614 }
615 defer cr.Close()
616
617 // missing
618 _, missing, excluded, _ := cr.Next()
619 if !missing || excluded {
620 t.Fatal("expected missing")
621 }
622
623 // large.bin — skip
624 size, missing, excluded, err := cr.Next()
625 if err != nil || missing || excluded {
626 t.Fatalf("large.bin: err=%v missing=%v excluded=%v", err, missing, excluded)
627 }
628 if size != 64*1024 {
629 t.Fatalf("large.bin size = %d", size)
630 }
631 // deliberately don't read
632
633 // hello.txt — read after missing+skip
634 size, missing, excluded, err = cr.Next()
635 if err != nil || missing || excluded {
636 t.Fatalf("hello.txt: err=%v missing=%v excluded=%v", err, missing, excluded)
637 }
638 content := make([]byte, size)
639 if _, err := io.ReadFull(cr, content); err != nil {
640 t.Fatal(err)
641 }
642 if string(content) != "hello world\n" {
643 t.Errorf("hello.txt = %q", content)
644 }
645}
646
647// --- Next() edge cases ---
648
649// TestCatfileReader_RepeatedNextAfterEOF verifies that calling Next
650// after EOF keeps returning EOF — not a panic, not a different error.
651func TestCatfileReader_RepeatedNextAfterEOF(t *testing.T) {
652 repoDir, blobs := createTestRepo(t)
653 ids := []plumbing.Hash{blobs["hello.txt"]}
654
655 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
656 if err != nil {
657 t.Fatal(err)
658 }
659 defer cr.Close()
660
661 // Consume and skip the only entry.
662 if _, _, _, err := cr.Next(); err != nil {
663 t.Fatal(err)
664 }
665
666 // First EOF.
667 _, _, _, err = cr.Next()
668 if err != io.EOF {
669 t.Fatalf("first post-exhaust Next: %v, want io.EOF", err)
670 }
671
672 // Second and third EOF — must be stable.
673 for i := 0; i < 2; i++ {
674 _, _, _, err = cr.Next()
675 if err != io.EOF {
676 t.Fatalf("Next #%d after EOF: %v, want io.EOF", i+2, err)
677 }
678 }
679}
680
681// --- Large blob precision tests ---
682
683// TestCatfileReader_LargeBlobBytePrecision verifies that a 64KB blob
684// is read with byte-exact precision — no off-by-one from trailing LF
685// handling, no truncation, no extra bytes.
686func TestCatfileReader_LargeBlobBytePrecision(t *testing.T) {
687 repoDir, blobs := createTestRepo(t)
688 ids := []plumbing.Hash{blobs["large.bin"]}
689
690 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
691 if err != nil {
692 t.Fatal(err)
693 }
694 defer cr.Close()
695
696 size, _, _, err := cr.Next()
697 if err != nil {
698 t.Fatal(err)
699 }
700 if size != 64*1024 {
701 t.Fatalf("size = %d, want %d", size, 64*1024)
702 }
703
704 // Read the full blob content.
705 content := make([]byte, size)
706 n, err := io.ReadFull(cr, content)
707 if err != nil {
708 t.Fatalf("ReadFull: %v (read %d of %d)", err, n, size)
709 }
710 if n != size {
711 t.Fatalf("read %d bytes, want %d", n, size)
712 }
713
714 // Verify git agrees on the content via cat-file -p.
715 expected, err := exec.Command("git", "-C", repoDir, "cat-file", "-p", blobs["large.bin"].String()).Output()
716 if err != nil {
717 t.Fatalf("git cat-file -p: %v", err)
718 }
719 if !bytes.Equal(content, expected) {
720 t.Errorf("content mismatch: got %d bytes, git says %d bytes", len(content), len(expected))
721 // Find first divergence.
722 for i := range content {
723 if i >= len(expected) || content[i] != expected[i] {
724 t.Errorf("first diff at byte %d: got 0x%02x, want 0x%02x", i, content[i], expected[i])
725 break
726 }
727 }
728 }
729}
730
731// TestCatfileReader_LargeBlobChunkedRead reads a 64KB blob in 997-byte
732// chunks (a prime number that doesn't align with any power-of-2 buffer)
733// to verify no byte is lost or duplicated across read boundaries.
734func TestCatfileReader_LargeBlobChunkedRead(t *testing.T) {
735 repoDir, blobs := createTestRepo(t)
736 ids := []plumbing.Hash{blobs["large.bin"]}
737
738 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
739 if err != nil {
740 t.Fatal(err)
741 }
742 defer cr.Close()
743
744 size, _, _, _ := cr.Next()
745 if size != 64*1024 {
746 t.Fatalf("size = %d", size)
747 }
748
749 var result bytes.Buffer
750 buf := make([]byte, 997) // prime-sized chunks
751 for {
752 n, err := cr.Read(buf)
753 if n > 0 {
754 result.Write(buf[:n])
755 }
756 if err == io.EOF {
757 break
758 }
759 if err != nil {
760 t.Fatal(err)
761 }
762 }
763
764 if result.Len() != size {
765 t.Fatalf("total read = %d, want %d", result.Len(), size)
766 }
767
768 // Cross-check with git.
769 expected, _ := exec.Command("git", "-C", repoDir, "cat-file", "-p", blobs["large.bin"].String()).Output()
770 if !bytes.Equal(result.Bytes(), expected) {
771 t.Error("chunked read content differs from git cat-file -p output")
772 }
773}
774
775// --- Duplicate SHA test ---
776
777// TestCatfileReader_DuplicateSHAs verifies that requesting the same
778// SHA multiple times works — git cat-file --batch outputs the object
779// for each request independently.
780func TestCatfileReader_DuplicateSHAs(t *testing.T) {
781 repoDir, blobs := createTestRepo(t)
782
783 sha := blobs["hello.txt"]
784 ids := []plumbing.Hash{sha, sha, sha}
785
786 cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{})
787 if err != nil {
788 t.Fatal(err)
789 }
790 defer cr.Close()
791
792 for i := 0; i < 3; i++ {
793 size, missing, excluded, err := cr.Next()
794 if err != nil {
795 t.Fatalf("Next #%d: %v", i, err)
796 }
797 if missing || excluded {
798 t.Fatalf("#%d unexpectedly missing", i)
799 }
800 if size != 12 {
801 t.Fatalf("#%d size = %d, want 12", i, size)
802 }
803 content := make([]byte, size)
804 if _, err := io.ReadFull(cr, content); err != nil {
805 t.Fatal(err)
806 }
807 if string(content) != "hello world\n" {
808 t.Errorf("#%d content = %q", i, content)
809 }
810 }
811
812 _, _, _, err = cr.Next()
813 if err != io.EOF {
814 t.Fatalf("expected EOF, got %v", err)
815 }
816}