contentprovider_test.go at 2011bba55730f4a3b4e2ae06443b6353dda9009e · boltless.me/zoekt

fork of https://github.com/sourcegraph/zoekt
zoekt / contentprovider_test.go
at 2011bba55730f4a3b4e2ae06443b6353dda9009e 7.8 kB View raw
Julie Tibshirani Ranking: simplify score combination strategy (#523) 3y ago
  1package zoekt
  2
  3import (
  4	"bytes"
  5	"fmt"
  6	"testing"
  7
  8	"github.com/google/go-cmp/cmp"
  9)
 10
 11func getNewlines(data []byte) newlines {
 12	var locs []uint32
 13	for i, c := range data {
 14		if c == '\n' {
 15			locs = append(locs, uint32(i))
 16		}
 17	}
 18	return newlines{
 19		locs:     locs,
 20		fileSize: uint32(len(data)),
 21	}
 22}
 23
 24func TestGetLines(t *testing.T) {
 25	contents := [][]byte{
 26		[]byte("one\ntwo\nthree\nfour"),
 27		[]byte("one\ntwo\nthree\nfour\n"),
 28		[]byte("one"),
 29		[]byte(""),
 30	}
 31
 32	for _, content := range contents {
 33		t.Run("", func(t *testing.T) {
 34			newLines := getNewlines(content)
 35			lines := bytes.Split(content, []byte{'\n'}) // TODO does split group consecutive sep?
 36			wantGetLines := func(low, high int) []byte {
 37				low--
 38				high--
 39				if low < 0 {
 40					low = 0
 41				}
 42				if low >= len(lines) {
 43					return nil
 44				}
 45				if high <= 0 {
 46					return nil
 47				}
 48				if high > len(lines) {
 49					high = len(lines)
 50				}
 51				return bytes.Join(lines[low:high], []byte{'\n'})
 52			}
 53
 54			for low := -1; low <= len(lines)+2; low++ {
 55				for high := low; high <= len(lines)+2; high++ {
 56					want := wantGetLines(low, high)
 57					got := newLines.getLines(content, low, high)
 58					if d := cmp.Diff(string(want), string(got)); d != "" {
 59						t.Fatal(d)
 60					}
 61				}
 62			}
 63		})
 64	}
 65}
 66
 67func TestAtOffset(t *testing.T) {
 68	cases := []struct {
 69		data       []byte
 70		offset     uint32
 71		lineNumber int
 72		lineStart  int
 73		lineEnd    int
 74	}{{
 75		data:       []byte("0.2.4.\n7.9.11.\n"),
 76		offset:     0,
 77		lineNumber: 1, lineStart: 0, lineEnd: 6,
 78	}, {
 79		data:       []byte("0.2.4.\n7.9.11.\n"),
 80		offset:     6,
 81		lineNumber: 1, lineStart: 0, lineEnd: 6,
 82	}, {
 83		data:       []byte("0.2.4.\n7.9.11.\n"),
 84		offset:     2,
 85		lineNumber: 1, lineStart: 0, lineEnd: 6,
 86	}, {
 87		data:       []byte("0.2.4.\n7.9.11.\n"),
 88		offset:     2,
 89		lineNumber: 1, lineStart: 0, lineEnd: 6,
 90	}, {
 91		data:       []byte("0.2.4.\n7.9.11.\n"),
 92		offset:     7,
 93		lineNumber: 2, lineStart: 7, lineEnd: 14,
 94	}, {
 95		data:       []byte("0.2.4.\n7.9.11.\n"),
 96		offset:     11,
 97		lineNumber: 2, lineStart: 7, lineEnd: 14,
 98	}, {
 99		data:       []byte("0.2.4.\n7.9.11.\n"),
100		offset:     15,
101		lineNumber: 3, lineStart: 15, lineEnd: 15,
102	}, {
103		data:       []byte("0.2.4.\n7.9.11."),
104		offset:     7,
105		lineNumber: 2, lineStart: 7, lineEnd: 14,
106	}, {
107		data:       []byte("\n\n"),
108		offset:     0,
109		lineNumber: 1, lineStart: 0, lineEnd: 0,
110	}, {
111		data:       []byte("\n\n"),
112		offset:     1,
113		lineNumber: 2, lineStart: 1, lineEnd: 1,
114	}, {
115		data:       []byte("\n\n"),
116		offset:     3,
117		lineNumber: 3, lineStart: 2, lineEnd: 2,
118	}, {
119		data:       []byte("line with no newlines"),
120		offset:     3,
121		lineNumber: 1, lineStart: 0, lineEnd: 21,
122	}}
123
124	for _, tt := range cases {
125		t.Run("", func(t *testing.T) {
126			nls := getNewlines(tt.data)
127			gotLineNumber, gotLineStart, gotLineEnd := nls.atOffset(tt.offset)
128			if gotLineNumber != tt.lineNumber {
129				t.Fatalf("expected line number %d, got %d", tt.lineNumber, gotLineNumber)
130			}
131			if gotLineStart != tt.lineStart {
132				t.Fatalf("expected line start %d, got %d", tt.lineStart, gotLineStart)
133			}
134			if gotLineEnd != tt.lineEnd {
135				t.Fatalf("expected line end %d, got %d", tt.lineEnd, gotLineEnd)
136			}
137		})
138	}
139}
140
141func TestLineBounds(t *testing.T) {
142	cases := []struct {
143		data       []byte
144		lineNumber int
145		start      uint32
146		end        uint32
147	}{{
148		data:       []byte("0.2.4.\n7.9.11.\n"),
149		lineNumber: 1,
150		start:      0, end: 6,
151	}, {
152		data:       []byte("0.2.4.\n7.9.11.\n"),
153		lineNumber: 2,
154		start:      7, end: 14,
155	}, {
156		data:       []byte("0.2.4.\n7.9.11.\n"),
157		lineNumber: 0,
158		start:      0, end: 0,
159	}, {
160		data:       []byte("0.2.4.\n7.9.11.\n"),
161		lineNumber: -1,
162		start:      0, end: 0,
163	}, {
164		data:       []byte("0.2.4.\n7.9.11.\n"),
165		lineNumber: 202002,
166		start:      15, end: 15,
167	}, {
168		data:       []byte("\n\n"),
169		lineNumber: 1,
170		start:      0, end: 0,
171	}, {
172		data:       []byte("\n\n"),
173		lineNumber: 2,
174		start:      1, end: 1,
175	}, {
176		data:       []byte("\n\n"),
177		lineNumber: 3,
178		start:      2, end: 2,
179	}}
180
181	for _, tt := range cases {
182		t.Run("", func(t *testing.T) {
183			nls := getNewlines(tt.data)
184			gotStart, gotEnd := nls.lineBounds(tt.lineNumber)
185			if gotStart != tt.start {
186				t.Fatalf("expected line start %d, got %d", tt.start, gotStart)
187			}
188			if gotEnd != tt.end {
189				t.Fatalf("expected line end %d, got %d", tt.end, gotEnd)
190			}
191		})
192	}
193}
194
195func TestChunkMatches(t *testing.T) {
196	content := []byte(`0.2.4.6.8.10.
19713.16.19.22.
19826.29.32.35.
19939.42.45.48.
20052.55.58.61.
20165.68.71.74.
20278.81.84.87.
203`)
204	match_0_2 := &candidateMatch{byteOffset: 0, byteMatchSz: 2}
205	match_6_10 := &candidateMatch{byteOffset: 6, byteMatchSz: 4}
206	match_10_16 := &candidateMatch{byteOffset: 10, byteMatchSz: 6}
207	match_19_42 := &candidateMatch{byteOffset: 19, byteMatchSz: 23}
208	match_45_48 := &candidateMatch{byteOffset: 45, byteMatchSz: 3}
209	match_71_72 := &candidateMatch{byteOffset: 71, byteMatchSz: 1}
210
211	cases := []struct {
212		candidateMatches []*candidateMatch
213		numContextLines  int
214		want             []candidateChunk
215	}{{
216		candidateMatches: []*candidateMatch{match_0_2},
217		numContextLines:  0,
218		want: []candidateChunk{{
219			firstLine:  1,
220			minOffset:  0,
221			lastLine:   1,
222			maxOffset:  2,
223			candidates: []*candidateMatch{match_0_2},
224		}},
225	}, {
226		candidateMatches: []*candidateMatch{match_0_2},
227		numContextLines:  5,
228		want: []candidateChunk{{
229			firstLine:  1,
230			minOffset:  0,
231			lastLine:   1,
232			maxOffset:  2,
233			candidates: []*candidateMatch{match_0_2},
234		}},
235	}, {
236		candidateMatches: []*candidateMatch{match_0_2, match_6_10},
237		numContextLines:  0,
238		want: []candidateChunk{{
239			firstLine:  1,
240			minOffset:  0,
241			lastLine:   1,
242			maxOffset:  10,
243			candidates: []*candidateMatch{match_0_2, match_6_10},
244		}},
245	}, {
246		candidateMatches: []*candidateMatch{match_0_2, match_10_16},
247		numContextLines:  0,
248		want: []candidateChunk{{
249			firstLine:  1,
250			minOffset:  0,
251			lastLine:   2,
252			maxOffset:  16,
253			candidates: []*candidateMatch{match_0_2, match_10_16},
254		}},
255	}, {
256		candidateMatches: []*candidateMatch{match_0_2, match_19_42},
257		numContextLines:  0,
258		want: []candidateChunk{{
259			firstLine:  1,
260			minOffset:  0,
261			lastLine:   1,
262			maxOffset:  2,
263			candidates: []*candidateMatch{match_0_2},
264		}, {
265			firstLine:  2,
266			minOffset:  19,
267			lastLine:   4,
268			maxOffset:  42,
269			candidates: []*candidateMatch{match_19_42},
270		}},
271	}, {
272		candidateMatches: []*candidateMatch{match_0_2, match_19_42},
273		numContextLines:  1,
274		want: []candidateChunk{{
275			firstLine:  1,
276			minOffset:  0,
277			lastLine:   4,
278			maxOffset:  42,
279			candidates: []*candidateMatch{match_0_2, match_19_42},
280		}},
281	}, {
282		candidateMatches: []*candidateMatch{
283			match_0_2, match_19_42, match_45_48, match_71_72,
284		},
285		numContextLines: 0,
286		want: []candidateChunk{{
287			firstLine:  1,
288			minOffset:  0,
289			lastLine:   1,
290			maxOffset:  2,
291			candidates: []*candidateMatch{match_0_2},
292		}, {
293			firstLine:  2,
294			minOffset:  19,
295			lastLine:   4,
296			maxOffset:  48,
297			candidates: []*candidateMatch{match_19_42, match_45_48},
298		}, {
299			firstLine:  6,
300			minOffset:  71,
301			lastLine:   6,
302			maxOffset:  72,
303			candidates: []*candidateMatch{match_71_72},
304		}},
305	}, {
306		candidateMatches: []*candidateMatch{
307			match_0_2, match_19_42, match_45_48, match_71_72,
308		},
309		numContextLines: 100,
310		want: []candidateChunk{{
311			firstLine:  1,
312			minOffset:  0,
313			lastLine:   6,
314			maxOffset:  72,
315			candidates: []*candidateMatch{match_0_2, match_19_42, match_45_48, match_71_72},
316		}},
317	}}
318
319	newlines := getNewlines(content)
320	for _, tt := range cases {
321		t.Run("", func(t *testing.T) {
322			got := chunkCandidates(tt.candidateMatches, newlines, tt.numContextLines)
323			if diff := cmp.Diff(fmt.Sprintf("%#v\n", tt.want), fmt.Sprintf("%#v\n", got)); diff != "" {
324				t.Fatal(diff)
325			}
326		})
327	}
328}
Configure Feed

Configure Feed