fork of https://github.com/sourcegraph/zoekt
1package zoekt
2
3import (
4 "bytes"
5 "fmt"
6 "testing"
7
8 "github.com/google/go-cmp/cmp"
9)
10
11func getNewlines(data []byte) newlines {
12 var locs []uint32
13 for i, c := range data {
14 if c == '\n' {
15 locs = append(locs, uint32(i))
16 }
17 }
18 return newlines{
19 locs: locs,
20 fileSize: uint32(len(data)),
21 }
22}
23
24func TestGetLines(t *testing.T) {
25 contents := [][]byte{
26 []byte("one\ntwo\nthree\nfour"),
27 []byte("one\ntwo\nthree\nfour\n"),
28 []byte("one"),
29 []byte(""),
30 }
31
32 for _, content := range contents {
33 t.Run("", func(t *testing.T) {
34 newLines := getNewlines(content)
35 lines := bytes.Split(content, []byte{'\n'}) // TODO does split group consecutive sep?
36 wantGetLines := func(low, high int) []byte {
37 low--
38 high--
39 if low < 0 {
40 low = 0
41 }
42 if low >= len(lines) {
43 return nil
44 }
45 if high <= 0 {
46 return nil
47 }
48 if high > len(lines) {
49 high = len(lines)
50 }
51 return bytes.Join(lines[low:high], []byte{'\n'})
52 }
53
54 for low := -1; low <= len(lines)+2; low++ {
55 for high := low; high <= len(lines)+2; high++ {
56 want := wantGetLines(low, high)
57 got := newLines.getLines(content, low, high)
58 if d := cmp.Diff(string(want), string(got)); d != "" {
59 t.Fatal(d)
60 }
61 }
62 }
63 })
64 }
65}
66
67func TestAtOffset(t *testing.T) {
68 cases := []struct {
69 data []byte
70 offset uint32
71 lineNumber int
72 lineStart int
73 lineEnd int
74 }{{
75 data: []byte("0.2.4.\n7.9.11.\n"),
76 offset: 0,
77 lineNumber: 1, lineStart: 0, lineEnd: 6,
78 }, {
79 data: []byte("0.2.4.\n7.9.11.\n"),
80 offset: 6,
81 lineNumber: 1, lineStart: 0, lineEnd: 6,
82 }, {
83 data: []byte("0.2.4.\n7.9.11.\n"),
84 offset: 2,
85 lineNumber: 1, lineStart: 0, lineEnd: 6,
86 }, {
87 data: []byte("0.2.4.\n7.9.11.\n"),
88 offset: 2,
89 lineNumber: 1, lineStart: 0, lineEnd: 6,
90 }, {
91 data: []byte("0.2.4.\n7.9.11.\n"),
92 offset: 7,
93 lineNumber: 2, lineStart: 7, lineEnd: 14,
94 }, {
95 data: []byte("0.2.4.\n7.9.11.\n"),
96 offset: 11,
97 lineNumber: 2, lineStart: 7, lineEnd: 14,
98 }, {
99 data: []byte("0.2.4.\n7.9.11.\n"),
100 offset: 15,
101 lineNumber: 3, lineStart: 15, lineEnd: 15,
102 }, {
103 data: []byte("0.2.4.\n7.9.11."),
104 offset: 7,
105 lineNumber: 2, lineStart: 7, lineEnd: 14,
106 }, {
107 data: []byte("\n\n"),
108 offset: 0,
109 lineNumber: 1, lineStart: 0, lineEnd: 0,
110 }, {
111 data: []byte("\n\n"),
112 offset: 1,
113 lineNumber: 2, lineStart: 1, lineEnd: 1,
114 }, {
115 data: []byte("\n\n"),
116 offset: 3,
117 lineNumber: 3, lineStart: 2, lineEnd: 2,
118 }, {
119 data: []byte("line with no newlines"),
120 offset: 3,
121 lineNumber: 1, lineStart: 0, lineEnd: 21,
122 }}
123
124 for _, tt := range cases {
125 t.Run("", func(t *testing.T) {
126 nls := getNewlines(tt.data)
127 gotLineNumber, gotLineStart, gotLineEnd := nls.atOffset(tt.offset)
128 if gotLineNumber != tt.lineNumber {
129 t.Fatalf("expected line number %d, got %d", tt.lineNumber, gotLineNumber)
130 }
131 if gotLineStart != tt.lineStart {
132 t.Fatalf("expected line start %d, got %d", tt.lineStart, gotLineStart)
133 }
134 if gotLineEnd != tt.lineEnd {
135 t.Fatalf("expected line end %d, got %d", tt.lineEnd, gotLineEnd)
136 }
137 })
138 }
139}
140
141func TestLineBounds(t *testing.T) {
142 cases := []struct {
143 data []byte
144 lineNumber int
145 start uint32
146 end uint32
147 }{{
148 data: []byte("0.2.4.\n7.9.11.\n"),
149 lineNumber: 1,
150 start: 0, end: 6,
151 }, {
152 data: []byte("0.2.4.\n7.9.11.\n"),
153 lineNumber: 2,
154 start: 7, end: 14,
155 }, {
156 data: []byte("0.2.4.\n7.9.11.\n"),
157 lineNumber: 0,
158 start: 0, end: 0,
159 }, {
160 data: []byte("0.2.4.\n7.9.11.\n"),
161 lineNumber: -1,
162 start: 0, end: 0,
163 }, {
164 data: []byte("0.2.4.\n7.9.11.\n"),
165 lineNumber: 202002,
166 start: 15, end: 15,
167 }, {
168 data: []byte("\n\n"),
169 lineNumber: 1,
170 start: 0, end: 0,
171 }, {
172 data: []byte("\n\n"),
173 lineNumber: 2,
174 start: 1, end: 1,
175 }, {
176 data: []byte("\n\n"),
177 lineNumber: 3,
178 start: 2, end: 2,
179 }}
180
181 for _, tt := range cases {
182 t.Run("", func(t *testing.T) {
183 nls := getNewlines(tt.data)
184 gotStart, gotEnd := nls.lineBounds(tt.lineNumber)
185 if gotStart != tt.start {
186 t.Fatalf("expected line start %d, got %d", tt.start, gotStart)
187 }
188 if gotEnd != tt.end {
189 t.Fatalf("expected line end %d, got %d", tt.end, gotEnd)
190 }
191 })
192 }
193}
194
195func TestChunkMatches(t *testing.T) {
196 content := []byte(`0.2.4.6.8.10.
19713.16.19.22.
19826.29.32.35.
19939.42.45.48.
20052.55.58.61.
20165.68.71.74.
20278.81.84.87.
203`)
204 match_0_2 := &candidateMatch{byteOffset: 0, byteMatchSz: 2}
205 match_6_10 := &candidateMatch{byteOffset: 6, byteMatchSz: 4}
206 match_10_16 := &candidateMatch{byteOffset: 10, byteMatchSz: 6}
207 match_19_42 := &candidateMatch{byteOffset: 19, byteMatchSz: 23}
208 match_45_48 := &candidateMatch{byteOffset: 45, byteMatchSz: 3}
209 match_71_72 := &candidateMatch{byteOffset: 71, byteMatchSz: 1}
210
211 cases := []struct {
212 candidateMatches []*candidateMatch
213 numContextLines int
214 want []candidateChunk
215 }{{
216 candidateMatches: []*candidateMatch{match_0_2},
217 numContextLines: 0,
218 want: []candidateChunk{{
219 firstLine: 1,
220 minOffset: 0,
221 lastLine: 1,
222 maxOffset: 2,
223 candidates: []*candidateMatch{match_0_2},
224 }},
225 }, {
226 candidateMatches: []*candidateMatch{match_0_2},
227 numContextLines: 5,
228 want: []candidateChunk{{
229 firstLine: 1,
230 minOffset: 0,
231 lastLine: 1,
232 maxOffset: 2,
233 candidates: []*candidateMatch{match_0_2},
234 }},
235 }, {
236 candidateMatches: []*candidateMatch{match_0_2, match_6_10},
237 numContextLines: 0,
238 want: []candidateChunk{{
239 firstLine: 1,
240 minOffset: 0,
241 lastLine: 1,
242 maxOffset: 10,
243 candidates: []*candidateMatch{match_0_2, match_6_10},
244 }},
245 }, {
246 candidateMatches: []*candidateMatch{match_0_2, match_10_16},
247 numContextLines: 0,
248 want: []candidateChunk{{
249 firstLine: 1,
250 minOffset: 0,
251 lastLine: 2,
252 maxOffset: 16,
253 candidates: []*candidateMatch{match_0_2, match_10_16},
254 }},
255 }, {
256 candidateMatches: []*candidateMatch{match_0_2, match_19_42},
257 numContextLines: 0,
258 want: []candidateChunk{{
259 firstLine: 1,
260 minOffset: 0,
261 lastLine: 1,
262 maxOffset: 2,
263 candidates: []*candidateMatch{match_0_2},
264 }, {
265 firstLine: 2,
266 minOffset: 19,
267 lastLine: 4,
268 maxOffset: 42,
269 candidates: []*candidateMatch{match_19_42},
270 }},
271 }, {
272 candidateMatches: []*candidateMatch{match_0_2, match_19_42},
273 numContextLines: 1,
274 want: []candidateChunk{{
275 firstLine: 1,
276 minOffset: 0,
277 lastLine: 4,
278 maxOffset: 42,
279 candidates: []*candidateMatch{match_0_2, match_19_42},
280 }},
281 }, {
282 candidateMatches: []*candidateMatch{
283 match_0_2, match_19_42, match_45_48, match_71_72,
284 },
285 numContextLines: 0,
286 want: []candidateChunk{{
287 firstLine: 1,
288 minOffset: 0,
289 lastLine: 1,
290 maxOffset: 2,
291 candidates: []*candidateMatch{match_0_2},
292 }, {
293 firstLine: 2,
294 minOffset: 19,
295 lastLine: 4,
296 maxOffset: 48,
297 candidates: []*candidateMatch{match_19_42, match_45_48},
298 }, {
299 firstLine: 6,
300 minOffset: 71,
301 lastLine: 6,
302 maxOffset: 72,
303 candidates: []*candidateMatch{match_71_72},
304 }},
305 }, {
306 candidateMatches: []*candidateMatch{
307 match_0_2, match_19_42, match_45_48, match_71_72,
308 },
309 numContextLines: 100,
310 want: []candidateChunk{{
311 firstLine: 1,
312 minOffset: 0,
313 lastLine: 6,
314 maxOffset: 72,
315 candidates: []*candidateMatch{match_0_2, match_19_42, match_45_48, match_71_72},
316 }},
317 }}
318
319 newlines := getNewlines(content)
320 for _, tt := range cases {
321 t.Run("", func(t *testing.T) {
322 got := chunkCandidates(tt.candidateMatches, newlines, tt.numContextLines)
323 if diff := cmp.Diff(fmt.Sprintf("%#v\n", tt.want), fmt.Sprintf("%#v\n", got)); diff != "" {
324 t.Fatal(diff)
325 }
326 })
327 }
328}