fork of https://github.com/sourcegraph/zoekt
1// Copyright 2018 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "math"
19 "reflect"
20 "regexp/syntax"
21 "testing"
22
23 "github.com/RoaringBitmap/roaring"
24 "github.com/grafana/regexp"
25
26 "github.com/sourcegraph/zoekt"
27 "github.com/sourcegraph/zoekt/query"
28)
29
30func Test_breakOnNewlines(t *testing.T) {
31 type args struct {
32 cm *candidateMatch
33 text []byte
34 }
35 tests := []struct {
36 name string
37 args args
38 want []*candidateMatch
39 }{
40 {
41 name: "trivial case",
42 args: args{
43 cm: &candidateMatch{
44 byteOffset: 0,
45 byteMatchSz: 0,
46 },
47 text: nil,
48 },
49 want: nil,
50 },
51 {
52 name: "no newlines",
53 args: args{
54 cm: &candidateMatch{
55 byteOffset: 0,
56 byteMatchSz: 1,
57 },
58 text: []byte("a"),
59 },
60 want: []*candidateMatch{
61 {
62 byteOffset: 0,
63 byteMatchSz: 1,
64 },
65 },
66 },
67 {
68 name: "newline at start",
69 args: args{
70 cm: &candidateMatch{
71 byteOffset: 0,
72 byteMatchSz: 2,
73 },
74 text: []byte("\na"),
75 },
76 want: []*candidateMatch{
77 {
78 byteOffset: 1,
79 byteMatchSz: 1,
80 },
81 },
82 },
83 {
84 name: "newline at end",
85 args: args{
86 cm: &candidateMatch{
87 byteOffset: 0,
88 byteMatchSz: 2,
89 },
90 text: []byte("a\n"),
91 },
92 want: []*candidateMatch{
93 {
94 byteOffset: 0,
95 byteMatchSz: 1,
96 },
97 },
98 },
99 {
100 name: "newline in middle",
101 args: args{
102 cm: &candidateMatch{
103 byteOffset: 0,
104 byteMatchSz: 3,
105 },
106 text: []byte("a\nb"),
107 },
108 want: []*candidateMatch{
109 {
110 byteOffset: 0,
111 byteMatchSz: 1,
112 },
113 {
114 byteOffset: 2,
115 byteMatchSz: 1,
116 },
117 },
118 },
119 {
120 name: "two newlines",
121 args: args{
122 cm: &candidateMatch{
123 byteOffset: 0,
124 byteMatchSz: 5,
125 },
126 text: []byte("a\nb\nc"),
127 },
128 want: []*candidateMatch{
129 {
130 byteOffset: 0,
131 byteMatchSz: 1,
132 },
133 {
134 byteOffset: 2,
135 byteMatchSz: 1,
136 },
137 {
138 byteOffset: 4,
139 byteMatchSz: 1,
140 },
141 },
142 },
143 }
144 for _, tt := range tests {
145 t.Run(tt.name, func(t *testing.T) {
146 if got := breakOnNewlines(tt.args.cm, tt.args.text); !reflect.DeepEqual(got, tt.want) {
147 type PrintableCm struct {
148 byteOffset uint32
149 byteMatchSz uint32
150 }
151 var got2, want2 []PrintableCm
152 for _, g := range got {
153 got2 = append(got2, PrintableCm{byteOffset: g.byteOffset, byteMatchSz: g.byteMatchSz})
154 }
155 for _, w := range tt.want {
156 want2 = append(want2, PrintableCm{byteOffset: w.byteOffset, byteMatchSz: w.byteMatchSz})
157 }
158 t.Errorf("breakMatchOnNewlines() = %+v, want %+v", got2, want2)
159 }
160 })
161 }
162}
163
164func TestEquivalentQuerySkipRegexpTree(t *testing.T) {
165 tests := []struct {
166 query string
167 skip bool
168 }{
169 {query: "^foo", skip: false},
170 {query: "foo", skip: true},
171 {query: "thread|needle|haystack", skip: true},
172 {query: "contain(er|ing)", skip: false},
173 {query: "thread (needle|haystack)", skip: true},
174 {query: "thread (needle|)", skip: false},
175 {query: `\bthread\b case:yes`, skip: true}, // word search
176 {query: `\bthread\b case:no`, skip: false},
177 }
178
179 for _, tt := range tests {
180 q, err := query.Parse(tt.query)
181 if err != nil {
182 t.Errorf("Error parsing query: %s", "sym:"+tt.query)
183 continue
184 }
185
186 d := &indexData{}
187 mt, err := d.newMatchTree(q, matchTreeOpt{})
188 if err != nil {
189 t.Errorf("Error creating match tree from query: %s", q)
190 continue
191 }
192
193 visitMatchTree(mt, func(m matchTree) {
194 if _, ok := m.(*regexpMatchTree); ok && tt.skip {
195 t.Log(mt)
196 t.Errorf("Expected regexpMatchTree to be skipped for query: %s", q)
197 }
198 })
199 }
200}
201
202// Test whether we skip the regexp engine for queries like "\bLITERAL\b
203// case:yes"
204func TestWordSearchSkipRegexpTree(t *testing.T) {
205 qStr := "\\bfoo\\b case:yes"
206 q, err := query.Parse(qStr)
207 if err != nil {
208 t.Fatalf("Error parsing query: %s", "sym:"+qStr)
209 }
210
211 d := &indexData{}
212 mt, err := d.newMatchTree(q, matchTreeOpt{})
213 if err != nil {
214 t.Fatalf("Error creating match tree from query: %s", q)
215 }
216
217 countRegexMatchTree, countWordMatchTree := 0, 0
218 visitMatchTree(mt, func(m matchTree) {
219 switch m.(type) {
220 case *regexpMatchTree:
221 countRegexMatchTree++
222 case *wordMatchTree:
223 countWordMatchTree++
224 }
225 })
226
227 if countRegexMatchTree != 0 {
228 t.Fatalf("expected to find 0 regexMatchTree, found %d", countRegexMatchTree)
229 }
230
231 if countWordMatchTree != 1 {
232 t.Fatalf("expected to find 1 wordMatchTree, found %d", countWordMatchTree)
233 }
234}
235
236func TestSymbolMatchTree(t *testing.T) {
237 tests := []struct {
238 query string
239 substr string
240 regex string
241 regexAll bool
242 }{
243 {query: "sym:.*", regex: "(?i)(?-s:.)*", regexAll: true},
244 {query: "sym:(ab|cd)", regex: "(?i)ab|cd"},
245 {query: "sym:b.r", regex: "(?i)b(?-s:.)r"},
246 {query: "sym:horse", substr: "horse"},
247 {query: `sym:\bthread\b case:yes`, regex: `\bthread\b`}, // check we disable word search opt
248 {query: `sym:\bthread\b case:no`, regex: `(?i)\bthread\b`},
249 }
250
251 for _, tt := range tests {
252 q, err := query.Parse(tt.query)
253 if err != nil {
254 t.Errorf("Error parsing query: %s", tt.query)
255 continue
256 }
257
258 d := &indexData{}
259 mt, err := d.newMatchTree(q, matchTreeOpt{})
260 if err != nil {
261 t.Errorf("Error creating match tree from query: %s", q)
262 continue
263 }
264
265 var (
266 substr string
267 regex string
268 regexAll bool
269 )
270 if substrMT, ok := mt.(*symbolSubstrMatchTree); ok {
271 substr = substrMT.query.Pattern
272 }
273 if regexMT, ok := mt.(*symbolRegexpMatchTree); ok {
274 regex = regexMT.regexp.String()
275 regexAll = regexMT.all
276 }
277
278 if substr != tt.substr {
279 t.Errorf("%s has unexpected substring:\nwant: %q\ngot: %q", tt.query, tt.substr, substr)
280 }
281 if regex != tt.regex {
282 t.Errorf("%s has unexpected regex:\nwant: %q\ngot: %q", tt.query, tt.regex, regex)
283 }
284 if regexAll != tt.regexAll {
285 t.Errorf("%s has unexpected regexAll: want=%t got=%t", tt.query, tt.regexAll, regexAll)
286 }
287 }
288}
289
290func TestRepoSet(t *testing.T) {
291 d := &indexData{
292 repoMetaData: []zoekt.Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}},
293 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
294 repos: []uint16{0, 0, 1, 2, 3, 3},
295 }
296 mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}}, matchTreeOpt{})
297 if err != nil {
298 t.Fatal(err)
299 }
300 want := []uint32{2, 4, 5}
301 for i := range want {
302 nextDoc := mt.nextDoc()
303 if nextDoc != want[i] {
304 t.Fatalf("want %d, got %d", want[i], nextDoc)
305 }
306 mt.prepare(nextDoc)
307 }
308 if mt.nextDoc() != math.MaxUint32 {
309 t.Fatalf("expected %d document, but got at least 1 more", len(want))
310 }
311}
312
313func TestRepo(t *testing.T) {
314 d := &indexData{
315 repoMetaData: []zoekt.Repository{{Name: "foo"}, {Name: "bar"}},
316 fileBranchMasks: []uint64{1, 1, 1, 1, 1},
317 repos: []uint16{0, 0, 1, 0, 1},
318 }
319 mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")}, matchTreeOpt{})
320 if err != nil {
321 t.Fatal(err)
322 }
323 want := []uint32{2, 4}
324 for i := range want {
325 nextDoc := mt.nextDoc()
326 if nextDoc != want[i] {
327 t.Fatalf("want %d, got %d", want[i], nextDoc)
328 }
329 mt.prepare(nextDoc)
330 }
331 if mt.nextDoc() != math.MaxUint32 {
332 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
333 }
334}
335
336func TestBranchesRepos(t *testing.T) {
337 d := &indexData{
338 repoMetaData: []zoekt.Repository{
339 {ID: hash("foo"), Name: "foo"},
340 {ID: hash("bar"), Name: "bar"},
341 },
342 fileBranchMasks: []uint64{1, 1, 1, 2, 1, 2, 1},
343 repos: []uint16{0, 0, 1, 1, 1, 1, 1},
344 branchIDs: []map[string]uint{{"HEAD": 1}, {"HEAD": 1, "b1": 2}},
345 }
346
347 mt, err := d.newMatchTree(&query.BranchesRepos{List: []query.BranchRepos{
348 {Branch: "b1", Repos: roaring.BitmapOf(hash("bar"))},
349 {Branch: "b2", Repos: roaring.BitmapOf(hash("bar"))},
350 }}, matchTreeOpt{})
351 if err != nil {
352 t.Fatal(err)
353 }
354
355 want := []uint32{3, 5}
356 for i := range want {
357 nextDoc := mt.nextDoc()
358 if nextDoc != want[i] {
359 t.Fatalf("want %d, got %d", want[i], nextDoc)
360 }
361 mt.prepare(nextDoc)
362 }
363
364 if mt.nextDoc() != math.MaxUint32 {
365 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
366 }
367}
368
369func TestRepoIDs(t *testing.T) {
370 d := &indexData{
371 repoMetaData: []zoekt.Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}},
372 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
373 repos: []uint16{0, 0, 1, 2, 3, 3},
374 }
375 mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)}, matchTreeOpt{})
376 if err != nil {
377 t.Fatal(err)
378 }
379
380 want := []uint32{2, 4, 5}
381 for i := range want {
382 nextDoc := mt.nextDoc()
383 if nextDoc != want[i] {
384 t.Fatalf("want %d, got %d", want[i], nextDoc)
385 }
386 mt.prepare(nextDoc)
387 }
388 if mt.nextDoc() != math.MaxUint32 {
389 t.Fatalf("expected %d document, but got at least 1 more", len(want))
390 }
391}
392
393func TestIsRegexpAll(t *testing.T) {
394 valid := []string{
395 ".*",
396 "(.*)",
397 "(?-s:.*)",
398 "(?s:.*)",
399 "(?i)(?-s:.*)",
400 }
401 invalid := []string{
402 ".",
403 "foo",
404 "(foo.*)",
405 }
406
407 for _, s := range valid {
408 r, err := syntax.Parse(s, syntax.Perl)
409 if err != nil {
410 t.Fatal(err)
411 }
412 if !isRegexpAll(r) {
413 t.Errorf("expected %q to match all", s)
414 }
415 }
416
417 for _, s := range invalid {
418 r, err := syntax.Parse(s, syntax.Perl)
419 if err != nil {
420 t.Fatal(err)
421 }
422 if isRegexpAll(r) {
423 t.Errorf("expected %q to not match all", s)
424 }
425 }
426}
427
428func TestMetaQueryMatchTree(t *testing.T) {
429 d := &indexData{
430 repoMetaData: []zoekt.Repository{
431 {Name: "r0", Metadata: map[string]string{"license": "Apache-2.0"}},
432 {Name: "r1", Metadata: map[string]string{"license": "MIT"}},
433 {Name: "r2"}, // no metadata
434 {Name: "r3", Metadata: map[string]string{"haystack": "needle"}},
435 {Name: "r4", Metadata: map[string]string{"note": "test"}},
436 },
437 fileBranchMasks: []uint64{1, 1, 1, 1, 1}, // 5 docs
438 repos: []uint16{0, 1, 2, 3, 4}, // map docIDs to repos
439 docMatchTreeCache: newDocMatchTreeCache(1), // small cache to test eviction
440 }
441
442 q := &query.Meta{
443 Field: "license",
444 Value: regexp.MustCompile("M.T"),
445 }
446
447 mt, err := d.newMatchTree(q, matchTreeOpt{})
448 if err != nil {
449 t.Fatalf("failed to build matchTree: %v", err)
450 }
451
452 // Check that the docMatchTree cache is populated correctly
453 checksum := queryMetaChecksum("license", regexp.MustCompile("M.T"))
454 cacheKeyField := "Meta"
455 if _, ok := d.docMatchTreeCache.Get(cacheKeyField, checksum); !ok {
456 t.Errorf("expected docMatchTreeCache to be populated for key (%q, %q)", cacheKeyField, checksum)
457 }
458
459 var matched []uint32
460 for {
461 doc := mt.nextDoc()
462 if doc == math.MaxUint32 {
463 break
464 }
465 matched = append(matched, doc)
466 mt.prepare(doc)
467 }
468
469 want := []uint32{1} // only doc from r1 should match
470 if !reflect.DeepEqual(matched, want) {
471 t.Errorf("meta match failed: got %v, want %v", matched, want)
472 }
473}
474
475func Test_queryMetaCacheKey(t *testing.T) {
476 cases := []struct {
477 field string
478 pattern string
479 wantKey string
480 }{
481 {"metaField", "foo.*bar", "24e88a5ffec04af0"},
482 {"metaField", "foo.*baz", "d8d6f6a7f0725b61"},
483 {"otherField", "foo.*bar", "c9d07e17c028364"},
484 }
485 for _, tc := range cases {
486 re := regexp.MustCompile(tc.pattern)
487 key := queryMetaChecksum(tc.field, re)
488 if key != tc.wantKey {
489 t.Errorf("unexpected key for field=%q pattern=%q: got %q, want %q", tc.field, tc.pattern, key, tc.wantKey)
490 }
491 }
492}