fork of https://github.com/sourcegraph/zoekt
1// Copyright 2018 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "math"
19 "reflect"
20 "regexp/syntax"
21 "testing"
22
23 "github.com/RoaringBitmap/roaring"
24 "github.com/grafana/regexp"
25
26 "github.com/sourcegraph/zoekt"
27 "github.com/sourcegraph/zoekt/query"
28)
29
30func Test_breakOnNewlines(t *testing.T) {
31 type args struct {
32 cm *candidateMatch
33 text []byte
34 }
35 tests := []struct {
36 name string
37 args args
38 want []*candidateMatch
39 }{
40 {
41 name: "trivial case",
42 args: args{
43 cm: &candidateMatch{
44 byteOffset: 0,
45 byteMatchSz: 0,
46 },
47 text: nil,
48 },
49 want: nil,
50 },
51 {
52 name: "no newlines",
53 args: args{
54 cm: &candidateMatch{
55 byteOffset: 0,
56 byteMatchSz: 1,
57 },
58 text: []byte("a"),
59 },
60 want: []*candidateMatch{
61 {
62 byteOffset: 0,
63 byteMatchSz: 1,
64 },
65 },
66 },
67 {
68 name: "newline at start",
69 args: args{
70 cm: &candidateMatch{
71 byteOffset: 0,
72 byteMatchSz: 2,
73 },
74 text: []byte("\na"),
75 },
76 want: []*candidateMatch{
77 {
78 byteOffset: 1,
79 byteMatchSz: 1,
80 },
81 },
82 },
83 {
84 name: "newline at end",
85 args: args{
86 cm: &candidateMatch{
87 byteOffset: 0,
88 byteMatchSz: 2,
89 },
90 text: []byte("a\n"),
91 },
92 want: []*candidateMatch{
93 {
94 byteOffset: 0,
95 byteMatchSz: 1,
96 },
97 },
98 },
99 {
100 name: "newline in middle",
101 args: args{
102 cm: &candidateMatch{
103 byteOffset: 0,
104 byteMatchSz: 3,
105 },
106 text: []byte("a\nb"),
107 },
108 want: []*candidateMatch{
109 {
110 byteOffset: 0,
111 byteMatchSz: 1,
112 },
113 {
114 byteOffset: 2,
115 byteMatchSz: 1,
116 },
117 },
118 },
119 {
120 name: "two newlines",
121 args: args{
122 cm: &candidateMatch{
123 byteOffset: 0,
124 byteMatchSz: 5,
125 },
126 text: []byte("a\nb\nc"),
127 },
128 want: []*candidateMatch{
129 {
130 byteOffset: 0,
131 byteMatchSz: 1,
132 },
133 {
134 byteOffset: 2,
135 byteMatchSz: 1,
136 },
137 {
138 byteOffset: 4,
139 byteMatchSz: 1,
140 },
141 },
142 },
143 }
144 for _, tt := range tests {
145 t.Run(tt.name, func(t *testing.T) {
146 if got := breakOnNewlines(tt.args.cm, tt.args.text); !reflect.DeepEqual(got, tt.want) {
147 type PrintableCm struct {
148 byteOffset uint32
149 byteMatchSz uint32
150 }
151 var got2, want2 []PrintableCm
152 for _, g := range got {
153 got2 = append(got2, PrintableCm{byteOffset: g.byteOffset, byteMatchSz: g.byteMatchSz})
154 }
155 for _, w := range tt.want {
156 want2 = append(want2, PrintableCm{byteOffset: w.byteOffset, byteMatchSz: w.byteMatchSz})
157 }
158 t.Errorf("breakMatchOnNewlines() = %+v, want %+v", got2, want2)
159 }
160 })
161 }
162}
163
164func TestEquivalentQuerySkipRegexpTree(t *testing.T) {
165 tests := []struct {
166 query string
167 skip bool
168 }{
169 {query: "^foo", skip: false},
170 {query: "foo", skip: true},
171 {query: "thread|needle|haystack", skip: true},
172 {query: "contain(er|ing)", skip: false},
173 {query: "thread (needle|haystack)", skip: true},
174 {query: "thread (needle|)", skip: false},
175 {query: `\bthread\b case:yes`, skip: true}, // word search
176 {query: `\bthread\b case:no`, skip: false},
177 }
178
179 for _, tt := range tests {
180 q, err := query.Parse(tt.query)
181 if err != nil {
182 t.Errorf("Error parsing query: %s", "sym:"+tt.query)
183 continue
184 }
185
186 d := &indexData{}
187 mt, err := d.newMatchTree(q, matchTreeOpt{})
188 if err != nil {
189 t.Errorf("Error creating match tree from query: %s", q)
190 continue
191 }
192
193 visitMatchTree(mt, func(m matchTree) {
194 if _, ok := m.(*regexpMatchTree); ok && tt.skip {
195 t.Log(mt)
196 t.Errorf("Expected regexpMatchTree to be skipped for query: %s", q)
197 }
198 })
199 }
200}
201
202// Test whether we skip the regexp engine for queries like "\bLITERAL\b
203// case:yes"
204func TestWordSearchSkipRegexpTree(t *testing.T) {
205 qStr := "\\bfoo\\b case:yes"
206 q, err := query.Parse(qStr)
207 if err != nil {
208 t.Fatalf("Error parsing query: %s", "sym:"+qStr)
209 }
210
211 d := &indexData{}
212 mt, err := d.newMatchTree(q, matchTreeOpt{})
213 if err != nil {
214 t.Fatalf("Error creating match tree from query: %s", q)
215 }
216
217 countRegexMatchTree, countWordMatchTree := 0, 0
218 visitMatchTree(mt, func(m matchTree) {
219 switch m.(type) {
220 case *regexpMatchTree:
221 countRegexMatchTree++
222 case *wordMatchTree:
223 countWordMatchTree++
224 }
225 })
226
227 if countRegexMatchTree != 0 {
228 t.Fatalf("expected to find 0 regexMatchTree, found %d", countRegexMatchTree)
229 }
230
231 if countWordMatchTree != 1 {
232 t.Fatalf("expected to find 1 wordMatchTree, found %d", countWordMatchTree)
233 }
234}
235
236func TestSymbolMatchTree(t *testing.T) {
237 tests := []struct {
238 query string
239 substr string
240 regex string
241 regexAll bool
242 }{
243 {query: "sym:.*", regex: "(?i)(?-s:.)*", regexAll: true},
244 {query: "sym:(ab|cd)", regex: "(?i)ab|cd"},
245 {query: "sym:b.r", regex: "(?i)b(?-s:.)r"},
246 {query: "sym:horse", substr: "horse"},
247 {query: `sym:\bthread\b case:yes`, regex: `\bthread\b`}, // check we disable word search opt
248 {query: `sym:\bthread\b case:no`, regex: `(?i)\bthread\b`},
249 }
250
251 for _, tt := range tests {
252 q, err := query.Parse(tt.query)
253 if err != nil {
254 t.Errorf("Error parsing query: %s", tt.query)
255 continue
256 }
257
258 d := &indexData{}
259 mt, err := d.newMatchTree(q, matchTreeOpt{})
260 if err != nil {
261 t.Errorf("Error creating match tree from query: %s", q)
262 continue
263 }
264
265 var (
266 substr string
267 regex string
268 regexAll bool
269 )
270 if substrMT, ok := mt.(*symbolSubstrMatchTree); ok {
271 substr = substrMT.query.Pattern
272 }
273 if regexMT, ok := mt.(*symbolRegexpMatchTree); ok {
274 regex = regexMT.regexp.String()
275 regexAll = regexMT.all
276 }
277
278 if substr != tt.substr {
279 t.Errorf("%s has unexpected substring:\nwant: %q\ngot: %q", tt.query, tt.substr, substr)
280 }
281 if regex != tt.regex {
282 t.Errorf("%s has unexpected regex:\nwant: %q\ngot: %q", tt.query, tt.regex, regex)
283 }
284 if regexAll != tt.regexAll {
285 t.Errorf("%s has unexpected regexAll: want=%t got=%t", tt.query, tt.regexAll, regexAll)
286 }
287 }
288}
289
290func TestRepoSet(t *testing.T) {
291 d := &indexData{
292 repoMetaData: []zoekt.Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}},
293 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
294 repos: []uint16{0, 0, 1, 2, 3, 3},
295 }
296 mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}}, matchTreeOpt{})
297 if err != nil {
298 t.Fatal(err)
299 }
300 want := []uint32{2, 4, 5}
301 for i := range want {
302 nextDoc := mt.nextDoc()
303 if nextDoc != want[i] {
304 t.Fatalf("want %d, got %d", want[i], nextDoc)
305 }
306 mt.prepare(nextDoc)
307 }
308 if mt.nextDoc() != math.MaxUint32 {
309 t.Fatalf("expected %d document, but got at least 1 more", len(want))
310 }
311}
312
313func TestRepo(t *testing.T) {
314 d := &indexData{
315 repoMetaData: []zoekt.Repository{{Name: "foo"}, {Name: "bar"}},
316 fileBranchMasks: []uint64{1, 1, 1, 1, 1},
317 repos: []uint16{0, 0, 1, 0, 1},
318 }
319 mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")}, matchTreeOpt{})
320 if err != nil {
321 t.Fatal(err)
322 }
323 want := []uint32{2, 4}
324 for i := range want {
325 nextDoc := mt.nextDoc()
326 if nextDoc != want[i] {
327 t.Fatalf("want %d, got %d", want[i], nextDoc)
328 }
329 mt.prepare(nextDoc)
330 }
331 if mt.nextDoc() != math.MaxUint32 {
332 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
333 }
334}
335
336func TestBranchesRepos(t *testing.T) {
337 d := &indexData{
338 repoMetaData: []zoekt.Repository{
339 {ID: hash("foo"), Name: "foo"},
340 {ID: hash("bar"), Name: "bar"},
341 },
342 fileBranchMasks: []uint64{1, 1, 1, 2, 1, 2, 1},
343 repos: []uint16{0, 0, 1, 1, 1, 1, 1},
344 branchIDs: []map[string]uint{{"HEAD": 1}, {"HEAD": 1, "b1": 2}},
345 }
346
347 mt, err := d.newMatchTree(&query.BranchesRepos{List: []query.BranchRepos{
348 {Branch: "b1", Repos: roaring.BitmapOf(hash("bar"))},
349 {Branch: "b2", Repos: roaring.BitmapOf(hash("bar"))},
350 }}, matchTreeOpt{})
351 if err != nil {
352 t.Fatal(err)
353 }
354
355 want := []uint32{3, 5}
356 for i := range want {
357 nextDoc := mt.nextDoc()
358 if nextDoc != want[i] {
359 t.Fatalf("want %d, got %d", want[i], nextDoc)
360 }
361 mt.prepare(nextDoc)
362 }
363
364 if mt.nextDoc() != math.MaxUint32 {
365 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
366 }
367}
368
369func TestRepoIDs(t *testing.T) {
370 d := &indexData{
371 repoMetaData: []zoekt.Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}},
372 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
373 repos: []uint16{0, 0, 1, 2, 3, 3},
374 }
375 mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)}, matchTreeOpt{})
376 if err != nil {
377 t.Fatal(err)
378 }
379 want := []uint32{2, 4, 5}
380 for i := range want {
381 nextDoc := mt.nextDoc()
382 if nextDoc != want[i] {
383 t.Fatalf("want %d, got %d", want[i], nextDoc)
384 }
385 mt.prepare(nextDoc)
386 }
387 if mt.nextDoc() != math.MaxUint32 {
388 t.Fatalf("expected %d document, but got at least 1 more", len(want))
389 }
390}
391
392func TestIsRegexpAll(t *testing.T) {
393 valid := []string{
394 ".*",
395 "(.*)",
396 "(?-s:.*)",
397 "(?s:.*)",
398 "(?i)(?-s:.*)",
399 }
400 invalid := []string{
401 ".",
402 "foo",
403 "(foo.*)",
404 }
405
406 for _, s := range valid {
407 r, err := syntax.Parse(s, syntax.Perl)
408 if err != nil {
409 t.Fatal(err)
410 }
411 if !isRegexpAll(r) {
412 t.Errorf("expected %q to match all", s)
413 }
414 }
415
416 for _, s := range invalid {
417 r, err := syntax.Parse(s, syntax.Perl)
418 if err != nil {
419 t.Fatal(err)
420 }
421 if isRegexpAll(r) {
422 t.Errorf("expected %q to not match all", s)
423 }
424 }
425}
426
427func TestMetaQueryMatchTree(t *testing.T) {
428 d := &indexData{
429 repoMetaData: []zoekt.Repository{
430 {Name: "r0", Metadata: map[string]string{"license": "Apache-2.0"}},
431 {Name: "r1", Metadata: map[string]string{"license": "MIT"}},
432 {Name: "r2"}, // no metadata
433 {Name: "r3", Metadata: map[string]string{"haystack": "needle"}},
434 {Name: "r4", Metadata: map[string]string{"note": "test"}},
435 },
436 fileBranchMasks: []uint64{1, 1, 1, 1, 1}, // 5 docs
437 repos: []uint16{0, 1, 2, 3, 4}, // map docIDs to repos
438 }
439
440 q := &query.Meta{
441 Field: "license",
442 Value: regexp.MustCompile("M.T"),
443 }
444
445 mt, err := d.newMatchTree(q, matchTreeOpt{})
446 if err != nil {
447 t.Fatalf("failed to build matchTree: %v", err)
448 }
449
450 var matched []uint32
451 for {
452 doc := mt.nextDoc()
453 if doc == math.MaxUint32 {
454 break
455 }
456 matched = append(matched, doc)
457 mt.prepare(doc)
458 }
459
460 want := []uint32{1} // only doc from r1 should match
461 if !reflect.DeepEqual(matched, want) {
462 t.Errorf("meta match failed: got %v, want %v", matched, want)
463 }
464}