fork of https://github.com/sourcegraph/zoekt
1// Copyright 2018 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "reflect"
19 "regexp/syntax"
20 "testing"
21
22 "github.com/RoaringBitmap/roaring"
23 "github.com/grafana/regexp"
24 "github.com/sourcegraph/zoekt"
25
26 "github.com/sourcegraph/zoekt/query"
27)
28
29func Test_breakOnNewlines(t *testing.T) {
30 type args struct {
31 cm *candidateMatch
32 text []byte
33 }
34 tests := []struct {
35 name string
36 args args
37 want []*candidateMatch
38 }{
39 {
40 name: "trivial case",
41 args: args{
42 cm: &candidateMatch{
43 byteOffset: 0,
44 byteMatchSz: 0,
45 },
46 text: nil,
47 },
48 want: nil,
49 },
50 {
51 name: "no newlines",
52 args: args{
53 cm: &candidateMatch{
54 byteOffset: 0,
55 byteMatchSz: 1,
56 },
57 text: []byte("a"),
58 },
59 want: []*candidateMatch{
60 {
61 byteOffset: 0,
62 byteMatchSz: 1,
63 },
64 },
65 },
66 {
67 name: "newline at start",
68 args: args{
69 cm: &candidateMatch{
70 byteOffset: 0,
71 byteMatchSz: 2,
72 },
73 text: []byte("\na"),
74 },
75 want: []*candidateMatch{
76 {
77 byteOffset: 1,
78 byteMatchSz: 1,
79 },
80 },
81 },
82 {
83 name: "newline at end",
84 args: args{
85 cm: &candidateMatch{
86 byteOffset: 0,
87 byteMatchSz: 2,
88 },
89 text: []byte("a\n"),
90 },
91 want: []*candidateMatch{
92 {
93 byteOffset: 0,
94 byteMatchSz: 1,
95 },
96 },
97 },
98 {
99 name: "newline in middle",
100 args: args{
101 cm: &candidateMatch{
102 byteOffset: 0,
103 byteMatchSz: 3,
104 },
105 text: []byte("a\nb"),
106 },
107 want: []*candidateMatch{
108 {
109 byteOffset: 0,
110 byteMatchSz: 1,
111 },
112 {
113 byteOffset: 2,
114 byteMatchSz: 1,
115 },
116 },
117 },
118 {
119 name: "two newlines",
120 args: args{
121 cm: &candidateMatch{
122 byteOffset: 0,
123 byteMatchSz: 5,
124 },
125 text: []byte("a\nb\nc"),
126 },
127 want: []*candidateMatch{
128 {
129 byteOffset: 0,
130 byteMatchSz: 1,
131 },
132 {
133 byteOffset: 2,
134 byteMatchSz: 1,
135 },
136 {
137 byteOffset: 4,
138 byteMatchSz: 1,
139 },
140 },
141 },
142 }
143 for _, tt := range tests {
144 t.Run(tt.name, func(t *testing.T) {
145 if got := breakOnNewlines(tt.args.cm, tt.args.text); !reflect.DeepEqual(got, tt.want) {
146 type PrintableCm struct {
147 byteOffset uint32
148 byteMatchSz uint32
149 }
150 var got2, want2 []PrintableCm
151 for _, g := range got {
152 got2 = append(got2, PrintableCm{byteOffset: g.byteOffset, byteMatchSz: g.byteMatchSz})
153 }
154 for _, w := range tt.want {
155 want2 = append(want2, PrintableCm{byteOffset: w.byteOffset, byteMatchSz: w.byteMatchSz})
156 }
157 t.Errorf("breakMatchOnNewlines() = %+v, want %+v", got2, want2)
158 }
159 })
160 }
161}
162
163func TestEquivalentQuerySkipRegexpTree(t *testing.T) {
164 tests := []struct {
165 query string
166 skip bool
167 }{
168 {query: "^foo", skip: false},
169 {query: "foo", skip: true},
170 {query: "thread|needle|haystack", skip: true},
171 {query: "contain(er|ing)", skip: false},
172 {query: "thread (needle|haystack)", skip: true},
173 {query: "thread (needle|)", skip: false},
174 {query: `\bthread\b case:yes`, skip: true}, // word search
175 {query: `\bthread\b case:no`, skip: false},
176 }
177
178 for _, tt := range tests {
179 q, err := query.Parse(tt.query)
180 if err != nil {
181 t.Errorf("Error parsing query: %s", "sym:"+tt.query)
182 continue
183 }
184
185 d := &indexData{}
186 mt, err := d.newMatchTree(q, matchTreeOpt{})
187 if err != nil {
188 t.Errorf("Error creating match tree from query: %s", q)
189 continue
190 }
191
192 visitMatchTree(mt, func(m matchTree) {
193 if _, ok := m.(*regexpMatchTree); ok && tt.skip {
194 t.Log(mt)
195 t.Errorf("Expected regexpMatchTree to be skipped for query: %s", q)
196 }
197 })
198 }
199}
200
201// Test whether we skip the regexp engine for queries like "\bLITERAL\b
202// case:yes"
203func TestWordSearchSkipRegexpTree(t *testing.T) {
204 qStr := "\\bfoo\\b case:yes"
205 q, err := query.Parse(qStr)
206 if err != nil {
207 t.Fatalf("Error parsing query: %s", "sym:"+qStr)
208 }
209
210 d := &indexData{}
211 mt, err := d.newMatchTree(q, matchTreeOpt{})
212 if err != nil {
213 t.Fatalf("Error creating match tree from query: %s", q)
214 }
215
216 countRegexMatchTree, countWordMatchTree := 0, 0
217 visitMatchTree(mt, func(m matchTree) {
218 switch m.(type) {
219 case *regexpMatchTree:
220 countRegexMatchTree++
221 case *wordMatchTree:
222 countWordMatchTree++
223 }
224 })
225
226 if countRegexMatchTree != 0 {
227 t.Fatalf("expected to find 0 regexMatchTree, found %d", countRegexMatchTree)
228 }
229
230 if countWordMatchTree != 1 {
231 t.Fatalf("expected to find 1 wordMatchTree, found %d", countWordMatchTree)
232 }
233}
234
235func TestSymbolMatchTree(t *testing.T) {
236 tests := []struct {
237 query string
238 substr string
239 regex string
240 regexAll bool
241 }{
242 {query: "sym:.*", regex: "(?i)(?-s:.)*", regexAll: true},
243 {query: "sym:(ab|cd)", regex: "(?i)ab|cd"},
244 {query: "sym:b.r", regex: "(?i)b(?-s:.)r"},
245 {query: "sym:horse", substr: "horse"},
246 {query: `sym:\bthread\b case:yes`, regex: `\bthread\b`}, // check we disable word search opt
247 {query: `sym:\bthread\b case:no`, regex: `(?i)\bthread\b`},
248 }
249
250 for _, tt := range tests {
251 q, err := query.Parse(tt.query)
252 if err != nil {
253 t.Errorf("Error parsing query: %s", tt.query)
254 continue
255 }
256
257 d := &indexData{}
258 mt, err := d.newMatchTree(q, matchTreeOpt{})
259 if err != nil {
260 t.Errorf("Error creating match tree from query: %s", q)
261 continue
262 }
263
264 var (
265 substr string
266 regex string
267 regexAll bool
268 )
269 if substrMT, ok := mt.(*symbolSubstrMatchTree); ok {
270 substr = substrMT.query.Pattern
271 }
272 if regexMT, ok := mt.(*symbolRegexpMatchTree); ok {
273 regex = regexMT.regexp.String()
274 regexAll = regexMT.all
275 }
276
277 if substr != tt.substr {
278 t.Errorf("%s has unexpected substring:\nwant: %q\ngot: %q", tt.query, tt.substr, substr)
279 }
280 if regex != tt.regex {
281 t.Errorf("%s has unexpected regex:\nwant: %q\ngot: %q", tt.query, tt.regex, regex)
282 }
283 if regexAll != tt.regexAll {
284 t.Errorf("%s has unexpected regexAll: want=%t got=%t", tt.query, tt.regexAll, regexAll)
285 }
286 }
287}
288
289func TestRepoSet(t *testing.T) {
290 d := &indexData{
291 repoMetaData: []zoekt.Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}},
292 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
293 repos: []uint16{0, 0, 1, 2, 3, 3},
294 }
295 mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}}, matchTreeOpt{})
296 if err != nil {
297 t.Fatal(err)
298 }
299 want := []uint32{2, 4, 5}
300 for i := range want {
301 nextDoc := mt.nextDoc()
302 if nextDoc != want[i] {
303 t.Fatalf("want %d, got %d", want[i], nextDoc)
304 }
305 mt.prepare(nextDoc)
306 }
307 if mt.nextDoc() != maxUInt32 {
308 t.Fatalf("expected %d document, but got at least 1 more", len(want))
309 }
310}
311
312func TestRepo(t *testing.T) {
313 d := &indexData{
314 repoMetaData: []zoekt.Repository{{Name: "foo"}, {Name: "bar"}},
315 fileBranchMasks: []uint64{1, 1, 1, 1, 1},
316 repos: []uint16{0, 0, 1, 0, 1},
317 }
318 mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")}, matchTreeOpt{})
319 if err != nil {
320 t.Fatal(err)
321 }
322 want := []uint32{2, 4}
323 for i := range want {
324 nextDoc := mt.nextDoc()
325 if nextDoc != want[i] {
326 t.Fatalf("want %d, got %d", want[i], nextDoc)
327 }
328 mt.prepare(nextDoc)
329 }
330 if mt.nextDoc() != maxUInt32 {
331 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
332 }
333}
334
335func TestBranchesRepos(t *testing.T) {
336 d := &indexData{
337 repoMetaData: []zoekt.Repository{
338 {ID: hash("foo"), Name: "foo"},
339 {ID: hash("bar"), Name: "bar"},
340 },
341 fileBranchMasks: []uint64{1, 1, 1, 2, 1, 2, 1},
342 repos: []uint16{0, 0, 1, 1, 1, 1, 1},
343 branchIDs: []map[string]uint{{"HEAD": 1}, {"HEAD": 1, "b1": 2}},
344 }
345
346 mt, err := d.newMatchTree(&query.BranchesRepos{List: []query.BranchRepos{
347 {Branch: "b1", Repos: roaring.BitmapOf(hash("bar"))},
348 {Branch: "b2", Repos: roaring.BitmapOf(hash("bar"))},
349 }}, matchTreeOpt{})
350 if err != nil {
351 t.Fatal(err)
352 }
353
354 want := []uint32{3, 5}
355 for i := range want {
356 nextDoc := mt.nextDoc()
357 if nextDoc != want[i] {
358 t.Fatalf("want %d, got %d", want[i], nextDoc)
359 }
360 mt.prepare(nextDoc)
361 }
362
363 if mt.nextDoc() != maxUInt32 {
364 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
365 }
366}
367
368func TestRepoIDs(t *testing.T) {
369 d := &indexData{
370 repoMetaData: []zoekt.Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}},
371 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
372 repos: []uint16{0, 0, 1, 2, 3, 3},
373 }
374 mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)}, matchTreeOpt{})
375 if err != nil {
376 t.Fatal(err)
377 }
378 want := []uint32{2, 4, 5}
379 for i := range want {
380 nextDoc := mt.nextDoc()
381 if nextDoc != want[i] {
382 t.Fatalf("want %d, got %d", want[i], nextDoc)
383 }
384 mt.prepare(nextDoc)
385 }
386 if mt.nextDoc() != maxUInt32 {
387 t.Fatalf("expected %d document, but got at least 1 more", len(want))
388 }
389}
390
391func TestIsRegexpAll(t *testing.T) {
392 valid := []string{
393 ".*",
394 "(.*)",
395 "(?-s:.*)",
396 "(?s:.*)",
397 "(?i)(?-s:.*)",
398 }
399 invalid := []string{
400 ".",
401 "foo",
402 "(foo.*)",
403 }
404
405 for _, s := range valid {
406 r, err := syntax.Parse(s, syntax.Perl)
407 if err != nil {
408 t.Fatal(err)
409 }
410 if !isRegexpAll(r) {
411 t.Errorf("expected %q to match all", s)
412 }
413 }
414
415 for _, s := range invalid {
416 r, err := syntax.Parse(s, syntax.Perl)
417 if err != nil {
418 t.Fatal(err)
419 }
420 if isRegexpAll(r) {
421 t.Errorf("expected %q to not match all", s)
422 }
423 }
424}