fork of https://github.com/sourcegraph/zoekt
1// Copyright 2018 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "reflect"
19 "regexp/syntax"
20 "testing"
21
22 "github.com/RoaringBitmap/roaring"
23 "github.com/grafana/regexp"
24
25 "github.com/sourcegraph/zoekt/query"
26)
27
28func Test_breakOnNewlines(t *testing.T) {
29 type args struct {
30 cm *candidateMatch
31 text []byte
32 }
33 tests := []struct {
34 name string
35 args args
36 want []*candidateMatch
37 }{
38 {
39 name: "trivial case",
40 args: args{
41 cm: &candidateMatch{
42 byteOffset: 0,
43 byteMatchSz: 0,
44 },
45 text: nil,
46 },
47 want: nil,
48 },
49 {
50 name: "no newlines",
51 args: args{
52 cm: &candidateMatch{
53 byteOffset: 0,
54 byteMatchSz: 1,
55 },
56 text: []byte("a"),
57 },
58 want: []*candidateMatch{
59 {
60 byteOffset: 0,
61 byteMatchSz: 1,
62 },
63 },
64 },
65 {
66 name: "newline at start",
67 args: args{
68 cm: &candidateMatch{
69 byteOffset: 0,
70 byteMatchSz: 2,
71 },
72 text: []byte("\na"),
73 },
74 want: []*candidateMatch{
75 {
76 byteOffset: 1,
77 byteMatchSz: 1,
78 },
79 },
80 },
81 {
82 name: "newline at end",
83 args: args{
84 cm: &candidateMatch{
85 byteOffset: 0,
86 byteMatchSz: 2,
87 },
88 text: []byte("a\n"),
89 },
90 want: []*candidateMatch{
91 {
92 byteOffset: 0,
93 byteMatchSz: 1,
94 },
95 },
96 },
97 {
98 name: "newline in middle",
99 args: args{
100 cm: &candidateMatch{
101 byteOffset: 0,
102 byteMatchSz: 3,
103 },
104 text: []byte("a\nb"),
105 },
106 want: []*candidateMatch{
107 {
108 byteOffset: 0,
109 byteMatchSz: 1,
110 },
111 {
112 byteOffset: 2,
113 byteMatchSz: 1,
114 },
115 },
116 },
117 {
118 name: "two newlines",
119 args: args{
120 cm: &candidateMatch{
121 byteOffset: 0,
122 byteMatchSz: 5,
123 },
124 text: []byte("a\nb\nc"),
125 },
126 want: []*candidateMatch{
127 {
128 byteOffset: 0,
129 byteMatchSz: 1,
130 },
131 {
132 byteOffset: 2,
133 byteMatchSz: 1,
134 },
135 {
136 byteOffset: 4,
137 byteMatchSz: 1,
138 },
139 },
140 },
141 }
142 for _, tt := range tests {
143 t.Run(tt.name, func(t *testing.T) {
144 if got := breakOnNewlines(tt.args.cm, tt.args.text); !reflect.DeepEqual(got, tt.want) {
145 type PrintableCm struct {
146 byteOffset uint32
147 byteMatchSz uint32
148 }
149 var got2, want2 []PrintableCm
150 for _, g := range got {
151 got2 = append(got2, PrintableCm{byteOffset: g.byteOffset, byteMatchSz: g.byteMatchSz})
152 }
153 for _, w := range tt.want {
154 want2 = append(want2, PrintableCm{byteOffset: w.byteOffset, byteMatchSz: w.byteMatchSz})
155 }
156 t.Errorf("breakMatchOnNewlines() = %+v, want %+v", got2, want2)
157 }
158 })
159 }
160}
161
162func TestEquivalentQuerySkipRegexpTree(t *testing.T) {
163 tests := []struct {
164 query string
165 skip bool
166 }{
167 {query: "^foo", skip: false},
168 {query: "foo", skip: true},
169 {query: "thread|needle|haystack", skip: true},
170 {query: "contain(er|ing)", skip: false},
171 {query: "thread (needle|haystack)", skip: true},
172 {query: "thread (needle|)", skip: false},
173 {query: `\bthread\b case:yes`, skip: true}, // word search
174 {query: `\bthread\b case:no`, skip: false},
175 }
176
177 for _, tt := range tests {
178 q, err := query.Parse(tt.query)
179 if err != nil {
180 t.Errorf("Error parsing query: %s", "sym:"+tt.query)
181 continue
182 }
183
184 d := &indexData{}
185 mt, err := d.newMatchTree(q, matchTreeOpt{})
186 if err != nil {
187 t.Errorf("Error creating match tree from query: %s", q)
188 continue
189 }
190
191 visitMatchTree(mt, func(m matchTree) {
192 if _, ok := m.(*regexpMatchTree); ok && tt.skip {
193 t.Log(mt)
194 t.Errorf("Expected regexpMatchTree to be skipped for query: %s", q)
195 }
196 })
197 }
198}
199
200// Test whether we skip the regexp engine for queries like "\bLITERAL\b
201// case:yes"
202func TestWordSearchSkipRegexpTree(t *testing.T) {
203 qStr := "\\bfoo\\b case:yes"
204 q, err := query.Parse(qStr)
205 if err != nil {
206 t.Fatalf("Error parsing query: %s", "sym:"+qStr)
207 }
208
209 d := &indexData{}
210 mt, err := d.newMatchTree(q, matchTreeOpt{})
211 if err != nil {
212 t.Fatalf("Error creating match tree from query: %s", q)
213 }
214
215 countRegexMatchTree, countWordMatchTree := 0, 0
216 visitMatchTree(mt, func(m matchTree) {
217 switch m.(type) {
218 case *regexpMatchTree:
219 countRegexMatchTree++
220 case *wordMatchTree:
221 countWordMatchTree++
222 }
223 })
224
225 if countRegexMatchTree != 0 {
226 t.Fatalf("expected to find 0 regexMatchTree, found %d", countRegexMatchTree)
227 }
228
229 if countWordMatchTree != 1 {
230 t.Fatalf("expected to find 1 wordMatchTree, found %d", countWordMatchTree)
231 }
232}
233
234func TestSymbolMatchTree(t *testing.T) {
235 tests := []struct {
236 query string
237 substr string
238 regex string
239 regexAll bool
240 }{
241 {query: "sym:.*", regex: "(?i)(?-s:.*)", regexAll: true},
242 {query: "sym:(ab|cd)", regex: "(?i)ab|cd"},
243 {query: "sym:b.r", regex: "(?i)(?-s:b.r)"},
244 {query: "sym:horse", substr: "horse"},
245 {query: `sym:\bthread\b case:yes`, regex: `\bthread\b`}, // check we disable word search opt
246 {query: `sym:\bthread\b case:no`, regex: `(?i)\bthread\b`},
247 }
248
249 for _, tt := range tests {
250 q, err := query.Parse(tt.query)
251 if err != nil {
252 t.Errorf("Error parsing query: %s", tt.query)
253 continue
254 }
255
256 d := &indexData{}
257 mt, err := d.newMatchTree(q, matchTreeOpt{})
258 if err != nil {
259 t.Errorf("Error creating match tree from query: %s", q)
260 continue
261 }
262
263 var (
264 substr string
265 regex string
266 regexAll bool
267 )
268 if substrMT, ok := mt.(*symbolSubstrMatchTree); ok {
269 substr = substrMT.query.Pattern
270 }
271 if regexMT, ok := mt.(*symbolRegexpMatchTree); ok {
272 regex = regexMT.regexp.String()
273 regexAll = regexMT.all
274 }
275
276 if substr != tt.substr {
277 t.Errorf("%s has unexpected substring:\nwant: %q\ngot: %q", tt.query, tt.substr, substr)
278 }
279 if regex != tt.regex {
280 t.Errorf("%s has unexpected regex:\nwant: %q\ngot: %q", tt.query, tt.regex, regex)
281 }
282 if regexAll != tt.regexAll {
283 t.Errorf("%s has unexpected regexAll: want=%t got=%t", tt.query, tt.regexAll, regexAll)
284 }
285 }
286}
287
288func TestRepoSet(t *testing.T) {
289 d := &indexData{
290 repoMetaData: []Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}},
291 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
292 repos: []uint16{0, 0, 1, 2, 3, 3},
293 }
294 mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}}, matchTreeOpt{})
295 if err != nil {
296 t.Fatal(err)
297 }
298 want := []uint32{2, 4, 5}
299 for i := 0; i < len(want); i++ {
300 nextDoc := mt.nextDoc()
301 if nextDoc != want[i] {
302 t.Fatalf("want %d, got %d", want[i], nextDoc)
303 }
304 mt.prepare(nextDoc)
305 }
306 if mt.nextDoc() != maxUInt32 {
307 t.Fatalf("expected %d document, but got at least 1 more", len(want))
308 }
309}
310
311func TestRepo(t *testing.T) {
312 d := &indexData{
313 repoMetaData: []Repository{{Name: "foo"}, {Name: "bar"}},
314 fileBranchMasks: []uint64{1, 1, 1, 1, 1},
315 repos: []uint16{0, 0, 1, 0, 1},
316 }
317 mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")}, matchTreeOpt{})
318 if err != nil {
319 t.Fatal(err)
320 }
321 want := []uint32{2, 4}
322 for i := 0; i < len(want); i++ {
323 nextDoc := mt.nextDoc()
324 if nextDoc != want[i] {
325 t.Fatalf("want %d, got %d", want[i], nextDoc)
326 }
327 mt.prepare(nextDoc)
328 }
329 if mt.nextDoc() != maxUInt32 {
330 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
331 }
332}
333
334func TestBranchesRepos(t *testing.T) {
335 d := &indexData{
336 repoMetaData: []Repository{
337 {ID: hash("foo"), Name: "foo"},
338 {ID: hash("bar"), Name: "bar"},
339 },
340 fileBranchMasks: []uint64{1, 1, 1, 2, 1, 2, 1},
341 repos: []uint16{0, 0, 1, 1, 1, 1, 1},
342 branchIDs: []map[string]uint{{"HEAD": 1}, {"HEAD": 1, "b1": 2}},
343 }
344
345 mt, err := d.newMatchTree(&query.BranchesRepos{List: []query.BranchRepos{
346 {Branch: "b1", Repos: roaring.BitmapOf(hash("bar"))},
347 {Branch: "b2", Repos: roaring.BitmapOf(hash("bar"))},
348 }}, matchTreeOpt{})
349 if err != nil {
350 t.Fatal(err)
351 }
352
353 want := []uint32{3, 5}
354 for i := 0; i < len(want); i++ {
355 nextDoc := mt.nextDoc()
356 if nextDoc != want[i] {
357 t.Fatalf("want %d, got %d", want[i], nextDoc)
358 }
359 mt.prepare(nextDoc)
360 }
361
362 if mt.nextDoc() != maxUInt32 {
363 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
364 }
365}
366
367func TestRepoIDs(t *testing.T) {
368 d := &indexData{
369 repoMetaData: []Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}},
370 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
371 repos: []uint16{0, 0, 1, 2, 3, 3},
372 }
373 mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)}, matchTreeOpt{})
374 if err != nil {
375 t.Fatal(err)
376 }
377 want := []uint32{2, 4, 5}
378 for i := 0; i < len(want); i++ {
379 nextDoc := mt.nextDoc()
380 if nextDoc != want[i] {
381 t.Fatalf("want %d, got %d", want[i], nextDoc)
382 }
383 mt.prepare(nextDoc)
384 }
385 if mt.nextDoc() != maxUInt32 {
386 t.Fatalf("expected %d document, but got at least 1 more", len(want))
387 }
388}
389
390func TestIsRegexpAll(t *testing.T) {
391 valid := []string{
392 ".*",
393 "(.*)",
394 "(?-s:.*)",
395 "(?s:.*)",
396 "(?i)(?-s:.*)",
397 }
398 invalid := []string{
399 ".",
400 "foo",
401 "(foo.*)",
402 }
403
404 for _, s := range valid {
405 r, err := syntax.Parse(s, syntax.Perl)
406 if err != nil {
407 t.Fatal(err)
408 }
409 if !isRegexpAll(r) {
410 t.Errorf("expected %q to match all", s)
411 }
412 }
413
414 for _, s := range invalid {
415 r, err := syntax.Parse(s, syntax.Perl)
416 if err != nil {
417 t.Fatal(err)
418 }
419 if isRegexpAll(r) {
420 t.Errorf("expected %q to not match all", s)
421 }
422 }
423}