fork of https://github.com/sourcegraph/zoekt
1// Copyright 2018 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "reflect"
19 "testing"
20
21 "github.com/RoaringBitmap/roaring"
22 "github.com/grafana/regexp"
23
24 "github.com/sourcegraph/zoekt/query"
25)
26
27func Test_breakOnNewlines(t *testing.T) {
28 type args struct {
29 cm *candidateMatch
30 text []byte
31 }
32 tests := []struct {
33 name string
34 args args
35 want []*candidateMatch
36 }{
37 {
38 name: "trivial case",
39 args: args{
40 cm: &candidateMatch{
41 byteOffset: 0,
42 byteMatchSz: 0,
43 },
44 text: nil,
45 },
46 want: nil,
47 },
48 {
49 name: "no newlines",
50 args: args{
51 cm: &candidateMatch{
52 byteOffset: 0,
53 byteMatchSz: 1,
54 },
55 text: []byte("a"),
56 },
57 want: []*candidateMatch{
58 {
59 byteOffset: 0,
60 byteMatchSz: 1,
61 },
62 },
63 },
64 {
65 name: "newline at start",
66 args: args{
67 cm: &candidateMatch{
68 byteOffset: 0,
69 byteMatchSz: 2,
70 },
71 text: []byte("\na"),
72 },
73 want: []*candidateMatch{
74 {
75 byteOffset: 1,
76 byteMatchSz: 1,
77 },
78 },
79 },
80 {
81 name: "newline at end",
82 args: args{
83 cm: &candidateMatch{
84 byteOffset: 0,
85 byteMatchSz: 2,
86 },
87 text: []byte("a\n"),
88 },
89 want: []*candidateMatch{
90 {
91 byteOffset: 0,
92 byteMatchSz: 1,
93 },
94 },
95 },
96 {
97 name: "newline in middle",
98 args: args{
99 cm: &candidateMatch{
100 byteOffset: 0,
101 byteMatchSz: 3,
102 },
103 text: []byte("a\nb"),
104 },
105 want: []*candidateMatch{
106 {
107 byteOffset: 0,
108 byteMatchSz: 1,
109 },
110 {
111 byteOffset: 2,
112 byteMatchSz: 1,
113 },
114 },
115 },
116 {
117 name: "two newlines",
118 args: args{
119 cm: &candidateMatch{
120 byteOffset: 0,
121 byteMatchSz: 5,
122 },
123 text: []byte("a\nb\nc"),
124 },
125 want: []*candidateMatch{
126 {
127 byteOffset: 0,
128 byteMatchSz: 1,
129 },
130 {
131 byteOffset: 2,
132 byteMatchSz: 1,
133 },
134 {
135 byteOffset: 4,
136 byteMatchSz: 1,
137 },
138 },
139 },
140 }
141 for _, tt := range tests {
142 t.Run(tt.name, func(t *testing.T) {
143 if got := breakOnNewlines(tt.args.cm, tt.args.text); !reflect.DeepEqual(got, tt.want) {
144 type PrintableCm struct {
145 byteOffset uint32
146 byteMatchSz uint32
147 }
148 var got2, want2 []PrintableCm
149 for _, g := range got {
150 got2 = append(got2, PrintableCm{byteOffset: g.byteOffset, byteMatchSz: g.byteMatchSz})
151 }
152 for _, w := range tt.want {
153 want2 = append(want2, PrintableCm{byteOffset: w.byteOffset, byteMatchSz: w.byteMatchSz})
154 }
155 t.Errorf("breakMatchOnNewlines() = %+v, want %+v", got2, want2)
156 }
157 })
158 }
159}
160
161func TestEquivalentQuerySkipRegexpTree(t *testing.T) {
162 tests := []struct {
163 query string
164 skip bool
165 }{
166 {query: "^foo", skip: false},
167 {query: "foo", skip: true},
168 {query: "thread|needle|haystack", skip: true},
169 {query: "contain(er|ing)", skip: false},
170 {query: "thread (needle|haystack)", skip: true},
171 {query: "thread (needle|)", skip: false},
172 {query: `\bthread\b case:yes`, skip: true}, // word search
173 {query: `\bthread\b case:no`, skip: false},
174 }
175
176 for _, tt := range tests {
177 q, err := query.Parse(tt.query)
178 if err != nil {
179 t.Errorf("Error parsing query: %s", "sym:"+tt.query)
180 continue
181 }
182
183 d := &indexData{}
184 mt, err := d.newMatchTree(q, matchTreeOpt{})
185 if err != nil {
186 t.Errorf("Error creating match tree from query: %s", q)
187 continue
188 }
189
190 visitMatchTree(mt, func(m matchTree) {
191 if _, ok := m.(*regexpMatchTree); ok && tt.skip {
192 t.Log(mt)
193 t.Errorf("Expected regexpMatchTree to be skipped for query: %s", q)
194 }
195 })
196 }
197}
198
199// Test whether we skip the regexp engine for queries like "\bLITERAL\b
200// case:yes"
201func TestWordSearchSkipRegexpTree(t *testing.T) {
202 qStr := "\\bfoo\\b case:yes"
203 q, err := query.Parse(qStr)
204 if err != nil {
205 t.Fatalf("Error parsing query: %s", "sym:"+qStr)
206 }
207
208 d := &indexData{}
209 mt, err := d.newMatchTree(q, matchTreeOpt{})
210 if err != nil {
211 t.Fatalf("Error creating match tree from query: %s", q)
212 }
213
214 countRegexMatchTree, countWordMatchTree := 0, 0
215 visitMatchTree(mt, func(m matchTree) {
216 switch m.(type) {
217 case *regexpMatchTree:
218 countRegexMatchTree++
219 case *wordMatchTree:
220 countWordMatchTree++
221 }
222 })
223
224 if countRegexMatchTree != 0 {
225 t.Fatalf("expected to find 0 regexMatchTree, found %d", countRegexMatchTree)
226 }
227
228 if countWordMatchTree != 1 {
229 t.Fatalf("expected to find 1 wordMatchTree, found %d", countWordMatchTree)
230 }
231}
232
233func TestSymbolMatchTree(t *testing.T) {
234 tests := []struct {
235 query string
236 substr string
237 regex string
238 regexAll bool
239 }{
240 {query: "sym:.*", regex: "(?i)(?-s:.*)", regexAll: true},
241 {query: "sym:(ab|cd)", regex: "(?i)ab|cd"},
242 {query: "sym:b.r", regex: "(?i)(?-s:b.r)"},
243 {query: "sym:horse", substr: "horse"},
244 {query: `sym:\bthread\b case:yes`, regex: `\bthread\b`}, // check we disable word search opt
245 {query: `sym:\bthread\b case:no`, regex: `(?i)\bthread\b`},
246 }
247
248 for _, tt := range tests {
249 q, err := query.Parse(tt.query)
250 if err != nil {
251 t.Errorf("Error parsing query: %s", tt.query)
252 continue
253 }
254
255 d := &indexData{}
256 mt, err := d.newMatchTree(q, matchTreeOpt{})
257 if err != nil {
258 t.Errorf("Error creating match tree from query: %s", q)
259 continue
260 }
261
262 var (
263 substr string
264 regex string
265 regexAll bool
266 )
267 if substrMT, ok := mt.(*symbolSubstrMatchTree); ok {
268 substr = substrMT.query.Pattern
269 }
270 if regexMT, ok := mt.(*symbolRegexpMatchTree); ok {
271 regex = regexMT.regexp.String()
272 regexAll = regexMT.all
273 }
274
275 if substr != tt.substr {
276 t.Errorf("%s has unexpected substring:\nwant: %q\ngot: %q", tt.query, tt.substr, substr)
277 }
278 if regex != tt.regex {
279 t.Errorf("%s has unexpected regex:\nwant: %q\ngot: %q", tt.query, tt.regex, regex)
280 }
281 if regexAll != tt.regexAll {
282 t.Errorf("%s has unexpected regexAll: want=%t got=%t", tt.query, tt.regexAll, regexAll)
283 }
284 }
285}
286
287func TestRepoSet(t *testing.T) {
288 d := &indexData{
289 repoMetaData: []Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}},
290 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
291 repos: []uint16{0, 0, 1, 2, 3, 3},
292 }
293 mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}}, matchTreeOpt{})
294 if err != nil {
295 t.Fatal(err)
296 }
297 want := []uint32{2, 4, 5}
298 for i := 0; i < len(want); i++ {
299 nextDoc := mt.nextDoc()
300 if nextDoc != want[i] {
301 t.Fatalf("want %d, got %d", want[i], nextDoc)
302 }
303 mt.prepare(nextDoc)
304 }
305 if mt.nextDoc() != maxUInt32 {
306 t.Fatalf("expected %d document, but got at least 1 more", len(want))
307 }
308}
309
310func TestRepo(t *testing.T) {
311 d := &indexData{
312 repoMetaData: []Repository{{Name: "foo"}, {Name: "bar"}},
313 fileBranchMasks: []uint64{1, 1, 1, 1, 1},
314 repos: []uint16{0, 0, 1, 0, 1},
315 }
316 mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")}, matchTreeOpt{})
317 if err != nil {
318 t.Fatal(err)
319 }
320 want := []uint32{2, 4}
321 for i := 0; i < len(want); i++ {
322 nextDoc := mt.nextDoc()
323 if nextDoc != want[i] {
324 t.Fatalf("want %d, got %d", want[i], nextDoc)
325 }
326 mt.prepare(nextDoc)
327 }
328 if mt.nextDoc() != maxUInt32 {
329 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
330 }
331}
332
333func TestBranchesRepos(t *testing.T) {
334 d := &indexData{
335 repoMetaData: []Repository{
336 {ID: hash("foo"), Name: "foo"},
337 {ID: hash("bar"), Name: "bar"},
338 },
339 fileBranchMasks: []uint64{1, 1, 1, 2, 1, 2, 1},
340 repos: []uint16{0, 0, 1, 1, 1, 1, 1},
341 branchIDs: []map[string]uint{{"HEAD": 1}, {"HEAD": 1, "b1": 2}},
342 }
343
344 mt, err := d.newMatchTree(&query.BranchesRepos{List: []query.BranchRepos{
345 {Branch: "b1", Repos: roaring.BitmapOf(hash("bar"))},
346 {Branch: "b2", Repos: roaring.BitmapOf(hash("bar"))},
347 }}, matchTreeOpt{})
348 if err != nil {
349 t.Fatal(err)
350 }
351
352 want := []uint32{3, 5}
353 for i := 0; i < len(want); i++ {
354 nextDoc := mt.nextDoc()
355 if nextDoc != want[i] {
356 t.Fatalf("want %d, got %d", want[i], nextDoc)
357 }
358 mt.prepare(nextDoc)
359 }
360
361 if mt.nextDoc() != maxUInt32 {
362 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
363 }
364}
365
366func TestRepoIDs(t *testing.T) {
367 d := &indexData{
368 repoMetaData: []Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}},
369 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
370 repos: []uint16{0, 0, 1, 2, 3, 3},
371 }
372 mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)}, matchTreeOpt{})
373 if err != nil {
374 t.Fatal(err)
375 }
376 want := []uint32{2, 4, 5}
377 for i := 0; i < len(want); i++ {
378 nextDoc := mt.nextDoc()
379 if nextDoc != want[i] {
380 t.Fatalf("want %d, got %d", want[i], nextDoc)
381 }
382 mt.prepare(nextDoc)
383 }
384 if mt.nextDoc() != maxUInt32 {
385 t.Fatalf("expected %d document, but got at least 1 more", len(want))
386 }
387}