fork of https://github.com/sourcegraph/zoekt
1// Copyright 2018 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "reflect"
19 "testing"
20
21 "github.com/RoaringBitmap/roaring"
22 "github.com/grafana/regexp"
23
24 "github.com/sourcegraph/zoekt/query"
25)
26
27func Test_breakOnNewlines(t *testing.T) {
28 type args struct {
29 cm *candidateMatch
30 text []byte
31 }
32 tests := []struct {
33 name string
34 args args
35 want []*candidateMatch
36 }{
37 {
38 name: "trivial case",
39 args: args{
40 cm: &candidateMatch{
41 byteOffset: 0,
42 byteMatchSz: 0,
43 },
44 text: nil,
45 },
46 want: nil,
47 },
48 {
49 name: "no newlines",
50 args: args{
51 cm: &candidateMatch{
52 byteOffset: 0,
53 byteMatchSz: 1,
54 },
55 text: []byte("a"),
56 },
57 want: []*candidateMatch{
58 {
59 byteOffset: 0,
60 byteMatchSz: 1,
61 },
62 },
63 },
64 {
65 name: "newline at start",
66 args: args{
67 cm: &candidateMatch{
68 byteOffset: 0,
69 byteMatchSz: 2,
70 },
71 text: []byte("\na"),
72 },
73 want: []*candidateMatch{
74 {
75 byteOffset: 1,
76 byteMatchSz: 1,
77 },
78 },
79 },
80 {
81 name: "newline at end",
82 args: args{
83 cm: &candidateMatch{
84 byteOffset: 0,
85 byteMatchSz: 2,
86 },
87 text: []byte("a\n"),
88 },
89 want: []*candidateMatch{
90 {
91 byteOffset: 0,
92 byteMatchSz: 1,
93 },
94 },
95 },
96 {
97 name: "newline in middle",
98 args: args{
99 cm: &candidateMatch{
100 byteOffset: 0,
101 byteMatchSz: 3,
102 },
103 text: []byte("a\nb"),
104 },
105 want: []*candidateMatch{
106 {
107 byteOffset: 0,
108 byteMatchSz: 1,
109 },
110 {
111 byteOffset: 2,
112 byteMatchSz: 1,
113 },
114 },
115 },
116 {
117 name: "two newlines",
118 args: args{
119 cm: &candidateMatch{
120 byteOffset: 0,
121 byteMatchSz: 5,
122 },
123 text: []byte("a\nb\nc"),
124 },
125 want: []*candidateMatch{
126 {
127 byteOffset: 0,
128 byteMatchSz: 1,
129 },
130 {
131 byteOffset: 2,
132 byteMatchSz: 1,
133 },
134 {
135 byteOffset: 4,
136 byteMatchSz: 1,
137 },
138 },
139 },
140 }
141 for _, tt := range tests {
142 t.Run(tt.name, func(t *testing.T) {
143 if got := breakOnNewlines(tt.args.cm, tt.args.text); !reflect.DeepEqual(got, tt.want) {
144 type PrintableCm struct {
145 byteOffset uint32
146 byteMatchSz uint32
147 }
148 var got2, want2 []PrintableCm
149 for _, g := range got {
150 got2 = append(got2, PrintableCm{byteOffset: g.byteOffset, byteMatchSz: g.byteMatchSz})
151 }
152 for _, w := range tt.want {
153 want2 = append(want2, PrintableCm{byteOffset: w.byteOffset, byteMatchSz: w.byteMatchSz})
154 }
155 t.Errorf("breakMatchOnNewlines() = %+v, want %+v", got2, want2)
156 }
157 })
158 }
159}
160
161func TestEquivalentQuerySkipRegexpTree(t *testing.T) {
162 tests := []struct {
163 query string
164 skip bool
165 }{
166 {query: "^foo", skip: false},
167 {query: "foo", skip: true},
168 {query: "thread|needle|haystack", skip: true},
169 {query: "contain(er|ing)", skip: false},
170 {query: "thread (needle|haystack)", skip: true},
171 {query: "thread (needle|)", skip: false},
172 }
173
174 for _, tt := range tests {
175 q, err := query.Parse(tt.query)
176 if err != nil {
177 t.Errorf("Error parsing query: %s", "sym:"+tt.query)
178 continue
179 }
180
181 d := &indexData{}
182 mt, err := d.newMatchTree(q)
183 if err != nil {
184 t.Errorf("Error creating match tree from query: %s", q)
185 continue
186 }
187
188 visitMatchTree(mt, func(m matchTree) {
189 if _, ok := m.(*regexpMatchTree); ok && tt.skip {
190 t.Errorf("Expected regexpMatchTree to be skipped for query: %s", q)
191 }
192 })
193 }
194}
195
196// Test whether we skip the regexp engine for queries like "\bLITERAL\b
197// case:yes"
198func TestWordSearchSkipRegexpTree(t *testing.T) {
199 qStr := "\\bfoo\\b case:yes"
200 q, err := query.Parse(qStr)
201 if err != nil {
202 t.Fatalf("Error parsing query: %s", "sym:"+qStr)
203 }
204
205 d := &indexData{}
206 mt, err := d.newMatchTree(q)
207 if err != nil {
208 t.Fatalf("Error creating match tree from query: %s", q)
209 }
210
211 countRegexMatchTree, countWordMatchTree := 0, 0
212 visitMatchTree(mt, func(m matchTree) {
213 switch m.(type) {
214 case *regexpMatchTree:
215 countRegexMatchTree++
216 case *wordMatchTree:
217 countWordMatchTree++
218 }
219 })
220
221 if countRegexMatchTree != 0 {
222 t.Fatalf("expected to find 0 regexMatchTree, found %d", countRegexMatchTree)
223 }
224
225 if countWordMatchTree != 1 {
226 t.Fatalf("expected to find 1 wordMatchTree, found %d", countWordMatchTree)
227 }
228}
229
230func TestSymbolMatchRegexAll(t *testing.T) {
231 tests := []struct {
232 query string
233 all bool
234 }{
235 {query: ".*", all: true},
236 {query: "(a|b)", all: false},
237 {query: "b.r", all: false},
238 }
239
240 for _, tt := range tests {
241 q, err := query.Parse("sym:" + tt.query)
242 if err != nil {
243 t.Errorf("Error parsing query: %s", "sym:"+tt.query)
244 continue
245 }
246
247 d := &indexData{}
248 mt, err := d.newMatchTree(q)
249 if err != nil {
250 t.Errorf("Error creating match tree from query: %s", q)
251 continue
252 }
253
254 regexMT, ok := mt.(*symbolRegexpMatchTree)
255 if !ok {
256 t.Errorf("Expected symbol regex match tree from query: %s, got %v", q, mt)
257 continue
258 }
259
260 if regexMT.all != tt.all {
261 t.Errorf("Expected property all: %t from query: %s", tt.all, q)
262 }
263 }
264}
265
266func TestRepoSet(t *testing.T) {
267 d := &indexData{
268 repoMetaData: []Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}},
269 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
270 repos: []uint16{0, 0, 1, 2, 3, 3},
271 }
272 mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}})
273 if err != nil {
274 t.Fatal(err)
275 }
276 want := []uint32{2, 4, 5}
277 for i := 0; i < len(want); i++ {
278 nextDoc := mt.nextDoc()
279 if nextDoc != want[i] {
280 t.Fatalf("want %d, got %d", want[i], nextDoc)
281 }
282 mt.prepare(nextDoc)
283 }
284 if mt.nextDoc() != maxUInt32 {
285 t.Fatalf("expected %d document, but got at least 1 more", len(want))
286 }
287}
288
289func TestRepo(t *testing.T) {
290 d := &indexData{
291 repoMetaData: []Repository{{Name: "foo"}, {Name: "bar"}},
292 fileBranchMasks: []uint64{1, 1, 1, 1, 1},
293 repos: []uint16{0, 0, 1, 0, 1},
294 }
295 mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")})
296 if err != nil {
297 t.Fatal(err)
298 }
299 want := []uint32{2, 4}
300 for i := 0; i < len(want); i++ {
301 nextDoc := mt.nextDoc()
302 if nextDoc != want[i] {
303 t.Fatalf("want %d, got %d", want[i], nextDoc)
304 }
305 mt.prepare(nextDoc)
306 }
307 if mt.nextDoc() != maxUInt32 {
308 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
309 }
310}
311
312func TestBranchesRepos(t *testing.T) {
313 d := &indexData{
314 repoMetaData: []Repository{
315 {ID: hash("foo"), Name: "foo"},
316 {ID: hash("bar"), Name: "bar"},
317 },
318 fileBranchMasks: []uint64{1, 1, 1, 2, 1, 2, 1},
319 repos: []uint16{0, 0, 1, 1, 1, 1, 1},
320 branchIDs: []map[string]uint{{"HEAD": 1}, {"HEAD": 1, "b1": 2}},
321 }
322
323 mt, err := d.newMatchTree(&query.BranchesRepos{List: []query.BranchRepos{
324 {Branch: "b1", Repos: roaring.BitmapOf(hash("bar"))},
325 {Branch: "b2", Repos: roaring.BitmapOf(hash("bar"))},
326 }})
327 if err != nil {
328 t.Fatal(err)
329 }
330
331 want := []uint32{3, 5}
332 for i := 0; i < len(want); i++ {
333 nextDoc := mt.nextDoc()
334 if nextDoc != want[i] {
335 t.Fatalf("want %d, got %d", want[i], nextDoc)
336 }
337 mt.prepare(nextDoc)
338 }
339
340 if mt.nextDoc() != maxUInt32 {
341 t.Fatalf("expect %d documents, but got at least 1 more", len(want))
342 }
343}
344
345func TestRepoIDs(t *testing.T) {
346 d := &indexData{
347 repoMetaData: []Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}},
348 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
349 repos: []uint16{0, 0, 1, 2, 3, 3},
350 }
351 mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)})
352 if err != nil {
353 t.Fatal(err)
354 }
355 want := []uint32{2, 4, 5}
356 for i := 0; i < len(want); i++ {
357 nextDoc := mt.nextDoc()
358 if nextDoc != want[i] {
359 t.Fatalf("want %d, got %d", want[i], nextDoc)
360 }
361 mt.prepare(nextDoc)
362 }
363 if mt.nextDoc() != maxUInt32 {
364 t.Fatalf("expected %d document, but got at least 1 more", len(want))
365 }
366}