fork of https://github.com/sourcegraph/zoekt
1// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "context"
19 "hash/fnv"
20 "reflect"
21 "regexp/syntax"
22 "strings"
23 "testing"
24
25 "github.com/RoaringBitmap/roaring"
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt/query"
31)
32
33var opnames = map[syntax.Op]string{
34 syntax.OpNoMatch: "OpNoMatch",
35 syntax.OpEmptyMatch: "OpEmptyMatch",
36 syntax.OpLiteral: "OpLiteral",
37 syntax.OpCharClass: "OpCharClass",
38 syntax.OpAnyCharNotNL: "OpAnyCharNotNL",
39 syntax.OpAnyChar: "OpAnyChar",
40 syntax.OpBeginLine: "OpBeginLine",
41 syntax.OpEndLine: "OpEndLine",
42 syntax.OpBeginText: "OpBeginText",
43 syntax.OpEndText: "OpEndText",
44 syntax.OpWordBoundary: "OpWordBoundary",
45 syntax.OpNoWordBoundary: "OpNoWordBoundary",
46 syntax.OpCapture: "OpCapture",
47 syntax.OpStar: "OpStar",
48 syntax.OpPlus: "OpPlus",
49 syntax.OpQuest: "OpQuest",
50 syntax.OpRepeat: "OpRepeat",
51 syntax.OpConcat: "OpConcat",
52 syntax.OpAlternate: "OpAlternate",
53}
54
55func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) {
56 t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub))
57 for _, s := range r.Sub {
58 printRegexp(t, s, lvl+1)
59 }
60}
61
62func caseSensitiveSubstrMT(pattern string) matchTree {
63 d := &indexData{}
64 mt, _ := d.newSubstringMatchTree(&query.Substring{
65 Pattern: pattern,
66 CaseSensitive: true,
67 })
68 return mt
69}
70
71func substrMT(pattern string) matchTree {
72 d := &indexData{}
73 mt, _ := d.newSubstringMatchTree(&query.Substring{
74 Pattern: pattern,
75 CaseSensitive: false,
76 })
77 return mt
78}
79
80func TestRegexpParse(t *testing.T) {
81 type testcase struct {
82 in string
83 query matchTree
84 isEquivalent bool
85 caseSensitive bool
86 }
87
88 cases := []testcase{
89 {"(foo|)bar", substrMT("bar"), false, false},
90 {"(foo|)", &bruteForceMatchTree{}, false, false},
91 {"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{
92 &orMatchTree{[]matchTree{
93 substrMT("foo"),
94 substrMT("bar"),
95 }},
96 substrMT("baz"),
97 substrMT("bla"),
98 }}, false, false},
99 {
100 "^[a-z](People)+barrabas$",
101 &andMatchTree{[]matchTree{
102 substrMT("People"),
103 substrMT("barrabas"),
104 }}, false, false,
105 },
106 {"foo", substrMT("foo"), true, false},
107 {"foo", caseSensitiveSubstrMT("foo"), true, true},
108 {"(?i)foo", substrMT("FOO"), true, false},
109 {"(?i)foo", substrMT("FOO"), true, true},
110 {"^foo", substrMT("foo"), false, false},
111 {"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false, false},
112 {"(thread|needle|haystack)", &orMatchTree{[]matchTree{
113 substrMT("thread"),
114 substrMT("needle"),
115 substrMT("haystack"),
116 }}, true, false},
117 {"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{
118 substrMT("foo"),
119 substrMT("bar"),
120 }}}, false, false},
121 {"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{
122 substrMT("foo"),
123 substrMT("bar"),
124 }}, false, false},
125 {"(foo){2,}", substrMT("foo"), false, false},
126 {"(...)(...)", &bruteForceMatchTree{}, false, false},
127 }
128
129 for _, c := range cases {
130 r, err := syntax.Parse(c.in, syntax.Perl)
131 if err != nil {
132 t.Errorf("Parse(%q): %v", c.in, err)
133 continue
134 }
135 d := indexData{}
136 q := query.Regexp{
137 Regexp: r,
138 CaseSensitive: c.caseSensitive,
139 }
140 gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive)
141 if !reflect.DeepEqual(c.query, gotQuery) {
142 printRegexp(t, r, 0)
143 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query)
144 }
145 if isEq != c.isEquivalent {
146 printRegexp(t, r, 0)
147 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent)
148 }
149 }
150}
151
152func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) {
153 cs := compoundReposShard(t, "foo", "bar")
154
155 ctx := context.Background()
156 q := &query.Const{Value: true}
157 opts := &SearchOptions{ShardRepoMaxMatchCount: 1}
158
159 sr, err := cs.Search(ctx, q, opts)
160 if err != nil {
161 t.Fatal(err)
162 }
163
164 t.Run("matches", func(t *testing.T) {
165 var filenames []string
166 for _, r := range sr.Files {
167 filenames = append(filenames, r.FileName)
168 }
169
170 got, want := filenames, []string{"foo.txt", "bar.txt"}
171 if diff := cmp.Diff(want, got); diff != "" {
172 t.Errorf("mismatch (-want, +got): %s", diff)
173 }
174 })
175
176 t.Run("stats", func(t *testing.T) {
177 got, want := sr.Stats, Stats{
178 ContentBytesLoaded: 0,
179 FileCount: 2,
180 FilesConsidered: 2,
181 FilesSkipped: 2,
182 ShardsScanned: 1,
183 MatchCount: 2,
184 }
185 if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
186 t.Errorf("mismatch (-want, +got): %s", diff)
187 }
188 })
189}
190
191func compoundReposShard(t *testing.T, names ...string) *indexData {
192 t.Helper()
193
194 repos := make([]*Repository, 0, len(names))
195 docs := make([][]Document, 0, len(names))
196 for _, name := range names {
197 repos = append(repos, &Repository{ID: hash(name), Name: name})
198 ds := []Document{
199 {Name: name + ".txt", Content: []byte(name + " content")},
200 {Name: name + ".2.txt", Content: []byte(name + " content 2")},
201 }
202 docs = append(docs, ds)
203 }
204
205 b := testIndexBuilderCompound(t, repos, docs)
206 s := searcherForTest(t, b)
207 return s.(*indexData)
208}
209
210func TestSimplifyRepoSet(t *testing.T) {
211 d := compoundReposShard(t, "foo", "bar")
212 all := &query.RepoSet{Set: map[string]bool{"foo": true, "bar": true}}
213 some := &query.RepoSet{Set: map[string]bool{"foo": true, "banana": true}}
214 none := &query.RepoSet{Set: map[string]bool{"banana": true}}
215
216 got := d.simplify(all)
217 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" {
218 t.Fatalf("-want, +got:\n%s", d)
219 }
220
221 got = d.simplify(some)
222 if d := cmp.Diff(some, got); d != "" {
223 t.Fatalf("-want, +got:\n%s", d)
224 }
225
226 got = d.simplify(none)
227 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
228 t.Fatalf("-want, +got:\n%s", d)
229 }
230}
231
232func TestSimplifyRepoIDs(t *testing.T) {
233 d := compoundReposShard(t, "foo", "bar")
234 all := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("bar"))}
235 some := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("banana"))}
236 none := &query.RepoIDs{Repos: roaring.BitmapOf(hash("banana"))}
237
238 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
239
240 got := d.simplify(all)
241 if d := cmp.Diff(&query.Const{Value: true}, got, tr); d != "" {
242 t.Fatalf("-want, +got:\n%s", d)
243 }
244
245 got = d.simplify(some)
246 if d := cmp.Diff(some, got, tr); d != "" {
247 t.Fatalf("-want, +got:\n%s", d)
248 }
249
250 got = d.simplify(none)
251 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
252 t.Fatalf("-want, +got:\n%s", d)
253 }
254}
255
256func TestSimplifyRepo(t *testing.T) {
257 re := func(pat string) *query.Repo {
258 t.Helper()
259 re, err := regexp.Compile(pat)
260 if err != nil {
261 t.Fatal(err)
262 }
263 return &query.Repo{
264 Regexp: re,
265 }
266 }
267 d := compoundReposShard(t, "foo", "fool")
268 cases := []struct {
269 name string
270 q query.Q
271 want query.Q
272 }{{
273 name: "all",
274 q: re("f.*"),
275 want: &query.Const{Value: true},
276 }, {
277 name: "some",
278 q: re("foo."),
279 want: re("foo."),
280 }, {
281 name: "none",
282 q: re("banana"),
283 want: &query.Const{Value: false},
284 }}
285
286 for _, tc := range cases {
287 got := d.simplify(tc.q)
288 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
289 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
290 }
291 }
292}
293
294func TestSimplifyRepoRegexp(t *testing.T) {
295 re := func(pat string) *query.RepoRegexp {
296 t.Helper()
297 re, err := regexp.Compile(pat)
298 if err != nil {
299 t.Fatal(err)
300 }
301 return &query.RepoRegexp{
302 Regexp: re,
303 }
304 }
305 d := compoundReposShard(t, "foo", "fool")
306 cases := []struct {
307 name string
308 q query.Q
309 want query.Q
310 }{{
311 name: "all",
312 q: re("f.*"),
313 want: &query.Const{Value: true},
314 }, {
315 name: "some",
316 q: re("foo."),
317 want: re("foo."),
318 }, {
319 name: "none",
320 q: re("banana"),
321 want: &query.Const{Value: false},
322 }}
323
324 for _, tc := range cases {
325 got := d.simplify(tc.q)
326 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
327 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
328 }
329 }
330}
331
332func TestSimplifyBranchesRepos(t *testing.T) {
333 d := compoundReposShard(t, "foo", "bar")
334
335 some := &query.BranchesRepos{
336 List: []query.BranchRepos{
337 {Branch: "branch1", Repos: roaring.BitmapOf(hash("bar"))},
338 },
339 }
340 none := &query.Repo{Regexp: regexp.MustCompile("banana")}
341
342 got := d.simplify(some)
343 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
344 if d := cmp.Diff(some, got, tr); d != "" {
345 t.Fatalf("-want, +got:\n%s", d)
346 }
347
348 got = d.simplify(none)
349 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
350 t.Fatalf("-want, +got:\n%s", d)
351 }
352}
353
354func hash(name string) uint32 {
355 h := fnv.New32()
356 h.Write([]byte(name))
357 return h.Sum32()
358}
359
360func TestGatherBranches(t *testing.T) {
361 content := []byte("dummy")
362 b := testIndexBuilder(t, &Repository{
363 Branches: []RepositoryBranch{
364 {"foo", "v1"},
365 {"foo-2", "v1"},
366 {"main", "v1"},
367 {"bar", "v1"},
368 {"quz", "v1"},
369 },
370 },
371 Document{Name: "f1", Content: content, Branches: []string{"foo", "bar", "quz"}},
372 Document{Name: "f2", Content: content, Branches: []string{"foo", "foo-2"}},
373 Document{Name: "f3", Content: content, Branches: []string{"main"}})
374
375 d := searcherForTest(t, b).(*indexData)
376
377 sr, err := d.Search(
378 context.Background(),
379 &query.Or{Children: []query.Q{
380 &query.Branch{Pattern: "foo"},
381 &query.Branch{Pattern: "quz"},
382 }},
383 &SearchOptions{},
384 )
385 if err != nil {
386 t.Fatal(err)
387 }
388
389 want := map[string][]string{
390 "f1": {"foo", "quz"},
391 "f2": {"foo", "foo-2"},
392 }
393
394 if len(sr.Files) != 2 {
395 t.Fatalf("len(sr.Files): want %d, got %d", 2, len(sr.Files))
396 }
397
398 for _, f := range sr.Files {
399 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" {
400 t.Fatalf("-want,+got:\n%s", d)
401 }
402 }
403}