fork of https://github.com/sourcegraph/zoekt
1// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "context"
19 "hash/fnv"
20 "reflect"
21 "regexp/syntax"
22 "strings"
23 "testing"
24
25 "github.com/RoaringBitmap/roaring"
26 "github.com/google/go-cmp/cmp"
27 "github.com/grafana/regexp"
28
29 "github.com/sourcegraph/zoekt/query"
30)
31
32var opnames = map[syntax.Op]string{
33 syntax.OpNoMatch: "OpNoMatch",
34 syntax.OpEmptyMatch: "OpEmptyMatch",
35 syntax.OpLiteral: "OpLiteral",
36 syntax.OpCharClass: "OpCharClass",
37 syntax.OpAnyCharNotNL: "OpAnyCharNotNL",
38 syntax.OpAnyChar: "OpAnyChar",
39 syntax.OpBeginLine: "OpBeginLine",
40 syntax.OpEndLine: "OpEndLine",
41 syntax.OpBeginText: "OpBeginText",
42 syntax.OpEndText: "OpEndText",
43 syntax.OpWordBoundary: "OpWordBoundary",
44 syntax.OpNoWordBoundary: "OpNoWordBoundary",
45 syntax.OpCapture: "OpCapture",
46 syntax.OpStar: "OpStar",
47 syntax.OpPlus: "OpPlus",
48 syntax.OpQuest: "OpQuest",
49 syntax.OpRepeat: "OpRepeat",
50 syntax.OpConcat: "OpConcat",
51 syntax.OpAlternate: "OpAlternate",
52}
53
54func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) {
55 t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub))
56 for _, s := range r.Sub {
57 printRegexp(t, s, lvl+1)
58 }
59}
60
61func substrMT(pattern string) matchTree {
62 d := &indexData{}
63 mt, _ := d.newSubstringMatchTree(&query.Substring{
64 Pattern: pattern,
65 })
66 return mt
67}
68
69func TestRegexpParse(t *testing.T) {
70 type testcase struct {
71 in string
72 query matchTree
73 isEquivalent bool
74 }
75
76 cases := []testcase{
77 {"(foo|)bar", substrMT("bar"), false},
78 {"(foo|)", &bruteForceMatchTree{}, false},
79 {"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{
80 &orMatchTree{[]matchTree{
81 substrMT("foo"),
82 substrMT("bar"),
83 }},
84 substrMT("baz"),
85 substrMT("bla"),
86 }}, false},
87 {
88 "^[a-z](People)+barrabas$",
89 &andMatchTree{[]matchTree{
90 substrMT("People"),
91 substrMT("barrabas"),
92 }}, false,
93 },
94 {"foo", substrMT("foo"), true},
95 {"^foo", substrMT("foo"), false},
96 {"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false},
97 {"(thread|needle|haystack)", &orMatchTree{[]matchTree{
98 substrMT("thread"),
99 substrMT("needle"),
100 substrMT("haystack"),
101 }}, true},
102 {"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{
103 substrMT("foo"),
104 substrMT("bar"),
105 }}}, false},
106 {"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{
107 substrMT("foo"),
108 substrMT("bar"),
109 }}, false},
110 {"(foo){2,}", substrMT("foo"), false},
111 {"(...)(...)", &bruteForceMatchTree{}, false},
112 }
113
114 for _, c := range cases {
115 r, err := syntax.Parse(c.in, syntax.Perl)
116 if err != nil {
117 t.Errorf("Parse(%q): %v", c.in, err)
118 continue
119 }
120 d := indexData{}
121 q := query.Regexp{
122 Regexp: r,
123 }
124 gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive)
125 if !reflect.DeepEqual(c.query, gotQuery) {
126 printRegexp(t, r, 0)
127 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query)
128 }
129 if isEq != c.isEquivalent {
130 printRegexp(t, r, 0)
131 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent)
132 }
133 }
134}
135
136func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) {
137 cs := compoundReposShard(t, "foo", "bar")
138
139 ctx := context.Background()
140 q := &query.Const{Value: true}
141 opts := &SearchOptions{ShardRepoMaxMatchCount: 1}
142
143 sr, err := cs.Search(ctx, q, opts)
144 if err != nil {
145 t.Fatal(err)
146 }
147
148 t.Run("matches", func(t *testing.T) {
149 var filenames []string
150 for _, r := range sr.Files {
151 filenames = append(filenames, r.FileName)
152 }
153
154 got, want := filenames, []string{"foo.txt", "bar.txt"}
155 if diff := cmp.Diff(want, got); diff != "" {
156 t.Errorf("mismatch (-want, +got): %s", diff)
157 }
158 })
159
160 t.Run("stats", func(t *testing.T) {
161 got, want := sr.Stats, Stats{
162 ContentBytesLoaded: 2,
163 FileCount: 2,
164 FilesConsidered: 2,
165 FilesSkipped: 2,
166 ShardsScanned: 1,
167 MatchCount: 2,
168 }
169 if diff := cmp.Diff(want, got); diff != "" {
170 t.Errorf("mismatch (-want, +got): %s", diff)
171 }
172 })
173}
174
175func compoundReposShard(t *testing.T, names ...string) *indexData {
176 t.Helper()
177
178 repos := make([]*Repository, 0, len(names))
179 docs := make([][]Document, 0, len(names))
180 for _, name := range names {
181 repos = append(repos, &Repository{ID: hash(name), Name: name})
182 ds := []Document{
183 {Name: name + ".txt", Content: []byte(name + " content")},
184 {Name: name + ".2.txt", Content: []byte(name + " content 2")},
185 }
186 docs = append(docs, ds)
187 }
188
189 b := testIndexBuilderCompound(t, repos, docs)
190 s := searcherForTest(t, b)
191 return s.(*indexData)
192}
193
194func TestSimplifyRepoSet(t *testing.T) {
195 d := compoundReposShard(t, "foo", "bar")
196 all := &query.RepoSet{Set: map[string]bool{"foo": true, "bar": true}}
197 some := &query.RepoSet{Set: map[string]bool{"foo": true, "banana": true}}
198 none := &query.RepoSet{Set: map[string]bool{"banana": true}}
199
200 got := d.simplify(all)
201 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" {
202 t.Fatalf("-want, +got:\n%s", d)
203 }
204
205 got = d.simplify(some)
206 if d := cmp.Diff(some, got); d != "" {
207 t.Fatalf("-want, +got:\n%s", d)
208 }
209
210 got = d.simplify(none)
211 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
212 t.Fatalf("-want, +got:\n%s", d)
213 }
214}
215
216func TestSimplifyRepoIDs(t *testing.T) {
217 d := compoundReposShard(t, "foo", "bar")
218 all := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("bar"))}
219 some := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("banana"))}
220 none := &query.RepoIDs{Repos: roaring.BitmapOf(hash("banana"))}
221
222 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
223
224 got := d.simplify(all)
225 if d := cmp.Diff(&query.Const{Value: true}, got, tr); d != "" {
226 t.Fatalf("-want, +got:\n%s", d)
227 }
228
229 got = d.simplify(some)
230 if d := cmp.Diff(some, got, tr); d != "" {
231 t.Fatalf("-want, +got:\n%s", d)
232 }
233
234 got = d.simplify(none)
235 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
236 t.Fatalf("-want, +got:\n%s", d)
237 }
238}
239
240func TestSimplifyRepo(t *testing.T) {
241 re := func(pat string) *query.Repo {
242 t.Helper()
243 re, err := regexp.Compile(pat)
244 if err != nil {
245 t.Fatal(err)
246 }
247 return &query.Repo{
248 Regexp: re,
249 }
250 }
251 d := compoundReposShard(t, "foo", "fool")
252 cases := []struct {
253 name string
254 q query.Q
255 want query.Q
256 }{{
257 name: "all",
258 q: re("f.*"),
259 want: &query.Const{Value: true},
260 }, {
261 name: "some",
262 q: re("foo."),
263 want: re("foo."),
264 }, {
265 name: "none",
266 q: re("banana"),
267 want: &query.Const{Value: false},
268 }}
269
270 for _, tc := range cases {
271 got := d.simplify(tc.q)
272 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
273 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
274 }
275 }
276}
277
278func TestSimplifyRepoRegexp(t *testing.T) {
279 re := func(pat string) *query.RepoRegexp {
280 t.Helper()
281 re, err := regexp.Compile(pat)
282 if err != nil {
283 t.Fatal(err)
284 }
285 return &query.RepoRegexp{
286 Regexp: re,
287 }
288 }
289 d := compoundReposShard(t, "foo", "fool")
290 cases := []struct {
291 name string
292 q query.Q
293 want query.Q
294 }{{
295 name: "all",
296 q: re("f.*"),
297 want: &query.Const{Value: true},
298 }, {
299 name: "some",
300 q: re("foo."),
301 want: re("foo."),
302 }, {
303 name: "none",
304 q: re("banana"),
305 want: &query.Const{Value: false},
306 }}
307
308 for _, tc := range cases {
309 got := d.simplify(tc.q)
310 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
311 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
312 }
313 }
314}
315
316func TestSimplifyBranchesRepos(t *testing.T) {
317 d := compoundReposShard(t, "foo", "bar")
318
319 some := &query.BranchesRepos{
320 List: []query.BranchRepos{
321 {Branch: "branch1", Repos: roaring.BitmapOf(hash("bar"))},
322 },
323 }
324 none := &query.Repo{Regexp: regexp.MustCompile("banana")}
325
326 got := d.simplify(some)
327 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
328 if d := cmp.Diff(some, got, tr); d != "" {
329 t.Fatalf("-want, +got:\n%s", d)
330 }
331
332 got = d.simplify(none)
333 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
334 t.Fatalf("-want, +got:\n%s", d)
335 }
336}
337
338func hash(name string) uint32 {
339 h := fnv.New32()
340 h.Write([]byte(name))
341 return h.Sum32()
342}
343
344func TestGatherBranches(t *testing.T) {
345 content := []byte("dummy")
346 b := testIndexBuilder(t, &Repository{
347 Branches: []RepositoryBranch{
348 {"foo", "v1"},
349 {"foo-2", "v1"},
350 {"main", "v1"},
351 {"bar", "v1"},
352 {"quz", "v1"},
353 }},
354 Document{Name: "f1", Content: content, Branches: []string{"foo", "bar", "quz"}},
355 Document{Name: "f2", Content: content, Branches: []string{"foo", "foo-2"}},
356 Document{Name: "f3", Content: content, Branches: []string{"main"}})
357
358 d := searcherForTest(t, b).(*indexData)
359
360 sr, err := d.Search(
361 context.Background(),
362 &query.Or{Children: []query.Q{
363 &query.Branch{Pattern: "foo"},
364 &query.Branch{Pattern: "quz"},
365 }},
366 &SearchOptions{},
367 )
368 if err != nil {
369 t.Fatal(err)
370 }
371
372 want := map[string][]string{
373 "f1": []string{"foo", "quz"},
374 "f2": []string{"foo", "foo-2"},
375 }
376
377 if len(sr.Files) != 2 {
378 t.Fatalf("len(sr.Files): want %d, got %d", 2, len(sr.Files))
379 }
380
381 for _, f := range sr.Files {
382 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" {
383 t.Fatalf("-want,+got:\n%s", d)
384 }
385 }
386}