fork of https://github.com/sourcegraph/zoekt
1// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "context"
19 "hash/fnv"
20 "reflect"
21 "regexp/syntax"
22 "strings"
23 "testing"
24
25 "github.com/RoaringBitmap/roaring"
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt/query"
31)
32
33var opnames = map[syntax.Op]string{
34 syntax.OpNoMatch: "OpNoMatch",
35 syntax.OpEmptyMatch: "OpEmptyMatch",
36 syntax.OpLiteral: "OpLiteral",
37 syntax.OpCharClass: "OpCharClass",
38 syntax.OpAnyCharNotNL: "OpAnyCharNotNL",
39 syntax.OpAnyChar: "OpAnyChar",
40 syntax.OpBeginLine: "OpBeginLine",
41 syntax.OpEndLine: "OpEndLine",
42 syntax.OpBeginText: "OpBeginText",
43 syntax.OpEndText: "OpEndText",
44 syntax.OpWordBoundary: "OpWordBoundary",
45 syntax.OpNoWordBoundary: "OpNoWordBoundary",
46 syntax.OpCapture: "OpCapture",
47 syntax.OpStar: "OpStar",
48 syntax.OpPlus: "OpPlus",
49 syntax.OpQuest: "OpQuest",
50 syntax.OpRepeat: "OpRepeat",
51 syntax.OpConcat: "OpConcat",
52 syntax.OpAlternate: "OpAlternate",
53}
54
55func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) {
56 t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub))
57 for _, s := range r.Sub {
58 printRegexp(t, s, lvl+1)
59 }
60}
61
62func substrMT(pattern string) matchTree {
63 d := &indexData{}
64 mt, _ := d.newSubstringMatchTree(&query.Substring{
65 Pattern: pattern,
66 })
67 return mt
68}
69
70func TestRegexpParse(t *testing.T) {
71 type testcase struct {
72 in string
73 query matchTree
74 isEquivalent bool
75 }
76
77 cases := []testcase{
78 {"(foo|)bar", substrMT("bar"), false},
79 {"(foo|)", &bruteForceMatchTree{}, false},
80 {"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{
81 &orMatchTree{[]matchTree{
82 substrMT("foo"),
83 substrMT("bar"),
84 }},
85 substrMT("baz"),
86 substrMT("bla"),
87 }}, false},
88 {
89 "^[a-z](People)+barrabas$",
90 &andMatchTree{[]matchTree{
91 substrMT("People"),
92 substrMT("barrabas"),
93 }}, false,
94 },
95 {"foo", substrMT("foo"), true},
96 {"^foo", substrMT("foo"), false},
97 {"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false},
98 {"(thread|needle|haystack)", &orMatchTree{[]matchTree{
99 substrMT("thread"),
100 substrMT("needle"),
101 substrMT("haystack"),
102 }}, true},
103 {"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{
104 substrMT("foo"),
105 substrMT("bar"),
106 }}}, false},
107 {"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{
108 substrMT("foo"),
109 substrMT("bar"),
110 }}, false},
111 {"(foo){2,}", substrMT("foo"), false},
112 {"(...)(...)", &bruteForceMatchTree{}, false},
113 }
114
115 for _, c := range cases {
116 r, err := syntax.Parse(c.in, syntax.Perl)
117 if err != nil {
118 t.Errorf("Parse(%q): %v", c.in, err)
119 continue
120 }
121 d := indexData{}
122 q := query.Regexp{
123 Regexp: r,
124 }
125 gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive)
126 if !reflect.DeepEqual(c.query, gotQuery) {
127 printRegexp(t, r, 0)
128 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query)
129 }
130 if isEq != c.isEquivalent {
131 printRegexp(t, r, 0)
132 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent)
133 }
134 }
135}
136
137func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) {
138 cs := compoundReposShard(t, "foo", "bar")
139
140 ctx := context.Background()
141 q := &query.Const{Value: true}
142 opts := &SearchOptions{ShardRepoMaxMatchCount: 1}
143
144 sr, err := cs.Search(ctx, q, opts)
145 if err != nil {
146 t.Fatal(err)
147 }
148
149 t.Run("matches", func(t *testing.T) {
150 var filenames []string
151 for _, r := range sr.Files {
152 filenames = append(filenames, r.FileName)
153 }
154
155 got, want := filenames, []string{"foo.txt", "bar.txt"}
156 if diff := cmp.Diff(want, got); diff != "" {
157 t.Errorf("mismatch (-want, +got): %s", diff)
158 }
159 })
160
161 t.Run("stats", func(t *testing.T) {
162 got, want := sr.Stats, Stats{
163 ContentBytesLoaded: 0,
164 FileCount: 2,
165 FilesConsidered: 2,
166 FilesSkipped: 2,
167 ShardsScanned: 1,
168 MatchCount: 2,
169 }
170 if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
171 t.Errorf("mismatch (-want, +got): %s", diff)
172 }
173 })
174}
175
176func compoundReposShard(t *testing.T, names ...string) *indexData {
177 t.Helper()
178
179 repos := make([]*Repository, 0, len(names))
180 docs := make([][]Document, 0, len(names))
181 for _, name := range names {
182 repos = append(repos, &Repository{ID: hash(name), Name: name})
183 ds := []Document{
184 {Name: name + ".txt", Content: []byte(name + " content")},
185 {Name: name + ".2.txt", Content: []byte(name + " content 2")},
186 }
187 docs = append(docs, ds)
188 }
189
190 b := testIndexBuilderCompound(t, repos, docs)
191 s := searcherForTest(t, b)
192 return s.(*indexData)
193}
194
195func TestSimplifyRepoSet(t *testing.T) {
196 d := compoundReposShard(t, "foo", "bar")
197 all := &query.RepoSet{Set: map[string]bool{"foo": true, "bar": true}}
198 some := &query.RepoSet{Set: map[string]bool{"foo": true, "banana": true}}
199 none := &query.RepoSet{Set: map[string]bool{"banana": true}}
200
201 got := d.simplify(all)
202 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" {
203 t.Fatalf("-want, +got:\n%s", d)
204 }
205
206 got = d.simplify(some)
207 if d := cmp.Diff(some, got); d != "" {
208 t.Fatalf("-want, +got:\n%s", d)
209 }
210
211 got = d.simplify(none)
212 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
213 t.Fatalf("-want, +got:\n%s", d)
214 }
215}
216
217func TestSimplifyRepoIDs(t *testing.T) {
218 d := compoundReposShard(t, "foo", "bar")
219 all := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("bar"))}
220 some := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("banana"))}
221 none := &query.RepoIDs{Repos: roaring.BitmapOf(hash("banana"))}
222
223 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
224
225 got := d.simplify(all)
226 if d := cmp.Diff(&query.Const{Value: true}, got, tr); d != "" {
227 t.Fatalf("-want, +got:\n%s", d)
228 }
229
230 got = d.simplify(some)
231 if d := cmp.Diff(some, got, tr); d != "" {
232 t.Fatalf("-want, +got:\n%s", d)
233 }
234
235 got = d.simplify(none)
236 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
237 t.Fatalf("-want, +got:\n%s", d)
238 }
239}
240
241func TestSimplifyRepo(t *testing.T) {
242 re := func(pat string) *query.Repo {
243 t.Helper()
244 re, err := regexp.Compile(pat)
245 if err != nil {
246 t.Fatal(err)
247 }
248 return &query.Repo{
249 Regexp: re,
250 }
251 }
252 d := compoundReposShard(t, "foo", "fool")
253 cases := []struct {
254 name string
255 q query.Q
256 want query.Q
257 }{{
258 name: "all",
259 q: re("f.*"),
260 want: &query.Const{Value: true},
261 }, {
262 name: "some",
263 q: re("foo."),
264 want: re("foo."),
265 }, {
266 name: "none",
267 q: re("banana"),
268 want: &query.Const{Value: false},
269 }}
270
271 for _, tc := range cases {
272 got := d.simplify(tc.q)
273 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
274 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
275 }
276 }
277}
278
279func TestSimplifyRepoRegexp(t *testing.T) {
280 re := func(pat string) *query.RepoRegexp {
281 t.Helper()
282 re, err := regexp.Compile(pat)
283 if err != nil {
284 t.Fatal(err)
285 }
286 return &query.RepoRegexp{
287 Regexp: re,
288 }
289 }
290 d := compoundReposShard(t, "foo", "fool")
291 cases := []struct {
292 name string
293 q query.Q
294 want query.Q
295 }{{
296 name: "all",
297 q: re("f.*"),
298 want: &query.Const{Value: true},
299 }, {
300 name: "some",
301 q: re("foo."),
302 want: re("foo."),
303 }, {
304 name: "none",
305 q: re("banana"),
306 want: &query.Const{Value: false},
307 }}
308
309 for _, tc := range cases {
310 got := d.simplify(tc.q)
311 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
312 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
313 }
314 }
315}
316
317func TestSimplifyBranchesRepos(t *testing.T) {
318 d := compoundReposShard(t, "foo", "bar")
319
320 some := &query.BranchesRepos{
321 List: []query.BranchRepos{
322 {Branch: "branch1", Repos: roaring.BitmapOf(hash("bar"))},
323 },
324 }
325 none := &query.Repo{Regexp: regexp.MustCompile("banana")}
326
327 got := d.simplify(some)
328 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
329 if d := cmp.Diff(some, got, tr); d != "" {
330 t.Fatalf("-want, +got:\n%s", d)
331 }
332
333 got = d.simplify(none)
334 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
335 t.Fatalf("-want, +got:\n%s", d)
336 }
337}
338
339func hash(name string) uint32 {
340 h := fnv.New32()
341 h.Write([]byte(name))
342 return h.Sum32()
343}
344
345func TestGatherBranches(t *testing.T) {
346 content := []byte("dummy")
347 b := testIndexBuilder(t, &Repository{
348 Branches: []RepositoryBranch{
349 {"foo", "v1"},
350 {"foo-2", "v1"},
351 {"main", "v1"},
352 {"bar", "v1"},
353 {"quz", "v1"},
354 },
355 },
356 Document{Name: "f1", Content: content, Branches: []string{"foo", "bar", "quz"}},
357 Document{Name: "f2", Content: content, Branches: []string{"foo", "foo-2"}},
358 Document{Name: "f3", Content: content, Branches: []string{"main"}})
359
360 d := searcherForTest(t, b).(*indexData)
361
362 sr, err := d.Search(
363 context.Background(),
364 &query.Or{Children: []query.Q{
365 &query.Branch{Pattern: "foo"},
366 &query.Branch{Pattern: "quz"},
367 }},
368 &SearchOptions{},
369 )
370 if err != nil {
371 t.Fatal(err)
372 }
373
374 want := map[string][]string{
375 "f1": {"foo", "quz"},
376 "f2": {"foo", "foo-2"},
377 }
378
379 if len(sr.Files) != 2 {
380 t.Fatalf("len(sr.Files): want %d, got %d", 2, len(sr.Files))
381 }
382
383 for _, f := range sr.Files {
384 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" {
385 t.Fatalf("-want,+got:\n%s", d)
386 }
387 }
388}