fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2020 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package index 16 17import ( 18 "context" 19 "hash/fnv" 20 "reflect" 21 "regexp/syntax" 22 "strconv" 23 "strings" 24 "testing" 25 26 "github.com/RoaringBitmap/roaring" 27 "github.com/google/go-cmp/cmp" 28 "github.com/google/go-cmp/cmp/cmpopts" 29 "github.com/grafana/regexp" 30 31 "github.com/sourcegraph/zoekt" 32 "github.com/sourcegraph/zoekt/query" 33) 34 35var opnames = map[syntax.Op]string{ 36 syntax.OpNoMatch: "OpNoMatch", 37 syntax.OpEmptyMatch: "OpEmptyMatch", 38 syntax.OpLiteral: "OpLiteral", 39 syntax.OpCharClass: "OpCharClass", 40 syntax.OpAnyCharNotNL: "OpAnyCharNotNL", 41 syntax.OpAnyChar: "OpAnyChar", 42 syntax.OpBeginLine: "OpBeginLine", 43 syntax.OpEndLine: "OpEndLine", 44 syntax.OpBeginText: "OpBeginText", 45 syntax.OpEndText: "OpEndText", 46 syntax.OpWordBoundary: "OpWordBoundary", 47 syntax.OpNoWordBoundary: "OpNoWordBoundary", 48 syntax.OpCapture: "OpCapture", 49 syntax.OpStar: "OpStar", 50 syntax.OpPlus: "OpPlus", 51 syntax.OpQuest: "OpQuest", 52 syntax.OpRepeat: "OpRepeat", 53 syntax.OpConcat: "OpConcat", 54 syntax.OpAlternate: "OpAlternate", 55} 56 57func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) { 58 t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub)) 59 for _, s := range r.Sub { 60 printRegexp(t, s, lvl+1) 61 } 62} 63 64func caseSensitiveSubstrMT(pattern string) matchTree { 65 d := &indexData{} 66 mt, _ := d.newSubstringMatchTree(&query.Substring{ 67 Pattern: pattern, 68 CaseSensitive: true, 69 }) 70 return mt 71} 72 73func substrMT(pattern string) matchTree { 74 d := &indexData{} 75 mt, _ := d.newSubstringMatchTree(&query.Substring{ 76 Pattern: pattern, 77 CaseSensitive: false, 78 }) 79 return mt 80} 81 82func TestRegexpParse(t *testing.T) { 83 type testcase struct { 84 in string 85 query matchTree 86 isEquivalent bool 87 caseSensitive bool 88 } 89 90 cases := []testcase{ 91 {"(foo|)bar", substrMT("bar"), false, false}, 92 {"(foo|)", &bruteForceMatchTree{}, false, false}, 93 {"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{ 94 &orMatchTree{[]matchTree{ 95 substrMT("foo"), 96 substrMT("bar"), 97 }}, 98 substrMT("baz"), 99 substrMT("bla"), 100 }}, false, false}, 101 { 102 "^[a-z](People)+barrabas$", 103 &andMatchTree{[]matchTree{ 104 substrMT("People"), 105 substrMT("barrabas"), 106 }}, false, false, 107 }, 108 {"foo", substrMT("foo"), true, false}, 109 {"foo", caseSensitiveSubstrMT("foo"), true, true}, 110 {"(?i)foo", substrMT("FOO"), true, false}, 111 {"(?i)foo", substrMT("FOO"), true, true}, 112 {"^foo", substrMT("foo"), false, false}, 113 {"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false, false}, 114 {"(thread|needle|haystack)", &orMatchTree{[]matchTree{ 115 substrMT("thread"), 116 substrMT("needle"), 117 substrMT("haystack"), 118 }}, true, false}, 119 {"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{ 120 substrMT("foo"), 121 substrMT("bar"), 122 }}}, false, false}, 123 {"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{ 124 substrMT("foo"), 125 substrMT("bar"), 126 }}, false, false}, 127 {"(foo){2,}", substrMT("foo"), false, false}, 128 {"(...)(...)", &bruteForceMatchTree{}, false, false}, 129 } 130 131 for _, c := range cases { 132 r, err := syntax.Parse(c.in, syntax.Perl) 133 if err != nil { 134 t.Errorf("Parse(%q): %v", c.in, err) 135 continue 136 } 137 d := indexData{} 138 q := query.Regexp{ 139 Regexp: r, 140 CaseSensitive: c.caseSensitive, 141 } 142 gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive) 143 if !reflect.DeepEqual(c.query, gotQuery) { 144 printRegexp(t, r, 0) 145 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query) 146 } 147 if isEq != c.isEquivalent { 148 printRegexp(t, r, 0) 149 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent) 150 } 151 } 152} 153 154func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) { 155 cs := compoundReposShard(t, "foo", "bar") 156 157 ctx := context.Background() 158 q := &query.Const{Value: true} 159 opts := &zoekt.SearchOptions{ShardRepoMaxMatchCount: 1} 160 161 sr, err := cs.Search(ctx, q, opts) 162 if err != nil { 163 t.Fatal(err) 164 } 165 166 t.Run("matches", func(t *testing.T) { 167 var filenames []string 168 for _, r := range sr.Files { 169 filenames = append(filenames, r.FileName) 170 } 171 172 got, want := filenames, []string{"foo.txt", "bar.txt"} 173 if diff := cmp.Diff(want, got); diff != "" { 174 t.Errorf("mismatch (-want, +got): %s", diff) 175 } 176 }) 177 178 t.Run("stats", func(t *testing.T) { 179 got, want := sr.Stats, zoekt.Stats{ 180 ContentBytesLoaded: 0, 181 FileCount: 2, 182 FilesConsidered: 2, 183 FilesSkipped: 2, 184 ShardsScanned: 1, 185 MatchCount: 2, 186 } 187 if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 188 t.Errorf("mismatch (-want, +got): %s", diff) 189 } 190 }) 191} 192 193func compoundReposShard(t *testing.T, names ...string) *indexData { 194 t.Helper() 195 196 repos := make([]*zoekt.Repository, 0, len(names)) 197 docs := make([][]Document, 0, len(names)) 198 for _, name := range names { 199 repos = append(repos, &zoekt.Repository{ID: hash(name), Name: name}) 200 ds := []Document{ 201 {Name: name + ".txt", Content: []byte(name + " content")}, 202 {Name: name + ".2.txt", Content: []byte(name + " content 2")}, 203 } 204 docs = append(docs, ds) 205 } 206 207 b := testShardBuilderCompound(t, repos, docs) 208 s := searcherForTest(t, b) 209 return s.(*indexData) 210} 211 212func TestSimplifyRepoSet(t *testing.T) { 213 d := compoundReposShard(t, "foo", "bar") 214 all := &query.RepoSet{Set: map[string]bool{"foo": true, "bar": true}} 215 some := &query.RepoSet{Set: map[string]bool{"foo": true, "banana": true}} 216 none := &query.RepoSet{Set: map[string]bool{"banana": true}} 217 218 got := d.simplify(all) 219 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" { 220 t.Fatalf("-want, +got:\n%s", d) 221 } 222 223 got = d.simplify(some) 224 if d := cmp.Diff(some, got); d != "" { 225 t.Fatalf("-want, +got:\n%s", d) 226 } 227 228 got = d.simplify(none) 229 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 230 t.Fatalf("-want, +got:\n%s", d) 231 } 232} 233 234func TestSimplifyRepoIDs(t *testing.T) { 235 d := compoundReposShard(t, "foo", "bar") 236 all := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("bar"))} 237 some := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("banana"))} 238 none := &query.RepoIDs{Repos: roaring.BitmapOf(hash("banana"))} 239 240 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() }) 241 242 got := d.simplify(all) 243 if d := cmp.Diff(&query.Const{Value: true}, got, tr); d != "" { 244 t.Fatalf("-want, +got:\n%s", d) 245 } 246 247 got = d.simplify(some) 248 if d := cmp.Diff(some, got, tr); d != "" { 249 t.Fatalf("-want, +got:\n%s", d) 250 } 251 252 got = d.simplify(none) 253 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 254 t.Fatalf("-want, +got:\n%s", d) 255 } 256} 257 258func TestSimplifyRepo(t *testing.T) { 259 re := func(pat string) *query.Repo { 260 t.Helper() 261 re, err := regexp.Compile(pat) 262 if err != nil { 263 t.Fatal(err) 264 } 265 return &query.Repo{ 266 Regexp: re, 267 } 268 } 269 d := compoundReposShard(t, "foo", "fool") 270 cases := []struct { 271 name string 272 q query.Q 273 want query.Q 274 }{{ 275 name: "all", 276 q: re("f.*"), 277 want: &query.Const{Value: true}, 278 }, { 279 name: "some", 280 q: re("foo."), 281 want: re("foo."), 282 }, { 283 name: "none", 284 q: re("banana"), 285 want: &query.Const{Value: false}, 286 }} 287 288 for _, tc := range cases { 289 got := d.simplify(tc.q) 290 if d := cmp.Diff(tc.want.String(), got.String()); d != "" { 291 t.Errorf("%s: -want, +got:\n%s", tc.name, d) 292 } 293 } 294} 295 296func TestSimplifyRepoRegexp(t *testing.T) { 297 re := func(pat string) *query.RepoRegexp { 298 t.Helper() 299 re, err := regexp.Compile(pat) 300 if err != nil { 301 t.Fatal(err) 302 } 303 return &query.RepoRegexp{ 304 Regexp: re, 305 } 306 } 307 d := compoundReposShard(t, "foo", "fool") 308 cases := []struct { 309 name string 310 q query.Q 311 want query.Q 312 }{{ 313 name: "all", 314 q: re("f.*"), 315 want: &query.Const{Value: true}, 316 }, { 317 name: "some", 318 q: re("foo."), 319 want: re("foo."), 320 }, { 321 name: "none", 322 q: re("banana"), 323 want: &query.Const{Value: false}, 324 }} 325 326 for _, tc := range cases { 327 got := d.simplify(tc.q) 328 if d := cmp.Diff(tc.want.String(), got.String()); d != "" { 329 t.Errorf("%s: -want, +got:\n%s", tc.name, d) 330 } 331 } 332} 333 334func TestSimplifyRcRawConfig(t *testing.T) { 335 d := compoundReposShard(t, "foo", "bar") 336 var all = query.RcOnlyPrivate | query.RcNoForks | query.RcNoArchived 337 338 got := d.simplify(all) 339 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" { 340 t.Fatalf("-want, +got:\n%s", d) 341 } 342 343 var none = query.RcOnlyPublic | query.RcNoForks | query.RcNoArchived 344 got = d.simplify(none) 345 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 346 t.Fatalf("-want, +got:\n%s", d) 347 } 348} 349 350func TestSimplifyBranchesRepos(t *testing.T) { 351 d := compoundReposShard(t, "foo", "bar") 352 353 some := &query.BranchesRepos{ 354 List: []query.BranchRepos{ 355 {Branch: "branch1", Repos: roaring.BitmapOf(hash("bar"))}, 356 }, 357 } 358 none := &query.Repo{Regexp: regexp.MustCompile("banana")} 359 360 got := d.simplify(some) 361 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() }) 362 if d := cmp.Diff(some, got, tr); d != "" { 363 t.Fatalf("-want, +got:\n%s", d) 364 } 365 366 got = d.simplify(none) 367 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 368 t.Fatalf("-want, +got:\n%s", d) 369 } 370} 371 372func hash(name string) uint32 { 373 h := fnv.New32() 374 h.Write([]byte(name)) 375 return h.Sum32() 376} 377 378func TestGatherBranches(t *testing.T) { 379 content := []byte("dummy") 380 b := testShardBuilder(t, &zoekt.Repository{ 381 Branches: []zoekt.RepositoryBranch{ 382 {"foo", "v1"}, 383 {"foo-2", "v1"}, 384 {"main", "v1"}, 385 {"bar", "v1"}, 386 {"quz", "v1"}, 387 }, 388 }, 389 Document{Name: "f1", Content: content, Branches: []string{"foo", "bar", "quz"}}, 390 Document{Name: "f2", Content: content, Branches: []string{"foo", "foo-2"}}, 391 Document{Name: "f3", Content: content, Branches: []string{"main"}}) 392 393 d := searcherForTest(t, b).(*indexData) 394 395 sr, err := d.Search( 396 context.Background(), 397 &query.Or{Children: []query.Q{ 398 &query.Branch{Pattern: "foo"}, 399 &query.Branch{Pattern: "quz"}, 400 }}, 401 &zoekt.SearchOptions{}, 402 ) 403 if err != nil { 404 t.Fatal(err) 405 } 406 407 want := map[string][]string{ 408 "f1": {"foo", "quz"}, 409 "f2": {"foo", "foo-2"}, 410 } 411 412 if len(sr.Files) != 2 { 413 t.Fatalf("len(sr.Files): want %d, got %d", 2, len(sr.Files)) 414 } 415 416 for _, f := range sr.Files { 417 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" { 418 t.Fatalf("-want,+got:\n%s", d) 419 } 420 } 421} 422 423func TestGatherBranchesMany(t *testing.T) { 424 content := []byte("dummy") 425 manyBranchNames := []string{} 426 manyBranches := []zoekt.RepositoryBranch{} 427 for i := range 64 { 428 branchName := "branch-" + strconv.Itoa(i) 429 manyBranchNames = append(manyBranchNames, branchName) 430 manyBranches = append(manyBranches, zoekt.RepositoryBranch{ 431 Name: branchName, 432 Version: "v1"}) 433 } 434 b := testShardBuilder(t, &zoekt.Repository{ 435 Branches: manyBranches, 436 }, Document{Name: "f1", Content: content, Branches: manyBranchNames}) 437 438 d := searcherForTest(t, b).(*indexData) 439 440 sr, err := d.Search( 441 context.Background(), 442 &query.Substring{ 443 Pattern: "dummy", 444 CaseSensitive: false, 445 }, 446 &zoekt.SearchOptions{}, 447 ) 448 if err != nil { 449 t.Fatal(err) 450 } 451 452 want := map[string][]string{ 453 "f1": manyBranchNames, 454 } 455 456 if len(sr.Files) != 1 { 457 t.Fatalf("len(sr.Files): want %d, got %d", 1, len(sr.Files)) 458 } 459 460 for _, f := range sr.Files { 461 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" { 462 t.Fatalf("-want,+got:\n%s", d) 463 } 464 } 465}