fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2020 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package index 16 17import ( 18 "context" 19 "hash/fnv" 20 "reflect" 21 "regexp/syntax" 22 "strconv" 23 "strings" 24 "testing" 25 26 "github.com/RoaringBitmap/roaring" 27 "github.com/google/go-cmp/cmp" 28 "github.com/google/go-cmp/cmp/cmpopts" 29 "github.com/grafana/regexp" 30 "github.com/sourcegraph/zoekt" 31 "github.com/sourcegraph/zoekt/query" 32) 33 34var opnames = map[syntax.Op]string{ 35 syntax.OpNoMatch: "OpNoMatch", 36 syntax.OpEmptyMatch: "OpEmptyMatch", 37 syntax.OpLiteral: "OpLiteral", 38 syntax.OpCharClass: "OpCharClass", 39 syntax.OpAnyCharNotNL: "OpAnyCharNotNL", 40 syntax.OpAnyChar: "OpAnyChar", 41 syntax.OpBeginLine: "OpBeginLine", 42 syntax.OpEndLine: "OpEndLine", 43 syntax.OpBeginText: "OpBeginText", 44 syntax.OpEndText: "OpEndText", 45 syntax.OpWordBoundary: "OpWordBoundary", 46 syntax.OpNoWordBoundary: "OpNoWordBoundary", 47 syntax.OpCapture: "OpCapture", 48 syntax.OpStar: "OpStar", 49 syntax.OpPlus: "OpPlus", 50 syntax.OpQuest: "OpQuest", 51 syntax.OpRepeat: "OpRepeat", 52 syntax.OpConcat: "OpConcat", 53 syntax.OpAlternate: "OpAlternate", 54} 55 56func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) { 57 t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub)) 58 for _, s := range r.Sub { 59 printRegexp(t, s, lvl+1) 60 } 61} 62 63func caseSensitiveSubstrMT(pattern string) matchTree { 64 d := &indexData{} 65 mt, _ := d.newSubstringMatchTree(&query.Substring{ 66 Pattern: pattern, 67 CaseSensitive: true, 68 }) 69 return mt 70} 71 72func substrMT(pattern string) matchTree { 73 d := &indexData{} 74 mt, _ := d.newSubstringMatchTree(&query.Substring{ 75 Pattern: pattern, 76 CaseSensitive: false, 77 }) 78 return mt 79} 80 81func TestRegexpParse(t *testing.T) { 82 type testcase struct { 83 in string 84 query matchTree 85 isEquivalent bool 86 caseSensitive bool 87 } 88 89 cases := []testcase{ 90 {"(foo|)bar", substrMT("bar"), false, false}, 91 {"(foo|)", &bruteForceMatchTree{}, false, false}, 92 {"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{ 93 &orMatchTree{[]matchTree{ 94 substrMT("foo"), 95 substrMT("bar"), 96 }}, 97 substrMT("baz"), 98 substrMT("bla"), 99 }}, false, false}, 100 { 101 "^[a-z](People)+barrabas$", 102 &andMatchTree{[]matchTree{ 103 substrMT("People"), 104 substrMT("barrabas"), 105 }}, false, false, 106 }, 107 {"foo", substrMT("foo"), true, false}, 108 {"foo", caseSensitiveSubstrMT("foo"), true, true}, 109 {"(?i)foo", substrMT("FOO"), true, false}, 110 {"(?i)foo", substrMT("FOO"), true, true}, 111 {"^foo", substrMT("foo"), false, false}, 112 {"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false, false}, 113 {"(thread|needle|haystack)", &orMatchTree{[]matchTree{ 114 substrMT("thread"), 115 substrMT("needle"), 116 substrMT("haystack"), 117 }}, true, false}, 118 {"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{ 119 substrMT("foo"), 120 substrMT("bar"), 121 }}}, false, false}, 122 {"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{ 123 substrMT("foo"), 124 substrMT("bar"), 125 }}, false, false}, 126 {"(foo){2,}", substrMT("foo"), false, false}, 127 {"(...)(...)", &bruteForceMatchTree{}, false, false}, 128 } 129 130 for _, c := range cases { 131 r, err := syntax.Parse(c.in, syntax.Perl) 132 if err != nil { 133 t.Errorf("Parse(%q): %v", c.in, err) 134 continue 135 } 136 d := indexData{} 137 q := query.Regexp{ 138 Regexp: r, 139 CaseSensitive: c.caseSensitive, 140 } 141 gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive) 142 if !reflect.DeepEqual(c.query, gotQuery) { 143 printRegexp(t, r, 0) 144 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query) 145 } 146 if isEq != c.isEquivalent { 147 printRegexp(t, r, 0) 148 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent) 149 } 150 } 151} 152 153func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) { 154 cs := compoundReposShard(t, "foo", "bar") 155 156 ctx := context.Background() 157 q := &query.Const{Value: true} 158 opts := &zoekt.SearchOptions{ShardRepoMaxMatchCount: 1} 159 160 sr, err := cs.Search(ctx, q, opts) 161 if err != nil { 162 t.Fatal(err) 163 } 164 165 t.Run("matches", func(t *testing.T) { 166 var filenames []string 167 for _, r := range sr.Files { 168 filenames = append(filenames, r.FileName) 169 } 170 171 got, want := filenames, []string{"foo.txt", "bar.txt"} 172 if diff := cmp.Diff(want, got); diff != "" { 173 t.Errorf("mismatch (-want, +got): %s", diff) 174 } 175 }) 176 177 t.Run("stats", func(t *testing.T) { 178 got, want := sr.Stats, zoekt.Stats{ 179 ContentBytesLoaded: 0, 180 FileCount: 2, 181 FilesConsidered: 2, 182 FilesSkipped: 2, 183 ShardsScanned: 1, 184 MatchCount: 2, 185 } 186 if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 187 t.Errorf("mismatch (-want, +got): %s", diff) 188 } 189 }) 190} 191 192func compoundReposShard(t *testing.T, names ...string) *indexData { 193 t.Helper() 194 195 repos := make([]*zoekt.Repository, 0, len(names)) 196 docs := make([][]Document, 0, len(names)) 197 for _, name := range names { 198 repos = append(repos, &zoekt.Repository{ID: hash(name), Name: name}) 199 ds := []Document{ 200 {Name: name + ".txt", Content: []byte(name + " content")}, 201 {Name: name + ".2.txt", Content: []byte(name + " content 2")}, 202 } 203 docs = append(docs, ds) 204 } 205 206 b := testShardBuilderCompound(t, repos, docs) 207 s := searcherForTest(t, b) 208 return s.(*indexData) 209} 210 211func TestSimplifyRepoSet(t *testing.T) { 212 d := compoundReposShard(t, "foo", "bar") 213 all := &query.RepoSet{Set: map[string]bool{"foo": true, "bar": true}} 214 some := &query.RepoSet{Set: map[string]bool{"foo": true, "banana": true}} 215 none := &query.RepoSet{Set: map[string]bool{"banana": true}} 216 217 got := d.simplify(all) 218 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" { 219 t.Fatalf("-want, +got:\n%s", d) 220 } 221 222 got = d.simplify(some) 223 if d := cmp.Diff(some, got); d != "" { 224 t.Fatalf("-want, +got:\n%s", d) 225 } 226 227 got = d.simplify(none) 228 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 229 t.Fatalf("-want, +got:\n%s", d) 230 } 231} 232 233func TestSimplifyRepoIDs(t *testing.T) { 234 d := compoundReposShard(t, "foo", "bar") 235 all := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("bar"))} 236 some := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("banana"))} 237 none := &query.RepoIDs{Repos: roaring.BitmapOf(hash("banana"))} 238 239 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() }) 240 241 got := d.simplify(all) 242 if d := cmp.Diff(&query.Const{Value: true}, got, tr); d != "" { 243 t.Fatalf("-want, +got:\n%s", d) 244 } 245 246 got = d.simplify(some) 247 if d := cmp.Diff(some, got, tr); d != "" { 248 t.Fatalf("-want, +got:\n%s", d) 249 } 250 251 got = d.simplify(none) 252 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 253 t.Fatalf("-want, +got:\n%s", d) 254 } 255} 256 257func TestSimplifyRepo(t *testing.T) { 258 re := func(pat string) *query.Repo { 259 t.Helper() 260 re, err := regexp.Compile(pat) 261 if err != nil { 262 t.Fatal(err) 263 } 264 return &query.Repo{ 265 Regexp: re, 266 } 267 } 268 d := compoundReposShard(t, "foo", "fool") 269 cases := []struct { 270 name string 271 q query.Q 272 want query.Q 273 }{{ 274 name: "all", 275 q: re("f.*"), 276 want: &query.Const{Value: true}, 277 }, { 278 name: "some", 279 q: re("foo."), 280 want: re("foo."), 281 }, { 282 name: "none", 283 q: re("banana"), 284 want: &query.Const{Value: false}, 285 }} 286 287 for _, tc := range cases { 288 got := d.simplify(tc.q) 289 if d := cmp.Diff(tc.want.String(), got.String()); d != "" { 290 t.Errorf("%s: -want, +got:\n%s", tc.name, d) 291 } 292 } 293} 294 295func TestSimplifyRepoRegexp(t *testing.T) { 296 re := func(pat string) *query.RepoRegexp { 297 t.Helper() 298 re, err := regexp.Compile(pat) 299 if err != nil { 300 t.Fatal(err) 301 } 302 return &query.RepoRegexp{ 303 Regexp: re, 304 } 305 } 306 d := compoundReposShard(t, "foo", "fool") 307 cases := []struct { 308 name string 309 q query.Q 310 want query.Q 311 }{{ 312 name: "all", 313 q: re("f.*"), 314 want: &query.Const{Value: true}, 315 }, { 316 name: "some", 317 q: re("foo."), 318 want: re("foo."), 319 }, { 320 name: "none", 321 q: re("banana"), 322 want: &query.Const{Value: false}, 323 }} 324 325 for _, tc := range cases { 326 got := d.simplify(tc.q) 327 if d := cmp.Diff(tc.want.String(), got.String()); d != "" { 328 t.Errorf("%s: -want, +got:\n%s", tc.name, d) 329 } 330 } 331} 332 333func TestSimplifyRcRawConfig(t *testing.T) { 334 d := compoundReposShard(t, "foo", "bar") 335 var all = query.RcOnlyPrivate | query.RcNoForks | query.RcNoArchived 336 337 got := d.simplify(all) 338 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" { 339 t.Fatalf("-want, +got:\n%s", d) 340 } 341 342 var none = query.RcOnlyPublic | query.RcNoForks | query.RcNoArchived 343 got = d.simplify(none) 344 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 345 t.Fatalf("-want, +got:\n%s", d) 346 } 347} 348 349func TestSimplifyBranchesRepos(t *testing.T) { 350 d := compoundReposShard(t, "foo", "bar") 351 352 some := &query.BranchesRepos{ 353 List: []query.BranchRepos{ 354 {Branch: "branch1", Repos: roaring.BitmapOf(hash("bar"))}, 355 }, 356 } 357 none := &query.Repo{Regexp: regexp.MustCompile("banana")} 358 359 got := d.simplify(some) 360 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() }) 361 if d := cmp.Diff(some, got, tr); d != "" { 362 t.Fatalf("-want, +got:\n%s", d) 363 } 364 365 got = d.simplify(none) 366 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 367 t.Fatalf("-want, +got:\n%s", d) 368 } 369} 370 371func hash(name string) uint32 { 372 h := fnv.New32() 373 h.Write([]byte(name)) 374 return h.Sum32() 375} 376 377func TestGatherBranches(t *testing.T) { 378 content := []byte("dummy") 379 b := testShardBuilder(t, &zoekt.Repository{ 380 Branches: []zoekt.RepositoryBranch{ 381 {"foo", "v1"}, 382 {"foo-2", "v1"}, 383 {"main", "v1"}, 384 {"bar", "v1"}, 385 {"quz", "v1"}, 386 }, 387 }, 388 Document{Name: "f1", Content: content, Branches: []string{"foo", "bar", "quz"}}, 389 Document{Name: "f2", Content: content, Branches: []string{"foo", "foo-2"}}, 390 Document{Name: "f3", Content: content, Branches: []string{"main"}}) 391 392 d := searcherForTest(t, b).(*indexData) 393 394 sr, err := d.Search( 395 context.Background(), 396 &query.Or{Children: []query.Q{ 397 &query.Branch{Pattern: "foo"}, 398 &query.Branch{Pattern: "quz"}, 399 }}, 400 &zoekt.SearchOptions{}, 401 ) 402 if err != nil { 403 t.Fatal(err) 404 } 405 406 want := map[string][]string{ 407 "f1": {"foo", "quz"}, 408 "f2": {"foo", "foo-2"}, 409 } 410 411 if len(sr.Files) != 2 { 412 t.Fatalf("len(sr.Files): want %d, got %d", 2, len(sr.Files)) 413 } 414 415 for _, f := range sr.Files { 416 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" { 417 t.Fatalf("-want,+got:\n%s", d) 418 } 419 } 420} 421 422func TestGatherBranchesMany(t *testing.T) { 423 content := []byte("dummy") 424 manyBranchNames := []string{} 425 manyBranches := []zoekt.RepositoryBranch{} 426 for i := range 64 { 427 branchName := "branch-" + strconv.Itoa(i) 428 manyBranchNames = append(manyBranchNames, branchName) 429 manyBranches = append(manyBranches, zoekt.RepositoryBranch{ 430 Name: branchName, 431 Version: "v1"}) 432 } 433 b := testShardBuilder(t, &zoekt.Repository{ 434 Branches: manyBranches, 435 }, Document{Name: "f1", Content: content, Branches: manyBranchNames}) 436 437 d := searcherForTest(t, b).(*indexData) 438 439 sr, err := d.Search( 440 context.Background(), 441 &query.Substring{ 442 Pattern: "dummy", 443 CaseSensitive: false, 444 }, 445 &zoekt.SearchOptions{}, 446 ) 447 if err != nil { 448 t.Fatal(err) 449 } 450 451 want := map[string][]string{ 452 "f1": manyBranchNames, 453 } 454 455 if len(sr.Files) != 1 { 456 t.Fatalf("len(sr.Files): want %d, got %d", 1, len(sr.Files)) 457 } 458 459 for _, f := range sr.Files { 460 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" { 461 t.Fatalf("-want,+got:\n%s", d) 462 } 463 } 464}