fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2020 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package index 16 17import ( 18 "context" 19 "hash/fnv" 20 "reflect" 21 "regexp/syntax" 22 "strings" 23 "testing" 24 25 "github.com/RoaringBitmap/roaring" 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 "github.com/sourcegraph/zoekt" 30 "github.com/sourcegraph/zoekt/query" 31) 32 33var opnames = map[syntax.Op]string{ 34 syntax.OpNoMatch: "OpNoMatch", 35 syntax.OpEmptyMatch: "OpEmptyMatch", 36 syntax.OpLiteral: "OpLiteral", 37 syntax.OpCharClass: "OpCharClass", 38 syntax.OpAnyCharNotNL: "OpAnyCharNotNL", 39 syntax.OpAnyChar: "OpAnyChar", 40 syntax.OpBeginLine: "OpBeginLine", 41 syntax.OpEndLine: "OpEndLine", 42 syntax.OpBeginText: "OpBeginText", 43 syntax.OpEndText: "OpEndText", 44 syntax.OpWordBoundary: "OpWordBoundary", 45 syntax.OpNoWordBoundary: "OpNoWordBoundary", 46 syntax.OpCapture: "OpCapture", 47 syntax.OpStar: "OpStar", 48 syntax.OpPlus: "OpPlus", 49 syntax.OpQuest: "OpQuest", 50 syntax.OpRepeat: "OpRepeat", 51 syntax.OpConcat: "OpConcat", 52 syntax.OpAlternate: "OpAlternate", 53} 54 55func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) { 56 t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub)) 57 for _, s := range r.Sub { 58 printRegexp(t, s, lvl+1) 59 } 60} 61 62func caseSensitiveSubstrMT(pattern string) matchTree { 63 d := &indexData{} 64 mt, _ := d.newSubstringMatchTree(&query.Substring{ 65 Pattern: pattern, 66 CaseSensitive: true, 67 }) 68 return mt 69} 70 71func substrMT(pattern string) matchTree { 72 d := &indexData{} 73 mt, _ := d.newSubstringMatchTree(&query.Substring{ 74 Pattern: pattern, 75 CaseSensitive: false, 76 }) 77 return mt 78} 79 80func TestRegexpParse(t *testing.T) { 81 type testcase struct { 82 in string 83 query matchTree 84 isEquivalent bool 85 caseSensitive bool 86 } 87 88 cases := []testcase{ 89 {"(foo|)bar", substrMT("bar"), false, false}, 90 {"(foo|)", &bruteForceMatchTree{}, false, false}, 91 {"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{ 92 &orMatchTree{[]matchTree{ 93 substrMT("foo"), 94 substrMT("bar"), 95 }}, 96 substrMT("baz"), 97 substrMT("bla"), 98 }}, false, false}, 99 { 100 "^[a-z](People)+barrabas$", 101 &andMatchTree{[]matchTree{ 102 substrMT("People"), 103 substrMT("barrabas"), 104 }}, false, false, 105 }, 106 {"foo", substrMT("foo"), true, false}, 107 {"foo", caseSensitiveSubstrMT("foo"), true, true}, 108 {"(?i)foo", substrMT("FOO"), true, false}, 109 {"(?i)foo", substrMT("FOO"), true, true}, 110 {"^foo", substrMT("foo"), false, false}, 111 {"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false, false}, 112 {"(thread|needle|haystack)", &orMatchTree{[]matchTree{ 113 substrMT("thread"), 114 substrMT("needle"), 115 substrMT("haystack"), 116 }}, true, false}, 117 {"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{ 118 substrMT("foo"), 119 substrMT("bar"), 120 }}}, false, false}, 121 {"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{ 122 substrMT("foo"), 123 substrMT("bar"), 124 }}, false, false}, 125 {"(foo){2,}", substrMT("foo"), false, false}, 126 {"(...)(...)", &bruteForceMatchTree{}, false, false}, 127 } 128 129 for _, c := range cases { 130 r, err := syntax.Parse(c.in, syntax.Perl) 131 if err != nil { 132 t.Errorf("Parse(%q): %v", c.in, err) 133 continue 134 } 135 d := indexData{} 136 q := query.Regexp{ 137 Regexp: r, 138 CaseSensitive: c.caseSensitive, 139 } 140 gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive) 141 if !reflect.DeepEqual(c.query, gotQuery) { 142 printRegexp(t, r, 0) 143 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query) 144 } 145 if isEq != c.isEquivalent { 146 printRegexp(t, r, 0) 147 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent) 148 } 149 } 150} 151 152func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) { 153 cs := compoundReposShard(t, "foo", "bar") 154 155 ctx := context.Background() 156 q := &query.Const{Value: true} 157 opts := &zoekt.SearchOptions{ShardRepoMaxMatchCount: 1} 158 159 sr, err := cs.Search(ctx, q, opts) 160 if err != nil { 161 t.Fatal(err) 162 } 163 164 t.Run("matches", func(t *testing.T) { 165 var filenames []string 166 for _, r := range sr.Files { 167 filenames = append(filenames, r.FileName) 168 } 169 170 got, want := filenames, []string{"foo.txt", "bar.txt"} 171 if diff := cmp.Diff(want, got); diff != "" { 172 t.Errorf("mismatch (-want, +got): %s", diff) 173 } 174 }) 175 176 t.Run("stats", func(t *testing.T) { 177 got, want := sr.Stats, zoekt.Stats{ 178 ContentBytesLoaded: 0, 179 FileCount: 2, 180 FilesConsidered: 2, 181 FilesSkipped: 2, 182 ShardsScanned: 1, 183 MatchCount: 2, 184 } 185 if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 186 t.Errorf("mismatch (-want, +got): %s", diff) 187 } 188 }) 189} 190 191func compoundReposShard(t *testing.T, names ...string) *indexData { 192 t.Helper() 193 194 repos := make([]*zoekt.Repository, 0, len(names)) 195 docs := make([][]Document, 0, len(names)) 196 for _, name := range names { 197 repos = append(repos, &zoekt.Repository{ID: hash(name), Name: name}) 198 ds := []Document{ 199 {Name: name + ".txt", Content: []byte(name + " content")}, 200 {Name: name + ".2.txt", Content: []byte(name + " content 2")}, 201 } 202 docs = append(docs, ds) 203 } 204 205 b := testIndexBuilderCompound(t, repos, docs) 206 s := searcherForTest(t, b) 207 return s.(*indexData) 208} 209 210func TestSimplifyRepoSet(t *testing.T) { 211 d := compoundReposShard(t, "foo", "bar") 212 all := &query.RepoSet{Set: map[string]bool{"foo": true, "bar": true}} 213 some := &query.RepoSet{Set: map[string]bool{"foo": true, "banana": true}} 214 none := &query.RepoSet{Set: map[string]bool{"banana": true}} 215 216 got := d.simplify(all) 217 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" { 218 t.Fatalf("-want, +got:\n%s", d) 219 } 220 221 got = d.simplify(some) 222 if d := cmp.Diff(some, got); d != "" { 223 t.Fatalf("-want, +got:\n%s", d) 224 } 225 226 got = d.simplify(none) 227 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 228 t.Fatalf("-want, +got:\n%s", d) 229 } 230} 231 232func TestSimplifyRepoIDs(t *testing.T) { 233 d := compoundReposShard(t, "foo", "bar") 234 all := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("bar"))} 235 some := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("banana"))} 236 none := &query.RepoIDs{Repos: roaring.BitmapOf(hash("banana"))} 237 238 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() }) 239 240 got := d.simplify(all) 241 if d := cmp.Diff(&query.Const{Value: true}, got, tr); d != "" { 242 t.Fatalf("-want, +got:\n%s", d) 243 } 244 245 got = d.simplify(some) 246 if d := cmp.Diff(some, got, tr); d != "" { 247 t.Fatalf("-want, +got:\n%s", d) 248 } 249 250 got = d.simplify(none) 251 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 252 t.Fatalf("-want, +got:\n%s", d) 253 } 254} 255 256func TestSimplifyRepo(t *testing.T) { 257 re := func(pat string) *query.Repo { 258 t.Helper() 259 re, err := regexp.Compile(pat) 260 if err != nil { 261 t.Fatal(err) 262 } 263 return &query.Repo{ 264 Regexp: re, 265 } 266 } 267 d := compoundReposShard(t, "foo", "fool") 268 cases := []struct { 269 name string 270 q query.Q 271 want query.Q 272 }{{ 273 name: "all", 274 q: re("f.*"), 275 want: &query.Const{Value: true}, 276 }, { 277 name: "some", 278 q: re("foo."), 279 want: re("foo."), 280 }, { 281 name: "none", 282 q: re("banana"), 283 want: &query.Const{Value: false}, 284 }} 285 286 for _, tc := range cases { 287 got := d.simplify(tc.q) 288 if d := cmp.Diff(tc.want.String(), got.String()); d != "" { 289 t.Errorf("%s: -want, +got:\n%s", tc.name, d) 290 } 291 } 292} 293 294func TestSimplifyRepoRegexp(t *testing.T) { 295 re := func(pat string) *query.RepoRegexp { 296 t.Helper() 297 re, err := regexp.Compile(pat) 298 if err != nil { 299 t.Fatal(err) 300 } 301 return &query.RepoRegexp{ 302 Regexp: re, 303 } 304 } 305 d := compoundReposShard(t, "foo", "fool") 306 cases := []struct { 307 name string 308 q query.Q 309 want query.Q 310 }{{ 311 name: "all", 312 q: re("f.*"), 313 want: &query.Const{Value: true}, 314 }, { 315 name: "some", 316 q: re("foo."), 317 want: re("foo."), 318 }, { 319 name: "none", 320 q: re("banana"), 321 want: &query.Const{Value: false}, 322 }} 323 324 for _, tc := range cases { 325 got := d.simplify(tc.q) 326 if d := cmp.Diff(tc.want.String(), got.String()); d != "" { 327 t.Errorf("%s: -want, +got:\n%s", tc.name, d) 328 } 329 } 330} 331 332func TestSimplifyRcRawConfig(t *testing.T) { 333 d := compoundReposShard(t, "foo", "bar") 334 var all = query.RcOnlyPrivate | query.RcNoForks | query.RcNoArchived 335 336 got := d.simplify(all) 337 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" { 338 t.Fatalf("-want, +got:\n%s", d) 339 } 340 341 var none = query.RcOnlyPublic | query.RcNoForks | query.RcNoArchived 342 got = d.simplify(none) 343 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 344 t.Fatalf("-want, +got:\n%s", d) 345 } 346} 347 348func TestSimplifyBranchesRepos(t *testing.T) { 349 d := compoundReposShard(t, "foo", "bar") 350 351 some := &query.BranchesRepos{ 352 List: []query.BranchRepos{ 353 {Branch: "branch1", Repos: roaring.BitmapOf(hash("bar"))}, 354 }, 355 } 356 none := &query.Repo{Regexp: regexp.MustCompile("banana")} 357 358 got := d.simplify(some) 359 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() }) 360 if d := cmp.Diff(some, got, tr); d != "" { 361 t.Fatalf("-want, +got:\n%s", d) 362 } 363 364 got = d.simplify(none) 365 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" { 366 t.Fatalf("-want, +got:\n%s", d) 367 } 368} 369 370func hash(name string) uint32 { 371 h := fnv.New32() 372 h.Write([]byte(name)) 373 return h.Sum32() 374} 375 376func TestGatherBranches(t *testing.T) { 377 content := []byte("dummy") 378 b := testIndexBuilder(t, &zoekt.Repository{ 379 Branches: []zoekt.RepositoryBranch{ 380 {"foo", "v1"}, 381 {"foo-2", "v1"}, 382 {"main", "v1"}, 383 {"bar", "v1"}, 384 {"quz", "v1"}, 385 }, 386 }, 387 Document{Name: "f1", Content: content, Branches: []string{"foo", "bar", "quz"}}, 388 Document{Name: "f2", Content: content, Branches: []string{"foo", "foo-2"}}, 389 Document{Name: "f3", Content: content, Branches: []string{"main"}}) 390 391 d := searcherForTest(t, b).(*indexData) 392 393 sr, err := d.Search( 394 context.Background(), 395 &query.Or{Children: []query.Q{ 396 &query.Branch{Pattern: "foo"}, 397 &query.Branch{Pattern: "quz"}, 398 }}, 399 &zoekt.SearchOptions{}, 400 ) 401 if err != nil { 402 t.Fatal(err) 403 } 404 405 want := map[string][]string{ 406 "f1": {"foo", "quz"}, 407 "f2": {"foo", "foo-2"}, 408 } 409 410 if len(sr.Files) != 2 { 411 t.Fatalf("len(sr.Files): want %d, got %d", 2, len(sr.Files)) 412 } 413 414 for _, f := range sr.Files { 415 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" { 416 t.Fatalf("-want,+got:\n%s", d) 417 } 418 } 419}