fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 12 kB View raw
1// Copyright 2018 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package index 16 17import ( 18 "math" 19 "reflect" 20 "regexp/syntax" 21 "testing" 22 23 "github.com/RoaringBitmap/roaring" 24 "github.com/grafana/regexp" 25 26 "github.com/sourcegraph/zoekt" 27 "github.com/sourcegraph/zoekt/query" 28) 29 30func Test_breakOnNewlines(t *testing.T) { 31 type args struct { 32 cm *candidateMatch 33 text []byte 34 } 35 tests := []struct { 36 name string 37 args args 38 want []*candidateMatch 39 }{ 40 { 41 name: "trivial case", 42 args: args{ 43 cm: &candidateMatch{ 44 byteOffset: 0, 45 byteMatchSz: 0, 46 }, 47 text: nil, 48 }, 49 want: nil, 50 }, 51 { 52 name: "no newlines", 53 args: args{ 54 cm: &candidateMatch{ 55 byteOffset: 0, 56 byteMatchSz: 1, 57 }, 58 text: []byte("a"), 59 }, 60 want: []*candidateMatch{ 61 { 62 byteOffset: 0, 63 byteMatchSz: 1, 64 }, 65 }, 66 }, 67 { 68 name: "newline at start", 69 args: args{ 70 cm: &candidateMatch{ 71 byteOffset: 0, 72 byteMatchSz: 2, 73 }, 74 text: []byte("\na"), 75 }, 76 want: []*candidateMatch{ 77 { 78 byteOffset: 1, 79 byteMatchSz: 1, 80 }, 81 }, 82 }, 83 { 84 name: "newline at end", 85 args: args{ 86 cm: &candidateMatch{ 87 byteOffset: 0, 88 byteMatchSz: 2, 89 }, 90 text: []byte("a\n"), 91 }, 92 want: []*candidateMatch{ 93 { 94 byteOffset: 0, 95 byteMatchSz: 1, 96 }, 97 }, 98 }, 99 { 100 name: "newline in middle", 101 args: args{ 102 cm: &candidateMatch{ 103 byteOffset: 0, 104 byteMatchSz: 3, 105 }, 106 text: []byte("a\nb"), 107 }, 108 want: []*candidateMatch{ 109 { 110 byteOffset: 0, 111 byteMatchSz: 1, 112 }, 113 { 114 byteOffset: 2, 115 byteMatchSz: 1, 116 }, 117 }, 118 }, 119 { 120 name: "two newlines", 121 args: args{ 122 cm: &candidateMatch{ 123 byteOffset: 0, 124 byteMatchSz: 5, 125 }, 126 text: []byte("a\nb\nc"), 127 }, 128 want: []*candidateMatch{ 129 { 130 byteOffset: 0, 131 byteMatchSz: 1, 132 }, 133 { 134 byteOffset: 2, 135 byteMatchSz: 1, 136 }, 137 { 138 byteOffset: 4, 139 byteMatchSz: 1, 140 }, 141 }, 142 }, 143 } 144 for _, tt := range tests { 145 t.Run(tt.name, func(t *testing.T) { 146 if got := breakOnNewlines(tt.args.cm, tt.args.text); !reflect.DeepEqual(got, tt.want) { 147 type PrintableCm struct { 148 byteOffset uint32 149 byteMatchSz uint32 150 } 151 var got2, want2 []PrintableCm 152 for _, g := range got { 153 got2 = append(got2, PrintableCm{byteOffset: g.byteOffset, byteMatchSz: g.byteMatchSz}) 154 } 155 for _, w := range tt.want { 156 want2 = append(want2, PrintableCm{byteOffset: w.byteOffset, byteMatchSz: w.byteMatchSz}) 157 } 158 t.Errorf("breakMatchOnNewlines() = %+v, want %+v", got2, want2) 159 } 160 }) 161 } 162} 163 164func TestEquivalentQuerySkipRegexpTree(t *testing.T) { 165 tests := []struct { 166 query string 167 skip bool 168 }{ 169 {query: "^foo", skip: false}, 170 {query: "foo", skip: true}, 171 {query: "thread|needle|haystack", skip: true}, 172 {query: "contain(er|ing)", skip: false}, 173 {query: "thread (needle|haystack)", skip: true}, 174 {query: "thread (needle|)", skip: false}, 175 {query: `\bthread\b case:yes`, skip: true}, // word search 176 {query: `\bthread\b case:no`, skip: false}, 177 } 178 179 for _, tt := range tests { 180 q, err := query.Parse(tt.query) 181 if err != nil { 182 t.Errorf("Error parsing query: %s", "sym:"+tt.query) 183 continue 184 } 185 186 d := &indexData{} 187 mt, err := d.newMatchTree(q, matchTreeOpt{}) 188 if err != nil { 189 t.Errorf("Error creating match tree from query: %s", q) 190 continue 191 } 192 193 visitMatchTree(mt, func(m matchTree) { 194 if _, ok := m.(*regexpMatchTree); ok && tt.skip { 195 t.Log(mt) 196 t.Errorf("Expected regexpMatchTree to be skipped for query: %s", q) 197 } 198 }) 199 } 200} 201 202// Test whether we skip the regexp engine for queries like "\bLITERAL\b 203// case:yes" 204func TestWordSearchSkipRegexpTree(t *testing.T) { 205 qStr := "\\bfoo\\b case:yes" 206 q, err := query.Parse(qStr) 207 if err != nil { 208 t.Fatalf("Error parsing query: %s", "sym:"+qStr) 209 } 210 211 d := &indexData{} 212 mt, err := d.newMatchTree(q, matchTreeOpt{}) 213 if err != nil { 214 t.Fatalf("Error creating match tree from query: %s", q) 215 } 216 217 countRegexMatchTree, countWordMatchTree := 0, 0 218 visitMatchTree(mt, func(m matchTree) { 219 switch m.(type) { 220 case *regexpMatchTree: 221 countRegexMatchTree++ 222 case *wordMatchTree: 223 countWordMatchTree++ 224 } 225 }) 226 227 if countRegexMatchTree != 0 { 228 t.Fatalf("expected to find 0 regexMatchTree, found %d", countRegexMatchTree) 229 } 230 231 if countWordMatchTree != 1 { 232 t.Fatalf("expected to find 1 wordMatchTree, found %d", countWordMatchTree) 233 } 234} 235 236func TestSymbolMatchTree(t *testing.T) { 237 tests := []struct { 238 query string 239 substr string 240 regex string 241 regexAll bool 242 }{ 243 {query: "sym:.*", regex: "(?i)(?-s:.)*", regexAll: true}, 244 {query: "sym:(ab|cd)", regex: "(?i)ab|cd"}, 245 {query: "sym:b.r", regex: "(?i)b(?-s:.)r"}, 246 {query: "sym:horse", substr: "horse"}, 247 {query: `sym:\bthread\b case:yes`, regex: `\bthread\b`}, // check we disable word search opt 248 {query: `sym:\bthread\b case:no`, regex: `(?i)\bthread\b`}, 249 } 250 251 for _, tt := range tests { 252 q, err := query.Parse(tt.query) 253 if err != nil { 254 t.Errorf("Error parsing query: %s", tt.query) 255 continue 256 } 257 258 d := &indexData{} 259 mt, err := d.newMatchTree(q, matchTreeOpt{}) 260 if err != nil { 261 t.Errorf("Error creating match tree from query: %s", q) 262 continue 263 } 264 265 var ( 266 substr string 267 regex string 268 regexAll bool 269 ) 270 if substrMT, ok := mt.(*symbolSubstrMatchTree); ok { 271 substr = substrMT.query.Pattern 272 } 273 if regexMT, ok := mt.(*symbolRegexpMatchTree); ok { 274 regex = regexMT.regexp.String() 275 regexAll = regexMT.all 276 } 277 278 if substr != tt.substr { 279 t.Errorf("%s has unexpected substring:\nwant: %q\ngot: %q", tt.query, tt.substr, substr) 280 } 281 if regex != tt.regex { 282 t.Errorf("%s has unexpected regex:\nwant: %q\ngot: %q", tt.query, tt.regex, regex) 283 } 284 if regexAll != tt.regexAll { 285 t.Errorf("%s has unexpected regexAll: want=%t got=%t", tt.query, tt.regexAll, regexAll) 286 } 287 } 288} 289 290func TestRepoSet(t *testing.T) { 291 d := &indexData{ 292 repoMetaData: []zoekt.Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}}, 293 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, 294 repos: []uint16{0, 0, 1, 2, 3, 3}, 295 } 296 mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}}, matchTreeOpt{}) 297 if err != nil { 298 t.Fatal(err) 299 } 300 want := []uint32{2, 4, 5} 301 for i := range want { 302 nextDoc := mt.nextDoc() 303 if nextDoc != want[i] { 304 t.Fatalf("want %d, got %d", want[i], nextDoc) 305 } 306 mt.prepare(nextDoc) 307 } 308 if mt.nextDoc() != math.MaxUint32 { 309 t.Fatalf("expected %d document, but got at least 1 more", len(want)) 310 } 311} 312 313func TestRepo(t *testing.T) { 314 d := &indexData{ 315 repoMetaData: []zoekt.Repository{{Name: "foo"}, {Name: "bar"}}, 316 fileBranchMasks: []uint64{1, 1, 1, 1, 1}, 317 repos: []uint16{0, 0, 1, 0, 1}, 318 } 319 mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")}, matchTreeOpt{}) 320 if err != nil { 321 t.Fatal(err) 322 } 323 want := []uint32{2, 4} 324 for i := range want { 325 nextDoc := mt.nextDoc() 326 if nextDoc != want[i] { 327 t.Fatalf("want %d, got %d", want[i], nextDoc) 328 } 329 mt.prepare(nextDoc) 330 } 331 if mt.nextDoc() != math.MaxUint32 { 332 t.Fatalf("expect %d documents, but got at least 1 more", len(want)) 333 } 334} 335 336func TestBranchesRepos(t *testing.T) { 337 d := &indexData{ 338 repoMetaData: []zoekt.Repository{ 339 {ID: hash("foo"), Name: "foo"}, 340 {ID: hash("bar"), Name: "bar"}, 341 }, 342 fileBranchMasks: []uint64{1, 1, 1, 2, 1, 2, 1}, 343 repos: []uint16{0, 0, 1, 1, 1, 1, 1}, 344 branchIDs: []map[string]uint{{"HEAD": 1}, {"HEAD": 1, "b1": 2}}, 345 } 346 347 mt, err := d.newMatchTree(&query.BranchesRepos{List: []query.BranchRepos{ 348 {Branch: "b1", Repos: roaring.BitmapOf(hash("bar"))}, 349 {Branch: "b2", Repos: roaring.BitmapOf(hash("bar"))}, 350 }}, matchTreeOpt{}) 351 if err != nil { 352 t.Fatal(err) 353 } 354 355 want := []uint32{3, 5} 356 for i := range want { 357 nextDoc := mt.nextDoc() 358 if nextDoc != want[i] { 359 t.Fatalf("want %d, got %d", want[i], nextDoc) 360 } 361 mt.prepare(nextDoc) 362 } 363 364 if mt.nextDoc() != math.MaxUint32 { 365 t.Fatalf("expect %d documents, but got at least 1 more", len(want)) 366 } 367} 368 369func TestRepoIDs(t *testing.T) { 370 d := &indexData{ 371 repoMetaData: []zoekt.Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}}, 372 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, 373 repos: []uint16{0, 0, 1, 2, 3, 3}, 374 } 375 mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)}, matchTreeOpt{}) 376 if err != nil { 377 t.Fatal(err) 378 } 379 380 want := []uint32{2, 4, 5} 381 for i := range want { 382 nextDoc := mt.nextDoc() 383 if nextDoc != want[i] { 384 t.Fatalf("want %d, got %d", want[i], nextDoc) 385 } 386 mt.prepare(nextDoc) 387 } 388 if mt.nextDoc() != math.MaxUint32 { 389 t.Fatalf("expected %d document, but got at least 1 more", len(want)) 390 } 391} 392 393func TestIsRegexpAll(t *testing.T) { 394 valid := []string{ 395 ".*", 396 "(.*)", 397 "(?-s:.*)", 398 "(?s:.*)", 399 "(?i)(?-s:.*)", 400 } 401 invalid := []string{ 402 ".", 403 "foo", 404 "(foo.*)", 405 } 406 407 for _, s := range valid { 408 r, err := syntax.Parse(s, syntax.Perl) 409 if err != nil { 410 t.Fatal(err) 411 } 412 if !isRegexpAll(r) { 413 t.Errorf("expected %q to match all", s) 414 } 415 } 416 417 for _, s := range invalid { 418 r, err := syntax.Parse(s, syntax.Perl) 419 if err != nil { 420 t.Fatal(err) 421 } 422 if isRegexpAll(r) { 423 t.Errorf("expected %q to not match all", s) 424 } 425 } 426} 427 428func TestMetaQueryMatchTree(t *testing.T) { 429 d := &indexData{ 430 repoMetaData: []zoekt.Repository{ 431 {Name: "r0", Metadata: map[string]string{"license": "Apache-2.0"}}, 432 {Name: "r1", Metadata: map[string]string{"license": "MIT"}}, 433 {Name: "r2"}, // no metadata 434 {Name: "r3", Metadata: map[string]string{"haystack": "needle"}}, 435 {Name: "r4", Metadata: map[string]string{"note": "test"}}, 436 }, 437 fileBranchMasks: []uint64{1, 1, 1, 1, 1}, // 5 docs 438 repos: []uint16{0, 1, 2, 3, 4}, // map docIDs to repos 439 docMatchTreeCache: newDocMatchTreeCache(1), // small cache to test eviction 440 } 441 442 q := &query.Meta{ 443 Field: "license", 444 Value: regexp.MustCompile("M.T"), 445 } 446 447 mt, err := d.newMatchTree(q, matchTreeOpt{}) 448 if err != nil { 449 t.Fatalf("failed to build matchTree: %v", err) 450 } 451 452 // Check that the docMatchTree cache is populated correctly 453 checksum := queryMetaChecksum("license", regexp.MustCompile("M.T")) 454 cacheKeyField := "Meta" 455 if _, ok := d.docMatchTreeCache.Get(cacheKeyField, checksum); !ok { 456 t.Errorf("expected docMatchTreeCache to be populated for key (%q, %q)", cacheKeyField, checksum) 457 } 458 459 var matched []uint32 460 for { 461 doc := mt.nextDoc() 462 if doc == math.MaxUint32 { 463 break 464 } 465 matched = append(matched, doc) 466 mt.prepare(doc) 467 } 468 469 want := []uint32{1} // only doc from r1 should match 470 if !reflect.DeepEqual(matched, want) { 471 t.Errorf("meta match failed: got %v, want %v", matched, want) 472 } 473} 474 475func Test_queryMetaCacheKey(t *testing.T) { 476 cases := []struct { 477 field string 478 pattern string 479 wantKey string 480 }{ 481 {"metaField", "foo.*bar", "24e88a5ffec04af0"}, 482 {"metaField", "foo.*baz", "d8d6f6a7f0725b61"}, 483 {"otherField", "foo.*bar", "c9d07e17c028364"}, 484 } 485 for _, tc := range cases { 486 re := regexp.MustCompile(tc.pattern) 487 key := queryMetaChecksum(tc.field, re) 488 if key != tc.wantKey { 489 t.Errorf("unexpected key for field=%q pattern=%q: got %q, want %q", tc.field, tc.pattern, key, tc.wantKey) 490 } 491 } 492}