fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package query 16 17import ( 18 "encoding/json" 19 "fmt" 20 "log" 21 "reflect" 22 "regexp/syntax" 23 "sort" 24 "strconv" 25 "strings" 26 27 "github.com/RoaringBitmap/roaring" 28 "github.com/grafana/regexp" 29 "github.com/sourcegraph/zoekt/internal/syntaxutil" 30) 31 32var _ = log.Println 33 34// Q is a representation for a possibly hierarchical search query. 35type Q interface { 36 String() string 37} 38 39// RawConfig filters repositories based on their encoded RawConfig map. 40type RawConfig uint64 41 42const ( 43 RcOnlyPublic RawConfig = 1 44 RcOnlyPrivate RawConfig = 2 45 RcOnlyForks RawConfig = 1 << 2 46 RcNoForks RawConfig = 2 << 2 47 RcOnlyArchived RawConfig = 1 << 4 48 RcNoArchived RawConfig = 2 << 4 49) 50 51var flagNames = []struct { 52 Mask RawConfig 53 Label string 54}{ 55 {RcOnlyPublic, "RcOnlyPublic"}, 56 {RcOnlyPrivate, "RcOnlyPrivate"}, 57 {RcOnlyForks, "RcOnlyForks"}, 58 {RcNoForks, "RcNoForks"}, 59 {RcOnlyArchived, "RcOnlyArchived"}, 60 {RcNoArchived, "RcNoArchived"}, 61} 62 63func (r RawConfig) String() string { 64 var s []string 65 for _, fn := range flagNames { 66 if r&fn.Mask != 0 { 67 s = append(s, fn.Label) 68 } 69 } 70 return fmt.Sprintf("rawConfig:%s", strings.Join(s, "|")) 71} 72 73// RegexpQuery is a query looking for regular expressions matches. 74type Regexp struct { 75 Regexp *syntax.Regexp 76 FileName bool 77 Content bool 78 CaseSensitive bool 79} 80 81func (q *Regexp) String() string { 82 pref := "" 83 if q.FileName { 84 pref = "file_" 85 } 86 if q.CaseSensitive { 87 pref = "case_" + pref 88 } 89 return fmt.Sprintf("%sregex:%q", pref, syntaxutil.RegexpString(q.Regexp)) 90} 91 92// gobRegexp wraps Regexp to make it gob-encodable/decodable. Regexp contains syntax.Regexp, which 93// contains slices/arrays with possibly nil elements, which gob doesn't support 94// (https://github.com/golang/go/issues/1501). 95type gobRegexp struct { 96 Regexp // Regexp.Regexp (*syntax.Regexp) is set to nil and its string is set in RegexpString 97 RegexpString string 98} 99 100// GobEncode implements gob.Encoder. 101func (q Regexp) GobEncode() ([]byte, error) { 102 gobq := gobRegexp{Regexp: q, RegexpString: syntaxutil.RegexpString(q.Regexp)} 103 gobq.Regexp.Regexp = nil // can't be gob-encoded/decoded 104 return json.Marshal(gobq) 105} 106 107// GobDecode implements gob.Decoder. 108func (q *Regexp) GobDecode(data []byte) error { 109 var gobq gobRegexp 110 err := json.Unmarshal(data, &gobq) 111 if err != nil { 112 return err 113 } 114 gobq.Regexp.Regexp, err = syntax.Parse(gobq.RegexpString, regexpFlags) 115 if err != nil { 116 return err 117 } 118 *q = gobq.Regexp 119 return nil 120} 121 122// Symbol finds a string that is a symbol. 123type Symbol struct { 124 Expr Q 125} 126 127func (s *Symbol) String() string { 128 return fmt.Sprintf("sym:%s", s.Expr) 129} 130 131type caseQ struct { 132 Flavor string 133} 134 135func (c *caseQ) String() string { 136 return "case:" + c.Flavor 137} 138 139type Language struct { 140 Language string 141} 142 143func (l *Language) String() string { 144 return "lang:" + l.Language 145} 146 147type Const struct { 148 Value bool 149} 150 151func (q *Const) String() string { 152 if q.Value { 153 return "TRUE" 154 } 155 return "FALSE" 156} 157 158type Repo struct { 159 Regexp *regexp.Regexp 160} 161 162func (q *Repo) String() string { 163 return fmt.Sprintf("repo:%s", q.Regexp.String()) 164} 165 166func (q Repo) GobEncode() ([]byte, error) { 167 return []byte(q.Regexp.String()), nil 168} 169 170func (q *Repo) GobDecode(data []byte) error { 171 var err error 172 q.Regexp, err = regexp.Compile(string(data)) 173 return err 174} 175 176// RepoRegexp is a Sourcegraph addition which searches documents where the 177// repository name matches Regexp. 178type RepoRegexp struct { 179 Regexp *regexp.Regexp 180} 181 182func (q *RepoRegexp) String() string { 183 return fmt.Sprintf("reporegex:%q", q.Regexp.String()) 184} 185 186// GobEncode implements gob.Encoder. 187func (q *RepoRegexp) GobEncode() ([]byte, error) { 188 // gob can't encode syntax.Regexp 189 return []byte(q.Regexp.String()), nil 190} 191 192// GobDecode implements gob.Decoder. 193func (q *RepoRegexp) GobDecode(data []byte) error { 194 var err error 195 q.Regexp, err = regexp.Compile(string(data)) 196 return err 197} 198 199// BranchesRepos is a slice of BranchRepos to match. It is a Sourcegraph 200// addition and only used in the RPC interface for efficient checking of large 201// repo lists. 202type BranchesRepos struct { 203 List []BranchRepos 204} 205 206// NewSingleBranchesRepos is a helper for creating a BranchesRepos which 207// searches a single branch. 208func NewSingleBranchesRepos(branch string, ids ...uint32) *BranchesRepos { 209 return &BranchesRepos{List: []BranchRepos{ 210 {branch, roaring.BitmapOf(ids...)}, 211 }} 212} 213 214func (q *BranchesRepos) String() string { 215 var sb strings.Builder 216 217 sb.WriteString("(branchesrepos") 218 219 for _, br := range q.List { 220 if size := br.Repos.GetCardinality(); size > 1 { 221 sb.WriteString(" " + br.Branch + ":" + strconv.FormatUint(size, 10)) 222 } else { 223 sb.WriteString(" " + br.Branch + "=" + br.Repos.String()) 224 } 225 } 226 227 sb.WriteString(")") 228 return sb.String() 229} 230 231// NewRepoIDs is a helper for creating a RepoIDs which 232// searches only the matched repos. 233func NewRepoIDs(ids ...uint32) *RepoIDs { 234 return &RepoIDs{Repos: roaring.BitmapOf(ids...)} 235} 236 237func (q *RepoIDs) String() string { 238 var sb strings.Builder 239 240 sb.WriteString("(repoids ") 241 242 if size := q.Repos.GetCardinality(); size > 1 { 243 sb.WriteString("count:" + strconv.FormatUint(size, 10)) 244 } else { 245 sb.WriteString("repoid=" + q.Repos.String()) 246 } 247 248 sb.WriteString(")") 249 return sb.String() 250} 251 252// MarshalBinary implements a specialized encoder for BranchesRepos. 253func (q BranchesRepos) MarshalBinary() ([]byte, error) { 254 return branchesReposEncode(q.List) 255} 256 257// UnmarshalBinary implements a specialized decoder for BranchesRepos. 258func (q *BranchesRepos) UnmarshalBinary(b []byte) (err error) { 259 q.List, err = branchesReposDecode(b) 260 return err 261} 262 263// BranchRepos is a (branch, sourcegraph repo ids bitmap) tuple. It is a 264// Sourcegraph addition. 265type BranchRepos struct { 266 Branch string 267 Repos *roaring.Bitmap 268} 269 270// Similar to BranchRepos but will be used to match only by repoid and 271// therefore matches all branches 272type RepoIDs struct { 273 Repos *roaring.Bitmap 274} 275 276// RepoSet is a list of repos to match. It is a Sourcegraph addition and only 277// used in the RPC interface for efficient checking of large repo lists. 278type RepoSet struct { 279 Set map[string]bool 280} 281 282func (q *RepoSet) String() string { 283 var detail string 284 if len(q.Set) > 5 { 285 // Large sets being output are not useful 286 detail = fmt.Sprintf("size=%d", len(q.Set)) 287 } else { 288 repos := make([]string, len(q.Set)) 289 i := 0 290 for repo := range q.Set { 291 repos[i] = repo 292 i++ 293 } 294 sort.Strings(repos) 295 detail = strings.Join(repos, " ") 296 } 297 return fmt.Sprintf("(reposet %s)", detail) 298} 299 300func NewRepoSet(repo ...string) *RepoSet { 301 s := &RepoSet{Set: make(map[string]bool)} 302 for _, r := range repo { 303 s.Set[r] = true 304 } 305 return s 306} 307 308// FileNameSet is a list of file names to match. It is a Sourcegraph addition 309// and only used in the RPC interface for efficient checking of large file 310// lists. 311type FileNameSet struct { 312 Set map[string]struct{} 313} 314 315// MarshalBinary implements a specialized encoder for FileNameSet. 316func (q *FileNameSet) MarshalBinary() ([]byte, error) { 317 return stringSetEncode(q.Set) 318} 319 320// UnmarshalBinary implements a specialized decoder for FileNameSet. 321func (q *FileNameSet) UnmarshalBinary(b []byte) error { 322 var err error 323 q.Set, err = stringSetDecode(b) 324 return err 325} 326 327func (q *FileNameSet) String() string { 328 var detail string 329 if len(q.Set) > 5 { 330 // Large sets being output are not useful 331 detail = fmt.Sprintf("size=%d", len(q.Set)) 332 } else { 333 values := make([]string, 0, len(q.Set)) 334 for v := range q.Set { 335 values = append(values, v) 336 } 337 sort.Strings(values) 338 detail = strings.Join(values, " ") 339 } 340 return fmt.Sprintf("(filenameset %s)", detail) 341} 342 343func NewFileNameSet(fileNames ...string) *FileNameSet { 344 s := &FileNameSet{Set: make(map[string]struct{})} 345 for _, r := range fileNames { 346 s.Set[r] = struct{}{} 347 } 348 return s 349} 350 351const ( 352 TypeFileMatch uint8 = iota 353 TypeFileName 354 TypeRepo 355) 356 357// Type changes the result type returned. 358type Type struct { 359 Child Q 360 Type uint8 361} 362 363func (q *Type) String() string { 364 switch q.Type { 365 case TypeFileMatch: 366 return fmt.Sprintf("(type:filematch %s)", q.Child) 367 case TypeFileName: 368 return fmt.Sprintf("(type:filename %s)", q.Child) 369 case TypeRepo: 370 return fmt.Sprintf("(type:repo %s)", q.Child) 371 default: 372 return fmt.Sprintf("(type:UNKNOWN %s)", q.Child) 373 } 374} 375 376// Boost scales the contribution to score of descendents. 377type Boost struct { 378 Child Q 379 // Boost will multiply the score of its descendents. Values less than 1 will 380 // give less importance while values greater than 1 will give more 381 // importance. 382 Boost float64 383} 384 385func (q *Boost) String() string { 386 return fmt.Sprintf("(boost %0.2f %s)", q.Boost, q.Child) 387} 388 389// Substring is the most basic query: a query for a substring. 390type Substring struct { 391 Pattern string 392 CaseSensitive bool 393 394 // Match only filename 395 FileName bool 396 397 // Match only content 398 Content bool 399} 400 401func (q *Substring) String() string { 402 s := "" 403 404 t := "" 405 if q.FileName { 406 t = "file_" 407 } else if q.Content { 408 t = "content_" 409 } 410 411 s += fmt.Sprintf("%ssubstr:%q", t, q.Pattern) 412 if q.CaseSensitive { 413 s = "case_" + s 414 } 415 return s 416} 417 418type setCaser interface { 419 setCase(string) 420} 421 422func (q *Substring) setCase(k string) { 423 switch k { 424 case "yes": 425 q.CaseSensitive = true 426 case "no": 427 q.CaseSensitive = false 428 case "auto": 429 // TODO - unicode 430 q.CaseSensitive = (q.Pattern != string(toLower([]byte(q.Pattern)))) 431 } 432} 433 434func (q *Symbol) setCase(k string) { 435 if sc, ok := q.Expr.(setCaser); ok { 436 sc.setCase(k) 437 } 438} 439 440func (q *Regexp) setCase(k string) { 441 switch k { 442 case "yes": 443 q.CaseSensitive = true 444 case "no": 445 q.CaseSensitive = false 446 case "auto": 447 q.CaseSensitive = !q.Regexp.Equal(LowerRegexp(q.Regexp)) 448 } 449} 450 451// Or is matched when any of its children is matched. 452type Or struct { 453 Children []Q 454} 455 456func (q *Or) String() string { 457 var sub []string 458 for _, ch := range q.Children { 459 sub = append(sub, ch.String()) 460 } 461 return fmt.Sprintf("(or %s)", strings.Join(sub, " ")) 462} 463 464// Not inverts the meaning of its child. 465type Not struct { 466 Child Q 467} 468 469func (q *Not) String() string { 470 return fmt.Sprintf("(not %s)", q.Child) 471} 472 473// And is matched when all its children are. 474type And struct { 475 Children []Q 476} 477 478func (q *And) String() string { 479 var sub []string 480 for _, ch := range q.Children { 481 sub = append(sub, ch.String()) 482 } 483 return fmt.Sprintf("(and %s)", strings.Join(sub, " ")) 484} 485 486// NewAnd is syntactic sugar for constructing And queries. 487func NewAnd(qs ...Q) Q { 488 return &And{Children: qs} 489} 490 491// NewOr is syntactic sugar for constructing Or queries. 492func NewOr(qs ...Q) Q { 493 return &Or{Children: qs} 494} 495 496// Branch limits search to a specific branch. 497type Branch struct { 498 Pattern string 499 500 // exact is true if we want to Pattern to equal branch. 501 Exact bool 502} 503 504func (q *Branch) String() string { 505 if q.Exact { 506 return fmt.Sprintf("branch=%q", q.Pattern) 507 } 508 return fmt.Sprintf("branch:%q", q.Pattern) 509} 510 511func queryChildren(q Q) []Q { 512 switch s := q.(type) { 513 case *And: 514 return s.Children 515 case *Or: 516 return s.Children 517 } 518 return nil 519} 520 521func flattenAndOr(children []Q, typ Q) ([]Q, bool) { 522 var flat []Q 523 changed := false 524 for _, ch := range children { 525 ch, subChanged := flatten(ch) 526 changed = changed || subChanged 527 if reflect.TypeOf(ch) == reflect.TypeOf(typ) { 528 changed = true 529 subChildren := queryChildren(ch) 530 if subChildren != nil { 531 flat = append(flat, subChildren...) 532 } 533 } else { 534 flat = append(flat, ch) 535 } 536 } 537 538 return flat, changed 539} 540 541// (and (and x y) z) => (and x y z) , the same for "or" 542func flatten(q Q) (Q, bool) { 543 switch s := q.(type) { 544 case *And: 545 if len(s.Children) == 1 { 546 return s.Children[0], true 547 } 548 flatChildren, changed := flattenAndOr(s.Children, s) 549 return &And{flatChildren}, changed 550 case *Or: 551 if len(s.Children) == 1 { 552 return s.Children[0], true 553 } 554 flatChildren, changed := flattenAndOr(s.Children, s) 555 return &Or{flatChildren}, changed 556 case *Not: 557 child, changed := flatten(s.Child) 558 return &Not{child}, changed 559 case *Type: 560 child, changed := flatten(s.Child) 561 return &Type{Child: child, Type: s.Type}, changed 562 case *Boost: 563 child, changed := flatten(s.Child) 564 return &Boost{Child: child, Boost: s.Boost}, changed 565 default: 566 return q, false 567 } 568} 569 570func mapQueryList(qs []Q, f func(Q) Q) []Q { 571 neg := make([]Q, len(qs)) 572 for i, sub := range qs { 573 neg[i] = Map(sub, f) 574 } 575 return neg 576} 577 578func invertConst(q Q) Q { 579 c, ok := q.(*Const) 580 if ok { 581 return &Const{!c.Value} 582 } 583 return q 584} 585 586func evalAndOrConstants(q Q, children []Q) Q { 587 _, isAnd := q.(*And) 588 589 children = mapQueryList(children, evalConstants) 590 591 newCH := children[:0] 592 for _, ch := range children { 593 c, ok := ch.(*Const) 594 if ok { 595 if c.Value == isAnd { 596 continue 597 } else { 598 return ch 599 } 600 } 601 newCH = append(newCH, ch) 602 } 603 if len(newCH) == 0 { 604 return &Const{isAnd} 605 } 606 if isAnd { 607 return &And{newCH} 608 } 609 return &Or{newCH} 610} 611 612func evalConstants(q Q) Q { 613 switch s := q.(type) { 614 case *And: 615 return evalAndOrConstants(q, s.Children) 616 case *Or: 617 return evalAndOrConstants(q, s.Children) 618 case *Not: 619 ch := evalConstants(s.Child) 620 if _, ok := ch.(*Const); ok { 621 return invertConst(ch) 622 } 623 return &Not{ch} 624 case *Type: 625 ch := evalConstants(s.Child) 626 if _, ok := ch.(*Const); ok { 627 // If q is the root query, then evaluating this to a const changes 628 // the type of result we will return. However, the only case this 629 // makes sense is `type:repo TRUE` to return all repos or 630 // `type:file TRUE` to return all filenames. For other cases we 631 // want to do this constant folding though, so we allow the 632 // unexpected behaviour mentioned previously. 633 return ch 634 } 635 return &Type{Child: ch, Type: s.Type} 636 case *Boost: 637 ch := evalConstants(s.Child) 638 if _, ok := ch.(*Const); ok { 639 return ch 640 } 641 return &Boost{Boost: s.Boost, Child: ch} 642 case *Substring: 643 if len(s.Pattern) == 0 { 644 return &Const{true} 645 } 646 case *Regexp: 647 if s.Regexp.Op == syntax.OpEmptyMatch { 648 return &Const{true} 649 } 650 case *Branch: 651 if s.Pattern == "" { 652 return &Const{true} 653 } 654 case *BranchesRepos: 655 for _, br := range s.List { 656 if !br.Repos.IsEmpty() { 657 return q 658 } 659 } 660 return &Const{false} 661 case *RepoIDs: 662 if s.Repos.IsEmpty() { 663 return &Const{false} 664 } 665 case *RepoSet: 666 if len(s.Set) == 0 { 667 return &Const{false} 668 } 669 case *FileNameSet: 670 if len(s.Set) == 0 { 671 return &Const{false} 672 } 673 } 674 return q 675} 676 677func Simplify(q Q) Q { 678 q = evalConstants(q) 679 for { 680 var changed bool 681 q, changed = flatten(q) 682 if !changed { 683 break 684 } 685 } 686 687 return q 688} 689 690// Map runs f over the q. 691func Map(q Q, f func(q Q) Q) Q { 692 switch s := q.(type) { 693 case *And: 694 q = &And{Children: mapQueryList(s.Children, f)} 695 case *Or: 696 q = &Or{Children: mapQueryList(s.Children, f)} 697 case *Not: 698 q = &Not{Child: Map(s.Child, f)} 699 case *Type: 700 q = &Type{Type: s.Type, Child: Map(s.Child, f)} 701 case *Boost: 702 q = &Boost{Boost: s.Boost, Child: Map(s.Child, f)} 703 } 704 return f(q) 705} 706 707// Expand expands Substr queries into (OR file_substr content_substr) 708// queries, and the same for Regexp queries.. 709func ExpandFileContent(q Q) Q { 710 switch s := q.(type) { 711 case *Substring: 712 if s.FileName == s.Content { 713 f := *s 714 f.FileName = true 715 f.Content = false 716 c := *s 717 c.FileName = false 718 c.Content = true 719 return NewOr(&f, &c) 720 } 721 case *Regexp: 722 if s.FileName == s.Content { 723 f := *s 724 f.FileName = true 725 f.Content = false 726 c := *s 727 c.FileName = false 728 c.Content = true 729 return NewOr(&f, &c) 730 } 731 } 732 return q 733} 734 735// VisitAtoms runs `v` on all atom queries within `q`. 736func VisitAtoms(q Q, v func(q Q)) { 737 Map(q, func(iQ Q) Q { 738 switch iQ.(type) { 739 case *And: 740 case *Or: 741 case *Not: 742 case *Type: 743 case *Boost: 744 default: 745 v(iQ) 746 } 747 return iQ 748 }) 749}