fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package query 16 17import ( 18 "bytes" 19 "encoding/gob" 20 "encoding/json" 21 "fmt" 22 "log" 23 "reflect" 24 "regexp/syntax" 25 "sort" 26 "strconv" 27 "strings" 28 "sync" 29 30 "github.com/RoaringBitmap/roaring" 31 "github.com/grafana/regexp" 32) 33 34var _ = log.Println 35 36// Q is a representation for a possibly hierarchical search query. 37type Q interface { 38 String() string 39} 40 41// RPCUnwrap processes q to remove RPC specific elements from q. This is 42// needed because gob isn't flexible enough for us. This should be called by 43// RPC servers at the client/server boundary so that q works with the rest of 44// zoekt. 45func RPCUnwrap(q Q) Q { 46 if cache, ok := q.(*GobCache); ok { 47 return cache.Q 48 } 49 return q 50} 51 52// RawConfig filters repositories based on their encoded RawConfig map. 53type RawConfig uint64 54 55const ( 56 RcOnlyPublic RawConfig = 1 57 RcOnlyPrivate RawConfig = 2 58 RcOnlyForks RawConfig = 1 << 2 59 RcNoForks RawConfig = 2 << 2 60 RcOnlyArchived RawConfig = 1 << 4 61 RcNoArchived RawConfig = 2 << 4 62) 63 64var flagNames = []struct { 65 Mask RawConfig 66 Label string 67}{ 68 {RcOnlyPublic, "RcOnlyPublic"}, 69 {RcOnlyPrivate, "RcOnlyPrivate"}, 70 {RcOnlyForks, "RcOnlyForks"}, 71 {RcNoForks, "RcNoForks"}, 72 {RcOnlyArchived, "RcOnlyArchived"}, 73 {RcNoArchived, "RcNoArchived"}, 74} 75 76func (r RawConfig) String() string { 77 var s []string 78 for _, fn := range flagNames { 79 if r&fn.Mask != 0 { 80 s = append(s, fn.Label) 81 } 82 } 83 return fmt.Sprintf("rawConfig:%s", strings.Join(s, "|")) 84} 85 86// RegexpQuery is a query looking for regular expressions matches. 87type Regexp struct { 88 Regexp *syntax.Regexp 89 FileName bool 90 Content bool 91 CaseSensitive bool 92} 93 94func (q *Regexp) String() string { 95 pref := "" 96 if q.FileName { 97 pref = "file_" 98 } 99 if q.CaseSensitive { 100 pref = "case_" + pref 101 } 102 return fmt.Sprintf("%sregex:%q", pref, q.Regexp.String()) 103} 104 105// gobRegexp wraps Regexp to make it gob-encodable/decodable. Regexp contains syntax.Regexp, which 106// contains slices/arrays with possibly nil elements, which gob doesn't support 107// (https://github.com/golang/go/issues/1501). 108type gobRegexp struct { 109 Regexp // Regexp.Regexp (*syntax.Regexp) is set to nil and its string is set in RegexpString 110 RegexpString string 111} 112 113// GobEncode implements gob.Encoder. 114func (q Regexp) GobEncode() ([]byte, error) { 115 gobq := gobRegexp{Regexp: q, RegexpString: q.Regexp.String()} 116 gobq.Regexp.Regexp = nil // can't be gob-encoded/decoded 117 return json.Marshal(gobq) 118} 119 120// GobDecode implements gob.Decoder. 121func (q *Regexp) GobDecode(data []byte) error { 122 var gobq gobRegexp 123 err := json.Unmarshal(data, &gobq) 124 if err != nil { 125 return err 126 } 127 gobq.Regexp.Regexp, err = syntax.Parse(gobq.RegexpString, regexpFlags) 128 if err != nil { 129 return err 130 } 131 *q = gobq.Regexp 132 return nil 133} 134 135// Symbol finds a string that is a symbol. 136type Symbol struct { 137 Expr Q 138} 139 140func (s *Symbol) String() string { 141 return fmt.Sprintf("sym:%s", s.Expr) 142} 143 144type caseQ struct { 145 Flavor string 146} 147 148func (c *caseQ) String() string { 149 return "case:" + c.Flavor 150} 151 152type Language struct { 153 Language string 154} 155 156func (l *Language) String() string { 157 return "lang:" + l.Language 158} 159 160type Const struct { 161 Value bool 162} 163 164func (q *Const) String() string { 165 if q.Value { 166 return "TRUE" 167 } 168 return "FALSE" 169} 170 171type Repo struct { 172 Regexp *regexp.Regexp 173} 174 175func (q *Repo) String() string { 176 return fmt.Sprintf("repo:%s", q.Regexp.String()) 177} 178 179func (q Repo) GobEncode() ([]byte, error) { 180 return []byte(q.Regexp.String()), nil 181} 182 183func (q *Repo) GobDecode(data []byte) error { 184 var err error 185 q.Regexp, err = regexp.Compile(string(data)) 186 return err 187} 188 189// RepoRegexp is a Sourcegraph addition which searches documents where the 190// repository name matches Regexp. 191type RepoRegexp struct { 192 Regexp *regexp.Regexp 193} 194 195func (q *RepoRegexp) String() string { 196 return fmt.Sprintf("reporegex:%q", q.Regexp.String()) 197} 198 199// GobEncode implements gob.Encoder. 200func (q *RepoRegexp) GobEncode() ([]byte, error) { 201 // gob can't encode syntax.Regexp 202 return []byte(q.Regexp.String()), nil 203} 204 205// GobDecode implements gob.Decoder. 206func (q *RepoRegexp) GobDecode(data []byte) error { 207 var err error 208 q.Regexp, err = regexp.Compile(string(data)) 209 return err 210} 211 212// BranchesRepos is a slice of BranchRepos to match. It is a Sourcegraph 213// addition and only used in the RPC interface for efficient checking of large 214// repo lists. 215type BranchesRepos struct { 216 List []BranchRepos 217} 218 219// NewSingleBranchesRepos is a helper for creating a BranchesRepos which 220// searches a single branch. 221func NewSingleBranchesRepos(branch string, ids ...uint32) *BranchesRepos { 222 return &BranchesRepos{List: []BranchRepos{ 223 {branch, roaring.BitmapOf(ids...)}, 224 }} 225} 226 227func (q *BranchesRepos) String() string { 228 var sb strings.Builder 229 230 sb.WriteString("(branchesrepos") 231 232 for _, br := range q.List { 233 if size := br.Repos.GetCardinality(); size > 1 { 234 sb.WriteString(" " + br.Branch + ":" + strconv.FormatUint(size, 10)) 235 } else { 236 sb.WriteString(" " + br.Branch + "=" + br.Repos.String()) 237 } 238 } 239 240 sb.WriteString(")") 241 return sb.String() 242} 243 244// NewRepoIDs is a helper for creating a RepoIDs which 245// searches only the matched repos. 246func NewRepoIDs(ids ...uint32) *RepoIDs { 247 return &RepoIDs{Repos: roaring.BitmapOf(ids...)} 248} 249 250func (q *RepoIDs) String() string { 251 var sb strings.Builder 252 253 sb.WriteString("(repoids ") 254 255 if size := q.Repos.GetCardinality(); size > 1 { 256 sb.WriteString("count:" + strconv.FormatUint(size, 10)) 257 } else { 258 sb.WriteString("repoid=" + q.Repos.String()) 259 } 260 261 sb.WriteString(")") 262 return sb.String() 263} 264 265// MarshalBinary implements a specialized encoder for BranchesRepos. 266func (q BranchesRepos) MarshalBinary() ([]byte, error) { 267 return branchesReposEncode(q.List) 268} 269 270// UnmarshalBinary implements a specialized decoder for BranchesRepos. 271func (q *BranchesRepos) UnmarshalBinary(b []byte) (err error) { 272 q.List, err = branchesReposDecode(b) 273 return err 274} 275 276// BranchRepos is a (branch, sourcegraph repo ids bitmap) tuple. It is a 277// Sourcegraph addition. 278type BranchRepos struct { 279 Branch string 280 Repos *roaring.Bitmap 281} 282 283// Similar to BranchRepos but will be used to match only by repoid and 284// therefore matches all branches 285type RepoIDs struct { 286 Repos *roaring.Bitmap 287} 288 289// RepoSet is a list of repos to match. It is a Sourcegraph addition and only 290// used in the RPC interface for efficient checking of large repo lists. 291type RepoSet struct { 292 Set map[string]bool 293} 294 295func (q *RepoSet) String() string { 296 var detail string 297 if len(q.Set) > 5 { 298 // Large sets being output are not useful 299 detail = fmt.Sprintf("size=%d", len(q.Set)) 300 } else { 301 repos := make([]string, len(q.Set)) 302 i := 0 303 for repo := range q.Set { 304 repos[i] = repo 305 i++ 306 } 307 sort.Strings(repos) 308 detail = strings.Join(repos, " ") 309 } 310 return fmt.Sprintf("(reposet %s)", detail) 311} 312 313func NewRepoSet(repo ...string) *RepoSet { 314 s := &RepoSet{Set: make(map[string]bool)} 315 for _, r := range repo { 316 s.Set[r] = true 317 } 318 return s 319} 320 321// FileNameSet is a list of file names to match. It is a Sourcegraph addition 322// and only used in the RPC interface for efficient checking of large file 323// lists. 324type FileNameSet struct { 325 Set map[string]struct{} 326} 327 328// MarshalBinary implements a specialized encoder for FileNameSet. 329func (q *FileNameSet) MarshalBinary() ([]byte, error) { 330 return stringSetEncode(q.Set) 331} 332 333// UnmarshalBinary implements a specialized decoder for FileNameSet. 334func (q *FileNameSet) UnmarshalBinary(b []byte) error { 335 var err error 336 q.Set, err = stringSetDecode(b) 337 return err 338} 339 340func (q *FileNameSet) String() string { 341 var detail string 342 if len(q.Set) > 5 { 343 // Large sets being output are not useful 344 detail = fmt.Sprintf("size=%d", len(q.Set)) 345 } else { 346 values := make([]string, 0, len(q.Set)) 347 for v := range q.Set { 348 values = append(values, v) 349 } 350 sort.Strings(values) 351 detail = strings.Join(values, " ") 352 } 353 return fmt.Sprintf("(filenameset %s)", detail) 354} 355 356func NewFileNameSet(fileNames ...string) *FileNameSet { 357 s := &FileNameSet{Set: make(map[string]struct{})} 358 for _, r := range fileNames { 359 s.Set[r] = struct{}{} 360 } 361 return s 362} 363 364const ( 365 TypeFileMatch uint8 = iota 366 TypeFileName 367 TypeRepo 368) 369 370// Type changes the result type returned. 371type Type struct { 372 Child Q 373 Type uint8 374} 375 376func (q *Type) String() string { 377 switch q.Type { 378 case TypeFileMatch: 379 return fmt.Sprintf("(type:filematch %s)", q.Child) 380 case TypeFileName: 381 return fmt.Sprintf("(type:filename %s)", q.Child) 382 case TypeRepo: 383 return fmt.Sprintf("(type:repo %s)", q.Child) 384 default: 385 return fmt.Sprintf("(type:UNKNOWN %s)", q.Child) 386 } 387} 388 389// Boost scales the contribution to score of descendents. 390type Boost struct { 391 Child Q 392 // Boost will multiply the score of its descendents. Values less than 1 will 393 // give less importance while values greater than 1 will give more 394 // importance. 395 Boost float64 396} 397 398func (q *Boost) String() string { 399 return fmt.Sprintf("(boost %0.2f %s)", q.Boost, q.Child) 400} 401 402// Substring is the most basic query: a query for a substring. 403type Substring struct { 404 Pattern string 405 CaseSensitive bool 406 407 // Match only filename 408 FileName bool 409 410 // Match only content 411 Content bool 412} 413 414func (q *Substring) String() string { 415 s := "" 416 417 t := "" 418 if q.FileName { 419 t = "file_" 420 } else if q.Content { 421 t = "content_" 422 } 423 424 s += fmt.Sprintf("%ssubstr:%q", t, q.Pattern) 425 if q.CaseSensitive { 426 s = "case_" + s 427 } 428 return s 429} 430 431type setCaser interface { 432 setCase(string) 433} 434 435func (q *Substring) setCase(k string) { 436 switch k { 437 case "yes": 438 q.CaseSensitive = true 439 case "no": 440 q.CaseSensitive = false 441 case "auto": 442 // TODO - unicode 443 q.CaseSensitive = (q.Pattern != string(toLower([]byte(q.Pattern)))) 444 } 445} 446 447func (q *Symbol) setCase(k string) { 448 if sc, ok := q.Expr.(setCaser); ok { 449 sc.setCase(k) 450 } 451} 452 453func (q *Regexp) setCase(k string) { 454 switch k { 455 case "yes": 456 q.CaseSensitive = true 457 case "no": 458 q.CaseSensitive = false 459 case "auto": 460 q.CaseSensitive = (q.Regexp.String() != LowerRegexp(q.Regexp).String()) 461 } 462} 463 464// GobCache exists so we only pay the cost of marshalling a query once when we 465// aggregate it out over all the replicas. 466// 467// Our query and eval layer do not support GobCache. Instead, at the gob 468// boundaries (RPC and Streaming) we check if the Q is a GobCache and unwrap 469// it. 470// 471// "I wish we could get rid of this code soon enough" - tomas 472type GobCache struct { 473 Q 474 475 once sync.Once 476 data []byte 477 err error 478} 479 480// GobEncode implements gob.Encoder. 481func (q *GobCache) GobEncode() ([]byte, error) { 482 q.once.Do(func() { 483 var buf bytes.Buffer 484 enc := gob.NewEncoder(&buf) 485 q.err = enc.Encode(&gobWrapper{ 486 WrappedQ: q.Q, 487 }) 488 q.data = buf.Bytes() 489 }) 490 return q.data, q.err 491} 492 493// GobDecode implements gob.Decoder. 494func (q *GobCache) GobDecode(data []byte) error { 495 dec := gob.NewDecoder(bytes.NewBuffer(data)) 496 var w gobWrapper 497 err := dec.Decode(&w) 498 if err != nil { 499 return err 500 } 501 q.Q = w.WrappedQ 502 return nil 503} 504 505// gobWrapper is needed so the gob decoder works. 506type gobWrapper struct { 507 WrappedQ Q 508} 509 510func (q *GobCache) String() string { 511 return fmt.Sprintf("GobCache(%s)", q.Q) 512} 513 514// Or is matched when any of its children is matched. 515type Or struct { 516 Children []Q 517} 518 519func (q *Or) String() string { 520 var sub []string 521 for _, ch := range q.Children { 522 sub = append(sub, ch.String()) 523 } 524 return fmt.Sprintf("(or %s)", strings.Join(sub, " ")) 525} 526 527// Not inverts the meaning of its child. 528type Not struct { 529 Child Q 530} 531 532func (q *Not) String() string { 533 return fmt.Sprintf("(not %s)", q.Child) 534} 535 536// And is matched when all its children are. 537type And struct { 538 Children []Q 539} 540 541func (q *And) String() string { 542 var sub []string 543 for _, ch := range q.Children { 544 sub = append(sub, ch.String()) 545 } 546 return fmt.Sprintf("(and %s)", strings.Join(sub, " ")) 547} 548 549// NewAnd is syntactic sugar for constructing And queries. 550func NewAnd(qs ...Q) Q { 551 return &And{Children: qs} 552} 553 554// NewOr is syntactic sugar for constructing Or queries. 555func NewOr(qs ...Q) Q { 556 return &Or{Children: qs} 557} 558 559// Branch limits search to a specific branch. 560type Branch struct { 561 Pattern string 562 563 // exact is true if we want to Pattern to equal branch. 564 Exact bool 565} 566 567func (q *Branch) String() string { 568 if q.Exact { 569 return fmt.Sprintf("branch=%q", q.Pattern) 570 } 571 return fmt.Sprintf("branch:%q", q.Pattern) 572} 573 574func queryChildren(q Q) []Q { 575 switch s := q.(type) { 576 case *And: 577 return s.Children 578 case *Or: 579 return s.Children 580 } 581 return nil 582} 583 584func flattenAndOr(children []Q, typ Q) ([]Q, bool) { 585 var flat []Q 586 changed := false 587 for _, ch := range children { 588 ch, subChanged := flatten(ch) 589 changed = changed || subChanged 590 if reflect.TypeOf(ch) == reflect.TypeOf(typ) { 591 changed = true 592 subChildren := queryChildren(ch) 593 if subChildren != nil { 594 flat = append(flat, subChildren...) 595 } 596 } else { 597 flat = append(flat, ch) 598 } 599 } 600 601 return flat, changed 602} 603 604// (and (and x y) z) => (and x y z) , the same for "or" 605func flatten(q Q) (Q, bool) { 606 switch s := q.(type) { 607 case *And: 608 if len(s.Children) == 1 { 609 return s.Children[0], true 610 } 611 flatChildren, changed := flattenAndOr(s.Children, s) 612 return &And{flatChildren}, changed 613 case *Or: 614 if len(s.Children) == 1 { 615 return s.Children[0], true 616 } 617 flatChildren, changed := flattenAndOr(s.Children, s) 618 return &Or{flatChildren}, changed 619 case *Not: 620 child, changed := flatten(s.Child) 621 return &Not{child}, changed 622 case *Type: 623 child, changed := flatten(s.Child) 624 return &Type{Child: child, Type: s.Type}, changed 625 case *Boost: 626 child, changed := flatten(s.Child) 627 return &Boost{Child: child, Boost: s.Boost}, changed 628 default: 629 return q, false 630 } 631} 632 633func mapQueryList(qs []Q, f func(Q) Q) []Q { 634 neg := make([]Q, len(qs)) 635 for i, sub := range qs { 636 neg[i] = Map(sub, f) 637 } 638 return neg 639} 640 641func invertConst(q Q) Q { 642 c, ok := q.(*Const) 643 if ok { 644 return &Const{!c.Value} 645 } 646 return q 647} 648 649func evalAndOrConstants(q Q, children []Q) Q { 650 _, isAnd := q.(*And) 651 652 children = mapQueryList(children, evalConstants) 653 654 newCH := children[:0] 655 for _, ch := range children { 656 c, ok := ch.(*Const) 657 if ok { 658 if c.Value == isAnd { 659 continue 660 } else { 661 return ch 662 } 663 } 664 newCH = append(newCH, ch) 665 } 666 if len(newCH) == 0 { 667 return &Const{isAnd} 668 } 669 if isAnd { 670 return &And{newCH} 671 } 672 return &Or{newCH} 673} 674 675func evalConstants(q Q) Q { 676 switch s := q.(type) { 677 case *And: 678 return evalAndOrConstants(q, s.Children) 679 case *Or: 680 return evalAndOrConstants(q, s.Children) 681 case *Not: 682 ch := evalConstants(s.Child) 683 if _, ok := ch.(*Const); ok { 684 return invertConst(ch) 685 } 686 return &Not{ch} 687 case *Type: 688 ch := evalConstants(s.Child) 689 if _, ok := ch.(*Const); ok { 690 // If q is the root query, then evaluating this to a const changes 691 // the type of result we will return. However, the only case this 692 // makes sense is `type:repo TRUE` to return all repos or 693 // `type:file TRUE` to return all filenames. For other cases we 694 // want to do this constant folding though, so we allow the 695 // unexpected behaviour mentioned previously. 696 return ch 697 } 698 return &Type{Child: ch, Type: s.Type} 699 case *Boost: 700 ch := evalConstants(s.Child) 701 if _, ok := ch.(*Const); ok { 702 return ch 703 } 704 return &Boost{Boost: s.Boost, Child: ch} 705 case *Substring: 706 if len(s.Pattern) == 0 { 707 return &Const{true} 708 } 709 case *Regexp: 710 if s.Regexp.Op == syntax.OpEmptyMatch { 711 return &Const{true} 712 } 713 case *Branch: 714 if s.Pattern == "" { 715 return &Const{true} 716 } 717 case *RepoSet: 718 if len(s.Set) == 0 { 719 return &Const{false} 720 } 721 case *FileNameSet: 722 if len(s.Set) == 0 { 723 return &Const{false} 724 } 725 } 726 return q 727} 728 729func Simplify(q Q) Q { 730 q = evalConstants(q) 731 for { 732 var changed bool 733 q, changed = flatten(q) 734 if !changed { 735 break 736 } 737 } 738 739 return q 740} 741 742// Map runs f over the q. 743func Map(q Q, f func(q Q) Q) Q { 744 switch s := q.(type) { 745 case *And: 746 q = &And{Children: mapQueryList(s.Children, f)} 747 case *Or: 748 q = &Or{Children: mapQueryList(s.Children, f)} 749 case *Not: 750 q = &Not{Child: Map(s.Child, f)} 751 case *Type: 752 q = &Type{Type: s.Type, Child: Map(s.Child, f)} 753 case *Boost: 754 q = &Boost{Boost: s.Boost, Child: Map(s.Child, f)} 755 } 756 return f(q) 757} 758 759// Expand expands Substr queries into (OR file_substr content_substr) 760// queries, and the same for Regexp queries.. 761func ExpandFileContent(q Q) Q { 762 switch s := q.(type) { 763 case *Substring: 764 if s.FileName == s.Content { 765 f := *s 766 f.FileName = true 767 f.Content = false 768 c := *s 769 c.FileName = false 770 c.Content = true 771 return NewOr(&f, &c) 772 } 773 case *Regexp: 774 if s.FileName == s.Content { 775 f := *s 776 f.FileName = true 777 f.Content = false 778 c := *s 779 c.FileName = false 780 c.Content = true 781 return NewOr(&f, &c) 782 } 783 } 784 return q 785} 786 787// VisitAtoms runs `v` on all atom queries within `q`. 788func VisitAtoms(q Q, v func(q Q)) { 789 Map(q, func(iQ Q) Q { 790 switch iQ.(type) { 791 case *And: 792 case *Or: 793 case *Not: 794 case *Type: 795 case *Boost: 796 default: 797 v(iQ) 798 } 799 return iQ 800 }) 801}