fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package query 16 17import ( 18 "bytes" 19 "encoding/gob" 20 "encoding/json" 21 "fmt" 22 "log" 23 "reflect" 24 "regexp/syntax" 25 "sort" 26 "strconv" 27 "strings" 28 "sync" 29 30 "github.com/RoaringBitmap/roaring" 31 "github.com/grafana/regexp" 32 "github.com/sourcegraph/zoekt/internal/syntaxutil" 33) 34 35var _ = log.Println 36 37// Q is a representation for a possibly hierarchical search query. 38type Q interface { 39 String() string 40} 41 42// RPCUnwrap processes q to remove RPC specific elements from q. This is 43// needed because gob isn't flexible enough for us. This should be called by 44// RPC servers at the client/server boundary so that q works with the rest of 45// zoekt. 46func RPCUnwrap(q Q) Q { 47 if cache, ok := q.(*GobCache); ok { 48 return cache.Q 49 } 50 return q 51} 52 53// RawConfig filters repositories based on their encoded RawConfig map. 54type RawConfig uint64 55 56const ( 57 RcOnlyPublic RawConfig = 1 58 RcOnlyPrivate RawConfig = 2 59 RcOnlyForks RawConfig = 1 << 2 60 RcNoForks RawConfig = 2 << 2 61 RcOnlyArchived RawConfig = 1 << 4 62 RcNoArchived RawConfig = 2 << 4 63) 64 65var flagNames = []struct { 66 Mask RawConfig 67 Label string 68}{ 69 {RcOnlyPublic, "RcOnlyPublic"}, 70 {RcOnlyPrivate, "RcOnlyPrivate"}, 71 {RcOnlyForks, "RcOnlyForks"}, 72 {RcNoForks, "RcNoForks"}, 73 {RcOnlyArchived, "RcOnlyArchived"}, 74 {RcNoArchived, "RcNoArchived"}, 75} 76 77func (r RawConfig) String() string { 78 var s []string 79 for _, fn := range flagNames { 80 if r&fn.Mask != 0 { 81 s = append(s, fn.Label) 82 } 83 } 84 return fmt.Sprintf("rawConfig:%s", strings.Join(s, "|")) 85} 86 87// RegexpQuery is a query looking for regular expressions matches. 88type Regexp struct { 89 Regexp *syntax.Regexp 90 FileName bool 91 Content bool 92 CaseSensitive bool 93} 94 95func (q *Regexp) String() string { 96 pref := "" 97 if q.FileName { 98 pref = "file_" 99 } 100 if q.CaseSensitive { 101 pref = "case_" + pref 102 } 103 return fmt.Sprintf("%sregex:%q", pref, syntaxutil.RegexpString(q.Regexp)) 104} 105 106// gobRegexp wraps Regexp to make it gob-encodable/decodable. Regexp contains syntax.Regexp, which 107// contains slices/arrays with possibly nil elements, which gob doesn't support 108// (https://github.com/golang/go/issues/1501). 109type gobRegexp struct { 110 Regexp // Regexp.Regexp (*syntax.Regexp) is set to nil and its string is set in RegexpString 111 RegexpString string 112} 113 114// GobEncode implements gob.Encoder. 115func (q Regexp) GobEncode() ([]byte, error) { 116 gobq := gobRegexp{Regexp: q, RegexpString: syntaxutil.RegexpString(q.Regexp)} 117 gobq.Regexp.Regexp = nil // can't be gob-encoded/decoded 118 return json.Marshal(gobq) 119} 120 121// GobDecode implements gob.Decoder. 122func (q *Regexp) GobDecode(data []byte) error { 123 var gobq gobRegexp 124 err := json.Unmarshal(data, &gobq) 125 if err != nil { 126 return err 127 } 128 gobq.Regexp.Regexp, err = syntax.Parse(gobq.RegexpString, regexpFlags) 129 if err != nil { 130 return err 131 } 132 *q = gobq.Regexp 133 return nil 134} 135 136// Symbol finds a string that is a symbol. 137type Symbol struct { 138 Expr Q 139} 140 141func (s *Symbol) String() string { 142 return fmt.Sprintf("sym:%s", s.Expr) 143} 144 145type caseQ struct { 146 Flavor string 147} 148 149func (c *caseQ) String() string { 150 return "case:" + c.Flavor 151} 152 153type Language struct { 154 Language string 155} 156 157func (l *Language) String() string { 158 return "lang:" + l.Language 159} 160 161type Const struct { 162 Value bool 163} 164 165func (q *Const) String() string { 166 if q.Value { 167 return "TRUE" 168 } 169 return "FALSE" 170} 171 172type Repo struct { 173 Regexp *regexp.Regexp 174} 175 176func (q *Repo) String() string { 177 return fmt.Sprintf("repo:%s", q.Regexp.String()) 178} 179 180func (q Repo) GobEncode() ([]byte, error) { 181 return []byte(q.Regexp.String()), nil 182} 183 184func (q *Repo) GobDecode(data []byte) error { 185 var err error 186 q.Regexp, err = regexp.Compile(string(data)) 187 return err 188} 189 190// RepoRegexp is a Sourcegraph addition which searches documents where the 191// repository name matches Regexp. 192type RepoRegexp struct { 193 Regexp *regexp.Regexp 194} 195 196func (q *RepoRegexp) String() string { 197 return fmt.Sprintf("reporegex:%q", q.Regexp.String()) 198} 199 200// GobEncode implements gob.Encoder. 201func (q *RepoRegexp) GobEncode() ([]byte, error) { 202 // gob can't encode syntax.Regexp 203 return []byte(q.Regexp.String()), nil 204} 205 206// GobDecode implements gob.Decoder. 207func (q *RepoRegexp) GobDecode(data []byte) error { 208 var err error 209 q.Regexp, err = regexp.Compile(string(data)) 210 return err 211} 212 213// BranchesRepos is a slice of BranchRepos to match. It is a Sourcegraph 214// addition and only used in the RPC interface for efficient checking of large 215// repo lists. 216type BranchesRepos struct { 217 List []BranchRepos 218} 219 220// NewSingleBranchesRepos is a helper for creating a BranchesRepos which 221// searches a single branch. 222func NewSingleBranchesRepos(branch string, ids ...uint32) *BranchesRepos { 223 return &BranchesRepos{List: []BranchRepos{ 224 {branch, roaring.BitmapOf(ids...)}, 225 }} 226} 227 228func (q *BranchesRepos) String() string { 229 var sb strings.Builder 230 231 sb.WriteString("(branchesrepos") 232 233 for _, br := range q.List { 234 if size := br.Repos.GetCardinality(); size > 1 { 235 sb.WriteString(" " + br.Branch + ":" + strconv.FormatUint(size, 10)) 236 } else { 237 sb.WriteString(" " + br.Branch + "=" + br.Repos.String()) 238 } 239 } 240 241 sb.WriteString(")") 242 return sb.String() 243} 244 245// NewRepoIDs is a helper for creating a RepoIDs which 246// searches only the matched repos. 247func NewRepoIDs(ids ...uint32) *RepoIDs { 248 return &RepoIDs{Repos: roaring.BitmapOf(ids...)} 249} 250 251func (q *RepoIDs) String() string { 252 var sb strings.Builder 253 254 sb.WriteString("(repoids ") 255 256 if size := q.Repos.GetCardinality(); size > 1 { 257 sb.WriteString("count:" + strconv.FormatUint(size, 10)) 258 } else { 259 sb.WriteString("repoid=" + q.Repos.String()) 260 } 261 262 sb.WriteString(")") 263 return sb.String() 264} 265 266// MarshalBinary implements a specialized encoder for BranchesRepos. 267func (q BranchesRepos) MarshalBinary() ([]byte, error) { 268 return branchesReposEncode(q.List) 269} 270 271// UnmarshalBinary implements a specialized decoder for BranchesRepos. 272func (q *BranchesRepos) UnmarshalBinary(b []byte) (err error) { 273 q.List, err = branchesReposDecode(b) 274 return err 275} 276 277// BranchRepos is a (branch, sourcegraph repo ids bitmap) tuple. It is a 278// Sourcegraph addition. 279type BranchRepos struct { 280 Branch string 281 Repos *roaring.Bitmap 282} 283 284// Similar to BranchRepos but will be used to match only by repoid and 285// therefore matches all branches 286type RepoIDs struct { 287 Repos *roaring.Bitmap 288} 289 290// RepoSet is a list of repos to match. It is a Sourcegraph addition and only 291// used in the RPC interface for efficient checking of large repo lists. 292type RepoSet struct { 293 Set map[string]bool 294} 295 296func (q *RepoSet) String() string { 297 var detail string 298 if len(q.Set) > 5 { 299 // Large sets being output are not useful 300 detail = fmt.Sprintf("size=%d", len(q.Set)) 301 } else { 302 repos := make([]string, len(q.Set)) 303 i := 0 304 for repo := range q.Set { 305 repos[i] = repo 306 i++ 307 } 308 sort.Strings(repos) 309 detail = strings.Join(repos, " ") 310 } 311 return fmt.Sprintf("(reposet %s)", detail) 312} 313 314func NewRepoSet(repo ...string) *RepoSet { 315 s := &RepoSet{Set: make(map[string]bool)} 316 for _, r := range repo { 317 s.Set[r] = true 318 } 319 return s 320} 321 322// FileNameSet is a list of file names to match. It is a Sourcegraph addition 323// and only used in the RPC interface for efficient checking of large file 324// lists. 325type FileNameSet struct { 326 Set map[string]struct{} 327} 328 329// MarshalBinary implements a specialized encoder for FileNameSet. 330func (q *FileNameSet) MarshalBinary() ([]byte, error) { 331 return stringSetEncode(q.Set) 332} 333 334// UnmarshalBinary implements a specialized decoder for FileNameSet. 335func (q *FileNameSet) UnmarshalBinary(b []byte) error { 336 var err error 337 q.Set, err = stringSetDecode(b) 338 return err 339} 340 341func (q *FileNameSet) String() string { 342 var detail string 343 if len(q.Set) > 5 { 344 // Large sets being output are not useful 345 detail = fmt.Sprintf("size=%d", len(q.Set)) 346 } else { 347 values := make([]string, 0, len(q.Set)) 348 for v := range q.Set { 349 values = append(values, v) 350 } 351 sort.Strings(values) 352 detail = strings.Join(values, " ") 353 } 354 return fmt.Sprintf("(filenameset %s)", detail) 355} 356 357func NewFileNameSet(fileNames ...string) *FileNameSet { 358 s := &FileNameSet{Set: make(map[string]struct{})} 359 for _, r := range fileNames { 360 s.Set[r] = struct{}{} 361 } 362 return s 363} 364 365const ( 366 TypeFileMatch uint8 = iota 367 TypeFileName 368 TypeRepo 369) 370 371// Type changes the result type returned. 372type Type struct { 373 Child Q 374 Type uint8 375} 376 377func (q *Type) String() string { 378 switch q.Type { 379 case TypeFileMatch: 380 return fmt.Sprintf("(type:filematch %s)", q.Child) 381 case TypeFileName: 382 return fmt.Sprintf("(type:filename %s)", q.Child) 383 case TypeRepo: 384 return fmt.Sprintf("(type:repo %s)", q.Child) 385 default: 386 return fmt.Sprintf("(type:UNKNOWN %s)", q.Child) 387 } 388} 389 390// Boost scales the contribution to score of descendents. 391type Boost struct { 392 Child Q 393 // Boost will multiply the score of its descendents. Values less than 1 will 394 // give less importance while values greater than 1 will give more 395 // importance. 396 Boost float64 397} 398 399func (q *Boost) String() string { 400 return fmt.Sprintf("(boost %0.2f %s)", q.Boost, q.Child) 401} 402 403// Substring is the most basic query: a query for a substring. 404type Substring struct { 405 Pattern string 406 CaseSensitive bool 407 408 // Match only filename 409 FileName bool 410 411 // Match only content 412 Content bool 413} 414 415func (q *Substring) String() string { 416 s := "" 417 418 t := "" 419 if q.FileName { 420 t = "file_" 421 } else if q.Content { 422 t = "content_" 423 } 424 425 s += fmt.Sprintf("%ssubstr:%q", t, q.Pattern) 426 if q.CaseSensitive { 427 s = "case_" + s 428 } 429 return s 430} 431 432type setCaser interface { 433 setCase(string) 434} 435 436func (q *Substring) setCase(k string) { 437 switch k { 438 case "yes": 439 q.CaseSensitive = true 440 case "no": 441 q.CaseSensitive = false 442 case "auto": 443 // TODO - unicode 444 q.CaseSensitive = (q.Pattern != string(toLower([]byte(q.Pattern)))) 445 } 446} 447 448func (q *Symbol) setCase(k string) { 449 if sc, ok := q.Expr.(setCaser); ok { 450 sc.setCase(k) 451 } 452} 453 454func (q *Regexp) setCase(k string) { 455 switch k { 456 case "yes": 457 q.CaseSensitive = true 458 case "no": 459 q.CaseSensitive = false 460 case "auto": 461 q.CaseSensitive = !q.Regexp.Equal(LowerRegexp(q.Regexp)) 462 } 463} 464 465// GobCache exists so we only pay the cost of marshalling a query once when we 466// aggregate it out over all the replicas. 467// 468// Our query and eval layer do not support GobCache. Instead, at the gob 469// boundaries (RPC and Streaming) we check if the Q is a GobCache and unwrap 470// it. 471// 472// "I wish we could get rid of this code soon enough" - tomas 473type GobCache struct { 474 Q 475 476 once sync.Once 477 data []byte 478 err error 479} 480 481// GobEncode implements gob.Encoder. 482func (q *GobCache) GobEncode() ([]byte, error) { 483 q.once.Do(func() { 484 var buf bytes.Buffer 485 enc := gob.NewEncoder(&buf) 486 q.err = enc.Encode(&gobWrapper{ 487 WrappedQ: q.Q, 488 }) 489 q.data = buf.Bytes() 490 }) 491 return q.data, q.err 492} 493 494// GobDecode implements gob.Decoder. 495func (q *GobCache) GobDecode(data []byte) error { 496 dec := gob.NewDecoder(bytes.NewBuffer(data)) 497 var w gobWrapper 498 err := dec.Decode(&w) 499 if err != nil { 500 return err 501 } 502 q.Q = w.WrappedQ 503 return nil 504} 505 506// gobWrapper is needed so the gob decoder works. 507type gobWrapper struct { 508 WrappedQ Q 509} 510 511func (q *GobCache) String() string { 512 return fmt.Sprintf("GobCache(%s)", q.Q) 513} 514 515// Or is matched when any of its children is matched. 516type Or struct { 517 Children []Q 518} 519 520func (q *Or) String() string { 521 var sub []string 522 for _, ch := range q.Children { 523 sub = append(sub, ch.String()) 524 } 525 return fmt.Sprintf("(or %s)", strings.Join(sub, " ")) 526} 527 528// Not inverts the meaning of its child. 529type Not struct { 530 Child Q 531} 532 533func (q *Not) String() string { 534 return fmt.Sprintf("(not %s)", q.Child) 535} 536 537// And is matched when all its children are. 538type And struct { 539 Children []Q 540} 541 542func (q *And) String() string { 543 var sub []string 544 for _, ch := range q.Children { 545 sub = append(sub, ch.String()) 546 } 547 return fmt.Sprintf("(and %s)", strings.Join(sub, " ")) 548} 549 550// NewAnd is syntactic sugar for constructing And queries. 551func NewAnd(qs ...Q) Q { 552 return &And{Children: qs} 553} 554 555// NewOr is syntactic sugar for constructing Or queries. 556func NewOr(qs ...Q) Q { 557 return &Or{Children: qs} 558} 559 560// Branch limits search to a specific branch. 561type Branch struct { 562 Pattern string 563 564 // exact is true if we want to Pattern to equal branch. 565 Exact bool 566} 567 568func (q *Branch) String() string { 569 if q.Exact { 570 return fmt.Sprintf("branch=%q", q.Pattern) 571 } 572 return fmt.Sprintf("branch:%q", q.Pattern) 573} 574 575func queryChildren(q Q) []Q { 576 switch s := q.(type) { 577 case *And: 578 return s.Children 579 case *Or: 580 return s.Children 581 } 582 return nil 583} 584 585func flattenAndOr(children []Q, typ Q) ([]Q, bool) { 586 var flat []Q 587 changed := false 588 for _, ch := range children { 589 ch, subChanged := flatten(ch) 590 changed = changed || subChanged 591 if reflect.TypeOf(ch) == reflect.TypeOf(typ) { 592 changed = true 593 subChildren := queryChildren(ch) 594 if subChildren != nil { 595 flat = append(flat, subChildren...) 596 } 597 } else { 598 flat = append(flat, ch) 599 } 600 } 601 602 return flat, changed 603} 604 605// (and (and x y) z) => (and x y z) , the same for "or" 606func flatten(q Q) (Q, bool) { 607 switch s := q.(type) { 608 case *And: 609 if len(s.Children) == 1 { 610 return s.Children[0], true 611 } 612 flatChildren, changed := flattenAndOr(s.Children, s) 613 return &And{flatChildren}, changed 614 case *Or: 615 if len(s.Children) == 1 { 616 return s.Children[0], true 617 } 618 flatChildren, changed := flattenAndOr(s.Children, s) 619 return &Or{flatChildren}, changed 620 case *Not: 621 child, changed := flatten(s.Child) 622 return &Not{child}, changed 623 case *Type: 624 child, changed := flatten(s.Child) 625 return &Type{Child: child, Type: s.Type}, changed 626 case *Boost: 627 child, changed := flatten(s.Child) 628 return &Boost{Child: child, Boost: s.Boost}, changed 629 default: 630 return q, false 631 } 632} 633 634func mapQueryList(qs []Q, f func(Q) Q) []Q { 635 neg := make([]Q, len(qs)) 636 for i, sub := range qs { 637 neg[i] = Map(sub, f) 638 } 639 return neg 640} 641 642func invertConst(q Q) Q { 643 c, ok := q.(*Const) 644 if ok { 645 return &Const{!c.Value} 646 } 647 return q 648} 649 650func evalAndOrConstants(q Q, children []Q) Q { 651 _, isAnd := q.(*And) 652 653 children = mapQueryList(children, evalConstants) 654 655 newCH := children[:0] 656 for _, ch := range children { 657 c, ok := ch.(*Const) 658 if ok { 659 if c.Value == isAnd { 660 continue 661 } else { 662 return ch 663 } 664 } 665 newCH = append(newCH, ch) 666 } 667 if len(newCH) == 0 { 668 return &Const{isAnd} 669 } 670 if isAnd { 671 return &And{newCH} 672 } 673 return &Or{newCH} 674} 675 676func evalConstants(q Q) Q { 677 switch s := q.(type) { 678 case *And: 679 return evalAndOrConstants(q, s.Children) 680 case *Or: 681 return evalAndOrConstants(q, s.Children) 682 case *Not: 683 ch := evalConstants(s.Child) 684 if _, ok := ch.(*Const); ok { 685 return invertConst(ch) 686 } 687 return &Not{ch} 688 case *Type: 689 ch := evalConstants(s.Child) 690 if _, ok := ch.(*Const); ok { 691 // If q is the root query, then evaluating this to a const changes 692 // the type of result we will return. However, the only case this 693 // makes sense is `type:repo TRUE` to return all repos or 694 // `type:file TRUE` to return all filenames. For other cases we 695 // want to do this constant folding though, so we allow the 696 // unexpected behaviour mentioned previously. 697 return ch 698 } 699 return &Type{Child: ch, Type: s.Type} 700 case *Boost: 701 ch := evalConstants(s.Child) 702 if _, ok := ch.(*Const); ok { 703 return ch 704 } 705 return &Boost{Boost: s.Boost, Child: ch} 706 case *Substring: 707 if len(s.Pattern) == 0 { 708 return &Const{true} 709 } 710 case *Regexp: 711 if s.Regexp.Op == syntax.OpEmptyMatch { 712 return &Const{true} 713 } 714 case *Branch: 715 if s.Pattern == "" { 716 return &Const{true} 717 } 718 case *RepoSet: 719 if len(s.Set) == 0 { 720 return &Const{false} 721 } 722 case *FileNameSet: 723 if len(s.Set) == 0 { 724 return &Const{false} 725 } 726 } 727 return q 728} 729 730func Simplify(q Q) Q { 731 q = evalConstants(q) 732 for { 733 var changed bool 734 q, changed = flatten(q) 735 if !changed { 736 break 737 } 738 } 739 740 return q 741} 742 743// Map runs f over the q. 744func Map(q Q, f func(q Q) Q) Q { 745 switch s := q.(type) { 746 case *And: 747 q = &And{Children: mapQueryList(s.Children, f)} 748 case *Or: 749 q = &Or{Children: mapQueryList(s.Children, f)} 750 case *Not: 751 q = &Not{Child: Map(s.Child, f)} 752 case *Type: 753 q = &Type{Type: s.Type, Child: Map(s.Child, f)} 754 case *Boost: 755 q = &Boost{Boost: s.Boost, Child: Map(s.Child, f)} 756 } 757 return f(q) 758} 759 760// Expand expands Substr queries into (OR file_substr content_substr) 761// queries, and the same for Regexp queries.. 762func ExpandFileContent(q Q) Q { 763 switch s := q.(type) { 764 case *Substring: 765 if s.FileName == s.Content { 766 f := *s 767 f.FileName = true 768 f.Content = false 769 c := *s 770 c.FileName = false 771 c.Content = true 772 return NewOr(&f, &c) 773 } 774 case *Regexp: 775 if s.FileName == s.Content { 776 f := *s 777 f.FileName = true 778 f.Content = false 779 c := *s 780 c.FileName = false 781 c.Content = true 782 return NewOr(&f, &c) 783 } 784 } 785 return q 786} 787 788// VisitAtoms runs `v` on all atom queries within `q`. 789func VisitAtoms(q Q, v func(q Q)) { 790 Map(q, func(iQ Q) Q { 791 switch iQ.(type) { 792 case *And: 793 case *Or: 794 case *Not: 795 case *Type: 796 case *Boost: 797 default: 798 v(iQ) 799 } 800 return iQ 801 }) 802}