fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package query 16 17import ( 18 "bytes" 19 "encoding/gob" 20 "encoding/json" 21 "fmt" 22 "log" 23 "reflect" 24 "regexp/syntax" 25 "sort" 26 "strconv" 27 "strings" 28 "sync" 29 30 "github.com/RoaringBitmap/roaring" 31 "github.com/grafana/regexp" 32) 33 34var _ = log.Println 35 36// Q is a representation for a possibly hierarchical search query. 37type Q interface { 38 String() string 39} 40 41// RPCUnwrap processes q to remove RPC specific elements from q. This is 42// needed because gob isn't flexible enough for us. This should be called by 43// RPC servers at the client/server boundary so that q works with the rest of 44// zoekt. 45func RPCUnwrap(q Q) Q { 46 if cache, ok := q.(*GobCache); ok { 47 return cache.Q 48 } 49 return q 50} 51 52// RawConfig filters repositories based on their encoded RawConfig map. 53type RawConfig uint64 54 55const ( 56 RcOnlyPublic RawConfig = 1 57 RcOnlyPrivate RawConfig = 2 58 RcOnlyForks RawConfig = 1 << 2 59 RcNoForks RawConfig = 2 << 2 60 RcOnlyArchived RawConfig = 1 << 4 61 RcNoArchived RawConfig = 2 << 4 62) 63 64var flagNames = []struct { 65 Mask RawConfig 66 Label string 67}{ 68 {RcOnlyPublic, "RcOnlyPublic"}, 69 {RcOnlyPrivate, "RcOnlyPrivate"}, 70 {RcOnlyForks, "RcOnlyForks"}, 71 {RcNoForks, "RcNoForks"}, 72 {RcOnlyArchived, "RcOnlyArchived"}, 73 {RcNoArchived, "RcNoArchived"}, 74} 75 76func (r RawConfig) String() string { 77 var s []string 78 for _, fn := range flagNames { 79 if r&fn.Mask != 0 { 80 s = append(s, fn.Label) 81 } 82 } 83 return fmt.Sprintf("rawConfig:%s", strings.Join(s, "|")) 84} 85 86// RegexpQuery is a query looking for regular expressions matches. 87type Regexp struct { 88 Regexp *syntax.Regexp 89 FileName bool 90 Content bool 91 CaseSensitive bool 92} 93 94func (q *Regexp) String() string { 95 pref := "" 96 if q.FileName { 97 pref = "file_" 98 } 99 if q.CaseSensitive { 100 pref = "case_" + pref 101 } 102 return fmt.Sprintf("%sregex:%q", pref, q.Regexp.String()) 103} 104 105// gobRegexp wraps Regexp to make it gob-encodable/decodable. Regexp contains syntax.Regexp, which 106// contains slices/arrays with possibly nil elements, which gob doesn't support 107// (https://github.com/golang/go/issues/1501). 108type gobRegexp struct { 109 Regexp // Regexp.Regexp (*syntax.Regexp) is set to nil and its string is set in RegexpString 110 RegexpString string 111} 112 113// GobEncode implements gob.Encoder. 114func (q Regexp) GobEncode() ([]byte, error) { 115 gobq := gobRegexp{Regexp: q, RegexpString: q.Regexp.String()} 116 gobq.Regexp.Regexp = nil // can't be gob-encoded/decoded 117 return json.Marshal(gobq) 118} 119 120// GobDecode implements gob.Decoder. 121func (q *Regexp) GobDecode(data []byte) error { 122 var gobq gobRegexp 123 err := json.Unmarshal(data, &gobq) 124 if err != nil { 125 return err 126 } 127 gobq.Regexp.Regexp, err = syntax.Parse(gobq.RegexpString, regexpFlags) 128 if err != nil { 129 return err 130 } 131 *q = gobq.Regexp 132 return nil 133} 134 135// Symbol finds a string that is a symbol. 136type Symbol struct { 137 Expr Q 138} 139 140func (s *Symbol) String() string { 141 return fmt.Sprintf("sym:%s", s.Expr) 142} 143 144type caseQ struct { 145 Flavor string 146} 147 148func (c *caseQ) String() string { 149 return "case:" + c.Flavor 150} 151 152type Language struct { 153 Language string 154} 155 156func (l *Language) String() string { 157 return "lang:" + l.Language 158} 159 160type Const struct { 161 Value bool 162} 163 164func (q *Const) String() string { 165 if q.Value { 166 return "TRUE" 167 } 168 return "FALSE" 169} 170 171type Repo struct { 172 Regexp *regexp.Regexp 173} 174 175func (q *Repo) String() string { 176 return fmt.Sprintf("repo:%s", q.Regexp.String()) 177} 178 179func (q Repo) GobEncode() ([]byte, error) { 180 return []byte(q.Regexp.String()), nil 181} 182 183func (q *Repo) GobDecode(data []byte) error { 184 var err error 185 q.Regexp, err = regexp.Compile(string(data)) 186 return err 187} 188 189// RepoRegexp is a Sourcegraph addition which searches documents where the 190// repository name matches Regexp. 191type RepoRegexp struct { 192 Regexp *regexp.Regexp 193} 194 195func (q *RepoRegexp) String() string { 196 return fmt.Sprintf("reporegex:%q", q.Regexp.String()) 197} 198 199// GobEncode implements gob.Encoder. 200func (q *RepoRegexp) GobEncode() ([]byte, error) { 201 // gob can't encode syntax.Regexp 202 return []byte(q.Regexp.String()), nil 203} 204 205// GobDecode implements gob.Decoder. 206func (q *RepoRegexp) GobDecode(data []byte) error { 207 var err error 208 q.Regexp, err = regexp.Compile(string(data)) 209 return err 210} 211 212// BranchesRepos is a slice of BranchRepos to match. It is a Sourcegraph 213// addition and only used in the RPC interface for efficient checking of large 214// repo lists. 215type BranchesRepos struct { 216 List []BranchRepos 217} 218 219// NewSingleBranchesRepos is a helper for creating a BranchesRepos which 220// searches a single branch. 221func NewSingleBranchesRepos(branch string, ids ...uint32) *BranchesRepos { 222 return &BranchesRepos{List: []BranchRepos{ 223 {branch, roaring.BitmapOf(ids...)}, 224 }} 225} 226 227func (q *BranchesRepos) String() string { 228 var sb strings.Builder 229 230 sb.WriteString("(branchesrepos") 231 232 for _, br := range q.List { 233 if size := br.Repos.GetCardinality(); size > 1 { 234 sb.WriteString(" " + br.Branch + ":" + strconv.FormatUint(size, 10)) 235 } else { 236 sb.WriteString(" " + br.Branch + "=" + br.Repos.String()) 237 } 238 } 239 240 sb.WriteString(")") 241 return sb.String() 242} 243 244// NewRepoIDs is a helper for creating a RepoIDs which 245// searches only the matched repos. 246func NewRepoIDs(ids ...uint32) *RepoIDs { 247 return &RepoIDs{Repos: roaring.BitmapOf(ids...)} 248} 249 250func (q *RepoIDs) String() string { 251 var sb strings.Builder 252 253 sb.WriteString("(repoids ") 254 255 if size := q.Repos.GetCardinality(); size > 1 { 256 sb.WriteString("count:" + strconv.FormatUint(size, 10)) 257 } else { 258 sb.WriteString("repoid=" + q.Repos.String()) 259 } 260 261 sb.WriteString(")") 262 return sb.String() 263} 264 265// MarshalBinary implements a specialized encoder for BranchesRepos. 266func (q BranchesRepos) MarshalBinary() ([]byte, error) { 267 return branchesReposEncode(q.List) 268} 269 270// UnmarshalBinary implements a specialized decoder for BranchesRepos. 271func (q *BranchesRepos) UnmarshalBinary(b []byte) (err error) { 272 q.List, err = branchesReposDecode(b) 273 return err 274} 275 276// BranchRepos is a (branch, sourcegraph repo ids bitmap) tuple. It is a 277// Sourcegraph addition. 278type BranchRepos struct { 279 Branch string 280 Repos *roaring.Bitmap 281} 282 283// Similar to BranchRepos but will be used to match only by repoid and 284// therefore matches all branches 285type RepoIDs struct { 286 Repos *roaring.Bitmap 287} 288 289// RepoSet is a list of repos to match. It is a Sourcegraph addition and only 290// used in the RPC interface for efficient checking of large repo lists. 291type RepoSet struct { 292 Set map[string]bool 293} 294 295func (q *RepoSet) String() string { 296 var detail string 297 if len(q.Set) > 5 { 298 // Large sets being output are not useful 299 detail = fmt.Sprintf("size=%d", len(q.Set)) 300 } else { 301 repos := make([]string, len(q.Set)) 302 i := 0 303 for repo := range q.Set { 304 repos[i] = repo 305 i++ 306 } 307 sort.Strings(repos) 308 detail = strings.Join(repos, " ") 309 } 310 return fmt.Sprintf("(reposet %s)", detail) 311} 312 313func NewRepoSet(repo ...string) *RepoSet { 314 s := &RepoSet{Set: make(map[string]bool)} 315 for _, r := range repo { 316 s.Set[r] = true 317 } 318 return s 319} 320 321// FileNameSet is a list of file names to match. It is a Sourcegraph addition 322// and only used in the RPC interface for efficient checking of large file 323// lists. 324type FileNameSet struct { 325 Set map[string]struct{} 326} 327 328// MarshalBinary implements a specialized encoder for FileNameSet. 329func (q *FileNameSet) MarshalBinary() ([]byte, error) { 330 return stringSetEncode(q.Set) 331} 332 333// UnmarshalBinary implements a specialized decoder for FileNameSet. 334func (q *FileNameSet) UnmarshalBinary(b []byte) error { 335 var err error 336 q.Set, err = stringSetDecode(b) 337 return err 338} 339 340func (q *FileNameSet) String() string { 341 var detail string 342 if len(q.Set) > 5 { 343 // Large sets being output are not useful 344 detail = fmt.Sprintf("size=%d", len(q.Set)) 345 } else { 346 values := make([]string, 0, len(q.Set)) 347 for v := range q.Set { 348 values = append(values, v) 349 } 350 sort.Strings(values) 351 detail = strings.Join(values, " ") 352 } 353 return fmt.Sprintf("(filenameset %s)", detail) 354} 355 356func NewFileNameSet(fileNames ...string) *FileNameSet { 357 s := &FileNameSet{Set: make(map[string]struct{})} 358 for _, r := range fileNames { 359 s.Set[r] = struct{}{} 360 } 361 return s 362} 363 364const ( 365 TypeFileMatch uint8 = iota 366 TypeFileName 367 TypeRepo 368) 369 370// Type changes the result type returned. 371type Type struct { 372 Child Q 373 Type uint8 374} 375 376func (q *Type) String() string { 377 switch q.Type { 378 case TypeFileMatch: 379 return fmt.Sprintf("(type:filematch %s)", q.Child) 380 case TypeFileName: 381 return fmt.Sprintf("(type:filename %s)", q.Child) 382 case TypeRepo: 383 return fmt.Sprintf("(type:repo %s)", q.Child) 384 default: 385 return fmt.Sprintf("(type:UNKNOWN %s)", q.Child) 386 } 387} 388 389// Substring is the most basic query: a query for a substring. 390type Substring struct { 391 Pattern string 392 CaseSensitive bool 393 394 // Match only filename 395 FileName bool 396 397 // Match only content 398 Content bool 399} 400 401func (q *Substring) String() string { 402 s := "" 403 404 t := "" 405 if q.FileName { 406 t = "file_" 407 } else if q.Content { 408 t = "content_" 409 } 410 411 s += fmt.Sprintf("%ssubstr:%q", t, q.Pattern) 412 if q.CaseSensitive { 413 s = "case_" + s 414 } 415 return s 416} 417 418type setCaser interface { 419 setCase(string) 420} 421 422func (q *Substring) setCase(k string) { 423 switch k { 424 case "yes": 425 q.CaseSensitive = true 426 case "no": 427 q.CaseSensitive = false 428 case "auto": 429 // TODO - unicode 430 q.CaseSensitive = (q.Pattern != string(toLower([]byte(q.Pattern)))) 431 } 432} 433 434func (q *Symbol) setCase(k string) { 435 if sc, ok := q.Expr.(setCaser); ok { 436 sc.setCase(k) 437 } 438} 439 440func (q *Regexp) setCase(k string) { 441 switch k { 442 case "yes": 443 q.CaseSensitive = true 444 case "no": 445 q.CaseSensitive = false 446 case "auto": 447 q.CaseSensitive = (q.Regexp.String() != LowerRegexp(q.Regexp).String()) 448 } 449} 450 451// GobCache exists so we only pay the cost of marshalling a query once when we 452// aggregate it out over all the replicas. 453// 454// Our query and eval layer do not support GobCache. Instead, at the gob 455// boundaries (RPC and Streaming) we check if the Q is a GobCache and unwrap 456// it. 457// 458// "I wish we could get rid of this code soon enough" - tomas 459type GobCache struct { 460 Q 461 462 once sync.Once 463 data []byte 464 err error 465} 466 467// GobEncode implements gob.Encoder. 468func (q *GobCache) GobEncode() ([]byte, error) { 469 q.once.Do(func() { 470 var buf bytes.Buffer 471 enc := gob.NewEncoder(&buf) 472 q.err = enc.Encode(&gobWrapper{ 473 WrappedQ: q.Q, 474 }) 475 q.data = buf.Bytes() 476 }) 477 return q.data, q.err 478} 479 480// GobDecode implements gob.Decoder. 481func (q *GobCache) GobDecode(data []byte) error { 482 dec := gob.NewDecoder(bytes.NewBuffer(data)) 483 var w gobWrapper 484 err := dec.Decode(&w) 485 if err != nil { 486 return err 487 } 488 q.Q = w.WrappedQ 489 return nil 490} 491 492// gobWrapper is needed so the gob decoder works. 493type gobWrapper struct { 494 WrappedQ Q 495} 496 497func (q *GobCache) String() string { 498 return fmt.Sprintf("GobCache(%s)", q.Q) 499} 500 501// Or is matched when any of its children is matched. 502type Or struct { 503 Children []Q 504} 505 506func (q *Or) String() string { 507 var sub []string 508 for _, ch := range q.Children { 509 sub = append(sub, ch.String()) 510 } 511 return fmt.Sprintf("(or %s)", strings.Join(sub, " ")) 512} 513 514// Not inverts the meaning of its child. 515type Not struct { 516 Child Q 517} 518 519func (q *Not) String() string { 520 return fmt.Sprintf("(not %s)", q.Child) 521} 522 523// And is matched when all its children are. 524type And struct { 525 Children []Q 526} 527 528func (q *And) String() string { 529 var sub []string 530 for _, ch := range q.Children { 531 sub = append(sub, ch.String()) 532 } 533 return fmt.Sprintf("(and %s)", strings.Join(sub, " ")) 534} 535 536// NewAnd is syntactic sugar for constructing And queries. 537func NewAnd(qs ...Q) Q { 538 return &And{Children: qs} 539} 540 541// NewOr is syntactic sugar for constructing Or queries. 542func NewOr(qs ...Q) Q { 543 return &Or{Children: qs} 544} 545 546// Branch limits search to a specific branch. 547type Branch struct { 548 Pattern string 549 550 // exact is true if we want to Pattern to equal branch. 551 Exact bool 552} 553 554func (q *Branch) String() string { 555 if q.Exact { 556 return fmt.Sprintf("branch=%q", q.Pattern) 557 } 558 return fmt.Sprintf("branch:%q", q.Pattern) 559} 560 561func queryChildren(q Q) []Q { 562 switch s := q.(type) { 563 case *And: 564 return s.Children 565 case *Or: 566 return s.Children 567 } 568 return nil 569} 570 571func flattenAndOr(children []Q, typ Q) ([]Q, bool) { 572 var flat []Q 573 changed := false 574 for _, ch := range children { 575 ch, subChanged := flatten(ch) 576 changed = changed || subChanged 577 if reflect.TypeOf(ch) == reflect.TypeOf(typ) { 578 changed = true 579 subChildren := queryChildren(ch) 580 if subChildren != nil { 581 flat = append(flat, subChildren...) 582 } 583 } else { 584 flat = append(flat, ch) 585 } 586 } 587 588 return flat, changed 589} 590 591// (and (and x y) z) => (and x y z) , the same for "or" 592func flatten(q Q) (Q, bool) { 593 switch s := q.(type) { 594 case *And: 595 if len(s.Children) == 1 { 596 return s.Children[0], true 597 } 598 flatChildren, changed := flattenAndOr(s.Children, s) 599 return &And{flatChildren}, changed 600 case *Or: 601 if len(s.Children) == 1 { 602 return s.Children[0], true 603 } 604 flatChildren, changed := flattenAndOr(s.Children, s) 605 return &Or{flatChildren}, changed 606 case *Not: 607 child, changed := flatten(s.Child) 608 return &Not{child}, changed 609 case *Type: 610 child, changed := flatten(s.Child) 611 return &Type{Child: child, Type: s.Type}, changed 612 default: 613 return q, false 614 } 615} 616 617func mapQueryList(qs []Q, f func(Q) Q) []Q { 618 neg := make([]Q, len(qs)) 619 for i, sub := range qs { 620 neg[i] = Map(sub, f) 621 } 622 return neg 623} 624 625func invertConst(q Q) Q { 626 c, ok := q.(*Const) 627 if ok { 628 return &Const{!c.Value} 629 } 630 return q 631} 632 633func evalAndOrConstants(q Q, children []Q) Q { 634 _, isAnd := q.(*And) 635 636 children = mapQueryList(children, evalConstants) 637 638 newCH := children[:0] 639 for _, ch := range children { 640 c, ok := ch.(*Const) 641 if ok { 642 if c.Value == isAnd { 643 continue 644 } else { 645 return ch 646 } 647 } 648 newCH = append(newCH, ch) 649 } 650 if len(newCH) == 0 { 651 return &Const{isAnd} 652 } 653 if isAnd { 654 return &And{newCH} 655 } 656 return &Or{newCH} 657} 658 659func evalConstants(q Q) Q { 660 switch s := q.(type) { 661 case *And: 662 return evalAndOrConstants(q, s.Children) 663 case *Or: 664 return evalAndOrConstants(q, s.Children) 665 case *Not: 666 ch := evalConstants(s.Child) 667 if _, ok := ch.(*Const); ok { 668 return invertConst(ch) 669 } 670 return &Not{ch} 671 case *Type: 672 ch := evalConstants(s.Child) 673 if _, ok := ch.(*Const); ok { 674 // If q is the root query, then evaluating this to a const changes 675 // the type of result we will return. However, the only case this 676 // makes sense is `type:repo TRUE` to return all repos or 677 // `type:file TRUE` to return all filenames. For other cases we 678 // want to do this constant folding though, so we allow the 679 // unexpected behaviour mentioned previously. 680 return ch 681 } 682 return &Type{Child: ch, Type: s.Type} 683 case *Substring: 684 if len(s.Pattern) == 0 { 685 return &Const{true} 686 } 687 case *Regexp: 688 if s.Regexp.Op == syntax.OpEmptyMatch { 689 return &Const{true} 690 } 691 case *Branch: 692 if s.Pattern == "" { 693 return &Const{true} 694 } 695 case *RepoSet: 696 if len(s.Set) == 0 { 697 return &Const{false} 698 } 699 case *FileNameSet: 700 if len(s.Set) == 0 { 701 return &Const{false} 702 } 703 } 704 return q 705} 706 707func Simplify(q Q) Q { 708 q = evalConstants(q) 709 for { 710 var changed bool 711 q, changed = flatten(q) 712 if !changed { 713 break 714 } 715 } 716 717 return q 718} 719 720// Map runs f over the q. 721func Map(q Q, f func(q Q) Q) Q { 722 switch s := q.(type) { 723 case *And: 724 q = &And{Children: mapQueryList(s.Children, f)} 725 case *Or: 726 q = &Or{Children: mapQueryList(s.Children, f)} 727 case *Not: 728 q = &Not{Child: Map(s.Child, f)} 729 case *Type: 730 q = &Type{Type: s.Type, Child: Map(s.Child, f)} 731 } 732 return f(q) 733} 734 735// Expand expands Substr queries into (OR file_substr content_substr) 736// queries, and the same for Regexp queries.. 737func ExpandFileContent(q Q) Q { 738 switch s := q.(type) { 739 case *Substring: 740 if s.FileName == s.Content { 741 f := *s 742 f.FileName = true 743 f.Content = false 744 c := *s 745 c.FileName = false 746 c.Content = true 747 return NewOr(&f, &c) 748 } 749 case *Regexp: 750 if s.FileName == s.Content { 751 f := *s 752 f.FileName = true 753 f.Content = false 754 c := *s 755 c.FileName = false 756 c.Content = true 757 return NewOr(&f, &c) 758 } 759 } 760 return q 761} 762 763// VisitAtoms runs `v` on all atom queries within `q`. 764func VisitAtoms(q Q, v func(q Q)) { 765 Map(q, func(iQ Q) Q { 766 switch iQ.(type) { 767 case *And: 768 case *Or: 769 case *Not: 770 case *Type: 771 default: 772 v(iQ) 773 } 774 return iQ 775 }) 776}