fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package query 16 17import ( 18 "encoding/json" 19 "fmt" 20 "log" 21 "reflect" 22 "regexp/syntax" 23 "sort" 24 "strconv" 25 "strings" 26 27 "github.com/RoaringBitmap/roaring" 28 "github.com/grafana/regexp" 29 30 "github.com/sourcegraph/zoekt/internal/syntaxutil" 31) 32 33var _ = log.Println 34 35// Q is a representation for a possibly hierarchical search query. 36type Q interface { 37 String() string 38} 39 40// RawConfig filters repositories based on their encoded RawConfig map. 41type RawConfig uint64 42 43const ( 44 RcOnlyPublic RawConfig = 1 45 RcOnlyPrivate RawConfig = 2 46 RcOnlyForks RawConfig = 1 << 2 47 RcNoForks RawConfig = 2 << 2 48 RcOnlyArchived RawConfig = 1 << 4 49 RcNoArchived RawConfig = 2 << 4 50) 51 52var flagNames = []struct { 53 Mask RawConfig 54 Label string 55}{ 56 {RcOnlyPublic, "RcOnlyPublic"}, 57 {RcOnlyPrivate, "RcOnlyPrivate"}, 58 {RcOnlyForks, "RcOnlyForks"}, 59 {RcNoForks, "RcNoForks"}, 60 {RcOnlyArchived, "RcOnlyArchived"}, 61 {RcNoArchived, "RcNoArchived"}, 62} 63 64func (r RawConfig) String() string { 65 var s []string 66 for _, fn := range flagNames { 67 if r&fn.Mask != 0 { 68 s = append(s, fn.Label) 69 } 70 } 71 return fmt.Sprintf("rawConfig:%s", strings.Join(s, "|")) 72} 73 74// RegexpQuery is a query looking for regular expressions matches. 75type Regexp struct { 76 Regexp *syntax.Regexp 77 FileName bool 78 Content bool 79 CaseSensitive bool 80} 81 82func (q *Regexp) String() string { 83 pref := "" 84 if q.FileName { 85 pref = "file_" 86 } 87 if q.CaseSensitive { 88 pref = "case_" + pref 89 } 90 return fmt.Sprintf("%sregex:%q", pref, syntaxutil.RegexpString(q.Regexp)) 91} 92 93// gobRegexp wraps Regexp to make it gob-encodable/decodable. Regexp contains syntax.Regexp, which 94// contains slices/arrays with possibly nil elements, which gob doesn't support 95// (https://github.com/golang/go/issues/1501). 96type gobRegexp struct { 97 Regexp // Regexp.Regexp (*syntax.Regexp) is set to nil and its string is set in RegexpString 98 RegexpString string 99} 100 101// GobEncode implements gob.Encoder. 102func (q Regexp) GobEncode() ([]byte, error) { 103 gobq := gobRegexp{Regexp: q, RegexpString: syntaxutil.RegexpString(q.Regexp)} 104 gobq.Regexp.Regexp = nil // can't be gob-encoded/decoded 105 return json.Marshal(gobq) 106} 107 108// GobDecode implements gob.Decoder. 109func (q *Regexp) GobDecode(data []byte) error { 110 var gobq gobRegexp 111 err := json.Unmarshal(data, &gobq) 112 if err != nil { 113 return err 114 } 115 gobq.Regexp.Regexp, err = syntax.Parse(gobq.RegexpString, regexpFlags) 116 if err != nil { 117 return err 118 } 119 *q = gobq.Regexp 120 return nil 121} 122 123// Symbol finds a string that is a symbol. 124type Symbol struct { 125 Expr Q 126} 127 128func (s *Symbol) String() string { 129 return fmt.Sprintf("sym:%s", s.Expr) 130} 131 132type caseQ struct { 133 Flavor string 134} 135 136func (c *caseQ) String() string { 137 return "case:" + c.Flavor 138} 139 140type Language struct { 141 Language string 142} 143 144func (l *Language) String() string { 145 return "lang:" + l.Language 146} 147 148type Const struct { 149 Value bool 150} 151 152func (q *Const) String() string { 153 if q.Value { 154 return "TRUE" 155 } 156 return "FALSE" 157} 158 159type Repo struct { 160 Regexp *regexp.Regexp 161} 162 163func (q *Repo) String() string { 164 return fmt.Sprintf("repo:%s", q.Regexp.String()) 165} 166 167func (q Repo) GobEncode() ([]byte, error) { 168 return []byte(q.Regexp.String()), nil 169} 170 171func (q *Repo) GobDecode(data []byte) error { 172 var err error 173 q.Regexp, err = regexp.Compile(string(data)) 174 return err 175} 176 177// RepoRegexp is a Sourcegraph addition which searches documents where the 178// repository name matches Regexp. 179type RepoRegexp struct { 180 Regexp *regexp.Regexp 181} 182 183func (q *RepoRegexp) String() string { 184 return fmt.Sprintf("reporegex:%q", q.Regexp.String()) 185} 186 187// GobEncode implements gob.Encoder. 188func (q *RepoRegexp) GobEncode() ([]byte, error) { 189 // gob can't encode syntax.Regexp 190 return []byte(q.Regexp.String()), nil 191} 192 193// GobDecode implements gob.Decoder. 194func (q *RepoRegexp) GobDecode(data []byte) error { 195 var err error 196 q.Regexp, err = regexp.Compile(string(data)) 197 return err 198} 199 200// BranchesRepos is a slice of BranchRepos to match. It is a Sourcegraph 201// addition and only used in the RPC interface for efficient checking of large 202// repo lists. 203type BranchesRepos struct { 204 List []BranchRepos 205} 206 207// NewSingleBranchesRepos is a helper for creating a BranchesRepos which 208// searches a single branch. 209func NewSingleBranchesRepos(branch string, ids ...uint32) *BranchesRepos { 210 return &BranchesRepos{List: []BranchRepos{ 211 {branch, roaring.BitmapOf(ids...)}, 212 }} 213} 214 215func (q *BranchesRepos) String() string { 216 var sb strings.Builder 217 218 sb.WriteString("(branchesrepos") 219 220 for _, br := range q.List { 221 if size := br.Repos.GetCardinality(); size > 1 { 222 sb.WriteString(" " + br.Branch + ":" + strconv.FormatUint(size, 10)) 223 } else { 224 sb.WriteString(" " + br.Branch + "=" + br.Repos.String()) 225 } 226 } 227 228 sb.WriteString(")") 229 return sb.String() 230} 231 232// NewRepoIDs is a helper for creating a RepoIDs which 233// searches only the matched repos. 234func NewRepoIDs(ids ...uint32) *RepoIDs { 235 return &RepoIDs{Repos: roaring.BitmapOf(ids...)} 236} 237 238func (q *RepoIDs) String() string { 239 var sb strings.Builder 240 241 sb.WriteString("(repoids ") 242 243 if size := q.Repos.GetCardinality(); size > 1 { 244 sb.WriteString("count:" + strconv.FormatUint(size, 10)) 245 } else { 246 sb.WriteString("repoid=" + q.Repos.String()) 247 } 248 249 sb.WriteString(")") 250 return sb.String() 251} 252 253// MarshalBinary implements a specialized encoder for BranchesRepos. 254func (q BranchesRepos) MarshalBinary() ([]byte, error) { 255 return branchesReposEncode(q.List) 256} 257 258// UnmarshalBinary implements a specialized decoder for BranchesRepos. 259func (q *BranchesRepos) UnmarshalBinary(b []byte) (err error) { 260 q.List, err = branchesReposDecode(b) 261 return err 262} 263 264// BranchRepos is a (branch, sourcegraph repo ids bitmap) tuple. It is a 265// Sourcegraph addition. 266type BranchRepos struct { 267 Branch string 268 Repos *roaring.Bitmap 269} 270 271// Similar to BranchRepos but will be used to match only by repoid and 272// therefore matches all branches 273type RepoIDs struct { 274 Repos *roaring.Bitmap 275} 276 277// RepoSet is a list of repos to match. It is a Sourcegraph addition and only 278// used in the RPC interface for efficient checking of large repo lists. 279type RepoSet struct { 280 Set map[string]bool 281} 282 283func (q *RepoSet) String() string { 284 var detail string 285 if len(q.Set) > 5 { 286 // Large sets being output are not useful 287 detail = fmt.Sprintf("size=%d", len(q.Set)) 288 } else { 289 repos := make([]string, len(q.Set)) 290 i := 0 291 for repo := range q.Set { 292 repos[i] = repo 293 i++ 294 } 295 sort.Strings(repos) 296 detail = strings.Join(repos, " ") 297 } 298 return fmt.Sprintf("(reposet %s)", detail) 299} 300 301func NewRepoSet(repo ...string) *RepoSet { 302 s := &RepoSet{Set: make(map[string]bool)} 303 for _, r := range repo { 304 s.Set[r] = true 305 } 306 return s 307} 308 309// FileNameSet is a list of file names to match. It is a Sourcegraph addition 310// and only used in the RPC interface for efficient checking of large file 311// lists. 312type FileNameSet struct { 313 Set map[string]struct{} 314} 315 316// MarshalBinary implements a specialized encoder for FileNameSet. 317func (q *FileNameSet) MarshalBinary() ([]byte, error) { 318 return stringSetEncode(q.Set) 319} 320 321// UnmarshalBinary implements a specialized decoder for FileNameSet. 322func (q *FileNameSet) UnmarshalBinary(b []byte) error { 323 var err error 324 q.Set, err = stringSetDecode(b) 325 return err 326} 327 328func (q *FileNameSet) String() string { 329 var detail string 330 if len(q.Set) > 5 { 331 // Large sets being output are not useful 332 detail = fmt.Sprintf("size=%d", len(q.Set)) 333 } else { 334 values := make([]string, 0, len(q.Set)) 335 for v := range q.Set { 336 values = append(values, v) 337 } 338 sort.Strings(values) 339 detail = strings.Join(values, " ") 340 } 341 return fmt.Sprintf("(filenameset %s)", detail) 342} 343 344func NewFileNameSet(fileNames ...string) *FileNameSet { 345 s := &FileNameSet{Set: make(map[string]struct{})} 346 for _, r := range fileNames { 347 s.Set[r] = struct{}{} 348 } 349 return s 350} 351 352const ( 353 TypeFileMatch uint8 = iota 354 TypeFileName 355 TypeRepo 356) 357 358// Type changes the result type returned. 359type Type struct { 360 Child Q 361 Type uint8 362} 363 364func (q *Type) String() string { 365 switch q.Type { 366 case TypeFileMatch: 367 return fmt.Sprintf("(type:filematch %s)", q.Child) 368 case TypeFileName: 369 return fmt.Sprintf("(type:filename %s)", q.Child) 370 case TypeRepo: 371 return fmt.Sprintf("(type:repo %s)", q.Child) 372 default: 373 return fmt.Sprintf("(type:UNKNOWN %s)", q.Child) 374 } 375} 376 377// Boost scales the contribution to score of descendents. 378type Boost struct { 379 Child Q 380 // Boost will multiply the score of its descendents. Values less than 1 will 381 // give less importance while values greater than 1 will give more 382 // importance. 383 Boost float64 384} 385 386func (q *Boost) String() string { 387 return fmt.Sprintf("(boost %0.2f %s)", q.Boost, q.Child) 388} 389 390// Substring is the most basic query: a query for a substring. 391type Substring struct { 392 Pattern string 393 CaseSensitive bool 394 395 // Match only filename 396 FileName bool 397 398 // Match only content 399 Content bool 400} 401 402func (q *Substring) String() string { 403 s := "" 404 405 t := "" 406 if q.FileName { 407 t = "file_" 408 } else if q.Content { 409 t = "content_" 410 } 411 412 s += fmt.Sprintf("%ssubstr:%q", t, q.Pattern) 413 if q.CaseSensitive { 414 s = "case_" + s 415 } 416 return s 417} 418 419type setCaser interface { 420 setCase(string) 421} 422 423func (q *Substring) setCase(k string) { 424 switch k { 425 case "yes": 426 q.CaseSensitive = true 427 case "no": 428 q.CaseSensitive = false 429 case "auto": 430 // TODO - unicode 431 q.CaseSensitive = (q.Pattern != string(toLower([]byte(q.Pattern)))) 432 } 433} 434 435func (q *Symbol) setCase(k string) { 436 if sc, ok := q.Expr.(setCaser); ok { 437 sc.setCase(k) 438 } 439} 440 441func (q *Regexp) setCase(k string) { 442 switch k { 443 case "yes": 444 q.CaseSensitive = true 445 case "no": 446 q.CaseSensitive = false 447 case "auto": 448 q.CaseSensitive = !q.Regexp.Equal(LowerRegexp(q.Regexp)) 449 } 450} 451 452// Or is matched when any of its children is matched. 453type Or struct { 454 Children []Q 455} 456 457func (q *Or) String() string { 458 var sub []string 459 for _, ch := range q.Children { 460 sub = append(sub, ch.String()) 461 } 462 return fmt.Sprintf("(or %s)", strings.Join(sub, " ")) 463} 464 465// Not inverts the meaning of its child. 466type Not struct { 467 Child Q 468} 469 470func (q *Not) String() string { 471 return fmt.Sprintf("(not %s)", q.Child) 472} 473 474// And is matched when all its children are. 475type And struct { 476 Children []Q 477} 478 479func (q *And) String() string { 480 var sub []string 481 for _, ch := range q.Children { 482 sub = append(sub, ch.String()) 483 } 484 return fmt.Sprintf("(and %s)", strings.Join(sub, " ")) 485} 486 487// NewAnd is syntactic sugar for constructing And queries. 488func NewAnd(qs ...Q) Q { 489 return &And{Children: qs} 490} 491 492// NewOr is syntactic sugar for constructing Or queries. 493func NewOr(qs ...Q) Q { 494 return &Or{Children: qs} 495} 496 497// Branch limits search to a specific branch. 498type Branch struct { 499 Pattern string 500 501 // exact is true if we want to Pattern to equal branch. 502 Exact bool 503} 504 505func (q *Branch) String() string { 506 if q.Exact { 507 return fmt.Sprintf("branch=%q", q.Pattern) 508 } 509 return fmt.Sprintf("branch:%q", q.Pattern) 510} 511 512// Meta represents a query for metadata fields. 513type Meta struct { 514 Field string // The metadata field name 515 Value *regexp.Regexp // The value to match 516} 517 518// String returns a string representation of the Meta query. 519func (m *Meta) String() string { 520 return fmt.Sprintf("meta.%s:%s", m.Field, m.Value) 521} 522 523func queryChildren(q Q) []Q { 524 switch s := q.(type) { 525 case *And: 526 return s.Children 527 case *Or: 528 return s.Children 529 } 530 return nil 531} 532 533func flattenAndOr(children []Q, typ Q) ([]Q, bool) { 534 var flat []Q 535 changed := false 536 for _, ch := range children { 537 ch, subChanged := flatten(ch) 538 changed = changed || subChanged 539 if reflect.TypeOf(ch) == reflect.TypeOf(typ) { 540 changed = true 541 subChildren := queryChildren(ch) 542 if subChildren != nil { 543 flat = append(flat, subChildren...) 544 } 545 } else { 546 flat = append(flat, ch) 547 } 548 } 549 550 return flat, changed 551} 552 553// (and (and x y) z) => (and x y z) , the same for "or" 554func flatten(q Q) (Q, bool) { 555 switch s := q.(type) { 556 case *And: 557 if len(s.Children) == 1 { 558 return s.Children[0], true 559 } 560 flatChildren, changed := flattenAndOr(s.Children, s) 561 return &And{flatChildren}, changed 562 case *Or: 563 if len(s.Children) == 1 { 564 return s.Children[0], true 565 } 566 flatChildren, changed := flattenAndOr(s.Children, s) 567 return &Or{flatChildren}, changed 568 case *Not: 569 child, changed := flatten(s.Child) 570 return &Not{child}, changed 571 case *Type: 572 child, changed := flatten(s.Child) 573 return &Type{Child: child, Type: s.Type}, changed 574 case *Boost: 575 child, changed := flatten(s.Child) 576 return &Boost{Child: child, Boost: s.Boost}, changed 577 default: 578 return q, false 579 } 580} 581 582func mapQueryList(qs []Q, f func(Q) Q) []Q { 583 neg := make([]Q, len(qs)) 584 for i, sub := range qs { 585 neg[i] = Map(sub, f) 586 } 587 return neg 588} 589 590func invertConst(q Q) Q { 591 c, ok := q.(*Const) 592 if ok { 593 return &Const{!c.Value} 594 } 595 return q 596} 597 598func evalAndOrConstants(q Q, children []Q) Q { 599 _, isAnd := q.(*And) 600 601 children = mapQueryList(children, evalConstants) 602 603 newCH := children[:0] 604 for _, ch := range children { 605 c, ok := ch.(*Const) 606 if ok { 607 if c.Value == isAnd { 608 continue 609 } else { 610 return ch 611 } 612 } 613 newCH = append(newCH, ch) 614 } 615 if len(newCH) == 0 { 616 return &Const{isAnd} 617 } 618 if isAnd { 619 return &And{newCH} 620 } 621 return &Or{newCH} 622} 623 624func evalConstants(q Q) Q { 625 switch s := q.(type) { 626 case *And: 627 return evalAndOrConstants(q, s.Children) 628 case *Or: 629 return evalAndOrConstants(q, s.Children) 630 case *Not: 631 ch := evalConstants(s.Child) 632 if _, ok := ch.(*Const); ok { 633 return invertConst(ch) 634 } 635 return &Not{ch} 636 case *Type: 637 ch := evalConstants(s.Child) 638 if _, ok := ch.(*Const); ok { 639 // If q is the root query, then evaluating this to a const changes 640 // the type of result we will return. However, the only case this 641 // makes sense is `type:repo TRUE` to return all repos or 642 // `type:file TRUE` to return all filenames. For other cases we 643 // want to do this constant folding though, so we allow the 644 // unexpected behaviour mentioned previously. 645 return ch 646 } 647 return &Type{Child: ch, Type: s.Type} 648 case *Boost: 649 ch := evalConstants(s.Child) 650 if _, ok := ch.(*Const); ok { 651 return ch 652 } 653 return &Boost{Boost: s.Boost, Child: ch} 654 case *Substring: 655 if len(s.Pattern) == 0 { 656 return &Const{true} 657 } 658 case *Regexp: 659 if s.Regexp.Op == syntax.OpEmptyMatch { 660 return &Const{true} 661 } 662 case *Branch: 663 if s.Pattern == "" { 664 return &Const{true} 665 } 666 case *BranchesRepos: 667 for _, br := range s.List { 668 if !br.Repos.IsEmpty() { 669 return q 670 } 671 } 672 return &Const{false} 673 case *RepoIDs: 674 if s.Repos.IsEmpty() { 675 return &Const{false} 676 } 677 case *RepoSet: 678 if len(s.Set) == 0 { 679 return &Const{false} 680 } 681 case *FileNameSet: 682 if len(s.Set) == 0 { 683 return &Const{false} 684 } 685 } 686 return q 687} 688 689func Simplify(q Q) Q { 690 q = evalConstants(q) 691 for { 692 var changed bool 693 q, changed = flatten(q) 694 if !changed { 695 break 696 } 697 } 698 699 return q 700} 701 702// Map runs f over the q. 703func Map(q Q, f func(q Q) Q) Q { 704 switch s := q.(type) { 705 case *And: 706 q = &And{Children: mapQueryList(s.Children, f)} 707 case *Or: 708 q = &Or{Children: mapQueryList(s.Children, f)} 709 case *Not: 710 q = &Not{Child: Map(s.Child, f)} 711 case *Type: 712 q = &Type{Type: s.Type, Child: Map(s.Child, f)} 713 case *Boost: 714 q = &Boost{Boost: s.Boost, Child: Map(s.Child, f)} 715 } 716 return f(q) 717} 718 719// Expand expands Substr queries into (OR file_substr content_substr) 720// queries, and the same for Regexp queries.. 721func ExpandFileContent(q Q) Q { 722 switch s := q.(type) { 723 case *Substring: 724 if s.FileName == s.Content { 725 f := *s 726 f.FileName = true 727 f.Content = false 728 c := *s 729 c.FileName = false 730 c.Content = true 731 return NewOr(&f, &c) 732 } 733 case *Regexp: 734 if s.FileName == s.Content { 735 f := *s 736 f.FileName = true 737 f.Content = false 738 c := *s 739 c.FileName = false 740 c.Content = true 741 return NewOr(&f, &c) 742 } 743 } 744 return q 745} 746 747// VisitAtoms runs `v` on all atom queries within `q`. 748func VisitAtoms(q Q, v func(q Q)) { 749 Map(q, func(iQ Q) Q { 750 switch iQ.(type) { 751 case *And: 752 case *Or: 753 case *Not: 754 case *Type: 755 case *Boost: 756 default: 757 v(iQ) 758 } 759 return iQ 760 }) 761}