fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 17 kB View raw
1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package query 16 17import ( 18 "encoding/json" 19 "fmt" 20 "log" 21 "reflect" 22 "regexp/syntax" 23 "sort" 24 "strconv" 25 "strings" 26 27 "github.com/RoaringBitmap/roaring" 28 "github.com/grafana/regexp" 29 30 "github.com/sourcegraph/zoekt/internal/syntaxutil" 31) 32 33var _ = log.Println 34 35// Q is a representation for a possibly hierarchical search query. 36type Q interface { 37 String() string 38} 39 40// RawConfig filters repositories based on their encoded RawConfig map. 41type RawConfig uint64 42 43const ( 44 RcOnlyPublic RawConfig = 1 45 RcOnlyPrivate RawConfig = 2 46 RcOnlyForks RawConfig = 1 << 2 47 RcNoForks RawConfig = 2 << 2 48 RcOnlyArchived RawConfig = 1 << 4 49 RcNoArchived RawConfig = 2 << 4 50) 51 52var flagNames = []struct { 53 Mask RawConfig 54 Label string 55}{ 56 {RcOnlyPublic, "RcOnlyPublic"}, 57 {RcOnlyPrivate, "RcOnlyPrivate"}, 58 {RcOnlyForks, "RcOnlyForks"}, 59 {RcNoForks, "RcNoForks"}, 60 {RcOnlyArchived, "RcOnlyArchived"}, 61 {RcNoArchived, "RcNoArchived"}, 62} 63 64func (r RawConfig) String() string { 65 var s []string 66 for _, fn := range flagNames { 67 if r&fn.Mask != 0 { 68 s = append(s, fn.Label) 69 } 70 } 71 return fmt.Sprintf("rawConfig:%s", strings.Join(s, "|")) 72} 73 74// RegexpQuery is a query looking for regular expressions matches. 75type Regexp struct { 76 Regexp *syntax.Regexp 77 FileName bool 78 Content bool 79 CaseSensitive bool 80} 81 82// RegexpString returns the marshaled raw regexp pattern for q. 83func (q *Regexp) RegexpString() string { 84 return syntaxutil.RegexpString(q.Regexp) 85} 86 87func (q *Regexp) String() string { 88 pref := "" 89 if q.FileName { 90 pref = "file_" 91 } 92 if q.CaseSensitive { 93 pref = "case_" + pref 94 } 95 return fmt.Sprintf("%sregex:%q", pref, q.RegexpString()) 96} 97 98// gobRegexp wraps Regexp to make it gob-encodable/decodable. Regexp contains syntax.Regexp, which 99// contains slices/arrays with possibly nil elements, which gob doesn't support 100// (https://github.com/golang/go/issues/1501). 101type gobRegexp struct { 102 Regexp // Regexp.Regexp (*syntax.Regexp) is set to nil and its string is set in RegexpString 103 RegexpString string 104} 105 106// GobEncode implements gob.Encoder. 107func (q Regexp) GobEncode() ([]byte, error) { 108 gobq := gobRegexp{Regexp: q, RegexpString: q.RegexpString()} 109 gobq.Regexp.Regexp = nil // can't be gob-encoded/decoded 110 return json.Marshal(gobq) 111} 112 113// GobDecode implements gob.Decoder. 114func (q *Regexp) GobDecode(data []byte) error { 115 var gobq gobRegexp 116 err := json.Unmarshal(data, &gobq) 117 if err != nil { 118 return err 119 } 120 gobq.Regexp.Regexp, err = syntax.Parse(gobq.RegexpString, regexpFlags) 121 if err != nil { 122 return err 123 } 124 *q = gobq.Regexp 125 return nil 126} 127 128// Symbol finds a string that is a symbol. 129type Symbol struct { 130 Expr Q 131} 132 133func (s *Symbol) String() string { 134 return fmt.Sprintf("sym:%s", s.Expr) 135} 136 137type caseQ struct { 138 Flavor string 139} 140 141func (c *caseQ) String() string { 142 return "case:" + c.Flavor 143} 144 145type Language struct { 146 Language string 147} 148 149func (l *Language) String() string { 150 return "lang:" + l.Language 151} 152 153type Const struct { 154 Value bool 155} 156 157func (q *Const) String() string { 158 if q.Value { 159 return "TRUE" 160 } 161 return "FALSE" 162} 163 164type Repo struct { 165 Regexp *regexp.Regexp 166} 167 168func (q *Repo) String() string { 169 return fmt.Sprintf("repo:%s", q.Regexp.String()) 170} 171 172func (q Repo) GobEncode() ([]byte, error) { 173 return []byte(q.Regexp.String()), nil 174} 175 176func (q *Repo) GobDecode(data []byte) error { 177 var err error 178 q.Regexp, err = regexp.Compile(string(data)) 179 return err 180} 181 182// RepoRegexp is a Sourcegraph addition which searches documents where the 183// repository name matches Regexp. 184type RepoRegexp struct { 185 Regexp *regexp.Regexp 186} 187 188func (q *RepoRegexp) String() string { 189 return fmt.Sprintf("reporegex:%q", q.Regexp.String()) 190} 191 192// GobEncode implements gob.Encoder. 193func (q *RepoRegexp) GobEncode() ([]byte, error) { 194 // gob can't encode syntax.Regexp 195 return []byte(q.Regexp.String()), nil 196} 197 198// GobDecode implements gob.Decoder. 199func (q *RepoRegexp) GobDecode(data []byte) error { 200 var err error 201 q.Regexp, err = regexp.Compile(string(data)) 202 return err 203} 204 205// BranchesRepos is a slice of BranchRepos to match. It is a Sourcegraph 206// addition and only used in the RPC interface for efficient checking of large 207// repo lists. 208type BranchesRepos struct { 209 List []BranchRepos 210} 211 212// NewSingleBranchesRepos is a helper for creating a BranchesRepos which 213// searches a single branch. 214func NewSingleBranchesRepos(branch string, ids ...uint32) *BranchesRepos { 215 return &BranchesRepos{List: []BranchRepos{ 216 {branch, roaring.BitmapOf(ids...)}, 217 }} 218} 219 220func (q *BranchesRepos) String() string { 221 var sb strings.Builder 222 223 sb.WriteString("(branchesrepos") 224 225 for _, br := range q.List { 226 if size := br.Repos.GetCardinality(); size > 1 { 227 sb.WriteString(" " + br.Branch + ":" + strconv.FormatUint(size, 10)) 228 } else { 229 sb.WriteString(" " + br.Branch + "=" + br.Repos.String()) 230 } 231 } 232 233 sb.WriteString(")") 234 return sb.String() 235} 236 237// NewRepoIDs is a helper for creating a RepoIDs which 238// searches only the matched repos. 239func NewRepoIDs(ids ...uint32) *RepoIDs { 240 return &RepoIDs{Repos: roaring.BitmapOf(ids...)} 241} 242 243func (q *RepoIDs) String() string { 244 var sb strings.Builder 245 246 sb.WriteString("(repoids ") 247 248 if size := q.Repos.GetCardinality(); size > 1 { 249 sb.WriteString("count:" + strconv.FormatUint(size, 10)) 250 } else { 251 sb.WriteString("repoid=" + q.Repos.String()) 252 } 253 254 sb.WriteString(")") 255 return sb.String() 256} 257 258// MarshalBinary implements a specialized encoder for BranchesRepos. 259func (q BranchesRepos) MarshalBinary() ([]byte, error) { 260 return branchesReposEncode(q.List) 261} 262 263// UnmarshalBinary implements a specialized decoder for BranchesRepos. 264func (q *BranchesRepos) UnmarshalBinary(b []byte) (err error) { 265 q.List, err = branchesReposDecode(b) 266 return err 267} 268 269// BranchRepos is a (branch, sourcegraph repo ids bitmap) tuple. It is a 270// Sourcegraph addition. 271type BranchRepos struct { 272 Branch string 273 Repos *roaring.Bitmap 274} 275 276// Similar to BranchRepos but will be used to match only by repoid and 277// therefore matches all branches 278type RepoIDs struct { 279 Repos *roaring.Bitmap 280} 281 282// RepoSet is a list of repos to match. It is a Sourcegraph addition and only 283// used in the RPC interface for efficient checking of large repo lists. 284type RepoSet struct { 285 Set map[string]bool 286} 287 288func (q *RepoSet) String() string { 289 var detail string 290 if len(q.Set) > 5 { 291 // Large sets being output are not useful 292 detail = fmt.Sprintf("size=%d", len(q.Set)) 293 } else { 294 repos := make([]string, len(q.Set)) 295 i := 0 296 for repo := range q.Set { 297 repos[i] = repo 298 i++ 299 } 300 sort.Strings(repos) 301 detail = strings.Join(repos, " ") 302 } 303 return fmt.Sprintf("(reposet %s)", detail) 304} 305 306func NewRepoSet(repo ...string) *RepoSet { 307 s := &RepoSet{Set: make(map[string]bool)} 308 for _, r := range repo { 309 s.Set[r] = true 310 } 311 return s 312} 313 314// FileNameSet is a list of file names to match. It is a Sourcegraph addition 315// and only used in the RPC interface for efficient checking of large file 316// lists. 317type FileNameSet struct { 318 Set map[string]struct{} 319} 320 321// MarshalBinary implements a specialized encoder for FileNameSet. 322func (q *FileNameSet) MarshalBinary() ([]byte, error) { 323 return stringSetEncode(q.Set) 324} 325 326// UnmarshalBinary implements a specialized decoder for FileNameSet. 327func (q *FileNameSet) UnmarshalBinary(b []byte) error { 328 var err error 329 q.Set, err = stringSetDecode(b) 330 return err 331} 332 333func (q *FileNameSet) String() string { 334 var detail string 335 if len(q.Set) > 5 { 336 // Large sets being output are not useful 337 detail = fmt.Sprintf("size=%d", len(q.Set)) 338 } else { 339 values := make([]string, 0, len(q.Set)) 340 for v := range q.Set { 341 values = append(values, v) 342 } 343 sort.Strings(values) 344 detail = strings.Join(values, " ") 345 } 346 return fmt.Sprintf("(filenameset %s)", detail) 347} 348 349func NewFileNameSet(fileNames ...string) *FileNameSet { 350 s := &FileNameSet{Set: make(map[string]struct{})} 351 for _, r := range fileNames { 352 s.Set[r] = struct{}{} 353 } 354 return s 355} 356 357const ( 358 TypeFileMatch uint8 = iota 359 TypeFileName 360 TypeRepo 361) 362 363// Type changes the result type returned. 364type Type struct { 365 Child Q 366 Type uint8 367} 368 369func (q *Type) String() string { 370 switch q.Type { 371 case TypeFileMatch: 372 return fmt.Sprintf("(type:filematch %s)", q.Child) 373 case TypeFileName: 374 return fmt.Sprintf("(type:filename %s)", q.Child) 375 case TypeRepo: 376 return fmt.Sprintf("(type:repo %s)", q.Child) 377 default: 378 return fmt.Sprintf("(type:UNKNOWN %s)", q.Child) 379 } 380} 381 382// Boost scales the contribution to score of descendents. 383type Boost struct { 384 Child Q 385 // Boost will multiply the score of its descendents. Values less than 1 will 386 // give less importance while values greater than 1 will give more 387 // importance. 388 Boost float64 389} 390 391func (q *Boost) String() string { 392 return fmt.Sprintf("(boost %0.2f %s)", q.Boost, q.Child) 393} 394 395// Substring is the most basic query: a query for a substring. 396type Substring struct { 397 Pattern string 398 CaseSensitive bool 399 400 // Match only filename 401 FileName bool 402 403 // Match only content 404 Content bool 405} 406 407func (q *Substring) String() string { 408 s := "" 409 410 t := "" 411 if q.FileName { 412 t = "file_" 413 } else if q.Content { 414 t = "content_" 415 } 416 417 s += fmt.Sprintf("%ssubstr:%q", t, q.Pattern) 418 if q.CaseSensitive { 419 s = "case_" + s 420 } 421 return s 422} 423 424type setCaser interface { 425 setCase(string) 426} 427 428func (q *Substring) setCase(k string) { 429 switch k { 430 case "yes": 431 q.CaseSensitive = true 432 case "no": 433 q.CaseSensitive = false 434 case "auto": 435 // TODO - unicode 436 q.CaseSensitive = (q.Pattern != string(toLower([]byte(q.Pattern)))) 437 } 438} 439 440func (q *Symbol) setCase(k string) { 441 if sc, ok := q.Expr.(setCaser); ok { 442 sc.setCase(k) 443 } 444} 445 446func (q *Regexp) setCase(k string) { 447 switch k { 448 case "yes": 449 q.CaseSensitive = true 450 case "no": 451 q.CaseSensitive = false 452 case "auto": 453 q.CaseSensitive = !q.Regexp.Equal(LowerRegexp(q.Regexp)) 454 } 455} 456 457// Or is matched when any of its children is matched. 458type Or struct { 459 Children []Q 460} 461 462func (q *Or) String() string { 463 var sub []string 464 for _, ch := range q.Children { 465 sub = append(sub, ch.String()) 466 } 467 return fmt.Sprintf("(or %s)", strings.Join(sub, " ")) 468} 469 470// Not inverts the meaning of its child. 471type Not struct { 472 Child Q 473} 474 475func (q *Not) String() string { 476 return fmt.Sprintf("(not %s)", q.Child) 477} 478 479// And is matched when all its children are. 480type And struct { 481 Children []Q 482} 483 484func (q *And) String() string { 485 var sub []string 486 for _, ch := range q.Children { 487 sub = append(sub, ch.String()) 488 } 489 return fmt.Sprintf("(and %s)", strings.Join(sub, " ")) 490} 491 492// NewAnd is syntactic sugar for constructing And queries. 493func NewAnd(qs ...Q) Q { 494 return &And{Children: qs} 495} 496 497// NewOr is syntactic sugar for constructing Or queries. 498func NewOr(qs ...Q) Q { 499 return &Or{Children: qs} 500} 501 502// Branch limits search to a specific branch. 503type Branch struct { 504 Pattern string 505 506 // exact is true if we want to Pattern to equal branch. 507 Exact bool 508} 509 510func (q *Branch) String() string { 511 if q.Exact { 512 return fmt.Sprintf("branch=%q", q.Pattern) 513 } 514 return fmt.Sprintf("branch:%q", q.Pattern) 515} 516 517// Meta represents a query for metadata fields. 518type Meta struct { 519 Field string // The metadata field name 520 Value *regexp.Regexp // The value to match 521} 522 523// String returns a string representation of the Meta query. 524func (m *Meta) String() string { 525 return fmt.Sprintf("meta.%s:%s", m.Field, m.Value) 526} 527 528func queryChildren(q Q) []Q { 529 switch s := q.(type) { 530 case *And: 531 return s.Children 532 case *Or: 533 return s.Children 534 } 535 return nil 536} 537 538func flattenAndOr(children []Q, typ Q) ([]Q, bool) { 539 var flat []Q 540 changed := false 541 for _, ch := range children { 542 ch, subChanged := flatten(ch) 543 changed = changed || subChanged 544 if reflect.TypeOf(ch) == reflect.TypeOf(typ) { 545 changed = true 546 subChildren := queryChildren(ch) 547 if subChildren != nil { 548 flat = append(flat, subChildren...) 549 } 550 } else { 551 flat = append(flat, ch) 552 } 553 } 554 555 return flat, changed 556} 557 558// (and (and x y) z) => (and x y z) , the same for "or" 559func flatten(q Q) (Q, bool) { 560 switch s := q.(type) { 561 case *And: 562 if len(s.Children) == 1 { 563 return s.Children[0], true 564 } 565 flatChildren, changed := flattenAndOr(s.Children, s) 566 return &And{flatChildren}, changed 567 case *Or: 568 if len(s.Children) == 1 { 569 return s.Children[0], true 570 } 571 flatChildren, changed := flattenAndOr(s.Children, s) 572 return &Or{flatChildren}, changed 573 case *Not: 574 child, changed := flatten(s.Child) 575 return &Not{child}, changed 576 case *Type: 577 child, changed := flatten(s.Child) 578 return &Type{Child: child, Type: s.Type}, changed 579 case *Boost: 580 child, changed := flatten(s.Child) 581 return &Boost{Child: child, Boost: s.Boost}, changed 582 default: 583 return q, false 584 } 585} 586 587func mapQueryList(qs []Q, f func(Q) Q) []Q { 588 neg := make([]Q, len(qs)) 589 for i, sub := range qs { 590 neg[i] = Map(sub, f) 591 } 592 return neg 593} 594 595func invertConst(q Q) Q { 596 c, ok := q.(*Const) 597 if ok { 598 return &Const{!c.Value} 599 } 600 return q 601} 602 603func evalAndOrConstants(q Q, children []Q) Q { 604 _, isAnd := q.(*And) 605 606 children = mapQueryList(children, evalConstants) 607 608 newCH := children[:0] 609 for _, ch := range children { 610 c, ok := ch.(*Const) 611 if ok { 612 if c.Value == isAnd { 613 continue 614 } else { 615 return ch 616 } 617 } 618 newCH = append(newCH, ch) 619 } 620 if len(newCH) == 0 { 621 return &Const{isAnd} 622 } 623 if isAnd { 624 return &And{newCH} 625 } 626 return &Or{newCH} 627} 628 629func evalConstants(q Q) Q { 630 switch s := q.(type) { 631 case *And: 632 return evalAndOrConstants(q, s.Children) 633 case *Or: 634 return evalAndOrConstants(q, s.Children) 635 case *Not: 636 ch := evalConstants(s.Child) 637 if _, ok := ch.(*Const); ok { 638 return invertConst(ch) 639 } 640 return &Not{ch} 641 case *Type: 642 ch := evalConstants(s.Child) 643 if _, ok := ch.(*Const); ok { 644 // If q is the root query, then evaluating this to a const changes 645 // the type of result we will return. However, the only case this 646 // makes sense is `type:repo TRUE` to return all repos or 647 // `type:file TRUE` to return all filenames. For other cases we 648 // want to do this constant folding though, so we allow the 649 // unexpected behaviour mentioned previously. 650 return ch 651 } 652 return &Type{Child: ch, Type: s.Type} 653 case *Boost: 654 ch := evalConstants(s.Child) 655 if _, ok := ch.(*Const); ok { 656 return ch 657 } 658 return &Boost{Boost: s.Boost, Child: ch} 659 case *Substring: 660 if len(s.Pattern) == 0 { 661 return &Const{true} 662 } 663 case *Regexp: 664 if s.Regexp.Op == syntax.OpEmptyMatch { 665 return &Const{true} 666 } 667 case *Branch: 668 if s.Pattern == "" { 669 return &Const{true} 670 } 671 case *BranchesRepos: 672 for _, br := range s.List { 673 if !br.Repos.IsEmpty() { 674 return q 675 } 676 } 677 return &Const{false} 678 case *RepoIDs: 679 if s.Repos.IsEmpty() { 680 return &Const{false} 681 } 682 case *RepoSet: 683 if len(s.Set) == 0 { 684 return &Const{false} 685 } 686 case *FileNameSet: 687 if len(s.Set) == 0 { 688 return &Const{false} 689 } 690 } 691 return q 692} 693 694func Simplify(q Q) Q { 695 q = evalConstants(q) 696 for { 697 var changed bool 698 q, changed = flatten(q) 699 if !changed { 700 break 701 } 702 } 703 704 return q 705} 706 707// Map runs f over the q. 708func Map(q Q, f func(q Q) Q) Q { 709 switch s := q.(type) { 710 case *And: 711 q = &And{Children: mapQueryList(s.Children, f)} 712 case *Or: 713 q = &Or{Children: mapQueryList(s.Children, f)} 714 case *Not: 715 q = &Not{Child: Map(s.Child, f)} 716 case *Type: 717 q = &Type{Type: s.Type, Child: Map(s.Child, f)} 718 case *Boost: 719 q = &Boost{Boost: s.Boost, Child: Map(s.Child, f)} 720 } 721 return f(q) 722} 723 724// Expand expands Substr queries into (OR file_substr content_substr) 725// queries, and the same for Regexp queries.. 726func ExpandFileContent(q Q) Q { 727 switch s := q.(type) { 728 case *Substring: 729 if s.FileName == s.Content { 730 f := *s 731 f.FileName = true 732 f.Content = false 733 c := *s 734 c.FileName = false 735 c.Content = true 736 return NewOr(&f, &c) 737 } 738 case *Regexp: 739 if s.FileName == s.Content { 740 f := *s 741 f.FileName = true 742 f.Content = false 743 c := *s 744 c.FileName = false 745 c.Content = true 746 return NewOr(&f, &c) 747 } 748 } 749 return q 750} 751 752// VisitAtoms runs `v` on all atom queries within `q`. 753func VisitAtoms(q Q, v func(q Q)) { 754 Map(q, func(iQ Q) Q { 755 switch iQ.(type) { 756 case *And: 757 case *Or: 758 case *Not: 759 case *Type: 760 case *Boost: 761 default: 762 v(iQ) 763 } 764 return iQ 765 }) 766}