fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 13 kB View raw
1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package query 16 17import ( 18 "bytes" 19 "fmt" 20 "log" 21 "regexp/syntax" 22 23 "github.com/grafana/regexp" 24 25 "github.com/sourcegraph/zoekt/languages" 26) 27 28var _ = log.Printf 29 30// parseStringLiteral parses a string literal, consumes the starting 31// quote too. 32func parseStringLiteral(in []byte) (lit []byte, n int, err error) { 33 left := in[1:] 34 found := false 35 36loop: 37 for len(left) > 0 { 38 c := left[0] 39 left = left[1:] 40 switch c { 41 case '"': 42 found = true 43 break loop 44 case '\\': 45 // TODO - other escape sequences. 46 if len(left) == 0 { 47 return nil, 0, fmt.Errorf("query: missing char after \\") 48 } 49 c = left[0] 50 left = left[1:] 51 52 lit = append(lit, c) 53 default: 54 lit = append(lit, c) 55 } 56 } 57 if !found { 58 return nil, 0, fmt.Errorf("query: unterminated quoted string") 59 } 60 return lit, len(in) - len(left), nil 61} 62 63// orOperator is a placeholder intermediate so we can represent [A, 64// or, B] before we convert it to Or{A, B} 65type orOperator struct{} 66 67func (o *orOperator) String() string { 68 return "orOp" 69} 70 71// caseScopeQ is a parse-time wrapper used to prevent case directives from an 72// outer expression list from overriding an explicitly scoped inner `case:`. 73type caseScopeQ struct { 74 Child Q 75} 76 77func (c *caseScopeQ) String() string { 78 return c.Child.String() 79} 80 81func stripCaseScopesList(qs []Q) []Q { 82 stripped := make([]Q, len(qs)) 83 for i, q := range qs { 84 stripped[i] = stripCaseScopes(q) 85 } 86 return stripped 87} 88 89func stripCaseScopes(q Q) Q { 90 switch s := q.(type) { 91 case *And: 92 return &And{Children: stripCaseScopesList(s.Children)} 93 case *Or: 94 return &Or{Children: stripCaseScopesList(s.Children)} 95 case *Not: 96 return &Not{Child: stripCaseScopes(s.Child)} 97 case *Type: 98 return &Type{Type: s.Type, Child: stripCaseScopes(s.Child)} 99 case *Boost: 100 return &Boost{Boost: s.Boost, Child: stripCaseScopes(s.Child)} 101 case *caseScopeQ: 102 return stripCaseScopes(s.Child) 103 default: 104 return q 105 } 106} 107 108func isSpace(c byte) bool { 109 return c == ' ' || c == '\t' 110} 111 112// Parse parses a string into a query. 113func Parse(qStr string) (Q, error) { 114 b := []byte(qStr) 115 116 qs, n, err := parseExprList(b) 117 if err != nil { 118 return nil, err 119 } 120 121 if n != len(b) { 122 return nil, fmt.Errorf("query: extra tokens found at end input: %q", b[n:]) 123 } 124 125 q, err := parseOperators(qs) 126 if err != nil { 127 return nil, err 128 } 129 130 q = stripCaseScopes(q) 131 132 return Simplify(q), nil 133} 134 135// parseExpr parses a single expression, returning the result, and the 136// number of bytes consumed. 137func parseExpr(in []byte) (Q, int, error) { 138 b := in[:] 139 var expr Q 140 for len(b) > 0 && isSpace(b[0]) { 141 b = b[1:] 142 } 143 144 tok, err := nextToken(b) 145 if err != nil { 146 return nil, 0, err 147 } 148 if tok == nil { 149 return nil, 0, nil 150 } 151 b = b[len(tok.Input):] 152 153 text := string(tok.Text) 154 switch tok.Type { 155 case tokCase: 156 switch text { 157 case "yes": 158 case "no": 159 case "auto": 160 default: 161 return nil, 0, fmt.Errorf("query: unknown case argument %q, want {yes,no,auto}", text) 162 } 163 expr = &caseQ{text} 164 case tokRepo: 165 r, err := regexp.Compile(text) 166 if err != nil { 167 return nil, 0, err 168 } 169 170 expr = &Repo{r} 171 case tokArchived: 172 switch text { 173 case "yes": 174 expr = RawConfig(RcOnlyArchived) 175 case "no": 176 expr = RawConfig(RcNoArchived) 177 default: 178 return nil, 0, fmt.Errorf("query: unknown archived argument %q, want {yes,no}", text) 179 } 180 case tokFork: 181 switch text { 182 case "yes": 183 expr = RawConfig(RcOnlyForks) 184 case "no": 185 expr = RawConfig(RcNoForks) 186 default: 187 return nil, 0, fmt.Errorf("query: unknown fork argument %q, want {yes,no}", text) 188 } 189 case tokPublic: 190 switch text { 191 case "yes": 192 expr = RawConfig(RcOnlyPublic) 193 case "no": 194 expr = RawConfig(RcOnlyPrivate) 195 default: 196 return nil, 0, fmt.Errorf("query: unknown public argument %q, want {yes,no}", text) 197 } 198 case tokBranch: 199 expr = &Branch{Pattern: text} 200 case tokText, tokRegex: 201 q, err := RegexpQuery(text, false, false) 202 if err != nil { 203 return nil, 0, err 204 } 205 expr = q 206 case tokFile: 207 q, err := RegexpQuery(text, false, true) 208 if err != nil { 209 return nil, 0, err 210 } 211 expr = q 212 case tokContent: 213 q, err := RegexpQuery(text, true, false) 214 if err != nil { 215 return nil, 0, err 216 } 217 expr = q 218 case tokLang: 219 canonical, ok := languages.GetLanguageByNameOrAlias(text) 220 if !ok { 221 expr = &Const{false} 222 } else { 223 expr = &Language{Language: canonical} 224 } 225 226 case tokSym: 227 if text == "" { 228 return nil, 0, fmt.Errorf("the sym: atom must have an argument") 229 } 230 231 q, err := RegexpQuery(text, false, false) 232 if err != nil { 233 return nil, 0, err 234 } 235 236 expr = &Symbol{q} 237 case tokParenClose: 238 // Caller must consume paren. 239 expr = nil 240 241 case tokParenOpen: 242 qs, n, err := parseExprList(b) 243 b = b[n:] 244 if err != nil { 245 return nil, 0, err 246 } 247 248 pTok, err := nextToken(b) 249 if err != nil { 250 return nil, 0, err 251 } 252 if pTok == nil || pTok.Type != tokParenClose { 253 return nil, 0, fmt.Errorf("query: missing close paren, got token %v", pTok) 254 } 255 256 b = b[len(pTok.Input):] 257 expr, err = parseOperators(qs) 258 if err != nil { 259 return nil, 0, err 260 } 261 case tokNegate: 262 subQ, n, err := parseExpr(b) 263 if err != nil { 264 return nil, 0, err 265 } 266 if subQ == nil { 267 return nil, 0, fmt.Errorf("query: '-' operator needs an argument") 268 } 269 b = b[n:] 270 expr = &Not{subQ} 271 272 case tokType: 273 var t uint8 274 switch text { 275 case "filematch": 276 t = TypeFileMatch 277 case "filename", "file": 278 t = TypeFileName 279 case "repo": 280 t = TypeRepo 281 default: 282 return nil, 0, fmt.Errorf("query: unknown type argument %q, want {filematch,filename,repo}", text) 283 } 284 // Later we will lift this into a root, like we do for caseQ 285 expr = &Type{Type: t, Child: nil} 286 case tokMeta: 287 // Split on ':' to separate field and value 288 parts := bytes.SplitN([]byte(text), []byte(":"), 2) 289 if len(parts) != 2 { 290 return nil, 0, fmt.Errorf("query: invalid meta field syntax %q", text) 291 } 292 field := string(parts[0]) 293 valuePattern := string(parts[1]) 294 re, err := regexp.Compile(valuePattern) 295 if err != nil { 296 return nil, 0, fmt.Errorf("query: invalid regexp in meta value: %v", err) 297 } 298 expr = &Meta{ 299 Field: field, 300 Value: re, 301 } 302 } 303 304 return expr, len(in) - len(b), nil 305} 306 307const regexpFlags syntax.Flags = syntax.ClassNL | syntax.PerlX | syntax.UnicodeGroups 308 309// RegexpQuery parses an atom into either a regular expression, or a 310// simple substring atom. 311func RegexpQuery(text string, content, file bool) (Q, error) { 312 var expr Q 313 314 r, err := syntax.Parse(text, regexpFlags) 315 if err != nil { 316 return nil, err 317 } 318 319 r = OptimizeRegexp(r, regexpFlags) 320 321 if r.Op == syntax.OpLiteral { 322 expr = &Substring{ 323 Pattern: string(r.Rune), 324 FileName: file, 325 Content: content, 326 } 327 } else { 328 expr = &Regexp{ 329 Regexp: r, 330 FileName: file, 331 Content: content, 332 } 333 } 334 335 return expr, nil 336} 337 338// parseOperators interprets the orOperator in a list of queries. 339func parseOperators(in []Q) (Q, error) { 340 top := &Or{} 341 cur := &And{} 342 343 seenOr := false 344 for _, q := range in { 345 if _, ok := q.(*orOperator); ok { 346 seenOr = true 347 if len(cur.Children) == 0 { 348 return nil, fmt.Errorf("query: OR operator should have operand") 349 } 350 top.Children = append(top.Children, cur) 351 cur = &And{} 352 } else { 353 cur.Children = append(cur.Children, q) 354 } 355 } 356 357 if seenOr && len(cur.Children) == 0 { 358 return nil, fmt.Errorf("query: OR operator should have operand") 359 } 360 top.Children = append(top.Children, cur) 361 return top, nil 362} 363 364// parseExprList parses a list of query expressions. It is the 365// workhorse of the Parse function. 366func parseExprList(in []byte) ([]Q, int, error) { 367 b := in[:] 368 var qs []Q 369 for len(b) > 0 { 370 for len(b) > 0 && isSpace(b[0]) { 371 b = b[1:] 372 } 373 tok, _ := nextToken(b) 374 if tok != nil && tok.Type == tokParenClose { 375 break 376 } else if tok != nil && tok.Type == tokOr { 377 qs = append(qs, &orOperator{}) 378 b = b[len(tok.Input):] 379 continue 380 } 381 382 q, n, err := parseExpr(b) 383 if err != nil { 384 return nil, 0, err 385 } 386 387 if q == nil { 388 // eof or a ')' 389 break 390 } 391 qs = append(qs, q) 392 b = b[n:] 393 } 394 395 setCase := "auto" 396 hasCaseScope := false 397 newQS := qs[:0] 398 typeT := uint8(100) 399 for _, q := range qs { 400 switch s := q.(type) { 401 case *caseQ: 402 setCase = s.Flavor 403 hasCaseScope = true 404 case *Type: 405 if s.Type < typeT { 406 typeT = s.Type 407 } 408 default: 409 newQS = append(newQS, q) 410 } 411 } 412 qs = mapQueryList(newQS, func(q Q) Q { 413 if sc, ok := q.(setCaser); ok { 414 sc.setCase(setCase) 415 } 416 return q 417 }) 418 if typeT != 100 { 419 typedQ, err := parseOperators(qs) 420 if err != nil { 421 return nil, 0, err 422 } 423 qs = []Q{&Type{Type: typeT, Child: typedQ}} 424 } 425 426 if hasCaseScope { 427 scoped := make([]Q, 0, len(qs)) 428 for _, q := range qs { 429 if _, isOrOperator := q.(*orOperator); isOrOperator { 430 scoped = append(scoped, q) 431 continue 432 } 433 scoped = append(scoped, &caseScopeQ{Child: q}) 434 } 435 qs = scoped 436 } 437 438 return qs, len(in) - len(b), nil 439} 440 441type token struct { 442 Type int 443 // The value of the token 444 Text []byte 445 446 // The input that we consumed to form the token. 447 Input []byte 448} 449 450func (t *token) String() string { 451 return fmt.Sprintf("%s:%q", tokNames[t.Type], t.Text) 452} 453 454// token types. 455const ( 456 tokText = 0 457 tokFile = 1 458 tokRepo = 2 459 tokCase = 3 460 tokBranch = 4 461 tokParenOpen = 5 462 tokParenClose = 6 463 tokError = 7 464 tokNegate = 8 465 tokRegex = 9 466 tokOr = 10 467 tokContent = 11 468 tokLang = 12 469 tokSym = 13 470 tokType = 14 471 tokArchived = 15 472 tokPublic = 16 473 tokFork = 17 474 tokMeta = 18 475) 476 477var tokNames = map[int]string{ 478 tokArchived: "Archived", 479 tokBranch: "Branch", 480 tokCase: "Case", 481 tokError: "Error", 482 tokFile: "File", 483 tokFork: "Fork", 484 tokNegate: "Negate", 485 tokOr: "Or", 486 tokParenClose: "ParenClose", 487 tokParenOpen: "ParenOpen", 488 tokPublic: "Public", 489 tokRegex: "Regex", 490 tokRepo: "Repo", 491 tokText: "Text", 492 tokLang: "Language", 493 tokSym: "Symbol", 494 tokType: "Type", 495 tokMeta: "Meta", 496} 497 498var prefixes = map[string]int{ 499 "archived:": tokArchived, 500 "b:": tokBranch, 501 "branch:": tokBranch, 502 "c:": tokContent, 503 "case:": tokCase, 504 "content:": tokContent, 505 "f:": tokFile, 506 "file:": tokFile, 507 "fork:": tokFork, 508 "public:": tokPublic, 509 "r:": tokRepo, 510 "regex:": tokRegex, 511 "repo:": tokRepo, 512 "lang:": tokLang, 513 "sym:": tokSym, 514 "t:": tokType, 515 "type:": tokType, 516 "meta.": tokMeta, 517} 518 519var reservedWords = map[string]int{ 520 "or": tokOr, 521} 522 523func (t *token) setType() { 524 // After we consumed the input, we have to interpret some of the text, 525 // eg. to distinguish between ")" the text and ) the query grouping 526 // parenthesis. 527 if len(t.Text) == 1 && t.Text[0] == '(' { 528 t.Type = tokParenOpen 529 } 530 if len(t.Text) == 1 && t.Text[0] == ')' { 531 t.Type = tokParenClose 532 } 533 534 for w, typ := range reservedWords { 535 if string(t.Text) == w && string(t.Input) == w { 536 t.Type = typ 537 break 538 } 539 } 540 541 for pref, typ := range prefixes { 542 if !bytes.HasPrefix(t.Input, []byte(pref)) { 543 continue 544 } 545 546 t.Text = t.Text[len(pref):] 547 t.Type = typ 548 break 549 } 550} 551 552// nextToken returns the next token from the given input. 553func nextToken(in []byte) (*token, error) { 554 left := in[:] 555 parenCount := 0 556 var cur token 557 if len(left) == 0 { 558 return nil, nil 559 } 560 561 if left[0] == '-' { 562 return &token{ 563 Type: tokNegate, 564 Text: []byte{'-'}, 565 Input: in[:1], 566 }, nil 567 } 568 569 foundSpace := false 570 571loop: 572 for len(left) > 0 { 573 c := left[0] 574 switch c { 575 case '(': 576 parenCount++ 577 cur.Text = append(cur.Text, c) 578 left = left[1:] 579 case ')': 580 if parenCount == 0 { 581 if len(cur.Text) == 0 { 582 cur.Text = []byte{')'} 583 left = left[1:] 584 } 585 break loop 586 } 587 588 cur.Text = append(cur.Text, c) 589 left = left[1:] 590 parenCount-- 591 592 case '"': 593 t, n, err := parseStringLiteral(left) 594 if err != nil { 595 return nil, err 596 } 597 cur.Text = append(cur.Text, t...) 598 left = left[n:] 599 case '\\': 600 left = left[1:] 601 if len(left) == 0 { 602 return nil, fmt.Errorf("query: lone \\ at end") 603 } 604 c := left[0] 605 cur.Text = append(cur.Text, '\\', c) 606 left = left[1:] 607 608 case ' ', '\n', '\t': 609 if parenCount > 0 { 610 foundSpace = true 611 } 612 break loop 613 default: 614 cur.Text = append(cur.Text, c) 615 left = left[1:] 616 } 617 } 618 619 if len(cur.Text) == 0 { 620 return nil, nil 621 } 622 623 if foundSpace && cur.Text[0] == '(' { 624 cur.Text = cur.Text[:1] 625 cur.Input = in[:1] 626 } else { 627 cur.Input = in[:len(in)-len(left)] 628 } 629 cur.setType() 630 return &cur, nil 631}