fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package query 16 17import ( 18 "bytes" 19 "fmt" 20 "log" 21 "regexp/syntax" 22 23 "github.com/grafana/regexp" 24 25 "github.com/sourcegraph/zoekt/internal/languages" 26) 27 28var _ = log.Printf 29 30// parseStringLiteral parses a string literal, consumes the starting 31// quote too. 32func parseStringLiteral(in []byte) (lit []byte, n int, err error) { 33 left := in[1:] 34 found := false 35 36loop: 37 for len(left) > 0 { 38 c := left[0] 39 left = left[1:] 40 switch c { 41 case '"': 42 found = true 43 break loop 44 case '\\': 45 // TODO - other escape sequences. 46 if len(left) == 0 { 47 return nil, 0, fmt.Errorf("query: missing char after \\") 48 } 49 c = left[0] 50 left = left[1:] 51 52 lit = append(lit, c) 53 default: 54 lit = append(lit, c) 55 } 56 } 57 if !found { 58 return nil, 0, fmt.Errorf("query: unterminated quoted string") 59 } 60 return lit, len(in) - len(left), nil 61} 62 63// orOperator is a placeholder intermediate so we can represent [A, 64// or, B] before we convert it to Or{A, B} 65type orOperator struct{} 66 67func (o *orOperator) String() string { 68 return "orOp" 69} 70 71func isSpace(c byte) bool { 72 return c == ' ' || c == '\t' 73} 74 75// Parse parses a string into a query. 76func Parse(qStr string) (Q, error) { 77 b := []byte(qStr) 78 79 qs, n, err := parseExprList(b) 80 if err != nil { 81 return nil, err 82 } 83 84 if n != len(b) { 85 return nil, fmt.Errorf("query: extra tokens found at end input: %q", b[n:]) 86 } 87 88 q, err := parseOperators(qs) 89 if err != nil { 90 return nil, err 91 } 92 93 return Simplify(q), nil 94} 95 96// parseExpr parses a single expression, returning the result, and the 97// number of bytes consumed. 98func parseExpr(in []byte) (Q, int, error) { 99 b := in[:] 100 var expr Q 101 for len(b) > 0 && isSpace(b[0]) { 102 b = b[1:] 103 } 104 105 tok, err := nextToken(b) 106 if err != nil { 107 return nil, 0, err 108 } 109 if tok == nil { 110 return nil, 0, nil 111 } 112 b = b[len(tok.Input):] 113 114 text := string(tok.Text) 115 switch tok.Type { 116 case tokCase: 117 switch text { 118 case "yes": 119 case "no": 120 case "auto": 121 default: 122 return nil, 0, fmt.Errorf("query: unknown case argument %q, want {yes,no,auto}", text) 123 } 124 expr = &caseQ{text} 125 case tokRepo: 126 r, err := regexp.Compile(text) 127 if err != nil { 128 return nil, 0, err 129 } 130 131 expr = &Repo{r} 132 case tokArchived: 133 switch text { 134 case "yes": 135 expr = RawConfig(RcOnlyArchived) 136 case "no": 137 expr = RawConfig(RcNoArchived) 138 default: 139 return nil, 0, fmt.Errorf("query: unknown archived argument %q, want {yes,no}", text) 140 } 141 case tokFork: 142 switch text { 143 case "yes": 144 expr = RawConfig(RcOnlyForks) 145 case "no": 146 expr = RawConfig(RcNoForks) 147 default: 148 return nil, 0, fmt.Errorf("query: unknown fork argument %q, want {yes,no}", text) 149 } 150 case tokPublic: 151 switch text { 152 case "yes": 153 expr = RawConfig(RcOnlyPublic) 154 case "no": 155 expr = RawConfig(RcOnlyPrivate) 156 default: 157 return nil, 0, fmt.Errorf("query: unknown public argument %q, want {yes,no}", text) 158 } 159 case tokBranch: 160 expr = &Branch{Pattern: text} 161 case tokText, tokRegex: 162 q, err := RegexpQuery(text, false, false) 163 if err != nil { 164 return nil, 0, err 165 } 166 expr = q 167 case tokFile: 168 q, err := RegexpQuery(text, false, true) 169 if err != nil { 170 return nil, 0, err 171 } 172 expr = q 173 case tokContent: 174 q, err := RegexpQuery(text, true, false) 175 if err != nil { 176 return nil, 0, err 177 } 178 expr = q 179 case tokLang: 180 canonical, ok := languages.GetLanguageByAlias(text) 181 if !ok { 182 expr = &Const{false} 183 } else { 184 expr = &Language{Language: canonical} 185 } 186 187 case tokSym: 188 if text == "" { 189 return nil, 0, fmt.Errorf("the sym: atom must have an argument") 190 } 191 192 q, err := RegexpQuery(text, false, false) 193 if err != nil { 194 return nil, 0, err 195 } 196 197 expr = &Symbol{q} 198 case tokParenClose: 199 // Caller must consume paren. 200 expr = nil 201 202 case tokParenOpen: 203 qs, n, err := parseExprList(b) 204 b = b[n:] 205 if err != nil { 206 return nil, 0, err 207 } 208 209 pTok, err := nextToken(b) 210 if err != nil { 211 return nil, 0, err 212 } 213 if pTok == nil || pTok.Type != tokParenClose { 214 return nil, 0, fmt.Errorf("query: missing close paren, got token %v", pTok) 215 } 216 217 b = b[len(pTok.Input):] 218 expr, err = parseOperators(qs) 219 if err != nil { 220 return nil, 0, err 221 } 222 case tokNegate: 223 subQ, n, err := parseExpr(b) 224 if err != nil { 225 return nil, 0, err 226 } 227 if subQ == nil { 228 return nil, 0, fmt.Errorf("query: '-' operator needs an argument") 229 } 230 b = b[n:] 231 expr = &Not{subQ} 232 233 case tokType: 234 var t uint8 235 switch text { 236 case "filematch": 237 t = TypeFileMatch 238 case "filename", "file": 239 t = TypeFileName 240 case "repo": 241 t = TypeRepo 242 default: 243 return nil, 0, fmt.Errorf("query: unknown type argument %q, want {filematch,filename,repo}", text) 244 } 245 // Later we will lift this into a root, like we do for caseQ 246 expr = &Type{Type: t, Child: nil} 247 case tokMeta: 248 // Split on ':' to separate field and value 249 parts := bytes.SplitN([]byte(text), []byte(":"), 2) 250 if len(parts) != 2 { 251 return nil, 0, fmt.Errorf("query: invalid meta field syntax %q", text) 252 } 253 field := string(parts[0]) 254 valuePattern := string(parts[1]) 255 re, err := regexp.Compile(valuePattern) 256 if err != nil { 257 return nil, 0, fmt.Errorf("query: invalid regexp in meta value: %v", err) 258 } 259 expr = &Meta{ 260 Field: field, 261 Value: re, 262 } 263 } 264 265 return expr, len(in) - len(b), nil 266} 267 268const regexpFlags syntax.Flags = syntax.ClassNL | syntax.PerlX | syntax.UnicodeGroups 269 270// RegexpQuery parses an atom into either a regular expression, or a 271// simple substring atom. 272func RegexpQuery(text string, content, file bool) (Q, error) { 273 var expr Q 274 275 r, err := syntax.Parse(text, regexpFlags) 276 if err != nil { 277 return nil, err 278 } 279 280 r = OptimizeRegexp(r, regexpFlags) 281 282 if r.Op == syntax.OpLiteral { 283 expr = &Substring{ 284 Pattern: string(r.Rune), 285 FileName: file, 286 Content: content, 287 } 288 } else { 289 expr = &Regexp{ 290 Regexp: r, 291 FileName: file, 292 Content: content, 293 } 294 } 295 296 return expr, nil 297} 298 299// parseOperators interprets the orOperator in a list of queries. 300func parseOperators(in []Q) (Q, error) { 301 top := &Or{} 302 cur := &And{} 303 304 seenOr := false 305 for _, q := range in { 306 if _, ok := q.(*orOperator); ok { 307 seenOr = true 308 if len(cur.Children) == 0 { 309 return nil, fmt.Errorf("query: OR operator should have operand") 310 } 311 top.Children = append(top.Children, cur) 312 cur = &And{} 313 } else { 314 cur.Children = append(cur.Children, q) 315 } 316 } 317 318 if seenOr && len(cur.Children) == 0 { 319 return nil, fmt.Errorf("query: OR operator should have operand") 320 } 321 top.Children = append(top.Children, cur) 322 return top, nil 323} 324 325// parseExprList parses a list of query expressions. It is the 326// workhorse of the Parse function. 327func parseExprList(in []byte) ([]Q, int, error) { 328 b := in[:] 329 var qs []Q 330 for len(b) > 0 { 331 for len(b) > 0 && isSpace(b[0]) { 332 b = b[1:] 333 } 334 tok, _ := nextToken(b) 335 if tok != nil && tok.Type == tokParenClose { 336 break 337 } else if tok != nil && tok.Type == tokOr { 338 qs = append(qs, &orOperator{}) 339 b = b[len(tok.Input):] 340 continue 341 } 342 343 q, n, err := parseExpr(b) 344 if err != nil { 345 return nil, 0, err 346 } 347 348 if q == nil { 349 // eof or a ')' 350 break 351 } 352 qs = append(qs, q) 353 b = b[n:] 354 } 355 356 setCase := "auto" 357 newQS := qs[:0] 358 typeT := uint8(100) 359 for _, q := range qs { 360 switch s := q.(type) { 361 case *caseQ: 362 setCase = s.Flavor 363 case *Type: 364 if s.Type < typeT { 365 typeT = s.Type 366 } 367 default: 368 newQS = append(newQS, q) 369 } 370 } 371 qs = mapQueryList(newQS, func(q Q) Q { 372 if sc, ok := q.(setCaser); ok { 373 sc.setCase(setCase) 374 } 375 return q 376 }) 377 if typeT != 100 { 378 qs = []Q{&Type{Type: typeT, Child: NewAnd(qs...)}} 379 } 380 return qs, len(in) - len(b), nil 381} 382 383type token struct { 384 Type int 385 // The value of the token 386 Text []byte 387 388 // The input that we consumed to form the token. 389 Input []byte 390} 391 392func (t *token) String() string { 393 return fmt.Sprintf("%s:%q", tokNames[t.Type], t.Text) 394} 395 396// token types. 397const ( 398 tokText = 0 399 tokFile = 1 400 tokRepo = 2 401 tokCase = 3 402 tokBranch = 4 403 tokParenOpen = 5 404 tokParenClose = 6 405 tokError = 7 406 tokNegate = 8 407 tokRegex = 9 408 tokOr = 10 409 tokContent = 11 410 tokLang = 12 411 tokSym = 13 412 tokType = 14 413 tokArchived = 15 414 tokPublic = 16 415 tokFork = 17 416 tokMeta = 18 417) 418 419var tokNames = map[int]string{ 420 tokArchived: "Archived", 421 tokBranch: "Branch", 422 tokCase: "Case", 423 tokError: "Error", 424 tokFile: "File", 425 tokFork: "Fork", 426 tokNegate: "Negate", 427 tokOr: "Or", 428 tokParenClose: "ParenClose", 429 tokParenOpen: "ParenOpen", 430 tokPublic: "Public", 431 tokRegex: "Regex", 432 tokRepo: "Repo", 433 tokText: "Text", 434 tokLang: "Language", 435 tokSym: "Symbol", 436 tokType: "Type", 437 tokMeta: "Meta", 438} 439 440var prefixes = map[string]int{ 441 "archived:": tokArchived, 442 "b:": tokBranch, 443 "branch:": tokBranch, 444 "c:": tokContent, 445 "case:": tokCase, 446 "content:": tokContent, 447 "f:": tokFile, 448 "file:": tokFile, 449 "fork:": tokFork, 450 "public:": tokPublic, 451 "r:": tokRepo, 452 "regex:": tokRegex, 453 "repo:": tokRepo, 454 "lang:": tokLang, 455 "sym:": tokSym, 456 "t:": tokType, 457 "type:": tokType, 458 "meta.": tokMeta, 459} 460 461var reservedWords = map[string]int{ 462 "or": tokOr, 463} 464 465func (t *token) setType() { 466 // After we consumed the input, we have to interpret some of the text, 467 // eg. to distinguish between ")" the text and ) the query grouping 468 // parenthesis. 469 if len(t.Text) == 1 && t.Text[0] == '(' { 470 t.Type = tokParenOpen 471 } 472 if len(t.Text) == 1 && t.Text[0] == ')' { 473 t.Type = tokParenClose 474 } 475 476 for w, typ := range reservedWords { 477 if string(t.Text) == w && string(t.Input) == w { 478 t.Type = typ 479 break 480 } 481 } 482 483 for pref, typ := range prefixes { 484 if !bytes.HasPrefix(t.Input, []byte(pref)) { 485 continue 486 } 487 488 t.Text = t.Text[len(pref):] 489 t.Type = typ 490 break 491 } 492} 493 494// nextToken returns the next token from the given input. 495func nextToken(in []byte) (*token, error) { 496 left := in[:] 497 parenCount := 0 498 var cur token 499 if len(left) == 0 { 500 return nil, nil 501 } 502 503 if left[0] == '-' { 504 return &token{ 505 Type: tokNegate, 506 Text: []byte{'-'}, 507 Input: in[:1], 508 }, nil 509 } 510 511 foundSpace := false 512 513loop: 514 for len(left) > 0 { 515 c := left[0] 516 switch c { 517 case '(': 518 parenCount++ 519 cur.Text = append(cur.Text, c) 520 left = left[1:] 521 case ')': 522 if parenCount == 0 { 523 if len(cur.Text) == 0 { 524 cur.Text = []byte{')'} 525 left = left[1:] 526 } 527 break loop 528 } 529 530 cur.Text = append(cur.Text, c) 531 left = left[1:] 532 parenCount-- 533 534 case '"': 535 t, n, err := parseStringLiteral(left) 536 if err != nil { 537 return nil, err 538 } 539 cur.Text = append(cur.Text, t...) 540 left = left[n:] 541 case '\\': 542 left = left[1:] 543 if len(left) == 0 { 544 return nil, fmt.Errorf("query: lone \\ at end") 545 } 546 c := left[0] 547 cur.Text = append(cur.Text, '\\', c) 548 left = left[1:] 549 550 case ' ', '\n', '\t': 551 if parenCount > 0 { 552 foundSpace = true 553 } 554 break loop 555 default: 556 cur.Text = append(cur.Text, c) 557 left = left[1:] 558 } 559 } 560 561 if len(cur.Text) == 0 { 562 return nil, nil 563 } 564 565 if foundSpace && cur.Text[0] == '(' { 566 cur.Text = cur.Text[:1] 567 cur.Input = in[:1] 568 } else { 569 cur.Input = in[:len(in)-len(left)] 570 } 571 cur.setType() 572 return &cur, nil 573}