fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package query 16 17import ( 18 "bytes" 19 "fmt" 20 "log" 21 "regexp/syntax" 22 23 "github.com/grafana/regexp" 24 "github.com/sourcegraph/zoekt/internal/languages" 25) 26 27var _ = log.Printf 28 29// parseStringLiteral parses a string literal, consumes the starting 30// quote too. 31func parseStringLiteral(in []byte) (lit []byte, n int, err error) { 32 left := in[1:] 33 found := false 34 35loop: 36 for len(left) > 0 { 37 c := left[0] 38 left = left[1:] 39 switch c { 40 case '"': 41 found = true 42 break loop 43 case '\\': 44 // TODO - other escape sequences. 45 if len(left) == 0 { 46 return nil, 0, fmt.Errorf("query: missing char after \\") 47 } 48 c = left[0] 49 left = left[1:] 50 51 lit = append(lit, c) 52 default: 53 lit = append(lit, c) 54 } 55 } 56 if !found { 57 return nil, 0, fmt.Errorf("query: unterminated quoted string") 58 } 59 return lit, len(in) - len(left), nil 60} 61 62// orOperator is a placeholder intermediate so we can represent [A, 63// or, B] before we convert it to Or{A, B} 64type orOperator struct{} 65 66func (o *orOperator) String() string { 67 return "orOp" 68} 69 70func isSpace(c byte) bool { 71 return c == ' ' || c == '\t' 72} 73 74// Parse parses a string into a query. 75func Parse(qStr string) (Q, error) { 76 b := []byte(qStr) 77 78 qs, _, err := parseExprList(b) 79 if err != nil { 80 return nil, err 81 } 82 83 q, err := parseOperators(qs) 84 if err != nil { 85 return nil, err 86 } 87 88 return Simplify(q), nil 89} 90 91// parseExpr parses a single expression, returning the result, and the 92// number of bytes consumed. 93func parseExpr(in []byte) (Q, int, error) { 94 b := in[:] 95 var expr Q 96 for len(b) > 0 && isSpace(b[0]) { 97 b = b[1:] 98 } 99 100 tok, err := nextToken(b) 101 if err != nil { 102 return nil, 0, err 103 } 104 if tok == nil { 105 return nil, 0, nil 106 } 107 b = b[len(tok.Input):] 108 109 text := string(tok.Text) 110 switch tok.Type { 111 case tokCase: 112 switch text { 113 case "yes": 114 case "no": 115 case "auto": 116 default: 117 return nil, 0, fmt.Errorf("query: unknown case argument %q, want {yes,no,auto}", text) 118 } 119 expr = &caseQ{text} 120 case tokRepo: 121 r, err := regexp.Compile(text) 122 if err != nil { 123 return nil, 0, err 124 } 125 126 expr = &Repo{r} 127 case tokArchived: 128 switch text { 129 case "yes": 130 expr = RawConfig(RcOnlyArchived) 131 case "no": 132 expr = RawConfig(RcNoArchived) 133 default: 134 return nil, 0, fmt.Errorf("query: unknown archived argument %q, want {yes,no}", text) 135 } 136 case tokFork: 137 switch text { 138 case "yes": 139 expr = RawConfig(RcOnlyForks) 140 case "no": 141 expr = RawConfig(RcNoForks) 142 default: 143 return nil, 0, fmt.Errorf("query: unknown fork argument %q, want {yes,no}", text) 144 } 145 case tokPublic: 146 switch text { 147 case "yes": 148 expr = RawConfig(RcOnlyPublic) 149 case "no": 150 expr = RawConfig(RcOnlyPrivate) 151 default: 152 return nil, 0, fmt.Errorf("query: unknown public argument %q, want {yes,no}", text) 153 } 154 case tokBranch: 155 expr = &Branch{Pattern: text} 156 case tokText, tokRegex: 157 q, err := RegexpQuery(text, false, false) 158 if err != nil { 159 return nil, 0, err 160 } 161 expr = q 162 case tokFile: 163 q, err := RegexpQuery(text, false, true) 164 if err != nil { 165 return nil, 0, err 166 } 167 expr = q 168 case tokContent: 169 q, err := RegexpQuery(text, true, false) 170 if err != nil { 171 return nil, 0, err 172 } 173 expr = q 174 case tokLang: 175 canonical, ok := languages.GetLanguageByAlias(text) 176 if !ok { 177 expr = &Const{false} 178 } else { 179 expr = &Language{Language: canonical} 180 } 181 182 case tokSym: 183 if text == "" { 184 return nil, 0, fmt.Errorf("the sym: atom must have an argument") 185 } 186 187 q, err := RegexpQuery(text, false, false) 188 if err != nil { 189 return nil, 0, err 190 } 191 192 expr = &Symbol{q} 193 case tokParenClose: 194 // Caller must consume paren. 195 expr = nil 196 197 case tokParenOpen: 198 qs, n, err := parseExprList(b) 199 b = b[n:] 200 if err != nil { 201 return nil, 0, err 202 } 203 204 pTok, err := nextToken(b) 205 if err != nil { 206 return nil, 0, err 207 } 208 if pTok == nil || pTok.Type != tokParenClose { 209 return nil, 0, fmt.Errorf("query: missing close paren, got token %v", pTok) 210 } 211 212 b = b[len(pTok.Input):] 213 expr, err = parseOperators(qs) 214 if err != nil { 215 return nil, 0, err 216 } 217 case tokNegate: 218 subQ, n, err := parseExpr(b) 219 if err != nil { 220 return nil, 0, err 221 } 222 if subQ == nil { 223 return nil, 0, fmt.Errorf("query: '-' operator needs an argument") 224 } 225 b = b[n:] 226 expr = &Not{subQ} 227 228 case tokType: 229 var t uint8 230 switch text { 231 case "filematch": 232 t = TypeFileMatch 233 case "filename", "file": 234 t = TypeFileName 235 case "repo": 236 t = TypeRepo 237 default: 238 return nil, 0, fmt.Errorf("query: unknown type argument %q, want {filematch,filename,repo}", text) 239 } 240 // Later we will lift this into a root, like we do for caseQ 241 expr = &Type{Type: t, Child: nil} 242 } 243 244 return expr, len(in) - len(b), nil 245} 246 247const regexpFlags syntax.Flags = syntax.ClassNL | syntax.PerlX | syntax.UnicodeGroups 248 249// RegexpQuery parses an atom into either a regular expression, or a 250// simple substring atom. 251func RegexpQuery(text string, content, file bool) (Q, error) { 252 var expr Q 253 254 r, err := syntax.Parse(text, regexpFlags) 255 if err != nil { 256 return nil, err 257 } 258 259 r = OptimizeRegexp(r, regexpFlags) 260 261 if r.Op == syntax.OpLiteral { 262 expr = &Substring{ 263 Pattern: string(r.Rune), 264 FileName: file, 265 Content: content, 266 } 267 } else { 268 expr = &Regexp{ 269 Regexp: r, 270 FileName: file, 271 Content: content, 272 } 273 } 274 275 return expr, nil 276} 277 278// parseOperators interprets the orOperator in a list of queries. 279func parseOperators(in []Q) (Q, error) { 280 top := &Or{} 281 cur := &And{} 282 283 seenOr := false 284 for _, q := range in { 285 if _, ok := q.(*orOperator); ok { 286 seenOr = true 287 if len(cur.Children) == 0 { 288 return nil, fmt.Errorf("query: OR operator should have operand") 289 } 290 top.Children = append(top.Children, cur) 291 cur = &And{} 292 } else { 293 cur.Children = append(cur.Children, q) 294 } 295 } 296 297 if seenOr && len(cur.Children) == 0 { 298 return nil, fmt.Errorf("query: OR operator should have operand") 299 } 300 top.Children = append(top.Children, cur) 301 return top, nil 302} 303 304// parseExprList parses a list of query expressions. It is the 305// workhorse of the Parse function. 306func parseExprList(in []byte) ([]Q, int, error) { 307 b := in[:] 308 var qs []Q 309 for len(b) > 0 { 310 for len(b) > 0 && isSpace(b[0]) { 311 b = b[1:] 312 } 313 tok, _ := nextToken(b) 314 if tok != nil && tok.Type == tokParenClose { 315 break 316 } else if tok != nil && tok.Type == tokOr { 317 qs = append(qs, &orOperator{}) 318 b = b[len(tok.Input):] 319 continue 320 } 321 322 q, n, err := parseExpr(b) 323 if err != nil { 324 return nil, 0, err 325 } 326 327 if q == nil { 328 // eof or a ')' 329 break 330 } 331 qs = append(qs, q) 332 b = b[n:] 333 } 334 335 setCase := "auto" 336 newQS := qs[:0] 337 typeT := uint8(100) 338 for _, q := range qs { 339 switch s := q.(type) { 340 case *caseQ: 341 setCase = s.Flavor 342 case *Type: 343 if s.Type < typeT { 344 typeT = s.Type 345 } 346 default: 347 newQS = append(newQS, q) 348 } 349 } 350 qs = mapQueryList(newQS, func(q Q) Q { 351 if sc, ok := q.(setCaser); ok { 352 sc.setCase(setCase) 353 } 354 return q 355 }) 356 if typeT != 100 { 357 qs = []Q{&Type{Type: typeT, Child: NewAnd(qs...)}} 358 } 359 return qs, len(in) - len(b), nil 360} 361 362type token struct { 363 Type int 364 // The value of the token 365 Text []byte 366 367 // The input that we consumed to form the token. 368 Input []byte 369} 370 371func (t *token) String() string { 372 return fmt.Sprintf("%s:%q", tokNames[t.Type], t.Text) 373} 374 375// token types. 376const ( 377 tokText = 0 378 tokFile = 1 379 tokRepo = 2 380 tokCase = 3 381 tokBranch = 4 382 tokParenOpen = 5 383 tokParenClose = 6 384 tokError = 7 385 tokNegate = 8 386 tokRegex = 9 387 tokOr = 10 388 tokContent = 11 389 tokLang = 12 390 tokSym = 13 391 tokType = 14 392 tokArchived = 15 393 tokPublic = 16 394 tokFork = 17 395) 396 397var tokNames = map[int]string{ 398 tokArchived: "Archived", 399 tokBranch: "Branch", 400 tokCase: "Case", 401 tokError: "Error", 402 tokFile: "File", 403 tokFork: "Fork", 404 tokNegate: "Negate", 405 tokOr: "Or", 406 tokParenClose: "ParenClose", 407 tokParenOpen: "ParenOpen", 408 tokPublic: "Public", 409 tokRegex: "Regex", 410 tokRepo: "Repo", 411 tokText: "Text", 412 tokLang: "Language", 413 tokSym: "Symbol", 414 tokType: "Type", 415} 416 417var prefixes = map[string]int{ 418 "archived:": tokArchived, 419 "b:": tokBranch, 420 "branch:": tokBranch, 421 "c:": tokContent, 422 "case:": tokCase, 423 "content:": tokContent, 424 "f:": tokFile, 425 "file:": tokFile, 426 "fork:": tokFork, 427 "public:": tokPublic, 428 "r:": tokRepo, 429 "regex:": tokRegex, 430 "repo:": tokRepo, 431 "lang:": tokLang, 432 "sym:": tokSym, 433 "t:": tokType, 434 "type:": tokType, 435} 436 437var reservedWords = map[string]int{ 438 "or": tokOr, 439} 440 441func (t *token) setType() { 442 // After we consumed the input, we have to interpret some of the text, 443 // eg. to distinguish between ")" the text and ) the query grouping 444 // parenthesis. 445 if len(t.Text) == 1 && t.Text[0] == '(' { 446 t.Type = tokParenOpen 447 } 448 if len(t.Text) == 1 && t.Text[0] == ')' { 449 t.Type = tokParenClose 450 } 451 452 for w, typ := range reservedWords { 453 if string(t.Text) == w && string(t.Input) == w { 454 t.Type = typ 455 break 456 } 457 } 458 459 for pref, typ := range prefixes { 460 if !bytes.HasPrefix(t.Input, []byte(pref)) { 461 continue 462 } 463 464 t.Text = t.Text[len(pref):] 465 t.Type = typ 466 break 467 } 468} 469 470// nextToken returns the next token from the given input. 471func nextToken(in []byte) (*token, error) { 472 left := in[:] 473 parenCount := 0 474 var cur token 475 if len(left) == 0 { 476 return nil, nil 477 } 478 479 if left[0] == '-' { 480 return &token{ 481 Type: tokNegate, 482 Text: []byte{'-'}, 483 Input: in[:1], 484 }, nil 485 } 486 487 foundSpace := false 488 489loop: 490 for len(left) > 0 { 491 c := left[0] 492 switch c { 493 case '(': 494 parenCount++ 495 cur.Text = append(cur.Text, c) 496 left = left[1:] 497 case ')': 498 if parenCount == 0 { 499 if len(cur.Text) == 0 { 500 cur.Text = []byte{')'} 501 left = left[1:] 502 } 503 break loop 504 } 505 506 cur.Text = append(cur.Text, c) 507 left = left[1:] 508 parenCount-- 509 510 case '"': 511 t, n, err := parseStringLiteral(left) 512 if err != nil { 513 return nil, err 514 } 515 cur.Text = append(cur.Text, t...) 516 left = left[n:] 517 case '\\': 518 left = left[1:] 519 if len(left) == 0 { 520 return nil, fmt.Errorf("query: lone \\ at end") 521 } 522 c := left[0] 523 cur.Text = append(cur.Text, '\\', c) 524 left = left[1:] 525 526 case ' ', '\n', '\t': 527 if parenCount > 0 { 528 foundSpace = true 529 } 530 break loop 531 default: 532 cur.Text = append(cur.Text, c) 533 left = left[1:] 534 } 535 } 536 537 if len(cur.Text) == 0 { 538 return nil, nil 539 } 540 541 if foundSpace && cur.Text[0] == '(' { 542 cur.Text = cur.Text[:1] 543 cur.Input = in[:1] 544 } else { 545 cur.Input = in[:len(in)-len(left)] 546 } 547 cur.setType() 548 return &cur, nil 549}