fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

index: support searching for symbols

The implementation only supports substrings for now. It is achieved by
intersecting the candidate matches produced by trigram search with the
document sections.

Change-Id: I4f11a3574c7ec979cbbd4d35ac2459da7e318958

+94
+44
index_test.go
··· 1570 1570 t.Fatalf("got %v, want 2 results in f3", res.Files) 1571 1571 } 1572 1572 } 1573 + 1574 + func TestSymbolAtom(t *testing.T) { 1575 + content := []byte("bla\nsymblabla\nbla") 1576 + // ----------------0123 456789012 1577 + 1578 + b := testIndexBuilder(t, &Repository{Name: "reponame"}, 1579 + Document{ 1580 + Name: "f1", 1581 + Content: content, 1582 + Symbols: []DocumentSection{{4, 12}}, 1583 + }, 1584 + ) 1585 + q := &query.Symbol{&query.Substring{Pattern: "bla"}} 1586 + res := searchForTest(t, b, q) 1587 + if len(res.Files) != 1 && len(res.Files[0].LineMatches) != 1 { 1588 + t.Fatalf("got %v, want 1 line in 1 file", res.Files) 1589 + } 1590 + m := res.Files[0].LineMatches[0].LineFragments[0] 1591 + if m.Offset != 7 || m.MatchLength != 3 { 1592 + t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 1593 + } 1594 + } 1595 + 1596 + func TestSymbolAtomExact(t *testing.T) { 1597 + content := []byte("bla\nsym\nbla\nsym\nasymb") 1598 + // ----------------0123 4567 89012 1599 + 1600 + b := testIndexBuilder(t, &Repository{Name: "reponame"}, 1601 + Document{ 1602 + Name: "f1", 1603 + Content: content, 1604 + Symbols: []DocumentSection{{4, 7}}, 1605 + }, 1606 + ) 1607 + q := &query.Symbol{&query.Substring{Pattern: "sym"}} 1608 + res := searchForTest(t, b, q) 1609 + if len(res.Files) != 1 && len(res.Files[0].LineMatches) != 1 { 1610 + t.Fatalf("got %v, want 1 line in 1 file", res.Files) 1611 + } 1612 + m := res.Files[0].LineMatches[0].LineFragments[0] 1613 + if m.Offset != 4 { 1614 + t.Fatalf("got offset %d, want 7", m.Offset) 1615 + } 1616 + }
+50
matchtree.go
··· 532 532 return &docMatchTree{ 533 533 docs: docs, 534 534 }, nil 535 + 536 + case *query.Symbol: 537 + mt, err := d.newSubstringMatchTree(s.Atom, stats) 538 + if err != nil { 539 + return nil, err 540 + } 541 + 542 + if _, ok := mt.(*regexpMatchTree); ok { 543 + return nil, fmt.Errorf("regexps and short queries not implemented for symbol search") 544 + } 545 + subMT, ok := mt.(*substrMatchTree) 546 + if !ok { 547 + return nil, fmt.Errorf("found %T inside query.Symbol", mt) 548 + } 549 + 550 + subMT.cands = d.trimByDocSection(s.Atom, subMT.cands, d.runeDocSections) 551 + return subMT, nil 535 552 } 536 553 log.Panicf("type %T", q) 537 554 return nil, nil ··· 565 582 stats.IndexBytesLoaded += int64(result.bytesRead) 566 583 return st, nil 567 584 } 585 + 586 + func (d *indexData) trimByDocSection(q *query.Substring, ms []*candidateMatch, secs []DocumentSection) []*candidateMatch { 587 + trimmed := ms[:0] 588 + 589 + patSize := utf8.RuneCount([]byte(q.Pattern)) 590 + for len(secs) > 0 && len(ms) > 0 { 591 + var fileStart uint32 592 + if ms[0].file > 0 { 593 + fileStart = d.fileEndRunes[ms[0].file-1] 594 + } 595 + 596 + start := fileStart + ms[0].runeOffset 597 + end := start + uint32(patSize) 598 + if start >= secs[0].End { 599 + secs = secs[1:] 600 + continue 601 + } 602 + 603 + if start < secs[0].Start { 604 + ms = ms[1:] 605 + continue 606 + } 607 + 608 + // here we have: sec.Start <= start < sec.End 609 + if end <= secs[0].End { 610 + // complete match falls inside section. 611 + trimmed = append(trimmed, ms[0]) 612 + } 613 + 614 + ms = ms[1:] 615 + } 616 + return trimmed 617 + }