fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

all: adjust field order for match structs (#716)

Structs related to matches can occur a lot in memory. As such there is
some value to ensuring the order of the fields is aligned to avoid
unneccessary padding.

The "fieldalignment" tool was used to find these changes.

Test Plan: go test

+84 -48
+40 -37
api.go
··· 34 34 35 35 // FileMatch contains all the matches within a file. 36 36 type FileMatch struct { 37 - // Ranking; the higher, the better. 38 - Score float64 // TODO - hide this field? 37 + FileName string 38 + 39 + // Repository is the globally unique name of the repo of the 40 + // match 41 + Repository string 42 + 43 + // SubRepositoryName is the globally unique name of the repo, 44 + // if it came from a subrepository 45 + SubRepositoryName string 46 + 47 + // SubRepositoryPath holds the prefix where the subrepository 48 + // was mounted. 49 + SubRepositoryPath string 50 + 51 + // Commit SHA1 (hex) of the (sub)repo holding the file. 52 + Version string 53 + 54 + // Detected language of the result. 55 + Language string 39 56 40 57 // For debugging. Needs DebugScore set, but public so tests in 41 58 // other packages can print some diagnostics. 42 59 Debug string 43 60 44 - FileName string 45 - 46 - // Repository is the globally unique name of the repo of the 47 - // match 48 - Repository string 49 - Branches []string 61 + Branches []string 50 62 51 63 // One of LineMatches or ChunkMatches will be returned depending on whether 52 64 // the SearchOptions.ChunkMatches is set. 53 65 LineMatches []LineMatch 54 66 ChunkMatches []ChunkMatch 55 67 56 - // RepositoryID is a Sourcegraph extension. This is the ID of Repository in 57 - // Sourcegraph. 58 - RepositoryID uint32 59 - 60 - // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to 61 - // order results from different repositories relative to each other. 62 - RepositoryPriority float64 63 - 64 68 // Only set if requested 65 69 Content []byte 66 70 67 71 // Checksum of the content. 68 72 Checksum []byte 69 73 70 - // Detected language of the result. 71 - Language string 74 + // Ranking; the higher, the better. 75 + Score float64 // TODO - hide this field? 72 76 73 - // SubRepositoryName is the globally unique name of the repo, 74 - // if it came from a subrepository 75 - SubRepositoryName string 76 - 77 - // SubRepositoryPath holds the prefix where the subrepository 78 - // was mounted. 79 - SubRepositoryPath string 77 + // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to 78 + // order results from different repositories relative to each other. 79 + RepositoryPriority float64 80 80 81 - // Commit SHA1 (hex) of the (sub)repo holding the file. 82 - Version string 81 + // RepositoryID is a Sourcegraph extension. This is the ID of Repository in 82 + // Sourcegraph. 83 + RepositoryID uint32 83 84 } 84 85 85 86 func (m *FileMatch) sizeBytes() (sz uint64) { ··· 134 135 // ChunkMatch is a set of non-overlapping matches within a contiguous range of 135 136 // lines in the file. 136 137 type ChunkMatch struct { 138 + DebugScore string 139 + 137 140 // Content is a contiguous range of complete lines that fully contains Ranges. 138 141 Content []byte 139 - // ContentStart is the location (inclusive) of the beginning of content 140 - // relative to the beginning of the file. It will always be at the 141 - // beginning of a line (Column will always be 1). 142 - ContentStart Location 143 - 144 - // FileName indicates whether this match is a match on the file name, in 145 - // which case Content will contain the file name. 146 - FileName bool 147 142 148 143 // Ranges is a set of matching ranges within this chunk. Each range is relative 149 144 // to the beginning of the file (not the beginning of Content). ··· 153 148 // its length will equal that of Ranges. Any of its elements may be nil. 154 149 SymbolInfo []*Symbol 155 150 156 - Score float64 157 - DebugScore string 151 + // FileName indicates whether this match is a match on the file name, in 152 + // which case Content will contain the file name. 153 + FileName bool 154 + 155 + // ContentStart is the location (inclusive) of the beginning of content 156 + // relative to the beginning of the file. It will always be at the 157 + // beginning of a line (Column will always be 1). 158 + ContentStart Location 159 + 160 + Score float64 158 161 } 159 162 160 163 func (cm *ChunkMatch) sizeBytes() (sz uint64) {
+29
api_test.go
··· 17 17 import ( 18 18 "bytes" 19 19 "encoding/gob" 20 + "reflect" 20 21 "strings" 21 22 "testing" 22 23 "time" ··· 136 137 t.Fatalf("want %d, got %d", wantBytes, cm.sizeBytes()) 137 138 } 138 139 } 140 + 141 + func TestMatchSize(t *testing.T) { 142 + cases := []struct { 143 + v any 144 + size int 145 + }{{ 146 + v: FileMatch{}, 147 + size: 256, 148 + }, { 149 + v: ChunkMatch{}, 150 + size: 112, 151 + }, { 152 + v: candidateMatch{}, 153 + size: 72, 154 + }, { 155 + v: candidateChunk{}, 156 + size: 40, 157 + }} 158 + for _, c := range cases { 159 + got := reflect.TypeOf(c.v).Size() 160 + if int(got) != c.size { 161 + t.Errorf(`sizeof struct %T has changed from %d to %d. 162 + These are match structs that occur a lot in memory, so we optimize size. 163 + When changing, please ensure there isn't unnecessary padding via the 164 + tool fieldalignment then update this test.`, c.v, c.size, got) 165 + } 166 + } 167 + }
+3 -3
contentprovider.go
··· 369 369 } 370 370 371 371 type candidateChunk struct { 372 + candidates []*candidateMatch 372 373 firstLine uint32 // 1-based, inclusive 373 374 lastLine uint32 // 1-based, inclusive 374 375 minOffset uint32 // 0-based, inclusive 375 376 maxOffset uint32 // 0-based, exclusive 376 - candidates []*candidateMatch 377 377 } 378 378 379 379 // chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent ··· 566 566 567 567 func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, language string, debug bool) (float64, string) { 568 568 type debugScore struct { 569 - score float64 570 569 what string 570 + score float64 571 571 } 572 572 573 573 score := &debugScore{} ··· 654 654 655 655 func (p *contentProvider) matchScore(secs []DocumentSection, m *LineMatch, language string, debug bool) (float64, string) { 656 656 type debugScore struct { 657 - score float64 658 657 what string 658 + score float64 659 659 } 660 660 661 661 score := &debugScore{}
+2 -2
hititer.go
··· 35 35 36 36 // distanceHitIterator looks for hits at a fixed distance apart. 37 37 type distanceHitIterator struct { 38 - started bool 39 - distance uint32 40 38 i1 hitIterator 41 39 i2 hitIterator 40 + distance uint32 41 + started bool 42 42 } 43 43 44 44 func (i *distanceHitIterator) String() string {
+10 -6
matchiter.go
··· 20 20 ) 21 21 22 22 // candidateMatch is a candidate match for a substring. 23 + // 24 + // Note: a lot of these can be in memory, so think about fieldalignment when 25 + // modify the fields of this structure. 23 26 type candidateMatch struct { 24 - caseSensitive bool 25 - fileName bool 26 - symbol bool 27 - symbolIdx uint32 28 - 29 27 substrBytes []byte 30 28 substrLowered []byte 31 29 32 - file uint32 30 + file uint32 31 + symbolIdx uint32 33 32 34 33 // Offsets are relative to the start of the filename or file contents. 35 34 runeOffset uint32 36 35 byteOffset uint32 37 36 byteMatchSz uint32 37 + 38 + // bools at end for struct field alignment 39 + caseSensitive bool 40 + fileName bool 41 + symbol bool 38 42 } 39 43 40 44 // Matches content against the substring, and populates byteMatchSz on success