fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 3.2 kB View raw
1package index 2 3import ( 4 "errors" 5 6 "github.com/go-enry/go-enry/v2" 7) 8 9// FileCategory represents the category of a file, as determined by go-enry. It is non-exhaustive 10// but tries to the major cases like whether the file is a test, generated, etc. 11// 12// A file's category is used in search scoring to determine the weight of a file match. 13type FileCategory byte 14 15const ( 16 // FileCategoryMissing is a sentinel value that indicates we never computed the file category during indexing 17 // (which means we're reading from an old index version). This value can never be written to the index. 18 FileCategoryMissing FileCategory = iota 19 FileCategoryDefault 20 FileCategoryTest 21 FileCategoryVendored 22 FileCategoryGenerated 23 FileCategoryConfig 24 FileCategoryDotFile 25 FileCategoryBinary 26 FileCategoryDocumentation 27) 28 29func DetermineFileCategory(doc *Document) { 30 if doc.SkipReason == SkipReasonBinary { 31 doc.Category = FileCategoryBinary 32 return 33 } 34 35 name := doc.Name 36 content := doc.Content 37 38 // If this document was skipped (too large, binary, or missing from the repo), 39 // guess the category based on the filename to avoid examining the contents. 40 // Note: passing nil content is allowed by the go-enry contract. 41 if doc.SkipReason == SkipReasonTooLarge || doc.SkipReason == SkipReasonBinary || doc.SkipReason == SkipReasonMissing { 42 content = nil 43 } 44 45 category := FileCategoryDefault 46 if enry.IsTest(name) { 47 category = FileCategoryTest 48 } else if enry.IsDotFile(name) { 49 category = FileCategoryDotFile 50 } else if enry.IsVendor(name) { 51 category = FileCategoryVendored 52 } else if enry.IsGenerated(name, content) { 53 category = FileCategoryGenerated 54 } else if enry.IsConfiguration(name) { 55 category = FileCategoryConfig 56 } else if enry.IsDocumentation(name) { 57 category = FileCategoryDocumentation 58 } 59 60 doc.Category = category 61} 62 63// lowPriority returns true if this file category is considered 'low priority'. This is used 64// in search scoring to down-weight matches in these files. 65func (c FileCategory) lowPriority() bool { 66 return c == FileCategoryTest || c == FileCategoryVendored || c == FileCategoryGenerated || c == FileCategoryBinary 67} 68 69func (c FileCategory) encode() (byte, error) { 70 switch c { 71 case FileCategoryMissing: 72 return 0, errors.New("cannot encode missing file category") 73 case FileCategoryDefault: 74 return 1, nil 75 case FileCategoryTest: 76 return 2, nil 77 case FileCategoryVendored: 78 return 3, nil 79 case FileCategoryGenerated: 80 return 4, nil 81 case FileCategoryConfig: 82 return 5, nil 83 case FileCategoryDotFile: 84 return 6, nil 85 case FileCategoryDocumentation: 86 return 7, nil 87 case FileCategoryBinary: 88 return 8, nil 89 default: 90 return 0, errors.New("unrecognized file category") 91 } 92} 93 94func decodeCategory(c byte) (FileCategory, error) { 95 switch c { 96 case 1: 97 return FileCategoryDefault, nil 98 case 2: 99 return FileCategoryTest, nil 100 case 3: 101 return FileCategoryVendored, nil 102 case 4: 103 return FileCategoryGenerated, nil 104 case 5: 105 return FileCategoryConfig, nil 106 case 6: 107 return FileCategoryDotFile, nil 108 case 7: 109 return FileCategoryDocumentation, nil 110 case 8: 111 return FileCategoryBinary, nil 112 default: 113 return FileCategoryMissing, errors.New("unrecognized file category") 114 } 115}