fork of https://github.com/sourcegraph/zoekt
1package index
2
3import (
4 "errors"
5
6 "github.com/go-enry/go-enry/v2"
7)
8
9// FileCategory represents the category of a file, as determined by go-enry. It is non-exhaustive
10// but tries to the major cases like whether the file is a test, generated, etc.
11//
12// A file's category is used in search scoring to determine the weight of a file match.
13type FileCategory byte
14
15const (
16 // FileCategoryMissing is a sentinel value that indicates we never computed the file category during indexing
17 // (which means we're reading from an old index version). This value can never be written to the index.
18 FileCategoryMissing FileCategory = iota
19 FileCategoryDefault
20 FileCategoryTest
21 FileCategoryVendored
22 FileCategoryGenerated
23 FileCategoryConfig
24 FileCategoryDotFile
25 FileCategoryBinary
26 FileCategoryDocumentation
27)
28
29func DetermineFileCategory(doc *Document) {
30 if doc.SkipReason == SkipReasonBinary {
31 doc.Category = FileCategoryBinary
32 return
33 }
34
35 name := doc.Name
36 content := doc.Content
37
38 // If this document was skipped because it was too large, just guess the category based on the filename to avoid
39 // examining the contents. Note: passing nil content is allowed by the go-enry contract.
40 if doc.SkipReason == SkipReasonTooLarge || doc.SkipReason == SkipReasonBinary {
41 content = nil
42 }
43
44 category := FileCategoryDefault
45 if enry.IsTest(name) {
46 category = FileCategoryTest
47 } else if enry.IsDotFile(name) {
48 category = FileCategoryDotFile
49 } else if enry.IsVendor(name) {
50 category = FileCategoryVendored
51 } else if enry.IsGenerated(name, content) {
52 category = FileCategoryGenerated
53 } else if enry.IsConfiguration(name) {
54 category = FileCategoryConfig
55 } else if enry.IsDocumentation(name) {
56 category = FileCategoryDocumentation
57 }
58
59 doc.Category = category
60}
61
62// lowPriority returns true if this file category is considered 'low priority'. This is used
63// in search scoring to down-weight matches in these files.
64func (c FileCategory) lowPriority() bool {
65 return c == FileCategoryTest || c == FileCategoryVendored || c == FileCategoryGenerated || c == FileCategoryBinary
66}
67
68func (c FileCategory) encode() (byte, error) {
69 switch c {
70 case FileCategoryMissing:
71 return 0, errors.New("cannot encode missing file category")
72 case FileCategoryDefault:
73 return 1, nil
74 case FileCategoryTest:
75 return 2, nil
76 case FileCategoryVendored:
77 return 3, nil
78 case FileCategoryGenerated:
79 return 4, nil
80 case FileCategoryConfig:
81 return 5, nil
82 case FileCategoryDotFile:
83 return 6, nil
84 case FileCategoryDocumentation:
85 return 7, nil
86 case FileCategoryBinary:
87 return 8, nil
88 default:
89 return 0, errors.New("unrecognized file category")
90 }
91}
92
93func decodeCategory(c byte) (FileCategory, error) {
94 switch c {
95 case 1:
96 return FileCategoryDefault, nil
97 case 2:
98 return FileCategoryTest, nil
99 case 3:
100 return FileCategoryVendored, nil
101 case 4:
102 return FileCategoryGenerated, nil
103 case 5:
104 return FileCategoryConfig, nil
105 case 6:
106 return FileCategoryDotFile, nil
107 case 7:
108 return FileCategoryDocumentation, nil
109 case 8:
110 return FileCategoryBinary, nil
111 default:
112 return FileCategoryMissing, errors.New("unrecognized file category")
113 }
114}