fork of https://github.com/sourcegraph/zoekt
1package languages
2
3import (
4 "slices"
5 "strings"
6 "testing"
7
8 "github.com/go-enry/go-enry/v2"
9 enrydata "github.com/go-enry/go-enry/v2/data"
10 "github.com/stretchr/testify/require"
11)
12
13// Languages/extensions that we don't want to regress
14var nonAmbiguousExtensionsCheck = map[string]string{
15 ".apex": "Apex",
16 ".apxt": "Apex",
17 ".apxc": "Apex",
18 ".trigger": "Apex",
19 ".js": "JavaScript",
20 // Linguist removed JSX (but not TSX) as a separate language:
21 // https://github.com/github-linguist/linguist/pull/5133
22 ".jsx": "JavaScript",
23 ".ts": "TypeScript",
24 ".tsx": "TSX",
25 ".py": "Python",
26 ".rb": "Ruby",
27 ".go": "Go",
28 ".java": "Java",
29 ".kt": "Kotlin",
30 ".magik": "Magik",
31 ".scala": "Scala",
32 ".cs": "C#",
33 ".fs": "F#",
34 ".rs": "Rust",
35 // ".c" is not included as we consider it ambiguous (C or C++) (SPLF-1309)
36 ".cpp": "C++",
37 ".cxx": "C++",
38 ".html": "HTML",
39 ".hpp": "C++",
40 ".hxx": "C++",
41 ".lua": "Lua",
42 ".dart": "Dart",
43 ".swift": "Swift",
44 ".css": "CSS",
45 ".json": "JSON",
46 ".yml": "YAML",
47 ".xml": "XML",
48 ".pkl": "Pkl",
49}
50
51func TestGetLanguageByAlias_UnsupportedLanguages(t *testing.T) {
52 for alias, name := range unsupportedByEnryAliasMap {
53 resName, _ := GetLanguageByNameOrAlias(alias)
54 require.Equal(t, name, resName,
55 "maybe a typo in `unsupportedByEnryAliasMap`?")
56 }
57}
58
59func TestGetLanguageByAlias_NonAmbiguousLanguages(t *testing.T) {
60 for _, language := range nonAmbiguousExtensionsCheck {
61 _, ok := GetLanguageByNameOrAlias(language)
62 require.True(t, ok,
63 "unable to find language %s in go-enry", language)
64 }
65}
66
67func TestGetLanguageExtensions_UnsupportedExtensions(t *testing.T) {
68 for language, exts := range unsupportedByEnryNameToExtensionMap {
69 extensions := GetLanguageExtensions(language)
70 for _, ext := range exts {
71 require.Contains(t, extensions, ext,
72 "maybe a typo in `unsupportedByEnryNameToExtensionMap`?")
73 }
74 }
75}
76
77func TestGetLanguageExtensions_NonAmbiguousExtensions(t *testing.T) {
78 langMap := reverseMap(nonAmbiguousExtensionsCheck)
79 for language, exts := range langMap {
80 extensions := GetLanguageExtensions(language)
81 for _, ext := range exts {
82 require.Contains(t, extensions, ext,
83 "If this test fails when updating enry, maybe `overrideAmbiguousExtensionsMap` needs updating")
84 }
85 }
86}
87
88func TestGetLanguagesByExtension_UnsupportedExtensions(t *testing.T) {
89 for ext, language := range unsupportedByEnryExtensionToNameMap {
90 filename := "foo" + ext
91 languages, _ := getLanguagesByExtension(filename)
92 require.Contains(t, languages, language,
93 "maybe a typo in `unsupportedByEnryExtensionToNameMap`?")
94 }
95}
96
97func TestGetLanguagesByExtension_OverrideExtensions(t *testing.T) {
98 for ext, language := range overrideAmbiguousExtensionsMap {
99 filename := "foo" + ext
100 enryLangs := enry.GetLanguagesByExtension(filename, nil, nil)
101
102 require.Contains(t, enryLangs, language,
103 "maybe a typo in `overrideAmbiguousExtensionsMap`?")
104 require.Greaterf(t, len(enryLangs), 1,
105 "extension %v is not ambiguous according to enry, remove it from `overrideAmbiguousExtensionsMap`",
106 ext)
107 }
108}
109
110func TestGetLanguagesByExtension_NonAmbiguousExtensions(t *testing.T) {
111 for ext, language := range nonAmbiguousExtensionsCheck {
112 filename := "foo" + ext
113 languages, isLikelyBinaryFile := getLanguagesByExtension(filename)
114 require.False(t, isLikelyBinaryFile)
115 require.Equal(t, []string{language}, languages,
116 "If this test fails when updating enry, maybe `overrideAmbiguousExtensionsMap` needs updating")
117 }
118}
119
120func TestGetLanguagesByExtension_BinaryExtensions(t *testing.T) {
121 for _, ext := range []string{".png", ".jpg", ".gif"} {
122 filename := "foo" + ext
123 _, isLikelyBinary := getLanguagesByExtension(filename)
124 require.Truef(t, isLikelyBinary, "filename: %v was not guessed to be binary;"+
125 "bug in extension matching logic in getLanguagesByExtension maybe?",
126 filename)
127 }
128}
129
130func TestExtensionsConsistency(t *testing.T) {
131 for ext, overrideLang := range overrideAmbiguousExtensionsMap {
132 filepath := "foo" + ext
133 enryLangsForExt := enry.GetLanguagesByExtension(filepath, nil, nil)
134 require.Containsf(t, enryLangsForExt, overrideLang, "overrideAmbiguousExtensionsMap maps extension %q to language %q but "+
135 "that mapping is not present in enry's list %v", ext, overrideLang, enryLangsForExt)
136 require.Greaterf(t, len(enryLangsForExt), 1, "overrideAmbiguousExtensionsMap states that"+
137 "%q extension is ambiguous, but only found langs: %v", ext, enryLangsForExt)
138
139 candidates, isLikelyBinary := getLanguagesByExtension(filepath)
140 require.False(t, isLikelyBinary, "ambiguous files are all source code")
141 require.True(t, len(candidates) == 1, "getLanguagesByExtension should respect overrideAmbiguousExtensionsMap")
142
143 shouldBeIgnoredLangsForExt := slices.DeleteFunc(enryLangsForExt, func(s string) bool {
144 return s == overrideLang
145 })
146 for _, shouldBeIgnoredLang := range shouldBeIgnoredLangsForExt {
147 ignoredExts, found := nicheExtensionUsages[shouldBeIgnoredLang]
148 require.Truef(t, found, "expected lang: %q to have an entry in nicheExtensionUsages for consistency with GetLanguagesByExtension", shouldBeIgnoredLang)
149 require.Truef(t, len(ignoredExts) >= 1, "sets in nicheExtensionUsages must be non-empty")
150
151 nonNicheExts := GetLanguageExtensions(shouldBeIgnoredLang)
152 for ignoredExt := range ignoredExts {
153 require.Falsef(t, slices.Contains(nonNicheExts, ignoredExt),
154 "GetLanguageExtensions should not return %q for lang %q for consistency with GetLanguagesByExtension",
155 ignoredExt, shouldBeIgnoredLang)
156 }
157 }
158 }
159}
160
161func TestExtensionsConsistency2(t *testing.T) {
162 for lang := range enrydata.ExtensionsByLanguage {
163 for _, ext := range GetLanguageExtensions(lang) {
164 if strings.Count(ext, ".") > 1 {
165 // Ignore unusual edge cases like .coffee.md for Literate CoffeeScript
166 continue
167 }
168 langsByExt, isLikelyBinary := getLanguagesByExtension("foo" + ext)
169 if !isLikelyBinary {
170 require.Truef(t, slices.Contains(langsByExt, lang),
171 "expected getLanguagesByExtension result %v to contain %q (extension: %q)", langsByExt, lang, ext)
172 }
173 }
174 }
175}
176
177func TestUnsupportedByEnry(t *testing.T) {
178 for lang := range unsupportedByEnryNameToExtensionMap {
179 enry_extensions, found := enrydata.ExtensionsByLanguage[lang]
180 if found {
181 validateLanguageAgainstGoEnry(t, "unsupportedByEnryNameToExtensionMap", enry_extensions, lang)
182 }
183 }
184 for _, lang := range unsupportedByEnryAliasMap {
185 enry_extensions, found := enrydata.ExtensionsByLanguage[lang]
186 if found {
187 validateLanguageAgainstGoEnry(t, "unsupportedByEnryAliasMap", enry_extensions, lang)
188 }
189 }
190 for _, lang := range unsupportedByEnryExtensionToNameMap {
191 enry_extensions, found := enrydata.ExtensionsByLanguage[lang]
192 if found {
193 validateLanguageAgainstGoEnry(t, "unsupportedByEnryExtensionToNameMap", enry_extensions, lang)
194 }
195 }
196}
197
198func validateLanguageAgainstGoEnry(t *testing.T, name string, enryExtensions []string, lang string) {
199 enryExtensions = slices.Clone(enryExtensions)
200 slices.Sort(enryExtensions)
201 sgExtensions := slices.Clone(unsupportedByEnryNameToExtensionMap[lang])
202 slices.Sort(sgExtensions)
203
204 require.NotEqualf(t, enryExtensions, sgExtensions, "looks like language %q is supported by enry with the same extensions; remove it from %q", lang, name)
205}