fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

copy languages package from Sourcegraph to Zoekt (#979)

We want Zoekt and Sourcegraph to use the same language package. In this PR we move the languages package from Sourcegraph to Zoekt, so that Zoekt can use it and Sourcegraph can import it.

Notes:
- Zoekt doesn't need to fetch content async which is why I added a little helper func `GetLanguagesFromContent` to make the call sites in Zoekt less awkward.
- Sourcegraph's languages package always classified .cls files as Apex, while Zoekt did a content based check. With this PR we follow Zoekt's approach. Specifically, I removed .cls from `unsupportedByEnryExtensionToNameMap`. I added an additional unit test to cover this case.

Test plan:
I appended the test cases from the old Zoekt languages packages to the tests I copied over from Sourcegraph

author
Stefan Hengl
committer
GitHub
date (Aug 21, 2025, 12:02 PM +0200) commit fb492e2d parent 69b7ba71
+1063 -193
+1
go.mod
··· 54 54 golang.org/x/sys v0.30.0 55 55 google.golang.org/grpc v1.69.4 56 56 google.golang.org/protobuf v1.36.3 57 + pgregory.net/rapid v1.2.0 57 58 ) 58 59 59 60 require (
+2
go.sum
··· 576 576 honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 577 577 mvdan.cc/gofumpt v0.4.0 h1:JVf4NN1mIpHogBj7ABpgOyZc65/UUOkKQFkoURsz4MM= 578 578 mvdan.cc/gofumpt v0.4.0/go.mod h1:PljLOHDeZqgS8opHRKLzp2It2VBuSdteAgqUfzMTxlQ= 579 + pgregory.net/rapid v1.2.0 h1:keKAYRcjm+e1F0oAuU5F5+YPAWcyxNNRK2wud503Gnk= 580 + pgregory.net/rapid v1.2.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04= 579 581 rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
+14 -10
index/shard_builder.go
··· 23 23 "net/url" 24 24 "os" 25 25 "path/filepath" 26 + "slices" 26 27 "sort" 27 28 "strings" 28 29 "text/template" 29 30 "time" 30 31 "unicode/utf8" 31 32 32 - "slices" 33 - 34 33 "github.com/sourcegraph/zoekt" 35 - "github.com/sourcegraph/zoekt/internal/languages" 34 + "github.com/sourcegraph/zoekt/languages" 36 35 ) 37 36 38 37 var _ = log.Println ··· 404 403 return 405 404 } 406 405 407 - if doc.SkipReason != SkipReasonNone { 408 - // If this document has been skipped, it's likely very large, or it's a non-code file like binary. 409 - // In this case, we just guess the language based on file name to avoid examining the contents. 410 - // Note: passing nil content is allowed by the go-enry contract (the underlying library we use here). 411 - doc.Language = languages.GetLanguage(doc.Name, nil) 412 - } else { 413 - doc.Language = languages.GetLanguage(doc.Name, doc.Content) 406 + // If this document has been skipped (doc.SkipReason != SkipReasonNone), it's 407 + // likely very large, or it's a non-code file like binary. In this case, we just 408 + // guess the language based on the file name to avoid examining the contents. 409 + // Note: passing nil content is allowed by the go-enry contract (the underlying 410 + // library we use here). 411 + var content []byte 412 + if doc.SkipReason == SkipReasonNone { 413 + content = doc.Content 414 + } 415 + langs := languages.GetLanguagesFromContent(doc.Name, content) 416 + if len(langs) > 0 { 417 + doc.Language = langs[0] 414 418 } 415 419 } 416 420
-74
internal/languages/language.go
··· 1 - // This file wraps the logic of go-enry (https://github.com/go-enry/go-enry) to support additional languages. 2 - // go-enry is based off of a package called Linguist (https://github.com/github/linguist) 3 - // and sometimes programming languages may not be supported by Linguist 4 - // or may take a while to get merged in and make it into go-enry. This wrapper 5 - // gives us flexibility to support languages in those cases. We list additional languages 6 - // in this file and remove them once they make it into Linguist and go-enry. 7 - // This logic is similar to what we have in the sourcegraph/sourcegraph repo, in the future 8 - // we plan to refactor both into a common library to share between the two repos. 9 - package languages 10 - 11 - import ( 12 - "path/filepath" 13 - "strings" 14 - 15 - "github.com/go-enry/go-enry/v2" 16 - ) 17 - 18 - var unsupportedByLinguistAliasMap = map[string]string{ 19 - // Extensions for the Apex programming language 20 - // See https://developer.salesforce.com/docs/atlas.en-us.apexcode.meta/apexcode/apex_dev_guide.htm 21 - "apex": "Apex", 22 - // Pkl Configuration Language (https://pkl-lang.org/) 23 - // Add to linguist on 6/7/24 24 - // can remove once go-enry package updates 25 - // to that linguist version 26 - "pkl": "Pkl", 27 - // Magik Language 28 - "magik": "Magik", 29 - } 30 - 31 - var unsupportedByLinguistExtensionToNameMap = map[string]string{ 32 - ".apex": "Apex", 33 - ".apxt": "Apex", 34 - ".apxc": "Apex", 35 - ".cls": "Apex", 36 - ".trigger": "Apex", 37 - // Pkl Configuration Language (https://pkl-lang.org/) 38 - ".pkl": "Pkl", 39 - // Magik Language 40 - ".magik": "Magik", 41 - } 42 - 43 - // getLanguagesByAlias is a replacement for enry.GetLanguagesByAlias 44 - // It supports languages that are missing in linguist 45 - func GetLanguageByAlias(alias string) (language string, ok bool) { 46 - language, ok = enry.GetLanguageByAlias(alias) 47 - if !ok { 48 - normalizedAlias := strings.ToLower(alias) 49 - language, ok = unsupportedByLinguistAliasMap[normalizedAlias] 50 - } 51 - 52 - return 53 - } 54 - 55 - // GetLanguage is a replacement for enry.GetLanguage 56 - // to find out the most probable language to return but includes support 57 - // for languages missing from linguist 58 - func GetLanguage(filename string, content []byte) (language string) { 59 - language = enry.GetLanguage(filename, content) 60 - 61 - // If go-enry failed to find language, fall back on our 62 - // internal check for languages missing in linguist 63 - if language == "" { 64 - ext := filepath.Ext(filename) 65 - normalizedExt := strings.ToLower(ext) 66 - if ext == "" { 67 - return 68 - } 69 - if lang, ok := unsupportedByLinguistExtensionToNameMap[normalizedExt]; ok { 70 - language = lang 71 - } 72 - } 73 - return 74 - }
-107
internal/languages/language_test.go
··· 1 - package languages 2 - 3 - import "testing" 4 - 5 - func TestGetLanguageByAlias(t *testing.T) { 6 - tests := []struct { 7 - name string 8 - alias string 9 - want string 10 - wantOk bool 11 - }{ 12 - { 13 - name: "empty alias", 14 - alias: "", 15 - want: "", 16 - wantOk: false, 17 - }, 18 - { 19 - name: "unknown alias", 20 - alias: "unknown", 21 - want: "", 22 - wantOk: false, 23 - }, 24 - { 25 - name: "supported alias", 26 - alias: "go", 27 - want: "Go", 28 - wantOk: true, 29 - }, 30 - { 31 - name: "unsupported by linguist alias", 32 - alias: "magik", 33 - want: "Magik", 34 - wantOk: true, 35 - }, 36 - { 37 - name: "unsupported by linguist alias normalized", 38 - alias: "mAgIk", 39 - want: "Magik", 40 - wantOk: true, 41 - }, 42 - { 43 - name: "apex example unsupported by linguist alias", 44 - alias: "apex", 45 - want: "Apex", 46 - wantOk: true, 47 - }, 48 - } 49 - 50 - for _, tt := range tests { 51 - t.Run(tt.name, func(t *testing.T) { 52 - got, ok := GetLanguageByAlias(tt.alias) 53 - if got != tt.want || ok != tt.wantOk { 54 - t.Errorf("GetLanguageByAlias(%q) = %q, %t, want %q, %t", tt.alias, got, ok, tt.want, tt.wantOk) 55 - } 56 - }) 57 - } 58 - } 59 - 60 - func TestGetLanguage(t *testing.T) { 61 - tests := []struct { 62 - name string 63 - filename string 64 - content []byte 65 - want string 66 - }{ 67 - { 68 - name: "empty filename", 69 - filename: "", 70 - content: []byte(""), 71 - want: "", 72 - }, 73 - { 74 - name: "unknown extension", 75 - filename: "file.unknown", 76 - content: []byte(""), 77 - want: "", 78 - }, 79 - { 80 - name: "supported extension", 81 - filename: "file.go", 82 - content: []byte("package main"), 83 - want: "Go", 84 - }, 85 - { 86 - name: "magik: unsupported by linguist extension", 87 - filename: "file.magik", 88 - content: []byte(""), 89 - want: "Magik", 90 - }, 91 - { 92 - name: "apex: unsupported by linguist extension", 93 - filename: "file.apxc", 94 - content: []byte(""), 95 - want: "Apex", 96 - }, 97 - } 98 - 99 - for _, tt := range tests { 100 - t.Run(tt.name, func(t *testing.T) { 101 - got := GetLanguage(tt.filename, tt.content) 102 - if got != tt.want { 103 - t.Errorf("GetLanguage(%q, %q) = %q, want %q", tt.filename, tt.content, got, tt.want) 104 - } 105 - }) 106 - } 107 - }
+15
languages/enry_vendored.go
··· 1 + package languages 2 + 3 + import "strings" 4 + 5 + // This file contains private functions 6 + // vendored from the go-enry codebase. 7 + 8 + // convertToAliasKey is vendored from go-enry to make sure 9 + // we're normalizing strings the same way. 10 + func convertToAliasKey(langName string) string { 11 + ak := strings.SplitN(langName, `,`, 2)[0] 12 + ak = strings.Replace(ak, ` `, `_`, -1) 13 + ak = strings.ToLower(ak) 14 + return ak 15 + }
+489
languages/extensions.go
··· 1 + package languages 2 + 3 + import ( 4 + "path/filepath" 5 + "slices" 6 + "strings" 7 + 8 + "github.com/go-enry/go-enry/v2" 9 + enrydata "github.com/go-enry/go-enry/v2/data" 10 + ) 11 + 12 + // GetLanguageByNameOrAlias returns the standardized name for 13 + // a language based on its name (in which case this is an identity operation) 14 + // or based on its alias, which is potentially an alternate name for 15 + // the language. 16 + // 17 + // Aliases are fully lowercase, and map N-1 to languages. 18 + // 19 + // For example, 20 + // 21 + // GetLanguageByNameOrAlias("ada") == "Ada", true 22 + // GetLanguageByNameOrAlias("ada95") == "Ada", true 23 + // 24 + // Historical note: This function was added for replacing usages of 25 + // enry.GetLanguageByAlias, which, unlike the name suggests, also 26 + // handles non-normalized names such as those with spaces. 27 + func GetLanguageByNameOrAlias(nameOrAlias string) (lang string, ok bool) { 28 + alias := convertToAliasKey(nameOrAlias) 29 + if lang, ok = unsupportedByEnryAliasMap[alias]; ok { 30 + return lang, true 31 + } 32 + 33 + return enry.GetLanguageByAlias(alias) 34 + } 35 + 36 + // GetLanguageExtensions returns the list of file extensions for a given 37 + // language. Returned extensions are always prefixed with a '.'. 38 + // 39 + // The returned slice will be empty iff the language is not known. 40 + // 41 + // Handles more languages than enry.GetLanguageExtensions. 42 + // 43 + // Mutually consistent with getLanguagesByExtension, see the tests 44 + // for the exact invariants. 45 + func GetLanguageExtensions(language string) []string { 46 + if langs, ok := unsupportedByEnryNameToExtensionMap[language]; ok { 47 + return langs 48 + } 49 + 50 + ignoreExts, isNiche := nicheExtensionUsages[language] 51 + // Force a copy to avoid accidentally modifying the global variable 52 + exts := slices.Clone(enry.GetLanguageExtensions(language)) 53 + for ext, lang := range sgExtraLangsForExts { // Map is tiny, so linear lookup is fine 54 + if language == lang { 55 + exts = append(exts, ext) 56 + } 57 + } 58 + if !isNiche { 59 + return exts 60 + } 61 + return slices.DeleteFunc(exts, func(ext string) bool { 62 + _, shouldIgnore := ignoreExts[ext] 63 + return shouldIgnore 64 + }) 65 + } 66 + 67 + // getLanguagesByExtension is a replacement for enry.GetLanguagesByExtension 68 + // to work around the following limitations: 69 + // - For some extensions which are overwhelmingly used by a certain file type 70 + // in practice, such as '.ts', '.md' and '.yaml', it returns ambiguous results. 71 + // - It does not provide any information about binary files. 72 + // - Some languages are not supported by enry yet (e.g. Magik) 73 + func getLanguagesByExtension(path string) (candidates []string, isLikelyBinaryFile bool) { 74 + // Lowercase extension before lookups to match enry's behavior. 75 + ext := strings.ToLower(filepath.Ext(path)) 76 + if ext == "" { 77 + return nil, false 78 + } 79 + if lang, ok := unsupportedByEnryExtensionToNameMap[ext]; ok { 80 + return []string{lang}, false 81 + } 82 + if _, ok := commonBinaryFileExtensions[ext[1:]]; ok { 83 + return nil, true 84 + } 85 + if lang, ok := overrideAmbiguousExtensionsMap[ext]; ok { 86 + return []string{lang}, false 87 + } 88 + candidates = enry.GetLanguagesByExtension(path, nil, nil) 89 + if extra, ok := sgExtraLangsForExts[ext]; ok { 90 + candidates = append(candidates, extra) 91 + } 92 + return candidates, false 93 + } 94 + 95 + var commonBinaryFileExtensions = func() map[string]struct{} { 96 + m := map[string]struct{}{} 97 + for _, s := range commonBinaryFileExtensionsList { 98 + m[s] = struct{}{} 99 + } 100 + return m 101 + }() 102 + 103 + var sgExtraLangsForExts = map[string]string{ 104 + ".c": "C++", 105 + // NOTE: Downstream code does linear lookups on this map, so 106 + // be careful if you're adding lots of entries here. 107 + } 108 + 109 + var sgExtraContentHeuristics = map[string]*enrydata.Heuristics{ 110 + ".c": enrydata.ContentHeuristics[".h"], 111 + } 112 + 113 + // overrideAmbiguousExtensionsMap represents extensions which are ambiguous according to 114 + // enry but not for Sourcegraph. 115 + var overrideAmbiguousExtensionsMap = map[string]string{ 116 + // Ignoring the uncommon usage of '.cs' for Smalltalk. 117 + ".cs": "C#", 118 + // The other languages are Filterscript, Forth, GLSL. Out of that, 119 + // Forth and GLSL commonly use other extensions. Ignore Filterscript 120 + // as it is niche. 121 + ".fs": "F#", 122 + // Ignoring the uncommon usage of '.html' for Ecmarkup. 123 + ".html": "HTML", 124 + // Ignoring other variants of JSON, such as OASv2-json and OASv3-json 125 + ".json": "JSON", 126 + // Not considering "GCC Machine Description". 127 + ".md": "Markdown", 128 + // The other main language using '.rs' is RenderScript, but that's deprecated. 129 + // See https://developer.android.com/guide/topics/renderscript/compute 130 + ".rs": "Rust", 131 + // In i18n contexts, there are XML files with '.ts' and '.tsx' extensions, 132 + // but we ignore those for now to avoid penalizing the common case. 133 + ".tsx": "TSX", 134 + ".ts": "TypeScript", 135 + // Ignoring "Adblock Filter List" and "Vim Help File". 136 + ".txt": "Text", 137 + // Ignoring other variants of YAML, such as MiniYAML, OASv2-yaml, OASv3-yaml. 138 + ".yaml": "YAML", 139 + ".yml": "YAML", 140 + // The PR adding Pkl support also listed another language called Pickle in 141 + // its heuristics, but doesn't have any real support for it. Just ignore 142 + // it. 143 + // https://github.com/github-linguist/linguist/pull/6730/files#diff-c2d2d7946540ab501a5ef7a7f54a57c530d8da599e41c2beb0fd2f5635d2fd50R539 144 + ".pkl": "Pkl", 145 + } 146 + 147 + // unsupportedByEnryExtensionToNameMap contains extension->name mappings 148 + // for languages not tracked by go-enry. 149 + var unsupportedByEnryExtensionToNameMap = map[string]string{ 150 + // Extensions for the Apex programming language 151 + // See https://developer.salesforce.com/docs/atlas.en-us.apexcode.meta/apexcode/apex_dev_guide.htm 152 + ".apex": "Apex", 153 + ".apxt": "Apex", 154 + ".apxc": "Apex", 155 + ".trigger": "Apex", 156 + ".magik": "Magik", 157 + } 158 + 159 + // nicheExtensionUsage keeps track of which (lang, extension) mappings 160 + // should not be considered. 161 + // 162 + // We cannot wholesale ignore these languages, as this list includes 163 + // languages like XML, but it can contain unusual extensions like '.tsx' 164 + // which we generally want to classify as TypeScript. 165 + var nicheExtensionUsages = func() map[string]map[string]struct{} { 166 + niche := map[string]map[string]struct{}{} 167 + considered := map[string]struct{}{} 168 + for _, lang := range overrideAmbiguousExtensionsMap { 169 + considered[lang] = struct{}{} 170 + } 171 + for ext := range overrideAmbiguousExtensionsMap { 172 + langs := enry.GetLanguagesByExtension("foo"+ext, nil, nil) 173 + for _, lang := range langs { 174 + if _, found := considered[lang]; !found { 175 + if m, hasMap := niche[lang]; hasMap { 176 + m[ext] = struct{}{} 177 + } else { 178 + niche[lang] = map[string]struct{}{ext: {}} 179 + } 180 + } 181 + } 182 + } 183 + for specialOverrideExt, lang := range unsupportedByEnryExtensionToNameMap { 184 + considered[lang] = struct{}{} 185 + langs := enry.GetLanguagesByExtension("foo"+specialOverrideExt, nil, nil) 186 + for _, lang := range langs { 187 + if _, found := considered[lang]; !found { 188 + if m, hasMap := niche[lang]; hasMap { 189 + m[specialOverrideExt] = struct{}{} 190 + } else { 191 + niche[lang] = map[string]struct{}{specialOverrideExt: {}} 192 + } 193 + } 194 + } 195 + } 196 + return niche 197 + }() 198 + 199 + // unsupportedByEnryNameToExtensionMap contains language->extension mappings 200 + // for languages not tracked by go-enry. 201 + var unsupportedByEnryNameToExtensionMap = reverseMap(unsupportedByEnryExtensionToNameMap) 202 + 203 + // unsupportedByEnryAliasMap maps alias -> language name for languages 204 + // not tracked by go-enry. 205 + var unsupportedByEnryAliasMap = func() map[string]string { 206 + out := map[string]string{} 207 + for _, lang := range unsupportedByEnryExtensionToNameMap { 208 + out[convertToAliasKey(lang)] = lang 209 + } 210 + return out 211 + }() 212 + 213 + func reverseMap(m map[string]string) map[string][]string { 214 + n := make(map[string][]string, len(m)) 215 + for k, v := range m { 216 + n[v] = append(n[v], k) 217 + } 218 + return n 219 + } 220 + 221 + // Source: https://github.com/sindresorhus/binary-extensions/blob/main/binary-extensions.json 222 + // License: https://github.com/sindresorhus/binary-extensions/blob/main/license 223 + // Replace the contents with 224 + // curl -L https://raw.githubusercontent.com/sindresorhus/binary-extensions/main/binary-extensions.json | jq '.[]' | awk '{print $1 ","}' 225 + // 226 + // Not adding a leading '.' here to make it easier to update/compare the list. 227 + var commonBinaryFileExtensionsList = []string{ 228 + "3dm", 229 + "3ds", 230 + "3g2", 231 + "3gp", 232 + "7z", 233 + "a", 234 + "aac", 235 + "adp", 236 + "afdesign", 237 + "afphoto", 238 + "afpub", 239 + "ai", 240 + "aif", 241 + "aiff", 242 + "alz", 243 + "ape", 244 + "apk", 245 + "appimage", 246 + "ar", 247 + "arj", 248 + "asf", 249 + "au", 250 + "avi", 251 + "bak", 252 + "baml", 253 + "bh", 254 + "bin", 255 + "bk", 256 + "bmp", 257 + "btif", 258 + "bz2", 259 + "bzip2", 260 + "cab", 261 + "caf", 262 + "cgm", 263 + "class", 264 + "cmx", 265 + "cpio", 266 + "cr2", 267 + "cur", 268 + "dat", 269 + "dcm", 270 + "deb", 271 + "dex", 272 + "djvu", 273 + "dll", 274 + "dmg", 275 + "dng", 276 + "doc", 277 + "docm", 278 + "docx", 279 + "dot", 280 + "dotm", 281 + "dra", 282 + "DS_Store", 283 + "dsk", 284 + "dts", 285 + "dtshd", 286 + "dvb", 287 + "dwg", 288 + "dxf", 289 + "ecelp4800", 290 + "ecelp7470", 291 + "ecelp9600", 292 + "egg", 293 + "eol", 294 + "eot", 295 + "epub", 296 + "exe", 297 + "f4v", 298 + "fbs", 299 + "fh", 300 + "fla", 301 + "flac", 302 + "flatpak", 303 + "fli", 304 + "flv", 305 + "fpx", 306 + "fst", 307 + "fvt", 308 + "g3", 309 + "gh", 310 + "gif", 311 + "graffle", 312 + "gz", 313 + "gzip", 314 + "h261", 315 + "h263", 316 + "h264", 317 + "icns", 318 + "ico", 319 + "ief", 320 + "img", 321 + "ipa", 322 + "iso", 323 + "jar", 324 + "jpeg", 325 + "jpg", 326 + "jpgv", 327 + "jpm", 328 + "jxr", 329 + "key", 330 + "ktx", 331 + "lha", 332 + "lib", 333 + "lvp", 334 + "lz", 335 + "lzh", 336 + "lzma", 337 + "lzo", 338 + "m3u", 339 + "m4a", 340 + "m4v", 341 + "mar", 342 + "mdi", 343 + "mht", 344 + "mid", 345 + "midi", 346 + "mj2", 347 + "mka", 348 + "mkv", 349 + "mmr", 350 + "mng", 351 + "mobi", 352 + "mov", 353 + "movie", 354 + "mp3", 355 + "mp4", 356 + "mp4a", 357 + "mpeg", 358 + "mpg", 359 + "mpga", 360 + "mxu", 361 + "nef", 362 + "npx", 363 + "numbers", 364 + "nupkg", 365 + "o", 366 + "odp", 367 + "ods", 368 + "odt", 369 + "oga", 370 + "ogg", 371 + "ogv", 372 + "otf", 373 + "ott", 374 + "pages", 375 + "pbm", 376 + "pcx", 377 + "pdb", 378 + "pdf", 379 + "pea", 380 + "pgm", 381 + "pic", 382 + "png", 383 + "pnm", 384 + "pot", 385 + "potm", 386 + "potx", 387 + "ppa", 388 + "ppam", 389 + "ppm", 390 + "pps", 391 + "ppsm", 392 + "ppsx", 393 + "ppt", 394 + "pptm", 395 + "pptx", 396 + "psd", 397 + "pya", 398 + "pyc", 399 + "pyo", 400 + "pyv", 401 + "qt", 402 + "rar", 403 + "ras", 404 + "raw", 405 + "resources", 406 + "rgb", 407 + "rip", 408 + "rlc", 409 + "rmf", 410 + "rmvb", 411 + "rpm", 412 + "rtf", 413 + "rz", 414 + "s3m", 415 + "s7z", 416 + "scpt", 417 + "sgi", 418 + "shar", 419 + "snap", 420 + "sil", 421 + "sketch", 422 + "slk", 423 + "smv", 424 + "snk", 425 + "so", 426 + "stl", 427 + "suo", 428 + "sub", 429 + "swf", 430 + "tar", 431 + "tbz", 432 + "tbz2", 433 + "tga", 434 + "tgz", 435 + "thmx", 436 + "tif", 437 + "tiff", 438 + "tlz", 439 + "ttc", 440 + "ttf", 441 + "txz", 442 + "udf", 443 + "uvh", 444 + "uvi", 445 + "uvm", 446 + "uvp", 447 + "uvs", 448 + "uvu", 449 + "viv", 450 + "vob", 451 + "war", 452 + "wav", 453 + "wax", 454 + "wbmp", 455 + "wdp", 456 + "weba", 457 + "webm", 458 + "webp", 459 + "whl", 460 + "wim", 461 + "wm", 462 + "wma", 463 + "wmv", 464 + "wmx", 465 + "woff", 466 + "woff2", 467 + "wrm", 468 + "wvx", 469 + "xbm", 470 + "xif", 471 + "xla", 472 + "xlam", 473 + "xls", 474 + "xlsb", 475 + "xlsm", 476 + "xlsx", 477 + "xlt", 478 + "xltm", 479 + "xltx", 480 + "xm", 481 + "xmind", 482 + "xpi", 483 + "xpm", 484 + "xwd", 485 + "xz", 486 + "z", 487 + "zip", 488 + "zipx", 489 + }
+205
languages/extensions_test.go
··· 1 + package languages 2 + 3 + import ( 4 + "slices" 5 + "strings" 6 + "testing" 7 + 8 + "github.com/go-enry/go-enry/v2" 9 + enrydata "github.com/go-enry/go-enry/v2/data" 10 + "github.com/stretchr/testify/require" 11 + ) 12 + 13 + // Languages/extensions that we don't want to regress 14 + var nonAmbiguousExtensionsCheck = map[string]string{ 15 + ".apex": "Apex", 16 + ".apxt": "Apex", 17 + ".apxc": "Apex", 18 + ".trigger": "Apex", 19 + ".js": "JavaScript", 20 + // Linguist removed JSX (but not TSX) as a separate language: 21 + // https://github.com/github-linguist/linguist/pull/5133 22 + ".jsx": "JavaScript", 23 + ".ts": "TypeScript", 24 + ".tsx": "TSX", 25 + ".py": "Python", 26 + ".rb": "Ruby", 27 + ".go": "Go", 28 + ".java": "Java", 29 + ".kt": "Kotlin", 30 + ".magik": "Magik", 31 + ".scala": "Scala", 32 + ".cs": "C#", 33 + ".fs": "F#", 34 + ".rs": "Rust", 35 + // ".c" is not included as we consider it ambiguous (C or C++) (SPLF-1309) 36 + ".cpp": "C++", 37 + ".cxx": "C++", 38 + ".html": "HTML", 39 + ".hpp": "C++", 40 + ".hxx": "C++", 41 + ".lua": "Lua", 42 + ".dart": "Dart", 43 + ".swift": "Swift", 44 + ".css": "CSS", 45 + ".json": "JSON", 46 + ".yml": "YAML", 47 + ".xml": "XML", 48 + ".pkl": "Pkl", 49 + } 50 + 51 + func TestGetLanguageByAlias_UnsupportedLanguages(t *testing.T) { 52 + for alias, name := range unsupportedByEnryAliasMap { 53 + resName, _ := GetLanguageByNameOrAlias(alias) 54 + require.Equal(t, name, resName, 55 + "maybe a typo in `unsupportedByEnryAliasMap`?") 56 + } 57 + } 58 + 59 + func TestGetLanguageByAlias_NonAmbiguousLanguages(t *testing.T) { 60 + for _, language := range nonAmbiguousExtensionsCheck { 61 + _, ok := GetLanguageByNameOrAlias(language) 62 + require.True(t, ok, 63 + "unable to find language %s in go-enry", language) 64 + } 65 + } 66 + 67 + func TestGetLanguageExtensions_UnsupportedExtensions(t *testing.T) { 68 + for language, exts := range unsupportedByEnryNameToExtensionMap { 69 + extensions := GetLanguageExtensions(language) 70 + for _, ext := range exts { 71 + require.Contains(t, extensions, ext, 72 + "maybe a typo in `unsupportedByEnryNameToExtensionMap`?") 73 + } 74 + } 75 + } 76 + 77 + func TestGetLanguageExtensions_NonAmbiguousExtensions(t *testing.T) { 78 + langMap := reverseMap(nonAmbiguousExtensionsCheck) 79 + for language, exts := range langMap { 80 + extensions := GetLanguageExtensions(language) 81 + for _, ext := range exts { 82 + require.Contains(t, extensions, ext, 83 + "If this test fails when updating enry, maybe `overrideAmbiguousExtensionsMap` needs updating") 84 + } 85 + } 86 + } 87 + 88 + func TestGetLanguagesByExtension_UnsupportedExtensions(t *testing.T) { 89 + for ext, language := range unsupportedByEnryExtensionToNameMap { 90 + filename := "foo" + ext 91 + languages, _ := getLanguagesByExtension(filename) 92 + require.Contains(t, languages, language, 93 + "maybe a typo in `unsupportedByEnryExtensionToNameMap`?") 94 + } 95 + } 96 + 97 + func TestGetLanguagesByExtension_OverrideExtensions(t *testing.T) { 98 + for ext, language := range overrideAmbiguousExtensionsMap { 99 + filename := "foo" + ext 100 + enryLangs := enry.GetLanguagesByExtension(filename, nil, nil) 101 + 102 + require.Contains(t, enryLangs, language, 103 + "maybe a typo in `overrideAmbiguousExtensionsMap`?") 104 + require.Greaterf(t, len(enryLangs), 1, 105 + "extension %v is not ambiguous according to enry, remove it from `overrideAmbiguousExtensionsMap`", 106 + ext) 107 + } 108 + } 109 + 110 + func TestGetLanguagesByExtension_NonAmbiguousExtensions(t *testing.T) { 111 + for ext, language := range nonAmbiguousExtensionsCheck { 112 + filename := "foo" + ext 113 + languages, isLikelyBinaryFile := getLanguagesByExtension(filename) 114 + require.False(t, isLikelyBinaryFile) 115 + require.Equal(t, []string{language}, languages, 116 + "If this test fails when updating enry, maybe `overrideAmbiguousExtensionsMap` needs updating") 117 + } 118 + } 119 + 120 + func TestGetLanguagesByExtension_BinaryExtensions(t *testing.T) { 121 + for _, ext := range []string{".png", ".jpg", ".gif"} { 122 + filename := "foo" + ext 123 + _, isLikelyBinary := getLanguagesByExtension(filename) 124 + require.Truef(t, isLikelyBinary, "filename: %v was not guessed to be binary;"+ 125 + "bug in extension matching logic in getLanguagesByExtension maybe?", 126 + filename) 127 + } 128 + } 129 + 130 + func TestExtensionsConsistency(t *testing.T) { 131 + for ext, overrideLang := range overrideAmbiguousExtensionsMap { 132 + filepath := "foo" + ext 133 + enryLangsForExt := enry.GetLanguagesByExtension(filepath, nil, nil) 134 + require.Containsf(t, enryLangsForExt, overrideLang, "overrideAmbiguousExtensionsMap maps extension %q to language %q but "+ 135 + "that mapping is not present in enry's list %v", ext, overrideLang, enryLangsForExt) 136 + require.Greaterf(t, len(enryLangsForExt), 1, "overrideAmbiguousExtensionsMap states that"+ 137 + "%q extension is ambiguous, but only found langs: %v", ext, enryLangsForExt) 138 + 139 + candidates, isLikelyBinary := getLanguagesByExtension(filepath) 140 + require.False(t, isLikelyBinary, "ambiguous files are all source code") 141 + require.True(t, len(candidates) == 1, "getLanguagesByExtension should respect overrideAmbiguousExtensionsMap") 142 + 143 + shouldBeIgnoredLangsForExt := slices.DeleteFunc(enryLangsForExt, func(s string) bool { 144 + return s == overrideLang 145 + }) 146 + for _, shouldBeIgnoredLang := range shouldBeIgnoredLangsForExt { 147 + ignoredExts, found := nicheExtensionUsages[shouldBeIgnoredLang] 148 + require.Truef(t, found, "expected lang: %q to have an entry in nicheExtensionUsages for consistency with GetLanguagesByExtension", shouldBeIgnoredLang) 149 + require.Truef(t, len(ignoredExts) >= 1, "sets in nicheExtensionUsages must be non-empty") 150 + 151 + nonNicheExts := GetLanguageExtensions(shouldBeIgnoredLang) 152 + for ignoredExt := range ignoredExts { 153 + require.Falsef(t, slices.Contains(nonNicheExts, ignoredExt), 154 + "GetLanguageExtensions should not return %q for lang %q for consistency with GetLanguagesByExtension", 155 + ignoredExt, shouldBeIgnoredLang) 156 + } 157 + } 158 + } 159 + } 160 + 161 + func TestExtensionsConsistency2(t *testing.T) { 162 + for lang := range enrydata.ExtensionsByLanguage { 163 + for _, ext := range GetLanguageExtensions(lang) { 164 + if strings.Count(ext, ".") > 1 { 165 + // Ignore unusual edge cases like .coffee.md for Literate CoffeeScript 166 + continue 167 + } 168 + langsByExt, isLikelyBinary := getLanguagesByExtension("foo" + ext) 169 + if !isLikelyBinary { 170 + require.Truef(t, slices.Contains(langsByExt, lang), 171 + "expected getLanguagesByExtension result %v to contain %q (extension: %q)", langsByExt, lang, ext) 172 + } 173 + } 174 + } 175 + } 176 + 177 + func TestUnsupportedByEnry(t *testing.T) { 178 + for lang := range unsupportedByEnryNameToExtensionMap { 179 + enry_extensions, found := enrydata.ExtensionsByLanguage[lang] 180 + if found { 181 + validateLanguageAgainstGoEnry(t, "unsupportedByEnryNameToExtensionMap", enry_extensions, lang) 182 + } 183 + } 184 + for _, lang := range unsupportedByEnryAliasMap { 185 + enry_extensions, found := enrydata.ExtensionsByLanguage[lang] 186 + if found { 187 + validateLanguageAgainstGoEnry(t, "unsupportedByEnryAliasMap", enry_extensions, lang) 188 + } 189 + } 190 + for _, lang := range unsupportedByEnryExtensionToNameMap { 191 + enry_extensions, found := enrydata.ExtensionsByLanguage[lang] 192 + if found { 193 + validateLanguageAgainstGoEnry(t, "unsupportedByEnryExtensionToNameMap", enry_extensions, lang) 194 + } 195 + } 196 + } 197 + 198 + func validateLanguageAgainstGoEnry(t *testing.T, name string, enryExtensions []string, lang string) { 199 + enryExtensions = slices.Clone(enryExtensions) 200 + slices.Sort(enryExtensions) 201 + sgExtensions := slices.Clone(unsupportedByEnryNameToExtensionMap[lang]) 202 + slices.Sort(sgExtensions) 203 + 204 + require.NotEqualf(t, enryExtensions, sgExtensions, "looks like language %q is supported by enry with the same extensions; remove it from %q", lang, name) 205 + }
+151
languages/languages.go
··· 1 + // Package languages provides enhanced language detection capabilities on top of 2 + // go-enry, with additional heuristics and mappings for better accuracy. 3 + package languages 4 + 5 + import ( 6 + "path/filepath" 7 + "slices" 8 + "strings" 9 + 10 + "github.com/go-enry/go-enry/v2" 11 + ) 12 + 13 + // Make sure all names are lowercase here, since they are normalized 14 + var enryLanguageMappings = map[string]string{ 15 + "c++": "cpp", 16 + "c#": "c_sharp", 17 + } 18 + 19 + // NormalizeLanguage converts the language name to lowercase and maps known 20 + // aliases to their canonical names. 21 + func NormalizeLanguage(filetype string) string { 22 + normalized := strings.ToLower(filetype) 23 + if mapped, ok := enryLanguageMappings[normalized]; ok { 24 + normalized = mapped 25 + } 26 + 27 + return normalized 28 + } 29 + 30 + // GetLanguages is a replacement for enry.GetLanguages which 31 + // avoids incorrect fallback behavior that is present in DefaultStrategies, 32 + // where it will misclassify '.h' header files as C when file contents 33 + // are not available. 34 + // 35 + // The content can be optionally passed via a callback instead of directly, so 36 + // that in the common case, the caller can avoid fetching the content. The full 37 + // content returned by getContent will be used for language detection. 38 + // 39 + // getContent is not called if the file is likely to be a binary file, 40 + // as enry only covers programming languages. 41 + // 42 + // The buffer provided by the getContent callback is not modified. 43 + // 44 + // Returns: 45 + // - An error if the getContent func returns an error 46 + // - An empty slice if language detection failed 47 + // - A single-element slice if the language was determined exactly 48 + // - A multi-element slice if the language was ambiguous. For example, 49 + // for simple `.h` files with just comments and macros, they may 50 + // be valid C, C++ or any of their derivative languages (e.g. Objective-C). 51 + func GetLanguages(path string, getContent func() ([]byte, error)) ([]string, error) { 52 + impl := func() ([]string, error) { 53 + langs := enry.GetLanguagesByFilename(path, nil, nil) 54 + if len(langs) == 1 { 55 + return langs, nil 56 + } 57 + newLangs, isLikelyBinaryFile := getLanguagesByExtension(path) 58 + if isLikelyBinaryFile { 59 + return nil, nil 60 + } 61 + switch len(newLangs) { 62 + case 0: 63 + break 64 + case 1: 65 + return newLangs, nil 66 + default: 67 + langs = newLangs 68 + } 69 + if getContent == nil { 70 + return langs, nil 71 + } 72 + content, err := getContent() 73 + if err != nil { 74 + return nil, err 75 + } 76 + if len(content) == 0 { 77 + return langs, nil 78 + } 79 + if enry.IsBinary(content) { 80 + return nil, nil 81 + } 82 + 83 + // enry doesn't expose a way to call GetLanguages with a specific set of 84 + // strategies, so just hand-roll that code here. 85 + var languages = langs 86 + for _, strategy := range []enry.Strategy{enry.GetLanguagesByModeline, getLanguagesByShebang, getLanguagesByContent, enry.GetLanguagesByClassifier} { 87 + candidates := strategy(path, content, languages) 88 + switch len(candidates) { 89 + case 0: 90 + continue 91 + case 1: 92 + return candidates, nil 93 + default: 94 + languages = candidates 95 + } 96 + } 97 + 98 + return languages, nil 99 + } 100 + 101 + langs, err := impl() 102 + return slices.Clone(langs), err 103 + } 104 + 105 + // GetLanguagesFromContent is a convenience wrapper around GetLanguages that 106 + // allows passing the content directly instead of a callback. 107 + func GetLanguagesFromContent(path string, content []byte) (langs []string) { 108 + // We can ignore the error here, because the callback will never return an error 109 + langs, _ = GetLanguages(path, func() ([]byte, error) { return content, nil }) 110 + return 111 + } 112 + 113 + // getLanguagesByContent is a wrapper for enry.GetLanguagesByContent. 114 + // 115 + // It applies additional heuristics for file extensions that need special handling. 116 + func getLanguagesByContent(path string, content []byte, candidates []string) []string { 117 + ext := strings.ToLower(filepath.Ext(path)) 118 + if heuristic, ok := sgExtraContentHeuristics[ext]; ok { 119 + return heuristic.Match(content) 120 + } 121 + return enry.GetLanguagesByContent(path, content, candidates) 122 + } 123 + 124 + // getLanguagesByShebang is a replacement for enry.GetLanguagesByShebang. 125 + // 126 + // The enry function considers non-programming languages such as 'Pod'/'Pod 6' 127 + // also for shebangs, so work around that. 128 + func getLanguagesByShebang(path string, content []byte, candidates []string) []string { 129 + languages := enry.GetLanguagesByShebang(path, content, candidates) 130 + if len(languages) == 2 { 131 + // See https://sourcegraph.com/github.com/go-enry/go-enry@40f2a1e5b90eec55c20441c2a5911dcfc298a447/-/blob/data/interpreter.go?L95-96 132 + if slices.Equal(languages, []string{"Perl", "Pod"}) { 133 + return []string{"Perl"} 134 + } 135 + if slices.Equal(languages, []string{"Pod 6", "Raku"}) { 136 + return []string{"Raku"} 137 + } 138 + } 139 + return slices.Clone(languages) 140 + } 141 + 142 + // IsLikelyVendoredFile returns true if the file is likely to be a vendored file. 143 + // 144 + // 1. This method is not 100% foolproof, as it relies on conventions 145 + // around file paths which may or may not be followed. 146 + // 2. The caller must not pass a directory path to this function 147 + // for short-circuiting, as there is no guarantee that if a path 148 + // p1 returns true, then Join(p1, p2) also returns true. 149 + func IsLikelyVendoredFile(path string) bool { 150 + return enry.IsVendor(path) 151 + }
+184
languages/languages_test.go
··· 1 + package languages 2 + 3 + import ( 4 + "testing" 5 + 6 + "github.com/go-enry/go-enry/v2" 7 + "github.com/stretchr/testify/require" 8 + "pgregory.net/rapid" 9 + ) 10 + 11 + var cppCapitalExtContent = `// Sample C++ file from the ROSE compiler project 12 + // Original source: https://github.com/rose-compiler/rose 13 + // This file is used for testing C++ language detection for files with .C extension 14 + // Attribution: ROSE Compiler Team - Lawrence Livermore National Laboratory 15 + 16 + #include "sage3basic.h" 17 + #include "rose_config.h" 18 + 19 + #include "SageTreeBuilder.h" 20 + #include "Jovial_to_ROSE_translation.h" 21 + #include "ModuleBuilder.h" 22 + 23 + #include <boost/optional/optional_io.hpp> 24 + #include <iostream> 25 + 26 + namespace Rose { 27 + namespace builder { 28 + 29 + using namespace Rose::Diagnostics; 30 + 31 + namespace SB = SageBuilder; 32 + namespace SI = SageInterface; 33 + namespace LT = LanguageTranslation; 34 + ` 35 + 36 + func TestGetLanguages(t *testing.T) { 37 + const matlabContent = "function [out] = square(x)\nout = x * x;\nend" 38 + const mathematicaContent = "f[x_] := x ^ 2\ng[y_] := f[y]" 39 + const cppContent = "namespace x { }" 40 + const cContent = "typedef struct { int x; } Int;" 41 + const emptyContent = "" 42 + 43 + testCases := []struct { 44 + path string 45 + content string 46 + expectedLanguages []string 47 + compareFirstOnly bool 48 + }{ 49 + {path: "perlscript", content: "#!/usr/bin/env perl\n$version = $ARGV[0];", expectedLanguages: []string{"Perl"}}, 50 + {path: "rakuscript", content: "#!/usr/bin/env perl6\n$version = $ARGV[0];", expectedLanguages: []string{"Raku"}}, 51 + {path: "ambiguous.h", content: emptyContent, expectedLanguages: []string{"C", "C++", "Objective-C"}}, 52 + {path: "cpp.h", content: cppContent, expectedLanguages: []string{"C++"}}, 53 + {path: "c.h", content: cContent, expectedLanguages: []string{"C"}}, 54 + {path: "matlab.m", content: matlabContent, expectedLanguages: []string{"MATLAB"}, compareFirstOnly: true}, 55 + {path: "mathematica.m", content: mathematicaContent, expectedLanguages: []string{"Mathematica"}, compareFirstOnly: true}, 56 + { 57 + path: "mathematica2.m", 58 + content: ` 59 + s := StringRiffle[{"a", "b", "c", "d", "e"}, ", "] 60 + Flatten[{{a, b}, {c, {d}, e}, {f, {g, h}}}] 61 + square[x_] := x ^ 2 62 + fourthpower[x_] := square[square[x]] 63 + `, 64 + expectedLanguages: []string{"Mathematica"}, 65 + compareFirstOnly: true, 66 + }, 67 + {path: "SageTreeBuilder.C", content: cppCapitalExtContent, expectedLanguages: []string{"C++"}}, 68 + // Ported cases from internal/languages TestGetLanguage 69 + {path: "", content: emptyContent, expectedLanguages: nil}, 70 + {path: "file.unknown", content: emptyContent, expectedLanguages: nil}, 71 + {path: "file.go", content: "package main", expectedLanguages: []string{"Go"}}, 72 + {path: "file.magik", content: emptyContent, expectedLanguages: []string{"Magik"}}, 73 + {path: "file.apxc", content: emptyContent, expectedLanguages: []string{"Apex"}}, 74 + // Check that we classify cls files by content and not just by extension 75 + {path: "tex.cls", content: `\DeclareOption*{}`, expectedLanguages: []string{"TeX", "Apex", "ObjectScript", "Visual Basic 6.0", "OpenEdge ABL", "VBA"}}, 76 + {path: "tex.cls", content: `public class HelloWorld {`, expectedLanguages: []string{"Apex", "Visual Basic 6.0", "TeX", "OpenEdge ABL", "ObjectScript", "VBA"}}, 77 + } 78 + 79 + for _, testCase := range testCases { 80 + var getContent func() ([]byte, error) 81 + if testCase.content != "" { 82 + getContent = func() ([]byte, error) { return []byte(testCase.content), nil } 83 + } 84 + gotLanguages, err := GetLanguages(testCase.path, getContent) 85 + require.NoError(t, err) 86 + if testCase.compareFirstOnly { 87 + require.Equal(t, testCase.expectedLanguages, gotLanguages[0:1]) 88 + continue 89 + } 90 + require.Equal(t, testCase.expectedLanguages, gotLanguages) 91 + } 92 + 93 + rapid.Check(t, func(t *rapid.T) { 94 + path := rapid.String().Draw(t, "path") 95 + content := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "contents") 96 + require.NotPanics(t, func() { 97 + langs, err := GetLanguages(path, func() ([]byte, error) { return content, nil }) 98 + require.NoError(t, err) 99 + if len(langs) != 0 { 100 + for _, l := range langs { 101 + require.NotEqual(t, enry.OtherLanguage, l) 102 + } 103 + } 104 + }) 105 + }) 106 + 107 + rapid.Check(t, func(t *rapid.T) { 108 + baseName := "abcd" 109 + exts := []string{".h", ".m", ".unknown", ""} 110 + extGens := []*rapid.Generator[string]{} 111 + for _, ext := range exts { 112 + extGens = append(extGens, rapid.Just(ext)) 113 + } 114 + extension := rapid.OneOf(extGens...).Draw(t, "extension") 115 + path := baseName + extension 116 + contentGens := []*rapid.Generator[string]{} 117 + for _, content := range []string{cContent, cppContent, mathematicaContent, matlabContent, emptyContent} { 118 + contentGens = append(contentGens, rapid.Just(content)) 119 + } 120 + content := rapid.OneOf(contentGens...).Draw(t, "content") 121 + langs, err := GetLanguages(path, func() ([]byte, error) { 122 + return []byte(content), nil 123 + }) 124 + require.NoError(t, err) 125 + for _, lang := range langs { 126 + require.NotEqual(t, enry.OtherLanguage, lang) 127 + } 128 + }) 129 + } 130 + 131 + func TestGetLanguageByNameOrAlias(t *testing.T) { 132 + tests := []struct { 133 + name string 134 + alias string 135 + want string 136 + wantOk bool 137 + }{ 138 + { 139 + name: "empty alias", 140 + alias: "", 141 + want: "", 142 + wantOk: false, 143 + }, 144 + { 145 + name: "unknown alias", 146 + alias: "unknown", 147 + want: "", 148 + wantOk: false, 149 + }, 150 + { 151 + name: "supported alias", 152 + alias: "go", 153 + want: "Go", 154 + wantOk: true, 155 + }, 156 + { 157 + name: "unsupported by linguist alias", 158 + alias: "magik", 159 + want: "Magik", 160 + wantOk: true, 161 + }, 162 + { 163 + name: "unsupported by linguist alias normalized", 164 + alias: "mAgIk", 165 + want: "Magik", 166 + wantOk: true, 167 + }, 168 + { 169 + name: "apex example unsupported by linguist alias", 170 + alias: "apex", 171 + want: "Apex", 172 + wantOk: true, 173 + }, 174 + } 175 + 176 + for _, tt := range tests { 177 + t.Run(tt.name, func(t *testing.T) { 178 + got, ok := GetLanguageByNameOrAlias(tt.alias) 179 + if got != tt.want || ok != tt.wantOk { 180 + t.Errorf("GetLanguageByNameOrAlias(%q) = %q, %t, want %q, %t", tt.alias, got, ok, tt.want, tt.wantOk) 181 + } 182 + }) 183 + } 184 + }
+2 -2
query/parse.go
··· 22 22 23 23 "github.com/grafana/regexp" 24 24 25 - "github.com/sourcegraph/zoekt/internal/languages" 25 + "github.com/sourcegraph/zoekt/languages" 26 26 ) 27 27 28 28 var _ = log.Printf ··· 177 177 } 178 178 expr = q 179 179 case tokLang: 180 - canonical, ok := languages.GetLanguageByAlias(text) 180 + canonical, ok := languages.GetLanguageByNameOrAlias(text) 181 181 if !ok { 182 182 expr = &Const{false} 183 183 } else {