fork of https://github.com/sourcegraph/zoekt
1package languages
2
3import (
4 "testing"
5
6 "github.com/go-enry/go-enry/v2"
7 "github.com/stretchr/testify/require"
8 "pgregory.net/rapid"
9)
10
11var cppCapitalExtContent = `// Sample C++ file from the ROSE compiler project
12// Original source: https://github.com/rose-compiler/rose
13// This file is used for testing C++ language detection for files with .C extension
14// Attribution: ROSE Compiler Team - Lawrence Livermore National Laboratory
15
16#include "sage3basic.h"
17#include "rose_config.h"
18
19#include "SageTreeBuilder.h"
20#include "Jovial_to_ROSE_translation.h"
21#include "ModuleBuilder.h"
22
23#include <boost/optional/optional_io.hpp>
24#include <iostream>
25
26namespace Rose {
27namespace builder {
28
29using namespace Rose::Diagnostics;
30
31namespace SB = SageBuilder;
32namespace SI = SageInterface;
33namespace LT = LanguageTranslation;
34`
35
36func TestGetLanguages(t *testing.T) {
37 const matlabContent = "function [out] = square(x)\nout = x * x;\nend"
38 const mathematicaContent = "f[x_] := x ^ 2\ng[y_] := f[y]"
39 const cppContent = "namespace x { }"
40 const cContent = "typedef struct { int x; } Int;"
41 const emptyContent = ""
42
43 testCases := []struct {
44 path string
45 content string
46 expectedLanguages []string
47 compareFirstOnly bool
48 }{
49 {path: "perlscript", content: "#!/usr/bin/env perl\n$version = $ARGV[0];", expectedLanguages: []string{"Perl"}},
50 {path: "rakuscript", content: "#!/usr/bin/env perl6\n$version = $ARGV[0];", expectedLanguages: []string{"Raku"}},
51 {path: "ambiguous.h", content: emptyContent, expectedLanguages: []string{"C", "C++", "Objective-C"}},
52 {path: "cpp.h", content: cppContent, expectedLanguages: []string{"C++"}},
53 {path: "c.h", content: cContent, expectedLanguages: []string{"C"}},
54 {path: "matlab.m", content: matlabContent, expectedLanguages: []string{"MATLAB"}, compareFirstOnly: true},
55 {path: "mathematica.m", content: mathematicaContent, expectedLanguages: []string{"Mathematica"}, compareFirstOnly: true},
56 {
57 path: "mathematica2.m",
58 content: `
59s := StringRiffle[{"a", "b", "c", "d", "e"}, ", "]
60Flatten[{{a, b}, {c, {d}, e}, {f, {g, h}}}]
61square[x_] := x ^ 2
62fourthpower[x_] := square[square[x]]
63`,
64 expectedLanguages: []string{"Mathematica"},
65 compareFirstOnly: true,
66 },
67 {path: "SageTreeBuilder.C", content: cppCapitalExtContent, expectedLanguages: []string{"C++"}},
68 // Ported cases from internal/languages TestGetLanguage
69 {path: "", content: emptyContent, expectedLanguages: nil},
70 {path: "file.unknown", content: emptyContent, expectedLanguages: nil},
71 {path: "file.go", content: "package main", expectedLanguages: []string{"Go"}},
72 {path: "file.magik", content: emptyContent, expectedLanguages: []string{"Magik"}},
73 {path: "file.apxc", content: emptyContent, expectedLanguages: []string{"Apex"}},
74 // Check that we classify cls files by content and not just by extension
75 {path: "tex.cls", content: `\DeclareOption*{}`, expectedLanguages: []string{"TeX", "Apex", "ObjectScript", "Visual Basic 6.0", "OpenEdge ABL", "VBA"}},
76 {path: "tex.cls", content: `public class HelloWorld {`, expectedLanguages: []string{"Apex", "Visual Basic 6.0", "TeX", "OpenEdge ABL", "ObjectScript", "VBA"}},
77 }
78
79 for _, testCase := range testCases {
80 var getContent func() ([]byte, error)
81 if testCase.content != "" {
82 getContent = func() ([]byte, error) { return []byte(testCase.content), nil }
83 }
84 gotLanguages, err := GetLanguages(testCase.path, getContent)
85 require.NoError(t, err)
86 if testCase.compareFirstOnly {
87 require.Equal(t, testCase.expectedLanguages, gotLanguages[0:1])
88 continue
89 }
90 require.Equal(t, testCase.expectedLanguages, gotLanguages)
91 }
92
93 rapid.Check(t, func(t *rapid.T) {
94 path := rapid.String().Draw(t, "path")
95 content := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "contents")
96 require.NotPanics(t, func() {
97 langs, err := GetLanguages(path, func() ([]byte, error) { return content, nil })
98 require.NoError(t, err)
99 if len(langs) != 0 {
100 for _, l := range langs {
101 require.NotEqual(t, enry.OtherLanguage, l)
102 }
103 }
104 })
105 })
106
107 rapid.Check(t, func(t *rapid.T) {
108 baseName := "abcd"
109 exts := []string{".h", ".m", ".unknown", ""}
110 extGens := []*rapid.Generator[string]{}
111 for _, ext := range exts {
112 extGens = append(extGens, rapid.Just(ext))
113 }
114 extension := rapid.OneOf(extGens...).Draw(t, "extension")
115 path := baseName + extension
116 contentGens := []*rapid.Generator[string]{}
117 for _, content := range []string{cContent, cppContent, mathematicaContent, matlabContent, emptyContent} {
118 contentGens = append(contentGens, rapid.Just(content))
119 }
120 content := rapid.OneOf(contentGens...).Draw(t, "content")
121 langs, err := GetLanguages(path, func() ([]byte, error) {
122 return []byte(content), nil
123 })
124 require.NoError(t, err)
125 for _, lang := range langs {
126 require.NotEqual(t, enry.OtherLanguage, lang)
127 }
128 })
129}
130
131func TestGetLanguageByNameOrAlias(t *testing.T) {
132 tests := []struct {
133 name string
134 alias string
135 want string
136 wantOk bool
137 }{
138 {
139 name: "empty alias",
140 alias: "",
141 want: "",
142 wantOk: false,
143 },
144 {
145 name: "unknown alias",
146 alias: "unknown",
147 want: "",
148 wantOk: false,
149 },
150 {
151 name: "supported alias",
152 alias: "go",
153 want: "Go",
154 wantOk: true,
155 },
156 {
157 name: "unsupported by linguist alias",
158 alias: "magik",
159 want: "Magik",
160 wantOk: true,
161 },
162 {
163 name: "unsupported by linguist alias normalized",
164 alias: "mAgIk",
165 want: "Magik",
166 wantOk: true,
167 },
168 {
169 name: "apex example unsupported by linguist alias",
170 alias: "apex",
171 want: "Apex",
172 wantOk: true,
173 },
174 }
175
176 for _, tt := range tests {
177 t.Run(tt.name, func(t *testing.T) {
178 got, ok := GetLanguageByNameOrAlias(tt.alias)
179 if got != tt.want || ok != tt.wantOk {
180 t.Errorf("GetLanguageByNameOrAlias(%q) = %q, %t, want %q, %t", tt.alias, got, ok, tt.want, tt.wantOk)
181 }
182 })
183 }
184}