mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-18 22:23:07 -03:00
java implementation of enry
Signed-off-by: Miguel Molina <miguel@erizocosmi.co>
This commit is contained in:
224
java/src/main/java/tech/sourced/enry/Enry.java
Normal file
224
java/src/main/java/tech/sourced/enry/Enry.java
Normal file
@ -0,0 +1,224 @@
|
||||
package tech.sourced.enry;
|
||||
|
||||
import tech.sourced.enry.nativelib.*;
|
||||
|
||||
import static tech.sourced.enry.GoUtils.*;
|
||||
|
||||
public class Enry {
|
||||
private static final EnryLibrary nativeLib = EnryLibrary.INSTANCE;
|
||||
|
||||
/**
|
||||
* Returns whether the given language is auxiliary or not.
|
||||
*
|
||||
* @param language name of the language, e.g. PHP, HTML, ...
|
||||
* @return if it's an auxiliary language
|
||||
*/
|
||||
public static boolean isAuxiliaryLanguage(String language) {
|
||||
return toJavaBool(nativeLib.IsAuxiliaryLanguage(toGoString(language)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the language of the given file based on the filename and its
|
||||
* contents.
|
||||
*
|
||||
* @param filename name of the file with the extension
|
||||
* @param content array of bytes with the contents of the file (the code)
|
||||
* @return the guessed language
|
||||
*/
|
||||
public static String getLanguage(String filename, byte[] content) {
|
||||
return toJavaString(nativeLib.GetLanguage(
|
||||
toGoString(filename),
|
||||
toGoByteSlice(content)
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns detected language by its content.
|
||||
* If there are more than one possible language, it returns the first
|
||||
* language in alphabetical order and safe to false.
|
||||
*
|
||||
* @param content of the file
|
||||
* @return guessed result
|
||||
*/
|
||||
public static Guess getLanguageByContent(byte[] content) {
|
||||
GetLanguageByContent_return.ByValue res = nativeLib.GetLanguageByContent(toGoByteSlice(content));
|
||||
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns detected language by its emacs modeline.
|
||||
* If there are more than one possible language, it returns the first
|
||||
* language in alphabetical order and safe to false.
|
||||
*
|
||||
* @param content of the file
|
||||
* @return guessed result
|
||||
*/
|
||||
public static Guess getLanguageByEmacsModeline(byte[] content) {
|
||||
GetLanguageByEmacsModeline_return.ByValue res = nativeLib.GetLanguageByEmacsModeline(toGoByteSlice(content));
|
||||
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns detected language by the extension of the filename.
|
||||
* If there are more than one possible languages, it returns
|
||||
* the first language in alphabetical order and safe to false.
|
||||
*
|
||||
* @param filename of the file
|
||||
* @return guessed result
|
||||
*/
|
||||
public static Guess getLanguageByExtension(String filename) {
|
||||
GetLanguageByExtension_return.ByValue res = nativeLib.GetLanguageByExtension(toGoString(filename));
|
||||
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns detected language by its shebang.
|
||||
* If there are more than one possible language, it returns the first
|
||||
* language in alphabetical order and safe to false.
|
||||
*
|
||||
* @param content of the file
|
||||
* @return guessed result
|
||||
*/
|
||||
public static Guess getLanguageByShebang(byte[] content) {
|
||||
GetLanguageByShebang_return.ByValue res = nativeLib.GetLanguageByShebang(toGoByteSlice(content));
|
||||
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns detected language by its filename.
|
||||
* If there are more than one possible language, it returns the first
|
||||
* language in alphabetical order and safe to false.
|
||||
*
|
||||
* @param filename of the file
|
||||
* @return guessed result
|
||||
*/
|
||||
public static Guess getLanguageByFilename(String filename) {
|
||||
GetLanguageByFilename_return.ByValue res = nativeLib.GetLanguageByFilename(toGoString(filename));
|
||||
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns detected language by its modeline.
|
||||
* If there are more than one possible language, it returns the first
|
||||
* language in alphabetical order and safe to false.
|
||||
*
|
||||
* @param content of the file
|
||||
* @return guessed result
|
||||
*/
|
||||
public static Guess getLanguageByModeline(byte[] content) {
|
||||
GetLanguageByModeline_return.ByValue res = nativeLib.GetLanguageByModeline(toGoByteSlice(content));
|
||||
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns detected language by its vim modeline.
|
||||
* If there are more than one possible language, it returns the first
|
||||
* language in alphabetical order and safe to false.
|
||||
*
|
||||
* @param content of the file
|
||||
* @return guessed result
|
||||
*/
|
||||
public static Guess getLanguageByVimModeline(byte[] content) {
|
||||
GetLanguageByVimModeline_return.ByValue res = nativeLib.GetLanguageByVimModeline(toGoByteSlice(content));
|
||||
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all the possible extensions for a file in the given language.
|
||||
*
|
||||
* @param language to get extensions from
|
||||
* @return extensions
|
||||
*/
|
||||
public static String[] getLanguageExtensions(String language) {
|
||||
GoSlice result = new GoSlice();
|
||||
nativeLib.GetLanguageExtensions(toGoString(language), result);
|
||||
return toJavaStringArray(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all possible languages for the given file.
|
||||
*
|
||||
* @param filename of the file
|
||||
* @param content of the file
|
||||
* @return all possible languages
|
||||
*/
|
||||
public static String[] getLanguages(String filename, byte[] content) {
|
||||
GoSlice result = new GoSlice();
|
||||
nativeLib.GetLanguages(toGoString(filename), toGoByteSlice(content), result);
|
||||
return toJavaStringArray(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the mime type of the file.
|
||||
*
|
||||
* @param path of the file
|
||||
* @param language of the file
|
||||
* @return mime type
|
||||
*/
|
||||
public static String getMimeType(String path, String language) {
|
||||
return toJavaString(nativeLib.GetMimeType(toGoString(path), toGoString(language)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reports whether the given file content is binary or not.
|
||||
*
|
||||
* @param content of the file
|
||||
* @return whether it's binary or not
|
||||
*/
|
||||
public static boolean isBinary(byte[] content) {
|
||||
return toJavaBool(nativeLib.IsBinary(toGoByteSlice(content)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reports whether the given file or directory is a config file or directory.
|
||||
*
|
||||
* @param path of the file or directory
|
||||
* @return whether it's config or not
|
||||
*/
|
||||
public static boolean isConfiguration(String path) {
|
||||
return toJavaBool(nativeLib.IsConfiguration(toGoString(path)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reports whether the given file or directory it's documentation.
|
||||
*
|
||||
* @param path of the file or directory. It must not contain its parents and
|
||||
* if it's a directory it must end in a slash e.g. "docs/" or
|
||||
* "foo.json".
|
||||
* @return whether it's docs or not
|
||||
*/
|
||||
public static boolean isDocumentation(String path) {
|
||||
return toJavaBool(nativeLib.IsDocumentation(toGoString(path)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reports whether the given file is a dotfile.
|
||||
*
|
||||
* @param path of the file
|
||||
* @return whether it's a dotfile or not
|
||||
*/
|
||||
public static boolean isDotFile(String path) {
|
||||
return toJavaBool(nativeLib.IsDotFile(toGoString(path)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reports whether the given path is an image or not.
|
||||
*
|
||||
* @param path of the file
|
||||
* @return whether it's an image or not
|
||||
*/
|
||||
public static boolean isImage(String path) {
|
||||
return toJavaBool(nativeLib.IsImage(toGoString(path)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reports whether the given path is a vendor path or not.
|
||||
*
|
||||
* @param path of the file or directory
|
||||
* @return whether it's vendor or not
|
||||
*/
|
||||
public static boolean isVendor(String path) {
|
||||
return toJavaBool(nativeLib.IsVendor(toGoString(path)));
|
||||
}
|
||||
|
||||
}
|
73
java/src/main/java/tech/sourced/enry/GoUtils.java
Normal file
73
java/src/main/java/tech/sourced/enry/GoUtils.java
Normal file
@ -0,0 +1,73 @@
|
||||
package tech.sourced.enry;
|
||||
|
||||
import com.sun.jna.Memory;
|
||||
import com.sun.jna.Pointer;
|
||||
import tech.sourced.enry.nativelib.GoSlice;
|
||||
import tech.sourced.enry.nativelib.GoString;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
|
||||
class GoUtils {
|
||||
|
||||
static GoString.ByValue toGoString(String str) {
|
||||
byte[] bytes;
|
||||
try {
|
||||
bytes = str.getBytes("utf-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
bytes = str.getBytes();
|
||||
}
|
||||
|
||||
GoString.ByValue val = new GoString.ByValue();
|
||||
val.n = bytes.length;
|
||||
Pointer ptr = new Memory(bytes.length);
|
||||
ptr.write(0, bytes, 0, bytes.length);
|
||||
val.p = ptr;
|
||||
return val;
|
||||
}
|
||||
|
||||
static String toJavaString(GoString str) {
|
||||
if (str.n == 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
byte[] bytes = new byte[(int) str.n];
|
||||
str.p.read(0, bytes, 0, (int) str.n);
|
||||
try {
|
||||
return new String(bytes, "utf-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new RuntimeException("utf-8 encoding is not supported");
|
||||
}
|
||||
}
|
||||
|
||||
static String[] toJavaStringArray(GoSlice slice) {
|
||||
String[] result = new String[(int) slice.len];
|
||||
Pointer[] ptrArr = slice.data.getPointerArray(0, (int) slice.len);
|
||||
for (int i = 0; i < (int) slice.len; i++) {
|
||||
result[i] = ptrArr[i].getString(0);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static GoSlice.ByValue toGoByteSlice(byte[] bytes) {
|
||||
return sliceFromPtr(bytes.length, ptrFromBytes(bytes));
|
||||
}
|
||||
|
||||
static GoSlice.ByValue sliceFromPtr(int len, Pointer ptr) {
|
||||
GoSlice.ByValue val = new GoSlice.ByValue();
|
||||
val.cap = len;
|
||||
val.len = len;
|
||||
val.data = ptr;
|
||||
return val;
|
||||
}
|
||||
|
||||
static Pointer ptrFromBytes(byte[] bytes) {
|
||||
Pointer ptr = new Memory(bytes.length);
|
||||
ptr.write(0, bytes, 0, bytes.length);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static boolean toJavaBool(byte goBool) {
|
||||
return goBool == 1;
|
||||
}
|
||||
|
||||
}
|
23
java/src/main/java/tech/sourced/enry/Guess.java
Normal file
23
java/src/main/java/tech/sourced/enry/Guess.java
Normal file
@ -0,0 +1,23 @@
|
||||
package tech.sourced.enry;
|
||||
|
||||
/**
|
||||
* Guess denotes a language detection result of which enry can be
|
||||
* completely sure or not.
|
||||
*/
|
||||
public class Guess {
|
||||
/**
|
||||
* Result is the resultant language of the detection.
|
||||
*/
|
||||
public String result;
|
||||
|
||||
/**
|
||||
* Sure indicates whether the enry was completely sure the language is
|
||||
* the correct one or it might not be.
|
||||
*/
|
||||
public boolean sure;
|
||||
|
||||
public Guess(String result, boolean sure) {
|
||||
this.result = result;
|
||||
this.sure = sure;
|
||||
}
|
||||
}
|
150
java/src/test/java/tech/sourced/enry/EnryTest.java
Normal file
150
java/src/test/java/tech/sourced/enry/EnryTest.java
Normal file
@ -0,0 +1,150 @@
|
||||
package tech.sourced.enry;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class EnryTest {
|
||||
|
||||
@Test
|
||||
public void isAuxiliaryLanguage() {
|
||||
assertTrue(Enry.isAuxiliaryLanguage("HTML"));
|
||||
assertFalse(Enry.isAuxiliaryLanguage("Go"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getLanguage() {
|
||||
String code = "<?php $foo = bar();";
|
||||
assertEquals("PHP", Enry.getLanguage("foobar.php", code.getBytes()));
|
||||
}
|
||||
|
||||
// TODO: this is a bug in enry, fix when it's fixed there
|
||||
@Test(expected = AssertionError.class)
|
||||
public void getLanguageByContent() {
|
||||
String code = "<?php $foo = bar();";
|
||||
assertGuess(
|
||||
"PHP",
|
||||
true,
|
||||
Enry.getLanguageByContent(code.getBytes())
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getLanguageByEmacsModeline() {
|
||||
String code = "// -*- font:bar;mode:c++ -*-\n" +
|
||||
"template <typename X> class { X i; };";
|
||||
assertGuess(
|
||||
"C++",
|
||||
true,
|
||||
Enry.getLanguageByEmacsModeline(code.getBytes())
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getLanguageByExtension() {
|
||||
assertGuess(
|
||||
"Ruby",
|
||||
true,
|
||||
Enry.getLanguageByExtension("foo.rb")
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getLanguageByShebang() {
|
||||
String code = "#!/usr/bin/env python";
|
||||
assertGuess(
|
||||
"Python",
|
||||
true,
|
||||
Enry.getLanguageByShebang(code.getBytes())
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getLanguageByModeline() {
|
||||
String code = "// -*- font:bar;mode:c++ -*-\n" +
|
||||
"template <typename X> class { X i; };";
|
||||
assertGuess(
|
||||
"C++",
|
||||
true,
|
||||
Enry.getLanguageByModeline(code.getBytes())
|
||||
);
|
||||
|
||||
code = "# vim: noexpandtab: ft=javascript";
|
||||
assertGuess(
|
||||
"JavaScript",
|
||||
true,
|
||||
Enry.getLanguageByModeline(code.getBytes())
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getLanguageByVimModeline() {
|
||||
String code = "# vim: noexpandtab: ft=javascript";
|
||||
assertGuess(
|
||||
"JavaScript",
|
||||
true,
|
||||
Enry.getLanguageByVimModeline(code.getBytes())
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getLanguageExtensions() {
|
||||
String[] exts = Enry.getLanguageExtensions("Go");
|
||||
String[] expected = {".go"};
|
||||
assertArrayEquals(expected, exts);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getLanguages() {
|
||||
String code = "#include <stdio.h>" +
|
||||
"" +
|
||||
"extern int foo(void *bar);";
|
||||
|
||||
String[] result = Enry.getLanguages("foo.h", code.getBytes());
|
||||
String[] expected = {"C", "C++", "Objective-C"};
|
||||
assertArrayEquals(expected, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getMimeType() {
|
||||
assertEquals(
|
||||
"text/x-ruby",
|
||||
Enry.getMimeType("foo.rb", "Ruby")
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isBinary() {
|
||||
assertFalse(Enry.isBinary("hello = 'world'".getBytes()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isConfiguration() {
|
||||
assertTrue(Enry.isConfiguration("config.yml"));
|
||||
assertFalse(Enry.isConfiguration("FooServiceProviderImplementorFactory.java"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isDocumentation() {
|
||||
assertTrue(Enry.isDocumentation("docs/"));
|
||||
assertFalse(Enry.isDocumentation("src/"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isDotFile() {
|
||||
assertTrue(Enry.isDotFile(".env"));
|
||||
assertFalse(Enry.isDotFile("config.json"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isImage() {
|
||||
assertTrue(Enry.isImage("yup.jpg"));
|
||||
assertFalse(Enry.isImage("nope.go"));
|
||||
}
|
||||
|
||||
void assertGuess(String language, boolean sure, Guess result) {
|
||||
assertEquals(language, result.result);
|
||||
assertEquals(sure, result.sure);
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user