java implementation of enry

Signed-off-by: Miguel Molina <miguel@erizocosmi.co>
This commit is contained in:
Miguel Molina
2017-08-08 12:02:27 +02:00
parent 33ff3ba8cd
commit d8fc4fe92f
12 changed files with 661 additions and 1 deletions

View File

@ -0,0 +1,224 @@
package tech.sourced.enry;
import tech.sourced.enry.nativelib.*;
import static tech.sourced.enry.GoUtils.*;
public class Enry {
private static final EnryLibrary nativeLib = EnryLibrary.INSTANCE;
/**
* Returns whether the given language is auxiliary or not.
*
* @param language name of the language, e.g. PHP, HTML, ...
* @return if it's an auxiliary language
*/
public static boolean isAuxiliaryLanguage(String language) {
return toJavaBool(nativeLib.IsAuxiliaryLanguage(toGoString(language)));
}
/**
* Returns the language of the given file based on the filename and its
* contents.
*
* @param filename name of the file with the extension
* @param content array of bytes with the contents of the file (the code)
* @return the guessed language
*/
public static String getLanguage(String filename, byte[] content) {
return toJavaString(nativeLib.GetLanguage(
toGoString(filename),
toGoByteSlice(content)
));
}
/**
* Returns detected language by its content.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByContent(byte[] content) {
GetLanguageByContent_return.ByValue res = nativeLib.GetLanguageByContent(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its emacs modeline.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByEmacsModeline(byte[] content) {
GetLanguageByEmacsModeline_return.ByValue res = nativeLib.GetLanguageByEmacsModeline(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by the extension of the filename.
* If there are more than one possible languages, it returns
* the first language in alphabetical order and safe to false.
*
* @param filename of the file
* @return guessed result
*/
public static Guess getLanguageByExtension(String filename) {
GetLanguageByExtension_return.ByValue res = nativeLib.GetLanguageByExtension(toGoString(filename));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its shebang.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByShebang(byte[] content) {
GetLanguageByShebang_return.ByValue res = nativeLib.GetLanguageByShebang(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its filename.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param filename of the file
* @return guessed result
*/
public static Guess getLanguageByFilename(String filename) {
GetLanguageByFilename_return.ByValue res = nativeLib.GetLanguageByFilename(toGoString(filename));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its modeline.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByModeline(byte[] content) {
GetLanguageByModeline_return.ByValue res = nativeLib.GetLanguageByModeline(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its vim modeline.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByVimModeline(byte[] content) {
GetLanguageByVimModeline_return.ByValue res = nativeLib.GetLanguageByVimModeline(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns all the possible extensions for a file in the given language.
*
* @param language to get extensions from
* @return extensions
*/
public static String[] getLanguageExtensions(String language) {
GoSlice result = new GoSlice();
nativeLib.GetLanguageExtensions(toGoString(language), result);
return toJavaStringArray(result);
}
/**
* Returns all possible languages for the given file.
*
* @param filename of the file
* @param content of the file
* @return all possible languages
*/
public static String[] getLanguages(String filename, byte[] content) {
GoSlice result = new GoSlice();
nativeLib.GetLanguages(toGoString(filename), toGoByteSlice(content), result);
return toJavaStringArray(result);
}
/**
* Returns the mime type of the file.
*
* @param path of the file
* @param language of the file
* @return mime type
*/
public static String getMimeType(String path, String language) {
return toJavaString(nativeLib.GetMimeType(toGoString(path), toGoString(language)));
}
/**
* Reports whether the given file content is binary or not.
*
* @param content of the file
* @return whether it's binary or not
*/
public static boolean isBinary(byte[] content) {
return toJavaBool(nativeLib.IsBinary(toGoByteSlice(content)));
}
/**
* Reports whether the given file or directory is a config file or directory.
*
* @param path of the file or directory
* @return whether it's config or not
*/
public static boolean isConfiguration(String path) {
return toJavaBool(nativeLib.IsConfiguration(toGoString(path)));
}
/**
* Reports whether the given file or directory it's documentation.
*
* @param path of the file or directory. It must not contain its parents and
* if it's a directory it must end in a slash e.g. "docs/" or
* "foo.json".
* @return whether it's docs or not
*/
public static boolean isDocumentation(String path) {
return toJavaBool(nativeLib.IsDocumentation(toGoString(path)));
}
/**
* Reports whether the given file is a dotfile.
*
* @param path of the file
* @return whether it's a dotfile or not
*/
public static boolean isDotFile(String path) {
return toJavaBool(nativeLib.IsDotFile(toGoString(path)));
}
/**
* Reports whether the given path is an image or not.
*
* @param path of the file
* @return whether it's an image or not
*/
public static boolean isImage(String path) {
return toJavaBool(nativeLib.IsImage(toGoString(path)));
}
/**
* Reports whether the given path is a vendor path or not.
*
* @param path of the file or directory
* @return whether it's vendor or not
*/
public static boolean isVendor(String path) {
return toJavaBool(nativeLib.IsVendor(toGoString(path)));
}
}

View File

@ -0,0 +1,73 @@
package tech.sourced.enry;
import com.sun.jna.Memory;
import com.sun.jna.Pointer;
import tech.sourced.enry.nativelib.GoSlice;
import tech.sourced.enry.nativelib.GoString;
import java.io.UnsupportedEncodingException;
class GoUtils {
static GoString.ByValue toGoString(String str) {
byte[] bytes;
try {
bytes = str.getBytes("utf-8");
} catch (UnsupportedEncodingException e) {
bytes = str.getBytes();
}
GoString.ByValue val = new GoString.ByValue();
val.n = bytes.length;
Pointer ptr = new Memory(bytes.length);
ptr.write(0, bytes, 0, bytes.length);
val.p = ptr;
return val;
}
static String toJavaString(GoString str) {
if (str.n == 0) {
return "";
}
byte[] bytes = new byte[(int) str.n];
str.p.read(0, bytes, 0, (int) str.n);
try {
return new String(bytes, "utf-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("utf-8 encoding is not supported");
}
}
static String[] toJavaStringArray(GoSlice slice) {
String[] result = new String[(int) slice.len];
Pointer[] ptrArr = slice.data.getPointerArray(0, (int) slice.len);
for (int i = 0; i < (int) slice.len; i++) {
result[i] = ptrArr[i].getString(0);
}
return result;
}
static GoSlice.ByValue toGoByteSlice(byte[] bytes) {
return sliceFromPtr(bytes.length, ptrFromBytes(bytes));
}
static GoSlice.ByValue sliceFromPtr(int len, Pointer ptr) {
GoSlice.ByValue val = new GoSlice.ByValue();
val.cap = len;
val.len = len;
val.data = ptr;
return val;
}
static Pointer ptrFromBytes(byte[] bytes) {
Pointer ptr = new Memory(bytes.length);
ptr.write(0, bytes, 0, bytes.length);
return ptr;
}
static boolean toJavaBool(byte goBool) {
return goBool == 1;
}
}

View File

@ -0,0 +1,23 @@
package tech.sourced.enry;
/**
* Guess denotes a language detection result of which enry can be
* completely sure or not.
*/
public class Guess {
/**
* Result is the resultant language of the detection.
*/
public String result;
/**
* Sure indicates whether the enry was completely sure the language is
* the correct one or it might not be.
*/
public boolean sure;
public Guess(String result, boolean sure) {
this.result = result;
this.sure = sure;
}
}

View File

@ -0,0 +1,150 @@
package tech.sourced.enry;
import org.junit.Test;
import static org.junit.Assert.*;
public class EnryTest {
@Test
public void isAuxiliaryLanguage() {
assertTrue(Enry.isAuxiliaryLanguage("HTML"));
assertFalse(Enry.isAuxiliaryLanguage("Go"));
}
@Test
public void getLanguage() {
String code = "<?php $foo = bar();";
assertEquals("PHP", Enry.getLanguage("foobar.php", code.getBytes()));
}
// TODO: this is a bug in enry, fix when it's fixed there
@Test(expected = AssertionError.class)
public void getLanguageByContent() {
String code = "<?php $foo = bar();";
assertGuess(
"PHP",
true,
Enry.getLanguageByContent(code.getBytes())
);
}
@Test
public void getLanguageByEmacsModeline() {
String code = "// -*- font:bar;mode:c++ -*-\n" +
"template <typename X> class { X i; };";
assertGuess(
"C++",
true,
Enry.getLanguageByEmacsModeline(code.getBytes())
);
}
@Test
public void getLanguageByExtension() {
assertGuess(
"Ruby",
true,
Enry.getLanguageByExtension("foo.rb")
);
}
@Test
public void getLanguageByShebang() {
String code = "#!/usr/bin/env python";
assertGuess(
"Python",
true,
Enry.getLanguageByShebang(code.getBytes())
);
}
@Test
public void getLanguageByModeline() {
String code = "// -*- font:bar;mode:c++ -*-\n" +
"template <typename X> class { X i; };";
assertGuess(
"C++",
true,
Enry.getLanguageByModeline(code.getBytes())
);
code = "# vim: noexpandtab: ft=javascript";
assertGuess(
"JavaScript",
true,
Enry.getLanguageByModeline(code.getBytes())
);
}
@Test
public void getLanguageByVimModeline() {
String code = "# vim: noexpandtab: ft=javascript";
assertGuess(
"JavaScript",
true,
Enry.getLanguageByVimModeline(code.getBytes())
);
}
@Test
public void getLanguageExtensions() {
String[] exts = Enry.getLanguageExtensions("Go");
String[] expected = {".go"};
assertArrayEquals(expected, exts);
}
@Test
public void getLanguages() {
String code = "#include <stdio.h>" +
"" +
"extern int foo(void *bar);";
String[] result = Enry.getLanguages("foo.h", code.getBytes());
String[] expected = {"C", "C++", "Objective-C"};
assertArrayEquals(expected, result);
}
@Test
public void getMimeType() {
assertEquals(
"text/x-ruby",
Enry.getMimeType("foo.rb", "Ruby")
);
}
@Test
public void isBinary() {
assertFalse(Enry.isBinary("hello = 'world'".getBytes()));
}
@Test
public void isConfiguration() {
assertTrue(Enry.isConfiguration("config.yml"));
assertFalse(Enry.isConfiguration("FooServiceProviderImplementorFactory.java"));
}
@Test
public void isDocumentation() {
assertTrue(Enry.isDocumentation("docs/"));
assertFalse(Enry.isDocumentation("src/"));
}
@Test
public void isDotFile() {
assertTrue(Enry.isDotFile(".env"));
assertFalse(Enry.isDotFile("config.json"));
}
@Test
public void isImage() {
assertTrue(Enry.isImage("yup.jpg"));
assertFalse(Enry.isImage("nope.go"));
}
void assertGuess(String language, boolean sure, Guess result) {
assertEquals(language, result.result);
assertEquals(sure, result.sure);
}
}