java implementation of enry

Signed-off-by: Miguel Molina <miguel@erizocosmi.co>
This commit is contained in:
Miguel Molina 2017-08-08 12:02:27 +02:00
parent 33ff3ba8cd
commit d8fc4fe92f
No known key found for this signature in database
GPG Key ID: D8826D1B86D051EA
12 changed files with 661 additions and 1 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
benchmarks/output
.ci
Makefile.main
.shared

View File

@ -24,13 +24,25 @@ LOCAL_COMMIT := $(shell git rev-parse --short HEAD)
LOCAL_BUILD := $(shell date +"%m-%d-%Y_%H_%M_%S")
LOCAL_LDFLAGS = -s -X main.version=$(LOCAL_TAG) -X main.build=$(LOCAL_BUILD) -X main.commit=$(LOCAL_COMMIT)
# shared objects
RESOURCES_DIR=./.shared
LINUX_DIR=$(RESOURCES_DIR)/linux-x86-64
LINUX_SHARED_LIB=$(LINUX_DIR)/libenry.so
DARWIN_DIR=$(RESOURCES_DIR)/darwin
DARWIN_SHARED_LIB=$(DARWIN_DIR)/libenry.dylib
HEADER_FILE=libenry.h
NATIVE_LIB=./shared/enry.go
$(LINGUIST_PATH):
git clone https://github.com/github/linguist.git $@
clean-linguist:
rm -rf $(LINGUIST_PATH)
clean: clean-linguist
clean-shared:
rm -rf $(RESOURCES_DIR)
clean: clean-linguist clean-shared
code-generate: $(LINGUIST_PATH)
mkdir -p data
@ -48,3 +60,17 @@ benchmarks-slow: $(LINGUST_PATH)
build-cli:
go build -o enry -ldflags "$(LOCAL_LDFLAGS)" cli/enry/main.go
linux-shared: $(LINUX_SHARED_LIB)
darwin-shared: $(DARWIN_SHARED_LIB)
$(DARWIN_SHARED_LIB):
mkdir -p $(DARWIN_DIR) && \
GOOS=darwin GOARCH=amd64 go build -buildmode=c-shared -o $(DARWIN_SHARED_LIB) $(NATIVE_LIB) && \
mv $(DARWIN_DIR)/$(HEADER_FILE) $(RESOURCES_DIR)/$(HEADER_FILE)
$(LINUX_SHARED_LIB):
mkdir -p $(LINUX_DIR) && \
GOOS=linux GOARCH=amd64 go build -buildmode=c-shared -o $(LINUX_SHARED_LIB) $(NATIVE_LIB) && \
mv $(LINUX_DIR)/$(HEADER_FILE) $(RESOURCES_DIR)/$(HEADER_FILE)

27
java/.gitignore vendored Normal file
View File

@ -0,0 +1,27 @@
# Compiled class file
*.class
# Log file
*.log
# BlueJ files
*.ctxt
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.jar
*.war
*.ear
*.zip
*.tar.gz
*.rar
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
target
.idea
.jnaerator
shared
*.jar

61
java/Makefile Normal file
View File

@ -0,0 +1,61 @@
JNAERATOR_VERSION=ac73c9e
JARS_DIR=./lib
JAR=$(JARS_DIR)/enry.jar
JNAERATOR_DIR=./.jnaerator
JNAERATOR_JAR=$(JNAERATOR_DIR)/jnaerator.jar
RESOURCES_SRC=../.shared
RESOURCES_DIR=./shared
HEADER_FILE=$(RESOURCES_DIR)/libenry.h
all: $(JAR)
$(JAR): $(RESOURCES_DIR) $(JNAERATOR_JAR)
mkdir -p lib && \
java -jar $(JNAERATOR_JAR) \
-package tech.sourced.enry.nativelib \
-library enry \
$(HEADER_FILE) \
-o $(JARS_DIR) \
-mode StandaloneJar \
-runtime JNA;
$(RESOURCES_DIR): os-shared-lib
cp -R $(RESOURCES_SRC) $(RESOURCES_DIR)
$(JNAERATOR_JAR): $(RESOURCES_DIR)
git clone --depth 1 https://github.com/nativelibs4java/jnaerator.git $(JNAERATOR_DIR) && \
cd $(JNAERATOR_DIR) && \
git checkout $(JNAERATOR_VERSION) && \
mvn clean install && \
mv jnaerator/target/jnaerator-*-shaded.jar ./jnaerator.jar && \
cd ..;
os-shared-lib:
@os_name="$(shell uname -s)" && \
if [ "$$os_name" == "Linux" ]; then \
$(MAKE) linux-shared; \
elif [ "$$os_name" == "Darwin" ]; then \
$(MAKE) darwin-shared; \
else \
echo "Unsupported operating system, can't build shared library"; \
exit 1; \
fi;
linux-shared:
cd .. && \
$(MAKE) linux-shared
darwin-shared:
cd .. && \
$(MAKE) darwin-shared
test:
sbt clean test
package:
sbt clean assembly
clean:
rm -rf $(JAR)
rm -rf $(RESOURCES_DIR)

35
java/README.md Normal file
View File

@ -0,0 +1,35 @@
# enry-java
### Requirements
* `sbt`
* `Java` (tested with Java 1.8)
* `maven` install and on the PATH (only for local usage)
* `Go` (only for building the shared objects for your operating system)
### Generate jar with Java bindings and shared libraries
You need to do this before exporting the jar and/or testing.
```
make
```
This will download JNAerator and build its jar to generate the code from the `libenry.h` header file (hence the need for `mvn` installed), it will be placed under `lib`.
The shared libraries for your operating system will be built if needed and copied inside the `shared` directory.
For IntelliJ and other IDEs remember to mark `shared` folder as sources and add `lib/enry.jar` as library. If you use `sbt` from the command line directly that's already taken care of.
### Run tests
```
make test
```
### Export jar
```
make package
```
Jar will be located in `./target/enry-java-assembly-X.X.X.jar`.

38
java/build.sbt Normal file
View File

@ -0,0 +1,38 @@
name := "enry-java"
organization := "tech.sourced"
version := "1.0"
crossPaths := false
autoScalaLibrary := false
publishMavenStyle := true
exportJars := true
libraryDependencies += "com.novocode" % "junit-interface" % "0.11" % Test
unmanagedBase := baseDirectory.value / "lib"
unmanagedClasspath in Test += baseDirectory.value / "shared"
unmanagedClasspath in Runtime += baseDirectory.value / "shared"
unmanagedClasspath in Compile += baseDirectory.value / "shared"
testOptions += Tests.Argument(TestFrameworks.JUnit)
lazy val buildNative = taskKey[Unit]("builds native code")
buildNative := {
val res = "make"!;
if (res != 0) throw new RuntimeException("unable to generate shared libraries and native jar bindings")
}
test := {
buildNative.value
(test in Test).value
}
compile := {
buildNative.value
(compile in Compile).value
}
assembly := {
buildNative.value
assembly.value
}

View File

@ -0,0 +1 @@
sbt.version = 0.13.16

1
java/project/plugins.sbt Normal file
View File

@ -0,0 +1 @@
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.5")

View File

@ -0,0 +1,224 @@
package tech.sourced.enry;
import tech.sourced.enry.nativelib.*;
import static tech.sourced.enry.GoUtils.*;
public class Enry {
private static final EnryLibrary nativeLib = EnryLibrary.INSTANCE;
/**
* Returns whether the given language is auxiliary or not.
*
* @param language name of the language, e.g. PHP, HTML, ...
* @return if it's an auxiliary language
*/
public static boolean isAuxiliaryLanguage(String language) {
return toJavaBool(nativeLib.IsAuxiliaryLanguage(toGoString(language)));
}
/**
* Returns the language of the given file based on the filename and its
* contents.
*
* @param filename name of the file with the extension
* @param content array of bytes with the contents of the file (the code)
* @return the guessed language
*/
public static String getLanguage(String filename, byte[] content) {
return toJavaString(nativeLib.GetLanguage(
toGoString(filename),
toGoByteSlice(content)
));
}
/**
* Returns detected language by its content.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByContent(byte[] content) {
GetLanguageByContent_return.ByValue res = nativeLib.GetLanguageByContent(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its emacs modeline.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByEmacsModeline(byte[] content) {
GetLanguageByEmacsModeline_return.ByValue res = nativeLib.GetLanguageByEmacsModeline(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by the extension of the filename.
* If there are more than one possible languages, it returns
* the first language in alphabetical order and safe to false.
*
* @param filename of the file
* @return guessed result
*/
public static Guess getLanguageByExtension(String filename) {
GetLanguageByExtension_return.ByValue res = nativeLib.GetLanguageByExtension(toGoString(filename));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its shebang.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByShebang(byte[] content) {
GetLanguageByShebang_return.ByValue res = nativeLib.GetLanguageByShebang(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its filename.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param filename of the file
* @return guessed result
*/
public static Guess getLanguageByFilename(String filename) {
GetLanguageByFilename_return.ByValue res = nativeLib.GetLanguageByFilename(toGoString(filename));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its modeline.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByModeline(byte[] content) {
GetLanguageByModeline_return.ByValue res = nativeLib.GetLanguageByModeline(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its vim modeline.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByVimModeline(byte[] content) {
GetLanguageByVimModeline_return.ByValue res = nativeLib.GetLanguageByVimModeline(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns all the possible extensions for a file in the given language.
*
* @param language to get extensions from
* @return extensions
*/
public static String[] getLanguageExtensions(String language) {
GoSlice result = new GoSlice();
nativeLib.GetLanguageExtensions(toGoString(language), result);
return toJavaStringArray(result);
}
/**
* Returns all possible languages for the given file.
*
* @param filename of the file
* @param content of the file
* @return all possible languages
*/
public static String[] getLanguages(String filename, byte[] content) {
GoSlice result = new GoSlice();
nativeLib.GetLanguages(toGoString(filename), toGoByteSlice(content), result);
return toJavaStringArray(result);
}
/**
* Returns the mime type of the file.
*
* @param path of the file
* @param language of the file
* @return mime type
*/
public static String getMimeType(String path, String language) {
return toJavaString(nativeLib.GetMimeType(toGoString(path), toGoString(language)));
}
/**
* Reports whether the given file content is binary or not.
*
* @param content of the file
* @return whether it's binary or not
*/
public static boolean isBinary(byte[] content) {
return toJavaBool(nativeLib.IsBinary(toGoByteSlice(content)));
}
/**
* Reports whether the given file or directory is a config file or directory.
*
* @param path of the file or directory
* @return whether it's config or not
*/
public static boolean isConfiguration(String path) {
return toJavaBool(nativeLib.IsConfiguration(toGoString(path)));
}
/**
* Reports whether the given file or directory it's documentation.
*
* @param path of the file or directory. It must not contain its parents and
* if it's a directory it must end in a slash e.g. "docs/" or
* "foo.json".
* @return whether it's docs or not
*/
public static boolean isDocumentation(String path) {
return toJavaBool(nativeLib.IsDocumentation(toGoString(path)));
}
/**
* Reports whether the given file is a dotfile.
*
* @param path of the file
* @return whether it's a dotfile or not
*/
public static boolean isDotFile(String path) {
return toJavaBool(nativeLib.IsDotFile(toGoString(path)));
}
/**
* Reports whether the given path is an image or not.
*
* @param path of the file
* @return whether it's an image or not
*/
public static boolean isImage(String path) {
return toJavaBool(nativeLib.IsImage(toGoString(path)));
}
/**
* Reports whether the given path is a vendor path or not.
*
* @param path of the file or directory
* @return whether it's vendor or not
*/
public static boolean isVendor(String path) {
return toJavaBool(nativeLib.IsVendor(toGoString(path)));
}
}

View File

@ -0,0 +1,73 @@
package tech.sourced.enry;
import com.sun.jna.Memory;
import com.sun.jna.Pointer;
import tech.sourced.enry.nativelib.GoSlice;
import tech.sourced.enry.nativelib.GoString;
import java.io.UnsupportedEncodingException;
class GoUtils {
static GoString.ByValue toGoString(String str) {
byte[] bytes;
try {
bytes = str.getBytes("utf-8");
} catch (UnsupportedEncodingException e) {
bytes = str.getBytes();
}
GoString.ByValue val = new GoString.ByValue();
val.n = bytes.length;
Pointer ptr = new Memory(bytes.length);
ptr.write(0, bytes, 0, bytes.length);
val.p = ptr;
return val;
}
static String toJavaString(GoString str) {
if (str.n == 0) {
return "";
}
byte[] bytes = new byte[(int) str.n];
str.p.read(0, bytes, 0, (int) str.n);
try {
return new String(bytes, "utf-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("utf-8 encoding is not supported");
}
}
static String[] toJavaStringArray(GoSlice slice) {
String[] result = new String[(int) slice.len];
Pointer[] ptrArr = slice.data.getPointerArray(0, (int) slice.len);
for (int i = 0; i < (int) slice.len; i++) {
result[i] = ptrArr[i].getString(0);
}
return result;
}
static GoSlice.ByValue toGoByteSlice(byte[] bytes) {
return sliceFromPtr(bytes.length, ptrFromBytes(bytes));
}
static GoSlice.ByValue sliceFromPtr(int len, Pointer ptr) {
GoSlice.ByValue val = new GoSlice.ByValue();
val.cap = len;
val.len = len;
val.data = ptr;
return val;
}
static Pointer ptrFromBytes(byte[] bytes) {
Pointer ptr = new Memory(bytes.length);
ptr.write(0, bytes, 0, bytes.length);
return ptr;
}
static boolean toJavaBool(byte goBool) {
return goBool == 1;
}
}

View File

@ -0,0 +1,23 @@
package tech.sourced.enry;
/**
* Guess denotes a language detection result of which enry can be
* completely sure or not.
*/
public class Guess {
/**
* Result is the resultant language of the detection.
*/
public String result;
/**
* Sure indicates whether the enry was completely sure the language is
* the correct one or it might not be.
*/
public boolean sure;
public Guess(String result, boolean sure) {
this.result = result;
this.sure = sure;
}
}

View File

@ -0,0 +1,150 @@
package tech.sourced.enry;
import org.junit.Test;
import static org.junit.Assert.*;
public class EnryTest {
@Test
public void isAuxiliaryLanguage() {
assertTrue(Enry.isAuxiliaryLanguage("HTML"));
assertFalse(Enry.isAuxiliaryLanguage("Go"));
}
@Test
public void getLanguage() {
String code = "<?php $foo = bar();";
assertEquals("PHP", Enry.getLanguage("foobar.php", code.getBytes()));
}
// TODO: this is a bug in enry, fix when it's fixed there
@Test(expected = AssertionError.class)
public void getLanguageByContent() {
String code = "<?php $foo = bar();";
assertGuess(
"PHP",
true,
Enry.getLanguageByContent(code.getBytes())
);
}
@Test
public void getLanguageByEmacsModeline() {
String code = "// -*- font:bar;mode:c++ -*-\n" +
"template <typename X> class { X i; };";
assertGuess(
"C++",
true,
Enry.getLanguageByEmacsModeline(code.getBytes())
);
}
@Test
public void getLanguageByExtension() {
assertGuess(
"Ruby",
true,
Enry.getLanguageByExtension("foo.rb")
);
}
@Test
public void getLanguageByShebang() {
String code = "#!/usr/bin/env python";
assertGuess(
"Python",
true,
Enry.getLanguageByShebang(code.getBytes())
);
}
@Test
public void getLanguageByModeline() {
String code = "// -*- font:bar;mode:c++ -*-\n" +
"template <typename X> class { X i; };";
assertGuess(
"C++",
true,
Enry.getLanguageByModeline(code.getBytes())
);
code = "# vim: noexpandtab: ft=javascript";
assertGuess(
"JavaScript",
true,
Enry.getLanguageByModeline(code.getBytes())
);
}
@Test
public void getLanguageByVimModeline() {
String code = "# vim: noexpandtab: ft=javascript";
assertGuess(
"JavaScript",
true,
Enry.getLanguageByVimModeline(code.getBytes())
);
}
@Test
public void getLanguageExtensions() {
String[] exts = Enry.getLanguageExtensions("Go");
String[] expected = {".go"};
assertArrayEquals(expected, exts);
}
@Test
public void getLanguages() {
String code = "#include <stdio.h>" +
"" +
"extern int foo(void *bar);";
String[] result = Enry.getLanguages("foo.h", code.getBytes());
String[] expected = {"C", "C++", "Objective-C"};
assertArrayEquals(expected, result);
}
@Test
public void getMimeType() {
assertEquals(
"text/x-ruby",
Enry.getMimeType("foo.rb", "Ruby")
);
}
@Test
public void isBinary() {
assertFalse(Enry.isBinary("hello = 'world'".getBytes()));
}
@Test
public void isConfiguration() {
assertTrue(Enry.isConfiguration("config.yml"));
assertFalse(Enry.isConfiguration("FooServiceProviderImplementorFactory.java"));
}
@Test
public void isDocumentation() {
assertTrue(Enry.isDocumentation("docs/"));
assertFalse(Enry.isDocumentation("src/"));
}
@Test
public void isDotFile() {
assertTrue(Enry.isDotFile(".env"));
assertFalse(Enry.isDotFile("config.json"));
}
@Test
public void isImage() {
assertTrue(Enry.isImage("yup.jpg"));
assertFalse(Enry.isImage("nope.go"));
}
void assertGuess(String language, boolean sure, Guess result) {
assertEquals(language, result.result);
assertEquals(sure, result.sure);
}
}