Merge pull request #92 from erizocosmico/feature/java-impl

java bindings for Enry
This commit is contained in:
Alfredo Beaumont 2017-08-09 17:18:59 +02:00 committed by GitHub
commit 574cae8a84
14 changed files with 800 additions and 2 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
benchmarks/output
.ci
Makefile.main
.shared

View File

@ -7,6 +7,23 @@ go:
matrix:
allow_failures:
- go: tip
include:
- language: scala
jdk: oraclejdk8
install:
- GIMME_OUTPUT=$(gimme 1.8 | tee -a $HOME/.bashrc) && eval "$GIMME_OUTPUT"
- export GOPATH=$HOME/gopath
- mkdir -p $GOPATH/src/gopkg.in/src-d/enry.v1
- rsync -az ${TRAVIS_BUILD_DIR}/ $GOPATH/src/gopkg.in/src-d/enry.v1
- go get -v gopkg.in/src-d/enry.v1/...
before_script:
- cd java
- make
script:
- make test
install:
- rm -rf $GOPATH/src/gopkg.in/src-d
@ -26,7 +43,7 @@ before_deploy:
deploy:
provider: releases
api_key: $GITHUB_TOKEN
api_key: $GITHUB_TOKEN
file_glob: true
file: build/*.tar.gz
skip_cleanup: true

View File

@ -24,13 +24,25 @@ LOCAL_COMMIT := $(shell git rev-parse --short HEAD)
LOCAL_BUILD := $(shell date +"%m-%d-%Y_%H_%M_%S")
LOCAL_LDFLAGS = -s -X main.version=$(LOCAL_TAG) -X main.build=$(LOCAL_BUILD) -X main.commit=$(LOCAL_COMMIT)
# shared objects
RESOURCES_DIR=./.shared
LINUX_DIR=$(RESOURCES_DIR)/linux-x86-64
LINUX_SHARED_LIB=$(LINUX_DIR)/libenry.so
DARWIN_DIR=$(RESOURCES_DIR)/darwin
DARWIN_SHARED_LIB=$(DARWIN_DIR)/libenry.dylib
HEADER_FILE=libenry.h
NATIVE_LIB=./shared/enry.go
$(LINGUIST_PATH):
git clone https://github.com/github/linguist.git $@
clean-linguist:
rm -rf $(LINGUIST_PATH)
clean: clean-linguist
clean-shared:
rm -rf $(RESOURCES_DIR)
clean: clean-linguist clean-shared
code-generate: $(LINGUIST_PATH)
mkdir -p data
@ -48,3 +60,17 @@ benchmarks-slow: $(LINGUST_PATH)
build-cli:
go build -o enry -ldflags "$(LOCAL_LDFLAGS)" cli/enry/main.go
linux-shared: $(LINUX_SHARED_LIB)
darwin-shared: $(DARWIN_SHARED_LIB)
$(DARWIN_SHARED_LIB):
mkdir -p $(DARWIN_DIR) && \
GOOS=darwin GOARCH=amd64 go build -buildmode=c-shared -o $(DARWIN_SHARED_LIB) $(NATIVE_LIB) && \
mv $(DARWIN_DIR)/$(HEADER_FILE) $(RESOURCES_DIR)/$(HEADER_FILE)
$(LINUX_SHARED_LIB):
mkdir -p $(LINUX_DIR) && \
GOOS=linux GOARCH=amd64 go build -buildmode=c-shared -o $(LINUX_SHARED_LIB) $(NATIVE_LIB) && \
mv $(LINUX_DIR)/$(HEADER_FILE) $(RESOURCES_DIR)/$(HEADER_FILE)

27
java/.gitignore vendored Normal file
View File

@ -0,0 +1,27 @@
# Compiled class file
*.class
# Log file
*.log
# BlueJ files
*.ctxt
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.jar
*.war
*.ear
*.zip
*.tar.gz
*.rar
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
target
.idea
.jnaerator
shared
*.jar

60
java/Makefile Normal file
View File

@ -0,0 +1,60 @@
JNAERATOR_VERSION=ac73c9e
JARS_DIR=./lib
JAR=$(JARS_DIR)/enry.jar
JNAERATOR_DIR=./.jnaerator
JNAERATOR_JAR=$(JNAERATOR_DIR)/jnaerator.jar
JNAERATOR_JAR_URL="https://jitpack.io/com/github/nativelibs4java/JNAerator/jnaerator/$(JNAERATOR_VERSION)/jnaerator-$(JNAERATOR_VERSION)-shaded.jar"
RESOURCES_SRC=../.shared
RESOURCES_DIR=./shared
HEADER_FILE=$(RESOURCES_DIR)/libenry.h
SHELL := /bin/bash
all: $(JAR)
$(JAR): $(RESOURCES_DIR) $(JNAERATOR_JAR)
mkdir -p lib && \
java -jar $(JNAERATOR_JAR) \
-package tech.sourced.enry.nativelib \
-library enry \
$(HEADER_FILE) \
-o $(JARS_DIR) \
-mode StandaloneJar \
-runtime JNA;
$(RESOURCES_DIR): os-shared-lib
cp -R $(RESOURCES_SRC) $(RESOURCES_DIR)
$(JNAERATOR_JAR): $(RESOURCES_DIR)
mkdir $(JNAERATOR_DIR) && \
wget $(JNAERATOR_JAR_URL) -O $(JNAERATOR_JAR)
os-shared-lib:
@os_name="$(shell uname -s)" && \
if [ "$$os_name" == "Linux" ]; then \
$(MAKE) linux-shared; \
elif [ "$$os_name" == "Darwin" ]; then \
$(MAKE) darwin-shared; \
else \
echo "Unsupported operating system, can't build shared library"; \
exit 1; \
fi;
linux-shared:
cd .. && \
$(MAKE) linux-shared
darwin-shared:
cd .. && \
$(MAKE) darwin-shared
test:
sbt clean test
package:
sbt clean assembly
clean:
rm -rf $(JAR)
rm -rf $(RESOURCES_DIR)

35
java/README.md Normal file
View File

@ -0,0 +1,35 @@
# enry-java
### Requirements
* `sbt`
* `Java` (tested with Java 1.8)
* `wget`
* `Go` (only for building the shared objects for your operating system)
### Generate jar with Java bindings and shared libraries
You need to do this before exporting the jar and/or testing.
```
make
```
This will download JNAerator jar to generate the code from the `libenry.h` header file, it will be placed under `lib`.
The shared libraries for your operating system will be built if needed and copied inside the `shared` directory.
For IntelliJ and other IDEs remember to mark `shared` folder as sources and add `lib/enry.jar` as library. If you use `sbt` from the command line directly that's already taken care of.
### Run tests
```
make test
```
### Export jar
```
make package
```
Jar will be located in `./target/enry-java-assembly-X.X.X.jar`.

17
java/build.sbt Normal file
View File

@ -0,0 +1,17 @@
name := "enry-java"
organization := "tech.sourced"
version := "1.0"
crossPaths := false
autoScalaLibrary := false
publishMavenStyle := true
exportJars := true
libraryDependencies += "com.novocode" % "junit-interface" % "0.11" % Test
unmanagedBase := baseDirectory.value / "lib"
unmanagedClasspath in Test += baseDirectory.value / "shared"
unmanagedClasspath in Runtime += baseDirectory.value / "shared"
unmanagedClasspath in Compile += baseDirectory.value / "shared"
testOptions += Tests.Argument(TestFrameworks.JUnit)

View File

@ -0,0 +1 @@
sbt.version = 0.13.16

1
java/project/plugins.sbt Normal file
View File

@ -0,0 +1 @@
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.5")

View File

@ -0,0 +1,228 @@
package tech.sourced.enry;
import tech.sourced.enry.nativelib.*;
import static tech.sourced.enry.GoUtils.*;
public class Enry {
private static final EnryLibrary nativeLib = EnryLibrary.INSTANCE;
/**
* Returns whether the given language is auxiliary or not.
*
* @param language name of the language, e.g. PHP, HTML, ...
* @return if it's an auxiliary language
*/
public static boolean isAuxiliaryLanguage(String language) {
return toJavaBool(nativeLib.IsAuxiliaryLanguage(toGoString(language)));
}
/**
* Returns the language of the given file based on the filename and its
* contents.
*
* @param filename name of the file with the extension
* @param content array of bytes with the contents of the file (the code)
* @return the guessed language
*/
public static String getLanguage(String filename, byte[] content) {
return toJavaString(nativeLib.GetLanguage(
toGoString(filename),
toGoByteSlice(content)
));
}
/**
* Returns detected language by its content.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param filename name of the file with the extension
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByContent(String filename, byte[] content) {
GetLanguageByContent_return.ByValue res = nativeLib.GetLanguageByContent(
toGoString(filename),
toGoByteSlice(content)
);
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its emacs modeline.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByEmacsModeline(byte[] content) {
GetLanguageByEmacsModeline_return.ByValue res = nativeLib.GetLanguageByEmacsModeline(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by the extension of the filename.
* If there are more than one possible languages, it returns
* the first language in alphabetical order and safe to false.
*
* @param filename of the file
* @return guessed result
*/
public static Guess getLanguageByExtension(String filename) {
GetLanguageByExtension_return.ByValue res = nativeLib.GetLanguageByExtension(toGoString(filename));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its shebang.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByShebang(byte[] content) {
GetLanguageByShebang_return.ByValue res = nativeLib.GetLanguageByShebang(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its filename.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param filename of the file
* @return guessed result
*/
public static Guess getLanguageByFilename(String filename) {
GetLanguageByFilename_return.ByValue res = nativeLib.GetLanguageByFilename(toGoString(filename));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its modeline.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByModeline(byte[] content) {
GetLanguageByModeline_return.ByValue res = nativeLib.GetLanguageByModeline(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns detected language by its vim modeline.
* If there are more than one possible language, it returns the first
* language in alphabetical order and safe to false.
*
* @param content of the file
* @return guessed result
*/
public static Guess getLanguageByVimModeline(byte[] content) {
GetLanguageByVimModeline_return.ByValue res = nativeLib.GetLanguageByVimModeline(toGoByteSlice(content));
return new Guess(toJavaString(res.r0), toJavaBool(res.r1));
}
/**
* Returns all the possible extensions for a file in the given language.
*
* @param language to get extensions from
* @return extensions
*/
public static String[] getLanguageExtensions(String language) {
GoSlice result = new GoSlice();
nativeLib.GetLanguageExtensions(toGoString(language), result);
return toJavaStringArray(result);
}
/**
* Returns all possible languages for the given file.
*
* @param filename of the file
* @param content of the file
* @return all possible languages
*/
public static String[] getLanguages(String filename, byte[] content) {
GoSlice result = new GoSlice();
nativeLib.GetLanguages(toGoString(filename), toGoByteSlice(content), result);
return toJavaStringArray(result);
}
/**
* Returns the mime type of the file.
*
* @param path of the file
* @param language of the file
* @return mime type
*/
public static String getMimeType(String path, String language) {
return toJavaString(nativeLib.GetMimeType(toGoString(path), toGoString(language)));
}
/**
* Reports whether the given file content is binary or not.
*
* @param content of the file
* @return whether it's binary or not
*/
public static boolean isBinary(byte[] content) {
return toJavaBool(nativeLib.IsBinary(toGoByteSlice(content)));
}
/**
* Reports whether the given file or directory is a config file or directory.
*
* @param path of the file or directory
* @return whether it's config or not
*/
public static boolean isConfiguration(String path) {
return toJavaBool(nativeLib.IsConfiguration(toGoString(path)));
}
/**
* Reports whether the given file or directory it's documentation.
*
* @param path of the file or directory. It must not contain its parents and
* if it's a directory it must end in a slash e.g. "docs/" or
* "foo.json".
* @return whether it's docs or not
*/
public static boolean isDocumentation(String path) {
return toJavaBool(nativeLib.IsDocumentation(toGoString(path)));
}
/**
* Reports whether the given file is a dotfile.
*
* @param path of the file
* @return whether it's a dotfile or not
*/
public static boolean isDotFile(String path) {
return toJavaBool(nativeLib.IsDotFile(toGoString(path)));
}
/**
* Reports whether the given path is an image or not.
*
* @param path of the file
* @return whether it's an image or not
*/
public static boolean isImage(String path) {
return toJavaBool(nativeLib.IsImage(toGoString(path)));
}
/**
* Reports whether the given path is a vendor path or not.
*
* @param path of the file or directory
* @return whether it's vendor or not
*/
public static boolean isVendor(String path) {
return toJavaBool(nativeLib.IsVendor(toGoString(path)));
}
}

View File

@ -0,0 +1,73 @@
package tech.sourced.enry;
import com.sun.jna.Memory;
import com.sun.jna.Pointer;
import tech.sourced.enry.nativelib.GoSlice;
import tech.sourced.enry.nativelib.GoString;
import java.io.UnsupportedEncodingException;
class GoUtils {
static GoString.ByValue toGoString(String str) {
byte[] bytes;
try {
bytes = str.getBytes("utf-8");
} catch (UnsupportedEncodingException e) {
bytes = str.getBytes();
}
GoString.ByValue val = new GoString.ByValue();
val.n = bytes.length;
Pointer ptr = new Memory(bytes.length);
ptr.write(0, bytes, 0, bytes.length);
val.p = ptr;
return val;
}
static String toJavaString(GoString str) {
if (str.n == 0) {
return "";
}
byte[] bytes = new byte[(int) str.n];
str.p.read(0, bytes, 0, (int) str.n);
try {
return new String(bytes, "utf-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("utf-8 encoding is not supported");
}
}
static String[] toJavaStringArray(GoSlice slice) {
String[] result = new String[(int) slice.len];
Pointer[] ptrArr = slice.data.getPointerArray(0, (int) slice.len);
for (int i = 0; i < (int) slice.len; i++) {
result[i] = ptrArr[i].getString(0);
}
return result;
}
static GoSlice.ByValue toGoByteSlice(byte[] bytes) {
return sliceFromPtr(bytes.length, ptrFromBytes(bytes));
}
static GoSlice.ByValue sliceFromPtr(int len, Pointer ptr) {
GoSlice.ByValue val = new GoSlice.ByValue();
val.cap = len;
val.len = len;
val.data = ptr;
return val;
}
static Pointer ptrFromBytes(byte[] bytes) {
Pointer ptr = new Memory(bytes.length);
ptr.write(0, bytes, 0, bytes.length);
return ptr;
}
static boolean toJavaBool(byte goBool) {
return goBool == 1;
}
}

View File

@ -0,0 +1,23 @@
package tech.sourced.enry;
/**
* Guess denotes a language detection result of which enry can be
* completely sure or not.
*/
public class Guess {
/**
* The resultant language of the detection.
*/
public String language;
/**
* Indicates whether the enry was completely sure the language is
* the correct one or it might not be.
*/
public boolean safe;
public Guess(String language, boolean safe) {
this.language = language;
this.safe = safe;
}
}

View File

@ -0,0 +1,149 @@
package tech.sourced.enry;
import org.junit.Test;
import static org.junit.Assert.*;
public class EnryTest {
@Test
public void isAuxiliaryLanguage() {
assertTrue(Enry.isAuxiliaryLanguage("HTML"));
assertFalse(Enry.isAuxiliaryLanguage("Go"));
}
@Test
public void getLanguage() {
String code = "<?php $foo = bar();";
assertEquals("PHP", Enry.getLanguage("foobar.php", code.getBytes()));
}
@Test
public void getLanguageByContent() {
String code = "<?php $foo = bar();";
assertGuess(
"PHP",
true,
Enry.getLanguageByContent("foo.php", code.getBytes())
);
}
@Test
public void getLanguageByEmacsModeline() {
String code = "// -*- font:bar;mode:c++ -*-\n" +
"template <typename X> class { X i; };";
assertGuess(
"C++",
true,
Enry.getLanguageByEmacsModeline(code.getBytes())
);
}
@Test
public void getLanguageByExtension() {
assertGuess(
"Ruby",
true,
Enry.getLanguageByExtension("foo.rb")
);
}
@Test
public void getLanguageByShebang() {
String code = "#!/usr/bin/env python";
assertGuess(
"Python",
true,
Enry.getLanguageByShebang(code.getBytes())
);
}
@Test
public void getLanguageByModeline() {
String code = "// -*- font:bar;mode:c++ -*-\n" +
"template <typename X> class { X i; };";
assertGuess(
"C++",
true,
Enry.getLanguageByModeline(code.getBytes())
);
code = "# vim: noexpandtab: ft=javascript";
assertGuess(
"JavaScript",
true,
Enry.getLanguageByModeline(code.getBytes())
);
}
@Test
public void getLanguageByVimModeline() {
String code = "# vim: noexpandtab: ft=javascript";
assertGuess(
"JavaScript",
true,
Enry.getLanguageByVimModeline(code.getBytes())
);
}
@Test
public void getLanguageExtensions() {
String[] exts = Enry.getLanguageExtensions("Go");
String[] expected = {".go"};
assertArrayEquals(expected, exts);
}
@Test
public void getLanguages() {
String code = "#include <stdio.h>" +
"" +
"extern int foo(void *bar);";
String[] result = Enry.getLanguages("foo.h", code.getBytes());
String[] expected = {"C", "C++", "Objective-C"};
assertArrayEquals(expected, result);
}
@Test
public void getMimeType() {
assertEquals(
"text/x-ruby",
Enry.getMimeType("foo.rb", "Ruby")
);
}
@Test
public void isBinary() {
assertFalse(Enry.isBinary("hello = 'world'".getBytes()));
}
@Test
public void isConfiguration() {
assertTrue(Enry.isConfiguration("config.yml"));
assertFalse(Enry.isConfiguration("FooServiceProviderImplementorFactory.java"));
}
@Test
public void isDocumentation() {
assertTrue(Enry.isDocumentation("docs/"));
assertFalse(Enry.isDocumentation("src/"));
}
@Test
public void isDotFile() {
assertTrue(Enry.isDotFile(".env"));
assertFalse(Enry.isDotFile("config.json"));
}
@Test
public void isImage() {
assertTrue(Enry.isImage("yup.jpg"));
assertFalse(Enry.isImage("nope.go"));
}
void assertGuess(String language, boolean safe, Guess guess) {
assertEquals(language, guess.language);
assertEquals(safe, guess.safe);
}
}

140
shared/enry.go Normal file
View File

@ -0,0 +1,140 @@
// +build darwin,cgo linux,cgo
// +build amd64
package main
import "C"
import "gopkg.in/src-d/enry.v1"
//export GetLanguage
func GetLanguage(filename string, content []byte) string {
return enry.GetLanguage(filename, content)
}
//export GetLanguageByContent
func GetLanguageByContent(filename string, content []byte) (language string, safe bool) {
return enry.GetLanguageByContent(filename, content)
}
//export GetLanguageByEmacsModeline
func GetLanguageByEmacsModeline(content []byte) (language string, safe bool) {
return enry.GetLanguageByModeline(content)
}
//export GetLanguageByExtension
func GetLanguageByExtension(filename string) (language string, safe bool) {
return enry.GetLanguageByExtension(filename)
}
//export GetLanguageByFilename
func GetLanguageByFilename(filename string) (language string, safe bool) {
return enry.GetLanguageByFilename(filename)
}
//export GetLanguageByModeline
func GetLanguageByModeline(content []byte) (language string, safe bool) {
return enry.GetLanguageByModeline(content)
}
//export GetLanguageByShebang
func GetLanguageByShebang(content []byte) (language string, safe bool) {
return enry.GetLanguageByShebang(content)
}
//export GetLanguageByVimModeline
func GetLanguageByVimModeline(content []byte) (language string, safe bool) {
return enry.GetLanguageByVimModeline(content)
}
//export GetLanguageExtensions
func GetLanguageExtensions(language string, result *[]*C.char) {
strSliceCopy(result, enry.GetLanguageExtensions(language))
}
//export GetLanguages
func GetLanguages(filename string, content []byte, result *[]*C.char) {
strSliceCopy(result, enry.GetLanguages(filename, content))
}
//export GetLanguagesByContent
func GetLanguagesByContent(filename string, content []byte, candidates []string, result *[]*C.char) {
strSliceCopy(result, enry.GetLanguagesByContent(filename, content, candidates))
}
//export GetLanguagesByEmacsModeline
func GetLanguagesByEmacsModeline(filename string, content []byte, candidates []string, result *[]*C.char) {
strSliceCopy(result, enry.GetLanguagesByEmacsModeline(filename, content, candidates))
}
//export GetLanguagesByExtension
func GetLanguagesByExtension(filename string, content []byte, candidates []string, result *[]*C.char) {
strSliceCopy(result, enry.GetLanguagesByExtension(filename, content, candidates))
}
//export GetLanguagesByFilename
func GetLanguagesByFilename(filename string, content []byte, candidates []string, result *[]*C.char) {
strSliceCopy(result, enry.GetLanguagesByFilename(filename, content, candidates))
}
//export GetLanguagesByModeline
func GetLanguagesByModeline(filename string, content []byte, candidates []string, result *[]*C.char) {
strSliceCopy(result, enry.GetLanguagesByModeline(filename, content, candidates))
}
//export GetLanguagesByShebang
func GetLanguagesByShebang(filename string, content []byte, candidates []string, result *[]*C.char) {
strSliceCopy(result, enry.GetLanguagesByShebang(filename, content, candidates))
}
//export GetLanguagesByVimModeline
func GetLanguagesByVimModeline(filename string, content []byte, candidates []string, result *[]*C.char) {
strSliceCopy(result, enry.GetLanguagesByVimModeline(filename, content, candidates))
}
//export GetMimeType
func GetMimeType(path string, language string) string {
return enry.GetMimeType(path, language)
}
//export IsAuxiliaryLanguage
func IsAuxiliaryLanguage(lang string) bool {
return enry.IsAuxiliaryLanguage(lang)
}
//export IsBinary
func IsBinary(data []byte) bool {
return enry.IsBinary(data)
}
//export IsConfiguration
func IsConfiguration(path string) bool {
return enry.IsConfiguration(path)
}
//export IsDocumentation
func IsDocumentation(path string) bool {
return enry.IsDocumentation(path)
}
//export IsDotFile
func IsDotFile(path string) bool {
return enry.IsDotFile(path)
}
//export IsImage
func IsImage(path string) bool {
return enry.IsImage(path)
}
//export IsVendor
func IsVendor(path string) bool {
return enry.IsVendor(path)
}
func strSliceCopy(result *[]*C.char, slice []string) {
for _, str := range slice {
*result = append(*result, C.CString(str))
}
}
func main() {}