Merge pull request #13 from go-enry/python-wrapper

Python: API to expose highest-level enry.GetLanguage
This commit is contained in:
Alexander 2020-04-24 20:57:37 +02:00 committed by GitHub
commit 4b468762b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 60 additions and 40 deletions

View File

@ -1,16 +1,15 @@
# Python bindings for enry
Python bingings thoug cFFI (API, out-of-line) for calling enr Go functions though CGo wrapper.
Python bindings through cFFI (API, out-of-line) for calling enry Go functions exposed by CGo wrapper.
## Build
```
$ make static
$ python enry_build.py
$ cd .. && make static
$ python build_enry.py
```
Will build static library for Cgo wrapper `libenry`, then generate and build `enry.c`
- a CPython extension that
Will build a static library for Cgo wrapper `libenry`, then generate and build `enry.c` - a CPython extension that provides actual bindings.
## Run
@ -21,9 +20,9 @@ $ python enry.py
```
## TODOs
- [ ] try ABI mode, to aviod dependency on C compiler on install (+perf test?)
- [ ] ready `libenry.h` and generate `ffibuilder.cdef` content
- [ ] helpers for sending/recieving Go slices to C
- [x] helpers for sending/receiving Go slices to C
- [ ] read `libenry.h` and generate `ffibuilder.cdef(...)` content
- [ ] cover the rest of enry API
- [ ] add `setup.py`
- [ ] build/release automation on CI (publish on pypi)
- [ ] build/release automation on CI (publish on pypi)
- [ ] try ABI mode, to avoid dependency on C compiler on install (+perf test?)

View File

@ -3,10 +3,19 @@ ffibuilder = FFI()
# cdef() expects a single string declaring the C types, functions and
# globals needed to use the shared object. It must be in valid C syntax.
# Taken from java/shared/libenry.h
ffibuilder.cdef("""
typedef unsigned char GoUint8;
typedef long long GoInt64;
typedef GoInt64 GoInt;
typedef struct { const char *p; ptrdiff_t n; } _GoString_;
typedef _GoString_ GoString;
typedef unsigned char GoUint8;
typedef struct { void *data; GoInt len; GoInt cap; } GoSlice;
extern GoString GetLanguage(GoString p0, GoSlice p1);
/* Return type for GetLanguageByExtension */
struct GetLanguageByExtension_return {

View File

@ -4,73 +4,85 @@ Python library calling enry Go implementation trough cFFI (API, out-of-line) and
from _c_enry import ffi, lib
## Helpers
## cgo -> ffi helpers
def py_bytes_to_go(py_bytes: bytes):
c_bytes = ffi.new("char[]", len(py_bytes))
go_slice = ffi.new("GoSlice *", [c_bytes, len(py_bytes), len(py_bytes)])
return (go_slice[0], c_bytes)
def py_str_to_go(py_str: str):
str_bytes = py_str.encode()
c_str = ffi.new("char[]", str_bytes)
go_str = ffi.new("_GoString_ *", [c_str, len(str_bytes)])
return (go_str[0], c_str)
def go_str_to_py(go_str):
def go_str_to_py(go_str: str):
str_len = go_str.n
if str_len > 0:
return ffi.unpack(go_str.p, go_str.n).decode()
return ""
def py_str_to_go(py_str):
str_bytes = py_str.encode()
c_str = ffi.new("char[]", str_bytes)
go_str = ffi.new("_GoString_ *", [c_str, len(str_bytes)])
return go_str[0]
def go_bool_to_py(go_bool):
def go_bool_to_py(go_bool: bool):
return go_bool == 1
## API
## API, TODO(bzz): add docstrings
def language(filename: str, content: bytes) -> str:
fName, c_str = py_str_to_go(filename)
fContent, c_bytes = py_bytes_to_go(content)
guess = lib.GetLanguage(fName, fContent)
lang = go_str_to_py(guess)
return lang
def language_by_extension(filename: str) -> str:
fName = py_str_to_go(filename)
fName, c_str = py_str_to_go(filename)
guess = lib.GetLanguageByExtension(fName)
lang = go_str_to_py(guess.r0)
return lang
def language_by_filename(filename: str) -> str:
fName = py_str_to_go(filename)
fName, c_str = py_str_to_go(filename)
guess = lib.GetLanguageByFilename(fName)
lang = go_str_to_py(guess.r0)
return lang
def is_vendor(filename: str) -> bool:
fName = py_str_to_go(filename)
fName, c_str = py_str_to_go(filename)
guess = lib.IsVendor(fName)
return go_bool_to_py(guess)
## Tests
from collections import namedtuple
def main():
TestFile = namedtuple("TestFile", "name, content, lang")
files = [
"Parse.hs", "some.cpp", "and.go", "type.h", ".bashrc", ".gitignore"
TestFile("Parse.hs", b"", "Haskell"), TestFile("some.cpp", b"", "C++"),
TestFile("orand.go", b"", "Go"), TestFile("type.h", b"", "C"),
TestFile(".bashrc", b"", "Shell"), TestFile(".gitignore", b"", "Ignore List")
]
print("strategy: extension")
for filename in files:
lang = language_by_extension(filename)
print("file: {:10s} language: '{}'".format(filename, lang))
print("\nstrategy: extension")
for f in files:
lang = language_by_extension(f.name)
print("\tfile: {:10s} language: '{}'".format(f.name, lang))
print("\nstrategy: filename")
for filename in files:
lang = language_by_filename(filename)
print("file: {:10s} language: '{}'".format(filename, lang))
for f in files:
lang = language_by_filename(f.name)
print("\tfile: {:10s} language: '{}'".format(f.name, lang))
print("\ncheck: is vendor?")
for filename in files:
vendor = is_vendor(filename)
print("file: {:10s} vendor: '{}'".format(filename, vendor))
for f in files:
vendor = is_vendor(f.name)
print("\tfile: {:10s} vendor: '{}'".format(f.name, vendor))
print("\nstrategy: all")
for f in files:
lang = language(f.name, f.content)
print("\tfile: {:10s} language: '{}'".format(f.name, lang))
assert lang == f.lang, "Expected '{}' but got '{}'".format(f.lang, lang)
if __name__ == "__main__":
main()