From 35575d0a3e89a4c00d7b33227483eb9448258532 Mon Sep 17 00:00:00 2001 From: Alexander Bezzubov Date: Sun, 17 Nov 2019 15:13:41 +0100 Subject: [PATCH] py: expose highest-level enry.language() Signed-off-by: Alexander Bezzubov --- python/README.md | 17 +++--- python/{enry_build.py => build_enry.py} | 11 +++- python/enry.py | 72 ++++++++++++++----------- 3 files changed, 60 insertions(+), 40 deletions(-) rename python/{enry_build.py => build_enry.py} (87%) diff --git a/python/README.md b/python/README.md index fe4ec13..eeb3cd2 100644 --- a/python/README.md +++ b/python/README.md @@ -1,16 +1,15 @@ # Python bindings for enry -Python bingings thoug cFFI (API, out-of-line) for calling enr Go functions though CGo wrapper. +Python bindings through cFFI (API, out-of-line) for calling enry Go functions exposed by CGo wrapper. ## Build ``` -$ make static -$ python enry_build.py +$ cd .. && make static +$ python build_enry.py ``` -Will build static library for Cgo wrapper `libenry`, then generate and build `enry.c` -- a CPython extension that +Will build a static library for Cgo wrapper `libenry`, then generate and build `enry.c` - a CPython extension that provides actual bindings. ## Run @@ -21,9 +20,9 @@ $ python enry.py ``` ## TODOs - - [ ] try ABI mode, to aviod dependency on C compiler on install (+perf test?) - - [ ] ready `libenry.h` and generate `ffibuilder.cdef` content - - [ ] helpers for sending/recieving Go slices to C + - [x] helpers for sending/receiving Go slices to C + - [ ] read `libenry.h` and generate `ffibuilder.cdef(...)` content - [ ] cover the rest of enry API - [ ] add `setup.py` - - [ ] build/release automation on CI (publish on pypi) \ No newline at end of file + - [ ] build/release automation on CI (publish on pypi) + - [ ] try ABI mode, to avoid dependency on C compiler on install (+perf test?) \ No newline at end of file diff --git a/python/enry_build.py b/python/build_enry.py similarity index 87% rename from python/enry_build.py rename to python/build_enry.py index 96ca8f6..a8d5444 100644 --- a/python/enry_build.py +++ b/python/build_enry.py @@ -3,10 +3,19 @@ ffibuilder = FFI() # cdef() expects a single string declaring the C types, functions and # globals needed to use the shared object. It must be in valid C syntax. +# Taken from java/shared/libenry.h ffibuilder.cdef(""" +typedef unsigned char GoUint8; +typedef long long GoInt64; +typedef GoInt64 GoInt; + typedef struct { const char *p; ptrdiff_t n; } _GoString_; typedef _GoString_ GoString; -typedef unsigned char GoUint8; + +typedef struct { void *data; GoInt len; GoInt cap; } GoSlice; + + +extern GoString GetLanguage(GoString p0, GoSlice p1); /* Return type for GetLanguageByExtension */ struct GetLanguageByExtension_return { diff --git a/python/enry.py b/python/enry.py index cd388ba..436c3fd 100644 --- a/python/enry.py +++ b/python/enry.py @@ -4,73 +4,85 @@ Python library calling enry Go implementation trough cFFI (API, out-of-line) and from _c_enry import ffi, lib -## Helpers +## cgo -> ffi helpers +def py_bytes_to_go(py_bytes: bytes): + c_bytes = ffi.new("char[]", len(py_bytes)) + go_slice = ffi.new("GoSlice *", [c_bytes, len(py_bytes), len(py_bytes)]) + return (go_slice[0], c_bytes) +def py_str_to_go(py_str: str): + str_bytes = py_str.encode() + c_str = ffi.new("char[]", str_bytes) + go_str = ffi.new("_GoString_ *", [c_str, len(str_bytes)]) + return (go_str[0], c_str) -def go_str_to_py(go_str): +def go_str_to_py(go_str: str): str_len = go_str.n if str_len > 0: return ffi.unpack(go_str.p, go_str.n).decode() return "" - -def py_str_to_go(py_str): - str_bytes = py_str.encode() - c_str = ffi.new("char[]", str_bytes) - go_str = ffi.new("_GoString_ *", [c_str, len(str_bytes)]) - return go_str[0] - - -def go_bool_to_py(go_bool): +def go_bool_to_py(go_bool: bool): return go_bool == 1 -## API - +## API, TODO(bzz): add docstrings +def language(filename: str, content: bytes) -> str: + fName, c_str = py_str_to_go(filename) + fContent, c_bytes = py_bytes_to_go(content) + guess = lib.GetLanguage(fName, fContent) + lang = go_str_to_py(guess) + return lang def language_by_extension(filename: str) -> str: - fName = py_str_to_go(filename) + fName, c_str = py_str_to_go(filename) guess = lib.GetLanguageByExtension(fName) lang = go_str_to_py(guess.r0) return lang - def language_by_filename(filename: str) -> str: - fName = py_str_to_go(filename) + fName, c_str = py_str_to_go(filename) guess = lib.GetLanguageByFilename(fName) lang = go_str_to_py(guess.r0) return lang - def is_vendor(filename: str) -> bool: - fName = py_str_to_go(filename) + fName, c_str = py_str_to_go(filename) guess = lib.IsVendor(fName) return go_bool_to_py(guess) ## Tests - +from collections import namedtuple def main(): + TestFile = namedtuple("TestFile", "name, content, lang") files = [ - "Parse.hs", "some.cpp", "and.go", "type.h", ".bashrc", ".gitignore" + TestFile("Parse.hs", b"", "Haskell"), TestFile("some.cpp", b"", "C++"), + TestFile("orand.go", b"", "Go"), TestFile("type.h", b"", "C"), + TestFile(".bashrc", b"", "Shell"), TestFile(".gitignore", b"", "Ignore List") ] - print("strategy: extension") - for filename in files: - lang = language_by_extension(filename) - print("file: {:10s} language: '{}'".format(filename, lang)) + print("\nstrategy: extension") + for f in files: + lang = language_by_extension(f.name) + print("\tfile: {:10s} language: '{}'".format(f.name, lang)) print("\nstrategy: filename") - for filename in files: - lang = language_by_filename(filename) - print("file: {:10s} language: '{}'".format(filename, lang)) + for f in files: + lang = language_by_filename(f.name) + print("\tfile: {:10s} language: '{}'".format(f.name, lang)) print("\ncheck: is vendor?") - for filename in files: - vendor = is_vendor(filename) - print("file: {:10s} vendor: '{}'".format(filename, vendor)) + for f in files: + vendor = is_vendor(f.name) + print("\tfile: {:10s} vendor: '{}'".format(f.name, vendor)) + print("\nstrategy: all") + for f in files: + lang = language(f.name, f.content) + print("\tfile: {:10s} language: '{}'".format(f.name, lang)) + assert lang == f.lang, "Expected '{}' but got '{}'".format(f.lang, lang) if __name__ == "__main__": main()