Merge pull request #29 from vsmaxim/master

python: cover the rest of python bindings from shared library, add tests, add docstrings for API
This commit is contained in:
Alexander 2020-08-12 14:35:58 +02:00 committed by GitHub
commit 5d58b1aaaf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 756 additions and 20 deletions

161
python/.gitignore vendored Normal file
View File

@ -0,0 +1,161 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
.idea/
# CMake
cmake-build-*/
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Per-project ignores

View File

@ -21,8 +21,8 @@ $ python enry.py
## TODOs ## TODOs
- [x] helpers for sending/receiving Go slices to C - [x] helpers for sending/receiving Go slices to C
- [ ] read `libenry.h` and generate `ffibuilder.cdef(...)` content - [x] read `libenry.h` and generate `ffibuilder.cdef(...)` content
- [ ] cover the rest of enry API - [x] cover the rest of enry API
- [ ] add `setup.py` - [x] add `setup.py`
- [ ] build/release automation on CI (publish on pypi) - [ ] build/release automation on CI (publish on pypi)
- [ ] try ABI mode, to avoid dependency on C compiler on install (+perf test?) - [ ] try ABI mode, to avoid dependency on C compiler on install (+perf test?)

View File

@ -1,10 +1,14 @@
from cffi import FFI from cffi import FFI
import os
from pathlib import Path
ffibuilder = FFI() ffibuilder = FFI()
# cdef() expects a single string declaring the C types, functions and # cdef() expects a single string declaring the C types, functions and
# globals needed to use the shared object. It must be in valid C syntax. # globals needed to use the shared object. It must be in valid C syntax.
# Taken from java/shared/libenry.h # Taken from java/shared/libenry.h
ffibuilder.cdef(""" ffibuilder.cdef(
"""
typedef unsigned char GoUint8; typedef unsigned char GoUint8;
typedef long long GoInt64; typedef long long GoInt64;
typedef GoInt64 GoInt; typedef GoInt64 GoInt;
@ -17,6 +21,22 @@ typedef struct { void *data; GoInt len; GoInt cap; } GoSlice;
extern GoString GetLanguage(GoString p0, GoSlice p1); extern GoString GetLanguage(GoString p0, GoSlice p1);
/* Return type for GetLanguageByContent */
struct GetLanguageByContent_return {
GoString r0; /* language */
GoUint8 r1; /* safe */
};
extern struct GetLanguageByContent_return GetLanguageByContent(GoString p0, GoSlice p1);
/* Return type for GetLanguageByEmacsModeline */
struct GetLanguageByEmacsModeline_return {
GoString r0; /* language */
GoUint8 r1; /* safe */
};
extern struct GetLanguageByEmacsModeline_return GetLanguageByEmacsModeline(GoSlice p0);
/* Return type for GetLanguageByExtension */ /* Return type for GetLanguageByExtension */
struct GetLanguageByExtension_return { struct GetLanguageByExtension_return {
GoString r0; /* language */ GoString r0; /* language */
@ -33,20 +53,82 @@ struct GetLanguageByFilename_return {
extern struct GetLanguageByFilename_return GetLanguageByFilename(GoString p0); extern struct GetLanguageByFilename_return GetLanguageByFilename(GoString p0);
/* Return type for GetLanguageByModeline */
struct GetLanguageByModeline_return {
GoString r0; /* language */
GoUint8 r1; /* safe */
};
extern struct GetLanguageByModeline_return GetLanguageByModeline(GoSlice p0);
/* Return type for GetLanguageByShebang */
struct GetLanguageByShebang_return {
GoString r0; /* language */
GoUint8 r1; /* safe */
};
extern struct GetLanguageByShebang_return GetLanguageByShebang(GoSlice p0);
/* Return type for GetLanguageByVimModeline */
struct GetLanguageByVimModeline_return {
GoString r0; /* language */
GoUint8 r1; /* safe */
};
extern struct GetLanguageByVimModeline_return GetLanguageByVimModeline(GoSlice p0);
extern void GetLanguageExtensions(GoString p0, GoSlice* p1);
extern void GetLanguages(GoString p0, GoSlice p1, GoSlice* p2);
extern void GetLanguagesByContent(GoString p0, GoSlice p1, GoSlice p2, GoSlice* p3);
extern void GetLanguagesByEmacsModeline(GoString p0, GoSlice p1, GoSlice p2, GoSlice* p3);
extern void GetLanguagesByExtension(GoString p0, GoSlice p1, GoSlice p2, GoSlice* p3);
extern void GetLanguagesByFilename(GoString p0, GoSlice p1, GoSlice p2, GoSlice* p3);
extern void GetLanguagesByModeline(GoString p0, GoSlice p1, GoSlice p2, GoSlice* p3);
extern void GetLanguagesByShebang(GoString p0, GoSlice p1, GoSlice p2, GoSlice* p3);
extern void GetLanguagesByVimModeline(GoString p0, GoSlice p1, GoSlice p2, GoSlice* p3);
extern GoString GetMimeType(GoString p0, GoString p1);
extern GoUint8 IsBinary(GoSlice p0);
extern GoUint8 IsConfiguration(GoString p0);
extern GoUint8 IsDocumentation(GoString p0);
extern GoUint8 IsDotFile(GoString p0);
extern GoUint8 IsImage(GoString p0);
extern GoUint8 IsVendor(GoString p0); extern GoUint8 IsVendor(GoString p0);
""")
extern GoUint8 IsGenerated(GoString p0, GoSlice p1);
extern GoString GetColor(GoString p0);
"""
)
# set_source() gives the name of the python extension module to # set_source() gives the name of the python extension module to
# produce, and some C source code as a string. This C code needs # produce, and some C source code as a string. This C code needs
# to make the declarated functions, types and globals available, # to make the declarated functions, types and globals available,
# so it is often just the "#include". # so it is often just the "#include".
ffibuilder.set_source("_c_enry", lib_dir = Path(__file__).resolve().parent.parent / ".shared"
""" lib_header = lib_dir / "libenry.h"
#include "../.shared/libenry.h" // the C header of the library
""", ffibuilder.set_source(
libraries=['enry'], "_c_enry",
library_dirs=['../.shared' f'#include "{lib_header.absolute()}"',
]) # library name, for the linker libraries=["enry"],
library_dirs=[str(lib_dir.absolute())],
) # library name, for the linker
if __name__ == "__main__": if __name__ == "__main__":
ffibuilder.compile(verbose=True) ffibuilder.compile(verbose=True)

26
python/enry/__init__.py Normal file
View File

@ -0,0 +1,26 @@
from enry.definitions import get_color, get_language, get_language_by_content, get_language_by_emacs_modeline, \
get_language_by_extension, get_language_by_filename, get_language_by_modeline, get_language_by_shebang, \
get_language_by_vim_modeline, get_languages, get_mime_type, is_binary, is_configuration, is_documentation, \
is_dot_file, is_generated, is_image, is_vendor, get_language_extensions
__all__ = [
"get_color",
"get_language",
"get_language_extensions",
"get_languages",
"get_mime_type",
"get_language_by_vim_modeline",
"get_language_by_extension",
"get_language_by_content",
"get_language_by_emacs_modeline",
"get_language_by_modeline",
"get_language_by_filename",
"get_language_by_shebang",
"is_vendor",
"is_binary",
"is_image",
"is_generated",
"is_documentation",
"is_dot_file",
"is_configuration",
]

240
python/enry/definitions.py Normal file
View File

@ -0,0 +1,240 @@
"""
Python library calling enry Go implementation trough cFFI (API, out-of-line) and Cgo.
"""
from typing import List
from _c_enry import lib
from enry.types import Guess
from enry.utils import transform_types, transform_types_ret_str_slice
GetLanguage = transform_types([str, bytes], str)(lib.GetLanguage)
GetLanguageByContent = transform_types([str, bytes], Guess)(lib.GetLanguageByContent)
GetLanguageByExtension = transform_types([str], Guess)(lib.GetLanguageByExtension)
GetLanguageByFilename = transform_types([str], Guess)(lib.GetLanguageByFilename)
GetLanguageByModeline = transform_types([bytes], Guess)(lib.GetLanguageByModeline)
GetLanguageByShebang = transform_types([bytes], Guess)(lib.GetLanguageByShebang)
GetLanguageByEmacsModeline = transform_types([bytes], Guess)(lib.GetLanguageByEmacsModeline)
GetLanguageByVimModeline = transform_types([bytes], Guess)(lib.GetLanguageByVimModeline)
GetLanguages = transform_types_ret_str_slice([str, bytes])(lib.GetLanguages)
GetLanguageExtensions = transform_types_ret_str_slice([str])(lib.GetLanguageExtensions)
GetMimeType = transform_types([str, str], str)(lib.GetMimeType)
GetColor = transform_types([str], str)(lib.GetColor)
IsVendor = transform_types([str], bool)(lib.IsVendor)
IsGenerated = transform_types([str, bytes], bool)(lib.IsGenerated)
IsBinary = transform_types([bytes], bool)(lib.IsBinary)
IsConfiguration = transform_types([str], bool)(lib.IsConfiguration)
IsDocumentation = transform_types([str], bool)(lib.IsDocumentation)
IsDotFile = transform_types([str], bool)(lib.IsDotFile)
IsImage = transform_types([str], bool)(lib.IsImage)
def get_language(filename: str, content: bytes) -> str:
"""
Return the language of the given file based on the filename and its contents.
:param filename: name of the file with the extension
:param content: array of bytes with the contents of the file (the code)
:return: the guessed language
"""
return GetLanguage(filename, content)
def get_language_by_content(filename: str, content: bytes) -> Guess:
"""
Return detected language by its content.
If there are more than one possible language, return the first language
in alphabetical order and safe = False.
:param filename: path of the file
:param content: array of bytes with the contents of the file (the code)
:return: guessed result
"""
return GetLanguageByContent(filename, content)
def get_language_by_extension(filename: str) -> Guess:
"""
Return detected language by the extension of the filename.
If there are more than one possible language return the first language
in alphabetical order and safe = False.
:param filename: path of the file
:return: guessed result
"""
return GetLanguageByExtension(filename)
def get_language_by_filename(filename: str) -> Guess:
"""
Return detected language by its filename.
If there are more than one possible language return the first language
in alphabetical order and safe = False.
:param filename: path of the file
:return: guessed result
"""
return GetLanguageByFilename(filename)
def get_language_by_modeline(content: bytes) -> Guess:
"""
Return detected language by its modeline.
If there are more than one possible language return the first language
in alphabetical order and safe = False.
:param content: array of bytes with the contents of the file (the code)
:return: guessed result
"""
return GetLanguageByModeline(content)
def get_language_by_vim_modeline(content: bytes) -> Guess:
"""
Return detected language by its vim modeline.
If there are more than one possible language return the first language
in alphabetical order and safe = False.
:param content: array of bytes with the contents of the file (the code)
:return: guessed result
"""
return GetLanguageByVimModeline(content)
def get_language_by_emacs_modeline(content: bytes) -> Guess:
"""
Return detected langauge by its emacs modeline.
If there are more than one possible language return the first language
in alphabetical order and safe = False.
:param content: array of bytes with the contents of the file (the code)
:return: guessed result
"""
return GetLanguageByEmacsModeline(content)
def get_language_by_shebang(content: bytes) -> Guess:
"""
Return detected langauge by its shebang.
If there are more than one possible language return the first language
in alphabetical order and safe = False.
:param content: array of bytes with the contents of the file (the code)
:return: guessed result
"""
return GetLanguageByShebang(content)
def get_languages(filename: str, content: bytes) -> List[str]:
"""
Return all possible languages for the given file.
:param filename:
:param content: array of bytes with the contents of the file (the code)
:return: all possible languages
"""
return GetLanguages(filename, content)
def get_language_extensions(language: str) -> List[str]:
"""
Return all the possible extensions for the given language.
:param language: language to get extensions from
:return: extensions for given language
"""
return GetLanguageExtensions(language)
def get_mime_type(path: str, language: str) -> str:
"""
Return mime type of the file.
:param path: path of the file
:param language: language to get mime type from
:return: mime type
"""
return GetMimeType(path, language)
def get_color(language: str) -> str:
"""
Return color code for given language
:param language:
:return: color in hex format
"""
return GetColor(language)
def is_vendor(filename: str) -> bool:
"""
Return True if given file is a vendor file.
:param filename: path of the file
:return: whether it's vendor or not
"""
return IsVendor(filename)
def is_generated(filename: str, content: bytes) -> bool:
"""
Return True if given file is a generated file.
:param filename: path of the file
:param content: array of bytes with the contents of the file (the code)
:return: whether it's generated or not
"""
return IsGenerated(filename, content)
def is_binary(content: bytes) -> bool:
"""
Return True if given file is a binary file.
:param content: array of bytes with the contents of the file (the code)
:return: whether it's binary or not
"""
return IsBinary(content)
def is_configuration(path: str) -> bool:
"""
Return True if given file is a configuration file.
:param path: path of the file
:return: whether it's a configuration file or not
"""
return IsConfiguration(path)
def is_documentation(path: str) -> bool:
"""
Return True if given file is a documentation file.
:param path: path of the file
:return: whether it's documentation or not
"""
return IsDocumentation(path)
def is_dot_file(path: str) -> bool:
"""
Return True if given file is a dot file.
:param path: path of the file
:return: whether it's a dot file or not
"""
return IsDotFile(path)
def is_image(path: str) -> bool:
"""
Return True if given file is an image file.
:param path: path of the file
:return: whether it's an image or not
"""
return IsImage(path)

6
python/enry/types.py Normal file
View File

@ -0,0 +1,6 @@
from typing import NamedTuple
class Guess(NamedTuple):
language: str
safe: bool

77
python/enry/utils.py Normal file
View File

@ -0,0 +1,77 @@
from _c_enry import ffi
from enry.types import Guess
from functools import wraps
from typing import Hashable, List, Sequence
def py_bytes_to_go(py_bytes: bytes):
c_bytes = ffi.new("char[]", py_bytes)
go_slice = ffi.new("GoSlice *", [c_bytes, len(py_bytes), len(py_bytes)])
return (go_slice[0], c_bytes)
def py_str_to_go(py_str: str):
str_bytes = py_str.encode()
c_str = ffi.new("char[]", str_bytes)
go_str = ffi.new("_GoString_ *", [c_str, len(str_bytes)])
return (go_str[0], c_str)
def go_str_to_py(go_str: str):
str_len = go_str.n
if str_len > 0:
return ffi.unpack(go_str.p, go_str.n).decode()
return ""
def init_go_slice():
return ffi.new("GoSlice *")
def go_str_slice_to_py(str_slice) -> List[str]:
slice_len = str_slice.len
char_arr = ffi.cast("char **", str_slice.data)
return [ffi.string(char_arr[i]).decode() for i in range(slice_len)]
def go_bool_to_py(go_bool: bool):
return go_bool == 1
def go_guess_to_py(guess) -> Guess:
return Guess(go_str_to_py(guess.r0), go_bool_to_py(guess.r1))
py_to_go = {
str: py_str_to_go,
bytes: py_bytes_to_go,
}
go_to_py = {
str: go_str_to_py,
bool: go_bool_to_py,
Guess: go_guess_to_py,
}
def transform_types(in_types: Sequence[Hashable], out_type: Hashable):
def decorator(fn):
@wraps(fn)
def inner(*args):
args_transformed = [py_to_go[type_](arg) for type_, arg in zip(in_types, args)]
return go_to_py[out_type](fn(*(arg[0] for arg in args_transformed)))
return inner
return decorator
def transform_types_ret_str_slice(in_types: Sequence[Hashable]):
def decorator(fn):
@wraps(fn)
def inner(*args):
ret_slice = init_go_slice()
args_transformed = [py_to_go[type_](arg) for type_, arg in zip(in_types, args)]
fn(*(arg[0] for arg in args_transformed), ret_slice)
return go_str_slice_to_py(ret_slice)
return inner
return decorator

View File

@ -0,0 +1 @@
pytest==6.0.1

View File

@ -1,4 +1,2 @@
cffi==1.12.3 cffi==1.14.1
Click==7.0 pycparser==2.20
pycparser==2.19
yapf==0.27.0

43
python/setup.py Normal file
View File

@ -0,0 +1,43 @@
from logging import getLogger
import shutil
import subprocess
from setuptools import setup, find_packages
from setuptools.command.develop import develop
from setuptools.command.install import install
logger = getLogger(__name__)
def build_go_archive():
logger.info("Building C archive with static library")
if shutil.which("go") is None:
raise EnvironmentError("You should have go installed and available on your path in order to build this module")
subprocess.check_output(["make", "static"], cwd="../")
logger.info("C archive successfully built")
class build_static_and_develop(develop):
def run(self):
build_go_archive()
super(build_static_and_develop, self).run()
class build_static_and_install(install):
def run(self):
build_go_archive()
super(build_static_and_install, self).run()
setup(
name="enry",
version="0.1.1",
description="Python bindings for go-enry package",
setup_requires=["cffi>=1.0.0"],
cffi_modules=["build_enry.py:ffibuilder"],
packages=find_packages(),
install_requires=["cffi>=1.0.0"],
cmdclass={"develop": build_static_and_develop, "install": build_static_and_install}
)

0
python/tests/__init__.py Normal file
View File

102
python/tests/test_enry.py Normal file
View File

@ -0,0 +1,102 @@
from enry import *
import pytest
@pytest.mark.parametrize("filename,content,language", [
("test.py", "import os", "Python"),
("", "#!/usr/bin/bash", "Shell"),
("test.hs", "", "Haskell"),
])
def test_get_language(filename: str, content: str, language: str):
assert get_language(filename, content.encode()) == language
def test_get_language_by_filename():
assert get_language_by_filename("pom.xml").language == "Maven POM"
def test_get_language_by_content():
assert get_language_by_content("test.php", "<?php $foo = bar();".encode()).language == "PHP"
def test_get_language_by_emacs_modeline():
modeline = "// -*- font:bar;mode:c++ -*-\ntemplate <typename X> class { X i; };"
assert get_language_by_emacs_modeline(modeline.encode()).language == "C++"
def test_get_language_by_vim_modeline():
modeline = "# vim: noexpandtab: ft=javascript"
assert get_language_by_vim_modeline(modeline.encode()).language == "JavaScript"
@pytest.mark.parametrize("modeline,language", [
("// -*- font:bar;mode:c++ -*-\ntemplate <typename X> class { X i; };", "C++"),
("# vim: noexpandtab: ft=javascript", "JavaScript")
])
def test_get_language_by_modeline(modeline: str, language: str):
assert get_language_by_modeline(modeline.encode()).language == language
def test_get_language_by_extension():
assert get_language_by_extension("test.lisp").language == "Common Lisp"
def test_get_language_by_shebang():
assert get_language_by_shebang("#!/usr/bin/python3".encode()).language == "Python"
def test_get_mime_type():
assert get_mime_type("test.rb", "Ruby") == "text/x-ruby"
def test_is_binary():
assert is_binary("println!('Hello world!\n');".encode()) == False
@pytest.mark.parametrize("path,is_documentation_actual", [
("sss/documentation/", True),
("docs/", True),
("test/", False),
])
def test_is_documentation(path: str, is_documentation_actual: bool):
assert is_documentation(path) == is_documentation_actual
@pytest.mark.parametrize("path,is_dot_actual", [
(".env", True),
("something.py", False),
])
def test_is_dot(path: str, is_dot_actual: bool):
assert is_dot_file(path) == is_dot_actual
@pytest.mark.parametrize("path,is_config_actual", [
("configuration.yml", True),
("some_code.py", False),
])
def test_is_configuration(path: str, is_config_actual: bool):
assert is_configuration(path) == is_config_actual
@pytest.mark.parametrize("path,is_image_actual", [
("nsfw.jpg", True),
("shrek-picture.png", True),
("openjdk-1000.parquet", False),
])
def test_is_image(path: str, is_image_actual: bool):
assert is_image(path) == is_image_actual
def test_get_color():
assert get_color("Go") == "#00ADD8"
def test_get_languages():
assert get_languages("test.py", "import os".encode())
def test_get_language_extensions():
assert get_language_extensions("Python") == [".py", ".cgi", ".fcgi", ".gyp", ".gypi", ".lmi", ".py3", ".pyde",
".pyi", ".pyp", ".pyt", ".pyw", ".rpy", ".smk", ".spec", ".tac",
".wsgi", ".xpy"]