From a1c2ca7d06f9de3ec2296609382ff5651150d8e0 Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Tue, 19 Jul 2022 11:56:46 -0300 Subject: [PATCH] Version inicial --- .gitignore | 164 ++++++++++++++++++++++++++ LICENSE | 21 ++++ nombres/README.md | 5 + nombres/busqueda/__init__.py | 0 nombres/busqueda/handler.py | 183 +++++++++++++++++++++++++++++ nombres/busqueda/handler_test.py | 10 ++ nombres/busqueda/requirements.txt | 3 + nombres/busqueda/tox.ini | 41 +++++++ nombres/deploy.sh | 15 +++ nombres/historico/__init__.py | 0 nombres/historico/handler.py | 62 ++++++++++ nombres/historico/handler_test.py | 10 ++ nombres/historico/requirements.txt | 3 + nombres/historico/tox.ini | 41 +++++++ nombres/nombres.yml | 13 ++ 15 files changed, 571 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 nombres/README.md create mode 100644 nombres/busqueda/__init__.py create mode 100644 nombres/busqueda/handler.py create mode 100644 nombres/busqueda/handler_test.py create mode 100644 nombres/busqueda/requirements.txt create mode 100644 nombres/busqueda/tox.ini create mode 100755 nombres/deploy.sh create mode 100644 nombres/historico/__init__.py create mode 100644 nombres/historico/handler.py create mode 100644 nombres/historico/handler_test.py create mode 100644 nombres/historico/requirements.txt create mode 100644 nombres/historico/tox.ini create mode 100644 nombres/nombres.yml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8d0aa3a --- /dev/null +++ b/.gitignore @@ -0,0 +1,164 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +.venv +.vscode/ +template/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..455579a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Roberto Alsina + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/nombres/README.md b/nombres/README.md new file mode 100644 index 0000000..365941c --- /dev/null +++ b/nombres/README.md @@ -0,0 +1,5 @@ +Scripts para hacer cosas con la data de nombres de Argentina. + +Página con esto funcionando: http://nombres.ralsina.me + +* historico/ y busqueda/ son funciones OpenFAAS para implementar el sitio diff --git a/nombres/busqueda/__init__.py b/nombres/busqueda/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nombres/busqueda/handler.py b/nombres/busqueda/handler.py new file mode 100644 index 0000000..87ee6f6 --- /dev/null +++ b/nombres/busqueda/handler.py @@ -0,0 +1,183 @@ +import unicodedata +import urllib +from collections import namedtuple as nt +from dataclasses import dataclass +from json import loads +import logging + +import pygal +import pyrqlite.dbapi2 as dbapi2 +import requests + +connection = dbapi2.connect( + host="10.61.0.1", + user="root", + port=4001, + password="", +) + + +def remove_accents(input_str): + nfkd_form = unicodedata.normalize("NFKD", input_str) + return "".join([c for c in nfkd_form if not unicodedata.combining(c)]) + + +@dataclass +class Género: + nombre: str = "" + masculinidad: float = 0 + + +def split_por_genero(nombres): + no_clasificados = set() + # Veamos cuales de estos nombres ya están clasificados + for nombre in nombres: + clasificador = remove_accents( + nombre.nombre.split()[0] + ) # genderize no aprecia acentos para AR + genero = Género.get(nombre=clasificador) + if not genero: + # No está clasificado + no_clasificados.add(urllib.parse.quote(clasificador)) + + if no_clasificados: + print(f"Tengo {len(no_clasificados)} sin clasificar") + no_clasificados = list(no_clasificados) + # Averiguar los no clasificados + # Partimos en bloques de a 10 (API de genderize) + for i in range(len(no_clasificados) // 10 + 1): + chunk = no_clasificados[i * 10 : (i + 1) * 10] + url = f'https://api.genderize.io/?name[]={"&name[]=".join(chunk)}&country_id=AR' + clasificados = requests.get(url) + for resultado in clasificados.json(): + if not resultado["name"]: + continue # No me importa + if resultado["gender"] == "male": + masc = resultado["probability"] + elif resultado["gender"] == "female": + masc = 1 - resultado["probability"] + else: + # Probablemente un acento o algo así + print(f"Raro:{resultado}") + masc = None + # Metemos en la base + print(f"Clasificando {resultado}: {masc}") + Género(nombre=resultado["name"], masculinidad=masc) + + nombres_f = [] + nombres_m = [] + for nombre in nombres: + clasificador = remove_accents(nombre.nombre.split()[0]) + genero = Género.get(nombre=clasificador) + if not genero or genero.masculinidad is None: # No clasificado, en ambos + nombres_f.append(nombre) + nombres_m.append(nombre) + elif 0.4 < genero.masculinidad: + nombres_m.append(nombre) + elif 0.6 > genero.masculinidad: + nombres_f.append(nombre) + return {"f": nombres_f, "m": nombres_m} + + +def handle(req): + """handle a request to the function + Args: + req (str): request body + + { + p: prefijo del nombre, + g: genero del nombre, + a: año de nacimiento + } + + """ + + + try: + data = loads(req) + except Exception as e: + data = {} + + try: + prefijo = data.get("p") or None + genero = data.get("g") or None + try: + año = int(data.get("a")) + except Exception: + año = None + except Exception as e: + prefijo = genero = año = None + + if prefijo is not None: + prefijo = prefijo.strip().lower() + + + if genero not in ("f", "m"): + genero = None + + if prefijo is None and año is None: # Totales globales + with connection.cursor() as cursor: + sql = """ + SELECT total, nombre + FROM totales + ORDER BY total DESC + LIMIT 50 + """ + cursor.execute(sql) + datos = [(r["total"], r["nombre"]) for r in cursor.fetchall()] + + elif prefijo is None and año is not None: # Totales por año + with connection.cursor() as cursor: + sql = """ + SELECT contador, nombre + FROM nombres + WHERE + anio = :anio + ORDER BY contador DESC + LIMIT 50 + """ + cursor.execute(sql, {'anio': año}) + datos = [(r["contador"], r["nombre"]) for r in cursor.fetchall()] + + elif prefijo is not None and año is None: + with connection.cursor() as cursor: + sql = """ + SELECT contador, nombre + FROM nombres + WHERE + nombre LIKE :nombre + ORDER BY contador DESC + LIMIT 50 + """ + cursor.execute(sql, {"nombre": "{prefijo}%"}) + datos = [(r["contador"], r["nombre"]) for r in cursor.fetchall()] + else: + with connection.cursor() as cursor: + sql = """ + SELECT contador, nombre + FROM nombres + WHERE + anio = :anio AND + nombre LIKE :nombre + ORDER BY contador DESC + LIMIT 50 + """ + cursor.execute(sql, {"anio": año, "nombre": "{prefijo}%"}) + datos = [(r["contador"], r["nombre"]) for r in cursor.fetchall()] + + if genero: + datos = split_por_genero(datos)[genero] + + datos = datos[:10] + + chart = pygal.HorizontalBar(height=400, show_legend=False, show_y_labels=True) + chart.x_labels = [nombre.title() for _, nombre in datos[::-1]] + if len(datos) > 1: + chart.title = f"¿Puede ser ... {datos[0][1].title()}? ¿O capaz que {datos[1][1].title()}? ¡Contáme más!" + elif len(datos) == 1: + chart.title = f"¡Hola {datos[0][1].title()}!" + elif len(datos) < 1: + chart.title = "¡No esssistís!" + chart.add("", [contador for contador, _ in datos[::-1]]) + + return chart.render(is_unicode=True), 200, {"Content-Typei": "image/svg+xml"} diff --git a/nombres/busqueda/handler_test.py b/nombres/busqueda/handler_test.py new file mode 100644 index 0000000..b07d5bf --- /dev/null +++ b/nombres/busqueda/handler_test.py @@ -0,0 +1,10 @@ +from .handler import handle + +# Test your handler here + +# To disable testing, you can set the build_arg `TEST_ENABLED=false` on the CLI or in your stack.yml +# https://docs.openfaas.com/reference/yaml/#function-build-args-build-args + +def test_handle(): + # assert handle("input") == "input" + pass diff --git a/nombres/busqueda/requirements.txt b/nombres/busqueda/requirements.txt new file mode 100644 index 0000000..cbc289f --- /dev/null +++ b/nombres/busqueda/requirements.txt @@ -0,0 +1,3 @@ +pygal +requests +pyrqlite diff --git a/nombres/busqueda/tox.ini b/nombres/busqueda/tox.ini new file mode 100644 index 0000000..a64a800 --- /dev/null +++ b/nombres/busqueda/tox.ini @@ -0,0 +1,41 @@ +# If you would like to disable +# automated testing during faas-cli build, + +# Replace the content of this file with +# [tox] +# skipsdist = true + +# You can also edit, remove, or add additional test steps +# by editing, removing, or adding new testenv sections + + +# find out more about tox: https://tox.readthedocs.io/en/latest/ +[tox] +envlist = lint,test +skipsdist = true + +[testenv:test] +deps = + flask + pytest + -rrequirements.txt +commands = + # run unit tests with pytest + # https://docs.pytest.org/en/stable/ + # configure by adding a pytest.ini to your handler + pytest + +[testenv:lint] +deps = + flake8 +commands = + flake8 . + +[flake8] +count = true +max-line-length = 127 +max-complexity = 10 +statistics = true +# stop the build if there are Python syntax errors or undefined names +select = E9,F63,F7,F82 +show-source = true diff --git a/nombres/deploy.sh b/nombres/deploy.sh new file mode 100755 index 0000000..1d00e2d --- /dev/null +++ b/nombres/deploy.sh @@ -0,0 +1,15 @@ +#!/bin/sh -x +set -e + +# My FAAS is arm64, so need to install this to cross-compile +docker run --rm --privileged \ + multiarch/qemu-user-static \ + --reset -p yes + +# Build and deploy +if [! -d templates] +then + faas-cli template store pull python3-http +fi +faas-cli publish -f nombres.yml --platforms linux/arm64 --build-arg 'TEST_ENABLED=false' +faas-cli deploy -f nombres.yml diff --git a/nombres/historico/__init__.py b/nombres/historico/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nombres/historico/handler.py b/nombres/historico/handler.py new file mode 100644 index 0000000..890c0bf --- /dev/null +++ b/nombres/historico/handler.py @@ -0,0 +1,62 @@ +import unicodedata +import urllib +from collections import defaultdict as ddict +from dataclasses import dataclass +from json import loads + +import pygal +import pyrqlite.dbapi2 as dbapi2 +import requests + +connection = dbapi2.connect( + host="10.61.0.1", + user="root", + port=4001, + password="", +) + + +def remove_accents(input_str): + nfkd_form = unicodedata.normalize("NFKD", input_str) + return "".join([c for c in nfkd_form if not unicodedata.combining(c)]) + + +def handle(req): + """handle a request to the function + Args: + req (str): request body + + { + p: prefijo del nombre, + g: genero del nombre, + a: año de nacimiento + } + + """ + try: + nombres = [remove_accents(req.strip().lower()) for x in req.split(",")] + except Exception: + nombres = ["maria", "juan"] + + chart = pygal.Line( + height=200, fill=True, human_readable=True, show_minor_x_labels=False + ) + chart.x_labels = range(1922, 2015) + for nombre in nombres: + datos = ddict(int) + with connection.cursor() as cursor: + sql = """ + SELECT anio, contador, nombre + FROM nombres + WHERE nombre = :nombre + ORDER BY anio + """ + cursor.execute(sql,{"nombre": nombre}) + datos.update({r["anio"]: r["contador"] for r in cursor.fetchall()}) + chart.add(nombre.title(), [datos[x] for x in range(1922, 2015)]) + + chart.x_labels = [str(n) for n in range(1922, 2015)] + chart.x_labels_major = [str(n) for n in range(1920, 2020, 10)] + + # return Response(chart.render(is_unicode=True), mimetype="image/svg+xml") + return chart.render(is_unicode=True), 200, {"Content-Type": "image/svg+xml"} diff --git a/nombres/historico/handler_test.py b/nombres/historico/handler_test.py new file mode 100644 index 0000000..b07d5bf --- /dev/null +++ b/nombres/historico/handler_test.py @@ -0,0 +1,10 @@ +from .handler import handle + +# Test your handler here + +# To disable testing, you can set the build_arg `TEST_ENABLED=false` on the CLI or in your stack.yml +# https://docs.openfaas.com/reference/yaml/#function-build-args-build-args + +def test_handle(): + # assert handle("input") == "input" + pass diff --git a/nombres/historico/requirements.txt b/nombres/historico/requirements.txt new file mode 100644 index 0000000..cbc289f --- /dev/null +++ b/nombres/historico/requirements.txt @@ -0,0 +1,3 @@ +pygal +requests +pyrqlite diff --git a/nombres/historico/tox.ini b/nombres/historico/tox.ini new file mode 100644 index 0000000..a64a800 --- /dev/null +++ b/nombres/historico/tox.ini @@ -0,0 +1,41 @@ +# If you would like to disable +# automated testing during faas-cli build, + +# Replace the content of this file with +# [tox] +# skipsdist = true + +# You can also edit, remove, or add additional test steps +# by editing, removing, or adding new testenv sections + + +# find out more about tox: https://tox.readthedocs.io/en/latest/ +[tox] +envlist = lint,test +skipsdist = true + +[testenv:test] +deps = + flask + pytest + -rrequirements.txt +commands = + # run unit tests with pytest + # https://docs.pytest.org/en/stable/ + # configure by adding a pytest.ini to your handler + pytest + +[testenv:lint] +deps = + flake8 +commands = + flake8 . + +[flake8] +count = true +max-line-length = 127 +max-complexity = 10 +statistics = true +# stop the build if there are Python syntax errors or undefined names +select = E9,F63,F7,F82 +show-source = true diff --git a/nombres/nombres.yml b/nombres/nombres.yml new file mode 100644 index 0000000..7665d82 --- /dev/null +++ b/nombres/nombres.yml @@ -0,0 +1,13 @@ +version: 1.0 +provider: + name: openfaas + gateway: http://pinky:8082 +functions: + busqueda: + lang: python3-flask + handler: ./busqueda + image: ralsina/nombres_busqueda:latest + historico: + lang: python3-flask + handler: ./historico + image: ralsina/nombres_historico:latest