commit
a1c2ca7d06
15 changed files with 571 additions and 0 deletions
@ -0,0 +1,164 @@ |
|||
# Byte-compiled / optimized / DLL files |
|||
__pycache__/ |
|||
*.py[cod] |
|||
*$py.class |
|||
|
|||
# C extensions |
|||
*.so |
|||
|
|||
# Distribution / packaging |
|||
.Python |
|||
build/ |
|||
develop-eggs/ |
|||
dist/ |
|||
downloads/ |
|||
eggs/ |
|||
.eggs/ |
|||
lib/ |
|||
lib64/ |
|||
parts/ |
|||
sdist/ |
|||
var/ |
|||
wheels/ |
|||
share/python-wheels/ |
|||
*.egg-info/ |
|||
.installed.cfg |
|||
*.egg |
|||
MANIFEST |
|||
|
|||
# PyInstaller |
|||
# Usually these files are written by a python script from a template |
|||
# before PyInstaller builds the exe, so as to inject date/other infos into it. |
|||
*.manifest |
|||
*.spec |
|||
|
|||
# Installer logs |
|||
pip-log.txt |
|||
pip-delete-this-directory.txt |
|||
|
|||
# Unit test / coverage reports |
|||
htmlcov/ |
|||
.tox/ |
|||
.nox/ |
|||
.coverage |
|||
.coverage.* |
|||
.cache |
|||
nosetests.xml |
|||
coverage.xml |
|||
*.cover |
|||
*.py,cover |
|||
.hypothesis/ |
|||
.pytest_cache/ |
|||
cover/ |
|||
|
|||
# Translations |
|||
*.mo |
|||
*.pot |
|||
|
|||
# Django stuff: |
|||
*.log |
|||
local_settings.py |
|||
db.sqlite3 |
|||
db.sqlite3-journal |
|||
|
|||
# Flask stuff: |
|||
instance/ |
|||
.webassets-cache |
|||
|
|||
# Scrapy stuff: |
|||
.scrapy |
|||
|
|||
# Sphinx documentation |
|||
docs/_build/ |
|||
|
|||
# PyBuilder |
|||
.pybuilder/ |
|||
target/ |
|||
|
|||
# Jupyter Notebook |
|||
.ipynb_checkpoints |
|||
|
|||
# IPython |
|||
profile_default/ |
|||
ipython_config.py |
|||
|
|||
# pyenv |
|||
# For a library or package, you might want to ignore these files since the code is |
|||
# intended to run in multiple environments; otherwise, check them in: |
|||
# .python-version |
|||
|
|||
# pipenv |
|||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. |
|||
# However, in case of collaboration, if having platform-specific dependencies or dependencies |
|||
# having no cross-platform support, pipenv may install dependencies that don't work, or not |
|||
# install all needed dependencies. |
|||
#Pipfile.lock |
|||
|
|||
# poetry |
|||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. |
|||
# This is especially recommended for binary packages to ensure reproducibility, and is more |
|||
# commonly ignored for libraries. |
|||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control |
|||
#poetry.lock |
|||
|
|||
# pdm |
|||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. |
|||
#pdm.lock |
|||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it |
|||
# in version control. |
|||
# https://pdm.fming.dev/#use-with-ide |
|||
.pdm.toml |
|||
|
|||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm |
|||
__pypackages__/ |
|||
|
|||
# Celery stuff |
|||
celerybeat-schedule |
|||
celerybeat.pid |
|||
|
|||
# SageMath parsed files |
|||
*.sage.py |
|||
|
|||
# Environments |
|||
.env |
|||
.venv |
|||
env/ |
|||
venv/ |
|||
ENV/ |
|||
env.bak/ |
|||
venv.bak/ |
|||
|
|||
# Spyder project settings |
|||
.spyderproject |
|||
.spyproject |
|||
|
|||
# Rope project settings |
|||
.ropeproject |
|||
|
|||
# mkdocs documentation |
|||
/site |
|||
|
|||
# mypy |
|||
.mypy_cache/ |
|||
.dmypy.json |
|||
dmypy.json |
|||
|
|||
# Pyre type checker |
|||
.pyre/ |
|||
|
|||
# pytype static type analyzer |
|||
.pytype/ |
|||
|
|||
# Cython debug symbols |
|||
cython_debug/ |
|||
|
|||
# PyCharm |
|||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can |
|||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore |
|||
# and can be added to the global gitignore or merged into this file. For a more nuclear |
|||
# option (not recommended) you can uncomment the following to ignore the entire idea folder. |
|||
#.idea/ |
|||
|
|||
.venv |
|||
.vscode/ |
|||
template/ |
@ -0,0 +1,21 @@ |
|||
MIT License |
|||
|
|||
Copyright (c) 2020 Roberto Alsina |
|||
|
|||
Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
of this software and associated documentation files (the "Software"), to deal |
|||
in the Software without restriction, including without limitation the rights |
|||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
copies of the Software, and to permit persons to whom the Software is |
|||
furnished to do so, subject to the following conditions: |
|||
|
|||
The above copyright notice and this permission notice shall be included in all |
|||
copies or substantial portions of the Software. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
SOFTWARE. |
@ -0,0 +1,5 @@ |
|||
Scripts para hacer cosas con la data de nombres de Argentina. |
|||
|
|||
Página con esto funcionando: http://nombres.ralsina.me |
|||
|
|||
* historico/ y busqueda/ son funciones OpenFAAS para implementar el sitio |
@ -0,0 +1,183 @@ |
|||
import unicodedata |
|||
import urllib |
|||
from collections import namedtuple as nt |
|||
from dataclasses import dataclass |
|||
from json import loads |
|||
import logging |
|||
|
|||
import pygal |
|||
import pyrqlite.dbapi2 as dbapi2 |
|||
import requests |
|||
|
|||
connection = dbapi2.connect( |
|||
host="10.61.0.1", |
|||
user="root", |
|||
port=4001, |
|||
password="", |
|||
) |
|||
|
|||
|
|||
def remove_accents(input_str): |
|||
nfkd_form = unicodedata.normalize("NFKD", input_str) |
|||
return "".join([c for c in nfkd_form if not unicodedata.combining(c)]) |
|||
|
|||
|
|||
@dataclass |
|||
class Género: |
|||
nombre: str = "" |
|||
masculinidad: float = 0 |
|||
|
|||
|
|||
def split_por_genero(nombres): |
|||
no_clasificados = set() |
|||
# Veamos cuales de estos nombres ya están clasificados |
|||
for nombre in nombres: |
|||
clasificador = remove_accents( |
|||
nombre.nombre.split()[0] |
|||
) # genderize no aprecia acentos para AR |
|||
genero = Género.get(nombre=clasificador) |
|||
if not genero: |
|||
# No está clasificado |
|||
no_clasificados.add(urllib.parse.quote(clasificador)) |
|||
|
|||
if no_clasificados: |
|||
print(f"Tengo {len(no_clasificados)} sin clasificar") |
|||
no_clasificados = list(no_clasificados) |
|||
# Averiguar los no clasificados |
|||
# Partimos en bloques de a 10 (API de genderize) |
|||
for i in range(len(no_clasificados) // 10 + 1): |
|||
chunk = no_clasificados[i * 10 : (i + 1) * 10] |
|||
url = f'https://api.genderize.io/?name[]={"&name[]=".join(chunk)}&country_id=AR' |
|||
clasificados = requests.get(url) |
|||
for resultado in clasificados.json(): |
|||
if not resultado["name"]: |
|||
continue # No me importa |
|||
if resultado["gender"] == "male": |
|||
masc = resultado["probability"] |
|||
elif resultado["gender"] == "female": |
|||
masc = 1 - resultado["probability"] |
|||
else: |
|||
# Probablemente un acento o algo así |
|||
print(f"Raro:{resultado}") |
|||
masc = None |
|||
# Metemos en la base |
|||
print(f"Clasificando {resultado}: {masc}") |
|||
Género(nombre=resultado["name"], masculinidad=masc) |
|||
|
|||
nombres_f = [] |
|||
nombres_m = [] |
|||
for nombre in nombres: |
|||
clasificador = remove_accents(nombre.nombre.split()[0]) |
|||
genero = Género.get(nombre=clasificador) |
|||
if not genero or genero.masculinidad is None: # No clasificado, en ambos |
|||
nombres_f.append(nombre) |
|||
nombres_m.append(nombre) |
|||
elif 0.4 < genero.masculinidad: |
|||
nombres_m.append(nombre) |
|||
elif 0.6 > genero.masculinidad: |
|||
nombres_f.append(nombre) |
|||
return {"f": nombres_f, "m": nombres_m} |
|||
|
|||
|
|||
def handle(req): |
|||
"""handle a request to the function |
|||
Args: |
|||
req (str): request body |
|||
|
|||
{ |
|||
p: prefijo del nombre, |
|||
g: genero del nombre, |
|||
a: año de nacimiento |
|||
} |
|||
|
|||
""" |
|||
|
|||
|
|||
try: |
|||
data = loads(req) |
|||
except Exception as e: |
|||
data = {} |
|||
|
|||
try: |
|||
prefijo = data.get("p") or None |
|||
genero = data.get("g") or None |
|||
try: |
|||
año = int(data.get("a")) |
|||
except Exception: |
|||
año = None |
|||
except Exception as e: |
|||
prefijo = genero = año = None |
|||
|
|||
if prefijo is not None: |
|||
prefijo = prefijo.strip().lower() |
|||
|
|||
|
|||
if genero not in ("f", "m"): |
|||
genero = None |
|||
|
|||
if prefijo is None and año is None: # Totales globales |
|||
with connection.cursor() as cursor: |
|||
sql = """ |
|||
SELECT total, nombre |
|||
FROM totales |
|||
ORDER BY total DESC |
|||
LIMIT 50 |
|||
""" |
|||
cursor.execute(sql) |
|||
datos = [(r["total"], r["nombre"]) for r in cursor.fetchall()] |
|||
|
|||
elif prefijo is None and año is not None: # Totales por año |
|||
with connection.cursor() as cursor: |
|||
sql = """ |
|||
SELECT contador, nombre |
|||
FROM nombres |
|||
WHERE |
|||
anio = :anio |
|||
ORDER BY contador DESC |
|||
LIMIT 50 |
|||
""" |
|||
cursor.execute(sql, {'anio': año}) |
|||
datos = [(r["contador"], r["nombre"]) for r in cursor.fetchall()] |
|||
|
|||
elif prefijo is not None and año is None: |
|||
with connection.cursor() as cursor: |
|||
sql = """ |
|||
SELECT contador, nombre |
|||
FROM nombres |
|||
WHERE |
|||
nombre LIKE :nombre |
|||
ORDER BY contador DESC |
|||
LIMIT 50 |
|||
""" |
|||
cursor.execute(sql, {"nombre": "{prefijo}%"}) |
|||
datos = [(r["contador"], r["nombre"]) for r in cursor.fetchall()] |
|||
else: |
|||
with connection.cursor() as cursor: |
|||
sql = """ |
|||
SELECT contador, nombre |
|||
FROM nombres |
|||
WHERE |
|||
anio = :anio AND |
|||
nombre LIKE :nombre |
|||
ORDER BY contador DESC |
|||
LIMIT 50 |
|||
""" |
|||
cursor.execute(sql, {"anio": año, "nombre": "{prefijo}%"}) |
|||
datos = [(r["contador"], r["nombre"]) for r in cursor.fetchall()] |
|||
|
|||
if genero: |
|||
datos = split_por_genero(datos)[genero] |
|||
|
|||
datos = datos[:10] |
|||
|
|||
chart = pygal.HorizontalBar(height=400, show_legend=False, show_y_labels=True) |
|||
chart.x_labels = [nombre.title() for _, nombre in datos[::-1]] |
|||
if len(datos) > 1: |
|||
chart.title = f"¿Puede ser ... {datos[0][1].title()}? ¿O capaz que {datos[1][1].title()}? ¡Contáme más!" |
|||
elif len(datos) == 1: |
|||
chart.title = f"¡Hola {datos[0][1].title()}!" |
|||
elif len(datos) < 1: |
|||
chart.title = "¡No esssistís!" |
|||
chart.add("", [contador for contador, _ in datos[::-1]]) |
|||
|
|||
return chart.render(is_unicode=True), 200, {"Content-Typei": "image/svg+xml"} |
@ -0,0 +1,10 @@ |
|||
from .handler import handle |
|||
|
|||
# Test your handler here |
|||
|
|||
# To disable testing, you can set the build_arg `TEST_ENABLED=false` on the CLI or in your stack.yml |
|||
# https://docs.openfaas.com/reference/yaml/#function-build-args-build-args |
|||
|
|||
def test_handle(): |
|||
# assert handle("input") == "input" |
|||
pass |
@ -0,0 +1,3 @@ |
|||
pygal |
|||
requests |
|||
pyrqlite |
@ -0,0 +1,41 @@ |
|||
# If you would like to disable |
|||
# automated testing during faas-cli build, |
|||
|
|||
# Replace the content of this file with |
|||
# [tox] |
|||
# skipsdist = true |
|||
|
|||
# You can also edit, remove, or add additional test steps |
|||
# by editing, removing, or adding new testenv sections |
|||
|
|||
|
|||
# find out more about tox: https://tox.readthedocs.io/en/latest/ |
|||
[tox] |
|||
envlist = lint,test |
|||
skipsdist = true |
|||
|
|||
[testenv:test] |
|||
deps = |
|||
flask |
|||
pytest |
|||
-rrequirements.txt |
|||
commands = |
|||
# run unit tests with pytest |
|||
# https://docs.pytest.org/en/stable/ |
|||
# configure by adding a pytest.ini to your handler |
|||
pytest |
|||
|
|||
[testenv:lint] |
|||
deps = |
|||
flake8 |
|||
commands = |
|||
flake8 . |
|||
|
|||
[flake8] |
|||
count = true |
|||
max-line-length = 127 |
|||
max-complexity = 10 |
|||
statistics = true |
|||
# stop the build if there are Python syntax errors or undefined names |
|||
select = E9,F63,F7,F82 |
|||
show-source = true |
@ -0,0 +1,15 @@ |
|||
#!/bin/sh -x |
|||
set -e |
|||
|
|||
# My FAAS is arm64, so need to install this to cross-compile |
|||
docker run --rm --privileged \ |
|||
multiarch/qemu-user-static \ |
|||
--reset -p yes |
|||
|
|||
# Build and deploy |
|||
if [! -d templates] |
|||
then |
|||
faas-cli template store pull python3-http |
|||
fi |
|||
faas-cli publish -f nombres.yml --platforms linux/arm64 --build-arg 'TEST_ENABLED=false' |
|||
faas-cli deploy -f nombres.yml |
@ -0,0 +1,62 @@ |
|||
import unicodedata |
|||
import urllib |
|||
from collections import defaultdict as ddict |
|||
from dataclasses import dataclass |
|||
from json import loads |
|||
|
|||
import pygal |
|||
import pyrqlite.dbapi2 as dbapi2 |
|||
import requests |
|||
|
|||
connection = dbapi2.connect( |
|||
host="10.61.0.1", |
|||
user="root", |
|||
port=4001, |
|||
password="", |
|||
) |
|||
|
|||
|
|||
def remove_accents(input_str): |
|||
nfkd_form = unicodedata.normalize("NFKD", input_str) |
|||
return "".join([c for c in nfkd_form if not unicodedata.combining(c)]) |
|||
|
|||
|
|||
def handle(req): |
|||
"""handle a request to the function |
|||
Args: |
|||
req (str): request body |
|||
|
|||
{ |
|||
p: prefijo del nombre, |
|||
g: genero del nombre, |
|||
a: año de nacimiento |
|||
} |
|||
|
|||
""" |
|||
try: |
|||
nombres = [remove_accents(req.strip().lower()) for x in req.split(",")] |
|||
except Exception: |
|||
nombres = ["maria", "juan"] |
|||
|
|||
chart = pygal.Line( |
|||
height=200, fill=True, human_readable=True, show_minor_x_labels=False |
|||
) |
|||
chart.x_labels = range(1922, 2015) |
|||
for nombre in nombres: |
|||
datos = ddict(int) |
|||
with connection.cursor() as cursor: |
|||
sql = """ |
|||
SELECT anio, contador, nombre |
|||
FROM nombres |
|||
WHERE nombre = :nombre |
|||
ORDER BY anio |
|||
""" |
|||
cursor.execute(sql,{"nombre": nombre}) |
|||
datos.update({r["anio"]: r["contador"] for r in cursor.fetchall()}) |
|||
chart.add(nombre.title(), [datos[x] for x in range(1922, 2015)]) |
|||
|
|||
chart.x_labels = [str(n) for n in range(1922, 2015)] |
|||
chart.x_labels_major = [str(n) for n in range(1920, 2020, 10)] |
|||
|
|||
# return Response(chart.render(is_unicode=True), mimetype="image/svg+xml") |
|||
return chart.render(is_unicode=True), 200, {"Content-Type": "image/svg+xml"} |
@ -0,0 +1,10 @@ |
|||
from .handler import handle |
|||
|
|||
# Test your handler here |
|||
|
|||
# To disable testing, you can set the build_arg `TEST_ENABLED=false` on the CLI or in your stack.yml |
|||
# https://docs.openfaas.com/reference/yaml/#function-build-args-build-args |
|||
|
|||
def test_handle(): |
|||
# assert handle("input") == "input" |
|||
pass |
@ -0,0 +1,3 @@ |
|||
pygal |
|||
requests |
|||
pyrqlite |
@ -0,0 +1,41 @@ |
|||
# If you would like to disable |
|||
# automated testing during faas-cli build, |
|||
|
|||
# Replace the content of this file with |
|||
# [tox] |
|||
# skipsdist = true |
|||
|
|||
# You can also edit, remove, or add additional test steps |
|||
# by editing, removing, or adding new testenv sections |
|||
|
|||
|
|||
# find out more about tox: https://tox.readthedocs.io/en/latest/ |
|||
[tox] |
|||
envlist = lint,test |
|||
skipsdist = true |
|||
|
|||
[testenv:test] |
|||
deps = |
|||
flask |
|||
pytest |
|||
-rrequirements.txt |
|||
commands = |
|||
# run unit tests with pytest |
|||
# https://docs.pytest.org/en/stable/ |
|||
# configure by adding a pytest.ini to your handler |
|||
pytest |
|||
|
|||
[testenv:lint] |
|||
deps = |
|||
flake8 |
|||
commands = |
|||
flake8 . |
|||
|
|||
[flake8] |
|||
count = true |
|||
max-line-length = 127 |
|||
max-complexity = 10 |
|||
statistics = true |
|||
# stop the build if there are Python syntax errors or undefined names |
|||
select = E9,F63,F7,F82 |
|||
show-source = true |
@ -0,0 +1,13 @@ |
|||
version: 1.0 |
|||
provider: |
|||
name: openfaas |
|||
gateway: http://pinky:8082 |
|||
functions: |
|||
busqueda: |
|||
lang: python3-flask |
|||
handler: ./busqueda |
|||
image: ralsina/nombres_busqueda:latest |
|||
historico: |
|||
lang: python3-flask |
|||
handler: ./historico |
|||
image: ralsina/nombres_historico:latest |
Loading…
Reference in new issue