2024-05-15 21:07:25 +00:00
|
|
|
require "http/client"
|
2023-06-04 14:53:24 +00:00
|
|
|
require "http/headers"
|
2024-05-15 21:07:25 +00:00
|
|
|
require "http/request"
|
2023-06-04 14:53:24 +00:00
|
|
|
require "ishi/html"
|
|
|
|
require "json"
|
2024-05-15 21:07:25 +00:00
|
|
|
require "uuid"
|
|
|
|
require "db"
|
|
|
|
require "pg"
|
|
|
|
|
2024-05-15 22:47:54 +00:00
|
|
|
USER = File.read("/var/openfaas/secrets/nombres-user").strip
|
|
|
|
PASS = File.read("/var/openfaas/secrets/nombres-pass").strip
|
|
|
|
DB_URL = "postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres"
|
2023-06-04 14:53:24 +00:00
|
|
|
|
|
|
|
class Handler
|
2023-06-04 21:16:21 +00:00
|
|
|
def format_buffer(buffer, canvas_name, title = "")
|
2023-06-04 14:53:24 +00:00
|
|
|
# Process the gnuplot output so it works in the page
|
|
|
|
#
|
|
|
|
# buffer is the Ishi output
|
|
|
|
# name is a string to replace for gnuplot_canvas so
|
2023-06-04 18:30:30 +00:00
|
|
|
# we can have multiple charts in a page
|
|
|
|
# title is added on top of the chart
|
2023-06-04 14:53:24 +00:00
|
|
|
|
|
|
|
html = buffer.to_s.split("\n")
|
|
|
|
html = html[html.index("<script type=\"text/javascript\">")..html.index("</script>")]
|
2023-06-04 18:30:30 +00:00
|
|
|
html = "<b>#{title}</b>" + html.join("\n") + %(
|
2023-06-04 14:53:24 +00:00
|
|
|
<div class="gnuplot">
|
|
|
|
<canvas id="Tile" width="32" height="32" hidden></canvas>
|
|
|
|
<table class="plot">
|
|
|
|
<tr><td>
|
|
|
|
<canvas id="gnuplot_canvas" width="800" height="300" tabindex="0">
|
|
|
|
Sorry, your browser seems not to support the HTML 5 canvas element
|
|
|
|
</canvas>
|
|
|
|
</td></tr>
|
|
|
|
</table>
|
|
|
|
<script type="text/javascript" defer>
|
|
|
|
gnuplot.init(); gnuplot_canvas();
|
|
|
|
</script>
|
|
|
|
</div>
|
|
|
|
)
|
2023-06-04 18:30:30 +00:00
|
|
|
# This ID needs to be unique in case
|
2023-06-04 14:53:24 +00:00
|
|
|
# we have 2 charts in the same page
|
|
|
|
html.gsub("gnuplot_canvas", canvas_name)
|
|
|
|
end
|
|
|
|
|
|
|
|
def normalize_name(s)
|
|
|
|
# Remove diacritics, turn lowercase
|
|
|
|
normalized = s.unicode_normalize(:nfkd).chars
|
2024-05-15 22:47:54 +00:00
|
|
|
normalized.reject! { |character|
|
|
|
|
!character.ascii_letter?
|
2023-06-04 14:53:24 +00:00
|
|
|
}.join("").downcase
|
|
|
|
end
|
|
|
|
|
2024-05-15 23:48:29 +00:00
|
|
|
def feminidad(cursor, nombre)
|
2024-05-15 22:47:54 +00:00
|
|
|
# Yes this database is upper case
|
2023-06-04 14:53:24 +00:00
|
|
|
nombre = nombre.to_s.upcase
|
|
|
|
sql1 = %(
|
|
|
|
SELECT COALESCE(frecuencia,0)
|
|
|
|
FROM mujeres WHERE nombre='#{nombre}'
|
|
|
|
)
|
|
|
|
sql2 = %(
|
|
|
|
SELECT COALESCE(frecuencia,0)
|
|
|
|
FROM hombres WHERE nombre='#{nombre}'
|
|
|
|
)
|
|
|
|
|
2024-05-15 22:58:51 +00:00
|
|
|
hombres = mujeres = 0
|
2024-05-15 23:48:29 +00:00
|
|
|
cursor.query sql1 do |result|
|
|
|
|
mujeres = result.read(Int32)
|
|
|
|
end
|
|
|
|
cursor.query sql2 do |result|
|
|
|
|
hombres = result.read(Int32)
|
2024-05-15 22:47:54 +00:00
|
|
|
end
|
2023-06-04 14:53:24 +00:00
|
|
|
if hombres == mujeres == 0
|
|
|
|
return 0.5
|
|
|
|
end
|
2024-05-15 22:47:54 +00:00
|
|
|
mujeres / (hombres + mujeres)
|
2023-06-04 14:53:24 +00:00
|
|
|
end
|
|
|
|
|
2024-05-16 00:15:34 +00:00
|
|
|
# def split_por_genero(cursor, nombres)
|
|
|
|
# femeninos = Array(Tuple(Int32, String)).new
|
|
|
|
# masculinos = Array(Tuple(Int32, String)).new
|
|
|
|
# nombres.map { |nombre|
|
|
|
|
# fem = feminidad(cursor, nombre[1])
|
|
|
|
# # El overlap en 0.5 es intencional!
|
|
|
|
# if fem >= 0.5
|
|
|
|
# femeninos << nombre
|
|
|
|
# end
|
|
|
|
# if fem <= 0.5
|
|
|
|
# masculinos << nombre
|
|
|
|
# end
|
|
|
|
# }
|
|
|
|
# {
|
|
|
|
# "f": femeninos,
|
|
|
|
# "m": masculinos,
|
|
|
|
# }
|
|
|
|
# end
|
2023-06-04 14:53:24 +00:00
|
|
|
|
|
|
|
def run(request : HTTP::Request)
|
2023-06-04 22:08:36 +00:00
|
|
|
# Try to find most popular names based on a prefix, year and gender.
|
|
|
|
#
|
|
|
|
# Request body is JSON in this form:
|
|
|
|
#
|
|
|
|
# {
|
|
|
|
# p: prefijo del nombre,
|
|
|
|
# g: genero del nombre,
|
2024-05-15 22:47:54 +00:00
|
|
|
# y: year de nacimiento
|
2023-06-04 22:08:36 +00:00
|
|
|
# }
|
|
|
|
|
2024-05-15 22:47:54 +00:00
|
|
|
if (body = request.body).nil?
|
2024-05-15 23:18:09 +00:00
|
|
|
query = {"p": "", "g": "", "a": ""}
|
2024-05-15 22:47:54 +00:00
|
|
|
else
|
|
|
|
query = Hash(String, String).from_json(body)
|
2023-06-04 14:53:24 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
# Sanitize input.
|
|
|
|
# Each one either a valid string or nil
|
|
|
|
prefijo = query.fetch("p", "")
|
|
|
|
genero = query.fetch("g", "")
|
2024-05-15 22:47:54 +00:00
|
|
|
year = query.fetch("y", "")
|
2023-06-04 14:53:24 +00:00
|
|
|
|
|
|
|
if !prefijo.empty?
|
|
|
|
prefijo = normalize_name(prefijo)
|
|
|
|
else
|
|
|
|
prefijo = nil
|
|
|
|
end
|
|
|
|
|
|
|
|
if !["f", "m"].includes?(genero)
|
|
|
|
genero = nil
|
|
|
|
end
|
|
|
|
|
2024-05-15 22:47:54 +00:00
|
|
|
year = year.to_i?
|
2023-06-04 14:53:24 +00:00
|
|
|
|
2024-05-15 22:47:54 +00:00
|
|
|
if prefijo.nil? && year.nil?
|
2023-06-04 14:53:24 +00:00
|
|
|
# Global totals
|
2024-05-15 23:18:09 +00:00
|
|
|
# FIXME: SLOW
|
2023-06-04 14:53:24 +00:00
|
|
|
sql = %(
|
2024-05-15 23:06:35 +00:00
|
|
|
SELECT total::integer, nombre
|
2023-06-04 14:53:24 +00:00
|
|
|
FROM totales
|
|
|
|
ORDER BY total DESC
|
|
|
|
LIMIT 50
|
|
|
|
)
|
2024-05-15 22:47:54 +00:00
|
|
|
elsif prefijo.nil? && !year.nil?
|
2023-06-04 14:53:24 +00:00
|
|
|
# Per-year totals
|
|
|
|
sql = %(
|
2024-05-15 23:06:35 +00:00
|
|
|
SELECT contador::integer, nombre
|
2023-06-04 14:53:24 +00:00
|
|
|
FROM nombres
|
|
|
|
WHERE
|
2024-05-15 22:47:54 +00:00
|
|
|
anio = '#{year}'
|
2023-06-04 14:53:24 +00:00
|
|
|
ORDER BY contador DESC
|
|
|
|
LIMIT 50
|
|
|
|
)
|
2024-05-15 22:47:54 +00:00
|
|
|
elsif !prefijo.nil? && year.nil?
|
2023-06-04 14:53:24 +00:00
|
|
|
# Filter only by prefix
|
|
|
|
sql = %(
|
|
|
|
SELECT total, nombre
|
|
|
|
FROM totales
|
|
|
|
WHERE
|
|
|
|
nombre LIKE '#{prefijo}%'
|
|
|
|
ORDER BY total DESC
|
|
|
|
LIMIT 50
|
|
|
|
)
|
|
|
|
else
|
2023-06-04 22:08:36 +00:00
|
|
|
# We have both
|
2023-06-04 14:53:24 +00:00
|
|
|
sql = %(
|
|
|
|
SELECT contador, nombre
|
|
|
|
FROM nombres
|
|
|
|
WHERE
|
2024-05-15 22:47:54 +00:00
|
|
|
anio = '#{year}' AND
|
2023-06-04 14:53:24 +00:00
|
|
|
nombre LIKE '#{prefijo}%'
|
|
|
|
ORDER BY contador DESC
|
|
|
|
LIMIT 50
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2024-05-15 23:28:45 +00:00
|
|
|
puts "QUERY: #{sql}"
|
2024-05-15 23:27:47 +00:00
|
|
|
|
2024-05-15 22:58:51 +00:00
|
|
|
datos = [] of Tuple(Int32, String)
|
2024-05-16 00:15:34 +00:00
|
|
|
DB.open(DB_URL) do |cursor|
|
2024-05-15 22:47:54 +00:00
|
|
|
cursor.query sql do |result_set|
|
|
|
|
result_set.each do
|
2024-05-15 23:48:29 +00:00
|
|
|
puts "loop"
|
2024-05-15 22:47:54 +00:00
|
|
|
valor = result_set.read(Int32)
|
|
|
|
nombre = result_set.read(String)
|
|
|
|
datos.push({valor, nombre})
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-05-15 23:32:17 +00:00
|
|
|
puts "Data gathered"
|
|
|
|
|
2024-05-15 22:47:54 +00:00
|
|
|
if datos.empty?
|
2023-06-04 14:53:24 +00:00
|
|
|
# This is bad 😀
|
|
|
|
return {
|
|
|
|
body: "Que raro, no tengo *idea*!",
|
|
|
|
status_code: 200,
|
|
|
|
headers: HTTP::Headers{"Content-Type" => "text/html"},
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2023-06-04 18:30:30 +00:00
|
|
|
# In this context, remove all composite names
|
2024-05-15 22:47:54 +00:00
|
|
|
datos.reject! { |row|
|
|
|
|
row[1].to_s.includes? " "
|
2023-06-04 18:30:30 +00:00
|
|
|
}
|
|
|
|
|
2023-06-04 14:53:24 +00:00
|
|
|
if genero
|
2024-05-16 00:15:34 +00:00
|
|
|
DB.open(DB_URL) do |cursor|
|
2024-05-15 23:57:00 +00:00
|
|
|
filtered = Array(Tuple(Int32, String)).new
|
2024-05-16 00:22:12 +00:00
|
|
|
datos.map { |item|
|
2024-05-16 00:15:34 +00:00
|
|
|
# How feminine is this name?
|
|
|
|
# Yes this database is upper case
|
2024-05-16 00:22:12 +00:00
|
|
|
nombre = item[1].upcase
|
2024-05-16 00:15:34 +00:00
|
|
|
feminidad = 0
|
|
|
|
sql = %(
|
|
|
|
SELECT COALESCE((SELECT frecuencia FROM mujeres WHERE nombre='#{nombre}'), 0) AS mujeres,
|
|
|
|
COALESCE((SELECT frecuencia FROM hombres WHERE nombre='#{nombre}'), 0) AS hombres
|
|
|
|
)
|
|
|
|
cursor.query sql do |result|
|
|
|
|
mujeres = result.read(Int32)
|
|
|
|
hombres = result.read(Int32)
|
|
|
|
if hombres == mujeres == 0
|
|
|
|
feminidad = 0.5
|
|
|
|
else
|
|
|
|
feminidad = mujeres / (hombres + mujeres)
|
|
|
|
end
|
|
|
|
end
|
2024-05-15 23:57:00 +00:00
|
|
|
# El overlap en 0.5 es intencional!
|
2024-05-16 00:15:34 +00:00
|
|
|
if feminidad >= 0.5 && genero == "f"
|
2024-05-16 00:22:12 +00:00
|
|
|
filtered << item
|
2024-05-16 00:15:34 +00:00
|
|
|
elsif feminidad <= 0.5 && genero == "m"
|
2024-05-16 00:22:12 +00:00
|
|
|
filtered << item
|
2024-05-15 23:57:00 +00:00
|
|
|
end
|
|
|
|
}
|
|
|
|
datos = filtered
|
2024-05-15 23:48:29 +00:00
|
|
|
puts "Data split by gender"
|
|
|
|
end
|
2023-06-04 14:53:24 +00:00
|
|
|
end
|
|
|
|
datos = datos[..10]
|
|
|
|
|
2023-06-04 18:30:30 +00:00
|
|
|
if datos.size > 1
|
2023-06-04 20:39:03 +00:00
|
|
|
title = "¿Puede ser ... #{datos[0][1].to_s.titleize}? ¿O capaz que #{datos[1][1].to_s.titleize}? ¡Contame más!"
|
2023-06-04 18:30:30 +00:00
|
|
|
elsif datos.size == 1
|
|
|
|
title = "Me parece que ... #{datos[0][1].to_s.titleize}!"
|
|
|
|
else
|
|
|
|
title = "No tengo idea!"
|
|
|
|
end
|
|
|
|
|
2023-06-04 14:53:24 +00:00
|
|
|
buffer = IO::Memory.new
|
|
|
|
Ishi.new(buffer) do
|
2023-06-04 18:30:30 +00:00
|
|
|
x = (0..datos.size - 1).to_a
|
2024-05-15 22:47:54 +00:00
|
|
|
y = datos.map { |row|
|
|
|
|
row[0].to_f / 1000
|
2023-06-04 18:30:30 +00:00
|
|
|
}
|
2023-06-05 19:02:54 +00:00
|
|
|
yrange(0..(y.max*1.1).to_i + 1)
|
2023-06-04 18:30:30 +00:00
|
|
|
|
|
|
|
xtics = Hash(Float64, String).new
|
2024-05-15 22:47:54 +00:00
|
|
|
datos.each_with_index { |row, i|
|
|
|
|
xtics[i.to_f] = row[1].to_s.titleize
|
2023-06-04 18:30:30 +00:00
|
|
|
}
|
|
|
|
|
2023-06-04 14:53:24 +00:00
|
|
|
canvas_size(800, 300)
|
2023-06-04 18:30:30 +00:00
|
|
|
plot(x, y, style: :boxes, fs: 0.25)
|
|
|
|
.boxwidth(0.5)
|
|
|
|
.show_key(false)
|
|
|
|
.ylabel("Popularidad (miles)")
|
|
|
|
.xtics(xtics)
|
2023-06-04 14:53:24 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
{
|
2023-06-04 18:30:30 +00:00
|
|
|
body: format_buffer(buffer, "busqueda", title),
|
2023-06-04 14:53:24 +00:00
|
|
|
status_code: 200,
|
|
|
|
headers: HTTP::Headers{"Content-Type" => "text/html"},
|
|
|
|
}
|
|
|
|
end
|
2023-06-04 18:30:30 +00:00
|
|
|
end
|