Functions/old-busqueda/handler.cr

229 lines
6.3 KiB
Crystal
Raw Permalink Normal View History

require "http/client"
require "http/headers"
require "http/request"
require "ishi/html"
require "json"
require "uuid"
require "db"
require "pg"
2024-05-15 22:47:54 +00:00
USER = File.read("/var/openfaas/secrets/nombres-user").strip
PASS = File.read("/var/openfaas/secrets/nombres-pass").strip
DB_URL = "postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres"
class Handler
2024-05-15 23:48:29 +00:00
# This class is the entry point for the OpenFaaS function.
# run() is the important bit
2023-06-04 21:16:21 +00:00
def format_buffer(buffer, canvas_name, title = "")
# Process the gnuplot output so it works in the page
#
# buffer is the Ishi output
# name is a string to replace for gnuplot_canvas so
# we can have multiple charts in a page
# title is added on top of the chart
html = buffer.to_s.split("\n")
html = html[html.index("<script type=\"text/javascript\">")..html.index("</script>")]
html = "<b>#{title}</b>" + html.join("\n") + %(
<div class="gnuplot">
<canvas id="Tile" width="32" height="32" hidden></canvas>
<table class="plot">
<tr><td>
<canvas id="gnuplot_canvas" width="800" height="300" tabindex="0">
Sorry, your browser seems not to support the HTML 5 canvas element
</canvas>
</td></tr>
</table>
<script type="text/javascript" defer>
gnuplot.init(); gnuplot_canvas();
</script>
</div>
)
# This ID needs to be unique in case
# we have 2 charts in the same page
html.gsub("gnuplot_canvas", canvas_name)
end
def normalize_name(s)
# Remove diacritics, turn lowercase
normalized = s.unicode_normalize(:nfkd).chars
2024-05-15 22:47:54 +00:00
normalized.reject! { |character|
!character.ascii_letter?
}.join("").downcase
end
def run(request : HTTP::Request)
2023-06-04 22:08:36 +00:00
# Try to find most popular names based on a prefix, year and gender.
#
# Request body is JSON in this form:
#
# {
# p: prefijo del nombre,
# g: genero del nombre,
2024-05-15 22:47:54 +00:00
# y: year de nacimiento
2023-06-04 22:08:36 +00:00
# }
2024-05-15 22:47:54 +00:00
if (body = request.body).nil?
2024-05-15 23:18:09 +00:00
query = {"p": "", "g": "", "a": ""}
2024-05-15 22:47:54 +00:00
else
query = Hash(String, String).from_json(body)
end
# Sanitize input.
# Each one either a valid string or nil
prefijo = query.fetch("p", "")
genero = query.fetch("g", "")
2024-05-15 22:47:54 +00:00
year = query.fetch("y", "")
if !prefijo.empty?
prefijo = normalize_name(prefijo)
else
prefijo = nil
end
if !["f", "m"].includes?(genero)
genero = nil
end
2024-05-15 22:47:54 +00:00
year = year.to_i?
2024-05-15 22:58:51 +00:00
datos = [] of Tuple(Int32, String)
2024-05-15 23:48:29 +00:00
DB.open(DB_URL) do |cursor|
2024-05-16 18:31:09 +00:00
if prefijo.nil? && year.nil?
# Global totals
result_set = cursor.query("
SELECT total::integer, nombre
FROM totales
ORDER BY total DESC
LIMIT 50")
elsif prefijo.nil? && !year.nil?
# Per-year totals
result_set = cursor.query("
SELECT contador::integer, nombre
FROM nombres
WHERE
anio = $1
ORDER BY contador DESC
LIMIT 50", year)
elsif !prefijo.nil? && year.nil?
# Filter only by prefix
result_set = cursor.query("
SELECT total::integer, nombre
FROM totales
WHERE
nombre LIKE $1
ORDER BY total DESC
LIMIT 50", prefijo + "%")
elsif !prefijo.nil? && !year.nil?
# We have both
result_set = cursor.query("
SELECT contador::integer, nombre
FROM nombres
WHERE
anio = $1 AND
nombre LIKE $2
ORDER BY contador DESC
LIMIT 50", year, prefijo + "%")
end
if !result_set.nil?
2024-05-15 22:47:54 +00:00
result_set.each do
valor = result_set.read(Int32)
nombre = result_set.read(String)
datos.push({valor, nombre})
end
2024-05-16 18:31:09 +00:00
result_set.close
2024-05-15 22:47:54 +00:00
end
end
2024-05-15 23:32:17 +00:00
puts "Data gathered"
2024-05-15 22:47:54 +00:00
if datos.empty?
# This is bad 😀
return {
body: "Que raro, no tengo *idea*!",
status_code: 200,
headers: HTTP::Headers{"Content-Type" => "text/html"},
}
end
# In this context, remove all composite names
2024-05-15 22:47:54 +00:00
datos.reject! { |row|
row[1].to_s.includes? " "
}
if genero
2024-05-15 23:48:29 +00:00
DB.open(DB_URL) do |cursor|
datos.reject! { |row|
# How feminine is this name?
# Yes this database is upper case
puts "Checking #{row[0]} #{row[1]}"
feminidad = 0
sql = %(
SELECT COALESCE((SELECT frecuencia FROM mujeres WHERE nombre='#{row[1]?.to_s.upcase}'), 0) AS mujeres,
COALESCE((SELECT frecuencia FROM hombres WHERE nombre='#{row[1]?.to_s.upcase}'), 0) AS hombres
)
puts "SQL: #{sql}"
cursor.query sql do |result_set|
result_set.each do
mujeres = result_set.read(Int32)
hombres = result_set.read(Int32)
puts "frecuencias: #{mujeres} #{hombres}"
if hombres == mujeres == 0
feminidad = 0.5
else
feminidad = mujeres / (hombres + mujeres)
end
end
end
# El overlap en 0.5 es intencional!
if (feminidad >= 0.5 && genero == "f") ||
(feminidad <= 0.5 && genero == "m")
false
else
true
end
}
puts "Data split by gender"
end
end
datos = datos[..10]
if datos.size > 1
2023-06-04 20:39:03 +00:00
title = "¿Puede ser ... #{datos[0][1].to_s.titleize}? ¿O capaz que #{datos[1][1].to_s.titleize}? ¡Contame más!"
elsif datos.size == 1
title = "Me parece que ... #{datos[0][1].to_s.titleize}!"
else
title = "No tengo idea!"
end
buffer = IO::Memory.new
Ishi.new(buffer) do
x = (0..datos.size - 1).to_a
2024-05-15 22:47:54 +00:00
y = datos.map { |row|
row[0].to_f / 1000
}
2023-06-05 19:02:54 +00:00
yrange(0..(y.max*1.1).to_i + 1)
xtics = Hash(Float64, String).new
2024-05-15 22:47:54 +00:00
datos.each_with_index { |row, i|
xtics[i.to_f] = row[1].to_s.titleize
}
canvas_size(800, 300)
plot(x, y, style: :boxes, fs: 0.25)
.boxwidth(0.5)
.show_key(false)
.ylabel("Popularidad (miles)")
.xtics(xtics)
end
{
body: format_buffer(buffer, "busqueda", title),
status_code: 200,
headers: HTTP::Headers{"Content-Type" => "text/html"},
}
end
end