Make split by gender faster
This commit is contained in:
parent
e4c44dcae1
commit
76e16ca803
@ -12,6 +12,9 @@ PASS = File.read("/var/openfaas/secrets/nombres-pass").strip
|
||||
DB_URL = "postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres"
|
||||
|
||||
class Handler
|
||||
# This class is the entry point for the OpenFaaS function.
|
||||
# run() is the important bit
|
||||
|
||||
def format_buffer(buffer, canvas_name, title = "")
|
||||
# Process the gnuplot output so it works in the page
|
||||
#
|
||||
@ -42,27 +45,6 @@ class Handler
|
||||
html.gsub("gnuplot_canvas", canvas_name)
|
||||
end
|
||||
|
||||
def query(sql)
|
||||
# Runs a SQL query against the database.
|
||||
#
|
||||
# Returns an array of values [[Year,Count]...]
|
||||
# Or nil if there are no results
|
||||
|
||||
DB.open("postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres") do |cursor|
|
||||
cursor.query sql do |result_set|
|
||||
result = [] of Tuple(Int32, Int32)
|
||||
result_set.each do
|
||||
year = result_set.read(Int32)
|
||||
contador = result_set.read(Int32)
|
||||
result.push({year, contador})
|
||||
end
|
||||
return result
|
||||
end
|
||||
end
|
||||
# No result, return nil
|
||||
nil
|
||||
end
|
||||
|
||||
def normalize_name(s)
|
||||
# Remove diacritics, turn lowercase
|
||||
normalized = s.unicode_normalize(:nfkd).chars
|
||||
@ -71,53 +53,6 @@ class Handler
|
||||
}.join("").downcase
|
||||
end
|
||||
|
||||
def feminidad(nombre)
|
||||
# Yes this database is upper case
|
||||
nombre = nombre.to_s.upcase
|
||||
sql1 = %(
|
||||
SELECT COALESCE(frecuencia,0)
|
||||
FROM mujeres WHERE nombre='#{nombre}'
|
||||
|
||||
)
|
||||
sql2 = %(
|
||||
SELECT COALESCE(frecuencia,0)
|
||||
FROM hombres WHERE nombre='#{nombre}'
|
||||
)
|
||||
|
||||
hombres = mujeres = 0
|
||||
DB.open("postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres") do |cursor|
|
||||
cursor.query sql1 do |result|
|
||||
mujeres = result.read(Int32)
|
||||
end
|
||||
cursor.query sql2 do |result|
|
||||
hombres = result.read(Int32)
|
||||
end
|
||||
end
|
||||
if hombres == mujeres == 0
|
||||
return 0.5
|
||||
end
|
||||
mujeres / (hombres + mujeres)
|
||||
end
|
||||
|
||||
def split_por_genero(nombres)
|
||||
femeninos = Array(Tuple(Int32, String)).new
|
||||
masculinos = Array(Tuple(Int32, String)).new
|
||||
nombres.map { |nombre|
|
||||
fem = feminidad(nombre[1])
|
||||
# El overlap en 0.5 es intencional!
|
||||
if fem >= 0.5
|
||||
femeninos << nombre
|
||||
end
|
||||
if fem <= 0.5
|
||||
masculinos << nombre
|
||||
end
|
||||
}
|
||||
{
|
||||
"f": femeninos,
|
||||
"m": masculinos,
|
||||
}
|
||||
end
|
||||
|
||||
def run(request : HTTP::Request)
|
||||
# Try to find most popular names based on a prefix, year and gender.
|
||||
#
|
||||
@ -155,7 +90,6 @@ class Handler
|
||||
|
||||
if prefijo.nil? && year.nil?
|
||||
# Global totals
|
||||
# FIXME: SLOW
|
||||
sql = %(
|
||||
SELECT total::integer, nombre
|
||||
FROM totales
|
||||
@ -198,9 +132,8 @@ class Handler
|
||||
puts "QUERY: #{sql}"
|
||||
|
||||
datos = [] of Tuple(Int32, String)
|
||||
DB.open("postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres") do |cursor|
|
||||
DB.open(DB_URL) do |cursor|
|
||||
cursor.query sql do |result_set|
|
||||
puts "loop"
|
||||
result_set.each do
|
||||
valor = result_set.read(Int32)
|
||||
nombre = result_set.read(String)
|
||||
@ -225,12 +158,41 @@ class Handler
|
||||
row[1].to_s.includes? " "
|
||||
}
|
||||
|
||||
|
||||
if genero
|
||||
datos = split_por_genero(datos)[genero]
|
||||
puts "Data split by gender"
|
||||
DB.open(DB_URL) do |cursor|
|
||||
datos.reject! { |row|
|
||||
# How feminine is this name?
|
||||
# Yes this database is upper case
|
||||
puts "Checking #{row[0]} #{row[1]}"
|
||||
feminidad = 0
|
||||
sql = %(
|
||||
SELECT COALESCE((SELECT frecuencia FROM mujeres WHERE nombre='#{row[1]?.to_s.upcase}'), 0) AS mujeres,
|
||||
COALESCE((SELECT frecuencia FROM hombres WHERE nombre='#{row[1]?.to_s.upcase}'), 0) AS hombres
|
||||
)
|
||||
puts "SQL: #{sql}"
|
||||
cursor.query sql do |result_set|
|
||||
result_set.each do
|
||||
mujeres = result_set.read(Int32)
|
||||
hombres = result_set.read(Int32)
|
||||
puts "frecuencias: #{mujeres} #{hombres}"
|
||||
if hombres == mujeres == 0
|
||||
feminidad = 0.5
|
||||
else
|
||||
feminidad = mujeres / (hombres + mujeres)
|
||||
end
|
||||
end
|
||||
end
|
||||
# El overlap en 0.5 es intencional!
|
||||
if (feminidad >= 0.5 && genero == "f") ||
|
||||
(feminidad <= 0.5 && genero == "m")
|
||||
false
|
||||
else
|
||||
true
|
||||
end
|
||||
}
|
||||
puts "Data split by gender"
|
||||
end
|
||||
end
|
||||
|
||||
datos = datos[..10]
|
||||
|
||||
if datos.size > 1
|
||||
|
Loading…
Reference in New Issue
Block a user