Compare commits

...

6 Commits

Author SHA1 Message Date
ea9e65a20b Make split by gender faster 2024-05-15 21:22:12 -03:00
19f25017f0 Make split by gender faster 2024-05-15 21:15:34 -03:00
b05327fd8d Make split by gender faster 2024-05-15 20:58:16 -03:00
9a84e979ac Make split by gender faster 2024-05-15 20:57:00 -03:00
539c0839db Make split by gender faster 2024-05-15 20:50:05 -03:00
cb9a24f4aa Make split by gender faster 2024-05-15 20:48:29 -03:00

View File

@@ -42,27 +42,6 @@ class Handler
html.gsub("gnuplot_canvas", canvas_name)
end
def query(sql)
# Runs a SQL query against the database.
#
# Returns an array of values [[Year,Count]...]
# Or nil if there are no results
DB.open("postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres") do |cursor|
cursor.query sql do |result_set|
result = [] of Tuple(Int32, Int32)
result_set.each do
year = result_set.read(Int32)
contador = result_set.read(Int32)
result.push({year, contador})
end
return result
end
end
# No result, return nil
nil
end
def normalize_name(s)
# Remove diacritics, turn lowercase
normalized = s.unicode_normalize(:nfkd).chars
@@ -71,13 +50,12 @@ class Handler
}.join("").downcase
end
def feminidad(nombre)
def feminidad(cursor, nombre)
# Yes this database is upper case
nombre = nombre.to_s.upcase
sql1 = %(
SELECT COALESCE(frecuencia,0)
FROM mujeres WHERE nombre='#{nombre}'
)
sql2 = %(
SELECT COALESCE(frecuencia,0)
@@ -85,38 +63,36 @@ class Handler
)
hombres = mujeres = 0
DB.open("postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres") do |cursor|
cursor.query sql1 do |result|
mujeres = result.read(Int32)
end
cursor.query sql2 do |result|
hombres = result.read(Int32)
end
end
if hombres == mujeres == 0
return 0.5
end
mujeres / (hombres + mujeres)
end
def split_por_genero(nombres)
femeninos = Array(Tuple(Int32, String)).new
masculinos = Array(Tuple(Int32, String)).new
nombres.map { |nombre|
fem = feminidad(nombre[1])
# El overlap en 0.5 es intencional!
if fem >= 0.5
femeninos << nombre
end
if fem <= 0.5
masculinos << nombre
end
}
{
"f": femeninos,
"m": masculinos,
}
end
# def split_por_genero(cursor, nombres)
# femeninos = Array(Tuple(Int32, String)).new
# masculinos = Array(Tuple(Int32, String)).new
# nombres.map { |nombre|
# fem = feminidad(cursor, nombre[1])
# # El overlap en 0.5 es intencional!
# if fem >= 0.5
# femeninos << nombre
# end
# if fem <= 0.5
# masculinos << nombre
# end
# }
# {
# "f": femeninos,
# "m": masculinos,
# }
# end
def run(request : HTTP::Request)
# Try to find most popular names based on a prefix, year and gender.
@@ -198,10 +174,10 @@ class Handler
puts "QUERY: #{sql}"
datos = [] of Tuple(Int32, String)
DB.open("postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres") do |cursor|
DB.open(DB_URL) do |cursor|
cursor.query sql do |result_set|
puts "loop"
result_set.each do
puts "loop"
valor = result_set.read(Int32)
nombre = result_set.read(String)
datos.push({valor, nombre})
@@ -225,12 +201,38 @@ class Handler
row[1].to_s.includes? " "
}
if genero
datos = split_por_genero(datos)[genero]
DB.open(DB_URL) do |cursor|
filtered = Array(Tuple(Int32, String)).new
datos.map { |item|
# How feminine is this name?
# Yes this database is upper case
nombre = item[1].upcase
feminidad = 0
sql = %(
SELECT COALESCE((SELECT frecuencia FROM mujeres WHERE nombre='#{nombre}'), 0) AS mujeres,
COALESCE((SELECT frecuencia FROM hombres WHERE nombre='#{nombre}'), 0) AS hombres
)
cursor.query sql do |result|
mujeres = result.read(Int32)
hombres = result.read(Int32)
if hombres == mujeres == 0
feminidad = 0.5
else
feminidad = mujeres / (hombres + mujeres)
end
end
# El overlap en 0.5 es intencional!
if feminidad >= 0.5 && genero == "f"
filtered << item
elsif feminidad <= 0.5 && genero == "m"
filtered << item
end
}
datos = filtered
puts "Data split by gender"
end
end
datos = datos[..10]
if datos.size > 1