From 76e16ca803f7c2a9f5a8e8e87b72de4b5f2ddc19 Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Wed, 15 May 2024 20:48:29 -0300 Subject: [PATCH] Make split by gender faster --- c-busqueda/handler.cr | 112 ++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 75 deletions(-) diff --git a/c-busqueda/handler.cr b/c-busqueda/handler.cr index 10ff6df..4a7b5b3 100644 --- a/c-busqueda/handler.cr +++ b/c-busqueda/handler.cr @@ -12,6 +12,9 @@ PASS = File.read("/var/openfaas/secrets/nombres-pass").strip DB_URL = "postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres" class Handler + # This class is the entry point for the OpenFaaS function. + # run() is the important bit + def format_buffer(buffer, canvas_name, title = "") # Process the gnuplot output so it works in the page # @@ -42,27 +45,6 @@ class Handler html.gsub("gnuplot_canvas", canvas_name) end - def query(sql) - # Runs a SQL query against the database. - # - # Returns an array of values [[Year,Count]...] - # Or nil if there are no results - - DB.open("postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres") do |cursor| - cursor.query sql do |result_set| - result = [] of Tuple(Int32, Int32) - result_set.each do - year = result_set.read(Int32) - contador = result_set.read(Int32) - result.push({year, contador}) - end - return result - end - end - # No result, return nil - nil - end - def normalize_name(s) # Remove diacritics, turn lowercase normalized = s.unicode_normalize(:nfkd).chars @@ -71,53 +53,6 @@ class Handler }.join("").downcase end - def feminidad(nombre) - # Yes this database is upper case - nombre = nombre.to_s.upcase - sql1 = %( - SELECT COALESCE(frecuencia,0) - FROM mujeres WHERE nombre='#{nombre}' - - ) - sql2 = %( - SELECT COALESCE(frecuencia,0) - FROM hombres WHERE nombre='#{nombre}' - ) - - hombres = mujeres = 0 - DB.open("postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres") do |cursor| - cursor.query sql1 do |result| - mujeres = result.read(Int32) - end - cursor.query sql2 do |result| - hombres = result.read(Int32) - end - end - if hombres == mujeres == 0 - return 0.5 - end - mujeres / (hombres + mujeres) - end - - def split_por_genero(nombres) - femeninos = Array(Tuple(Int32, String)).new - masculinos = Array(Tuple(Int32, String)).new - nombres.map { |nombre| - fem = feminidad(nombre[1]) - # El overlap en 0.5 es intencional! - if fem >= 0.5 - femeninos << nombre - end - if fem <= 0.5 - masculinos << nombre - end - } - { - "f": femeninos, - "m": masculinos, - } - end - def run(request : HTTP::Request) # Try to find most popular names based on a prefix, year and gender. # @@ -155,7 +90,6 @@ class Handler if prefijo.nil? && year.nil? # Global totals - # FIXME: SLOW sql = %( SELECT total::integer, nombre FROM totales @@ -198,9 +132,8 @@ class Handler puts "QUERY: #{sql}" datos = [] of Tuple(Int32, String) - DB.open("postgres://#{USER}:#{PASS}@10.61.0.1:5432/nombres") do |cursor| + DB.open(DB_URL) do |cursor| cursor.query sql do |result_set| - puts "loop" result_set.each do valor = result_set.read(Int32) nombre = result_set.read(String) @@ -225,12 +158,41 @@ class Handler row[1].to_s.includes? " " } - if genero - datos = split_por_genero(datos)[genero] - puts "Data split by gender" + DB.open(DB_URL) do |cursor| + datos.reject! { |row| + # How feminine is this name? + # Yes this database is upper case + puts "Checking #{row[0]} #{row[1]}" + feminidad = 0 + sql = %( + SELECT COALESCE((SELECT frecuencia FROM mujeres WHERE nombre='#{row[1]?.to_s.upcase}'), 0) AS mujeres, + COALESCE((SELECT frecuencia FROM hombres WHERE nombre='#{row[1]?.to_s.upcase}'), 0) AS hombres + ) + puts "SQL: #{sql}" + cursor.query sql do |result_set| + result_set.each do + mujeres = result_set.read(Int32) + hombres = result_set.read(Int32) + puts "frecuencias: #{mujeres} #{hombres}" + if hombres == mujeres == 0 + feminidad = 0.5 + else + feminidad = mujeres / (hombres + mujeres) + end + end + end + # El overlap en 0.5 es intencional! + if (feminidad >= 0.5 && genero == "f") || + (feminidad <= 0.5 && genero == "m") + false + else + true + end + } + puts "Data split by gender" + end end - datos = datos[..10] if datos.size > 1