Skip to content

Commit

Permalink
optimizing queries
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed Jan 6, 2025
1 parent 51bdacd commit cb5a5b6
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 20 deletions.
8 changes: 4 additions & 4 deletions R/geocode_rafa_local.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ geocode_rafa_local <- function(addresses_table,
state_select <- paste0("'", gsub("'", "''", input_states), "'", collapse = ", ")

query_filter_cnefe <- glue::glue("
CREATE OR REPLACE TEMPORARY TABLE filtered_cnefe AS
CREATE TEMPORARY TABLE filtered_cnefe AS
SELECT estado, municipio, logradouro_sem_numero, numero, cep, localidade, lat, lon
FROM logradouro_numero_cep_localidade
WHERE estado IN ({state_select}) AND municipio IN ({muni_select});"
Expand Down Expand Up @@ -222,7 +222,7 @@ geocode_rafa_local <- function(addresses_table,
all_output_tbs <- output_tables[!grepl('empty', output_tables)]

# save output to db
output_query <- paste("CREATE OR REPLACE TEMPORARY VIEW output_db AS",
output_query <- paste("CREATE TEMPORARY VIEW output_db AS",
paste0("SELECT ", paste0('*', " FROM ", all_output_tbs),
collapse = " UNION ALL ")
)
Expand Down Expand Up @@ -464,7 +464,7 @@ geocode_rafa_local2 <- function(addresses_table,
all_output_tbs <- output_tables[!grepl('empty', output_tables)]

# save output to db
output_query <- paste("CREATE OR REPLACE TEMPORARY VIEW output_db AS",
output_query <- paste("CREATE TEMPORARY VIEW output_db AS",
paste0("SELECT ", paste0('*', " FROM ", all_output_tbs),
collapse = " UNION ALL ")
)
Expand Down Expand Up @@ -693,7 +693,7 @@ geocode_rafa_local_arrow <- function(addresses_table,
all_output_tbs <- output_tables[!grepl('empty', output_tables)]

# save output to db
output_query <- paste("CREATE OR REPLACE TEMPORARY VIEW output_db AS",
output_query <- paste("CREATE TEMPORARY VIEW output_db AS",
paste0("SELECT ", paste0('*', " FROM ", all_output_tbs),
collapse = " UNION ALL ")
)
Expand Down
26 changes: 21 additions & 5 deletions R/match_aggregated_cases.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
#' @keywords internal
match_aggregated_cases <- function(con, x, y, output_tb, key_cols, match_type){

# table table - 7.993404
# view table--- 7.047993
# table view -- 8.874689
# view view --- 7.780372

# Build the dynamic select and group statement
cols_select <- paste0(paste(key_cols, collapse = ", "),",")
cols_group <- paste(key_cols, collapse = ", ")
Expand Down Expand Up @@ -45,7 +50,7 @@ match_aggregated_cases <- function(con, x, y, output_tb, key_cols, match_type){

# query for left join
query_match <- glue::glue(
"CREATE OR REPLACE TEMPORARY TABLE {output_tb} AS
"CREATE TEMPORARY TABLE {output_tb} AS
SELECT {x}.id, pre_aggregated_cnefe.lon, pre_aggregated_cnefe.lat, {match_type} as match_type
FROM {x}
LEFT JOIN pre_aggregated_cnefe
Expand Down Expand Up @@ -94,13 +99,18 @@ match_aggregated_cases <- function(con, x, y, output_tb, key_cols, match_type){
#' @keywords internal
match_aggregated_cases_local <- function(con, x, y, output_tb, key_cols, match_type){

# table table 8.904772
# view table 16.78523
# table view 23.72793
# view view 25.58421

# Build the dynamic select and group statement
cols_select <- paste0(paste(key_cols, collapse = ", "),",")
cols_group <- paste(key_cols, collapse = ", ")

# pre-aggregate cnefe
query_aggregate <- glue::glue(
"CREATE OR REPLACE TEMPORARY VIEW pre_aggregated_cnefe AS
"CREATE OR REPLACE TEMPORARY TABLE pre_aggregated_cnefe AS
SELECT {cols_select} AVG(lon) AS lon, AVG(lat) AS lat
FROM {y}
WHERE {y}.numero IS NOT NULL
Expand All @@ -126,7 +136,7 @@ match_aggregated_cases_local <- function(con, x, y, output_tb, key_cols, match_t

# query for left join >>>>>> TEMPORARY VIEW ?????
query_match <- glue::glue(
"CREATE OR REPLACE TEMPORARY TABLE {output_tb} AS
"CREATE TEMPORARY TABLE {output_tb} AS
SELECT {x}.id, pre_aggregated_cnefe.lon, pre_aggregated_cnefe.lat, {match_type} as match_type
FROM {x}
LEFT JOIN pre_aggregated_cnefe
Expand Down Expand Up @@ -183,6 +193,12 @@ match_aggregated_cases_local2 <- function(con,
input_municipio
){

# table table 13
# view table 8.4
# table view 20.61975
# view view 6.269719


table_name <- paste(key_cols, collapse = "_")
table_name <- gsub('estado_municipio_logradouro_sem_numero', 'logradouro', table_name)
y <- table_name
Expand Down Expand Up @@ -222,7 +238,7 @@ match_aggregated_cases_local2 <- function(con,

# query for left join
query_match <- glue::glue(
"CREATE OR REPLACE TEMPORARY TABLE {output_tb} AS
"CREATE TEMPORARY VIEW {output_tb} AS
SELECT {x}.id, filtered_cnefe.lon, filtered_cnefe.lat, {match_type} as match_type
FROM {x}
LEFT JOIN filtered_cnefe
Expand Down Expand Up @@ -318,7 +334,7 @@ match_aggregated_cases_local_arrow <- function(con,

# query for left join
query_match <- glue::glue(
"CREATE OR REPLACE TEMPORARY TABLE {output_tb} AS
"CREATE TEMPORARY TABLE {output_tb} AS
SELECT {x}.id, filtered_cnefe.lon, filtered_cnefe.lat, {match_type} as match_type
FROM {x}
LEFT JOIN filtered_cnefe
Expand Down
41 changes: 30 additions & 11 deletions tests/tests_rafa/benchmark_LIKE.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ ncores <- 7



rafa_loop <- function(){
rafa_loop <- function(){ message('rafa_loop')
fields <- geocodebr::setup_address_fields(
logradouro = 'logradouro',
numero = 'numero',
Expand All @@ -94,7 +94,7 @@ rafa_loop <- function(){
}


rafa_loc <- function(){
rafa_loc <- function(){ message('rafa_loc')
fields <- geocodebr::setup_address_fields(
logradouro = 'logradouro',
numero = 'numero',
Expand All @@ -115,7 +115,7 @@ rafa_loc <- function(){



rafa_loc2 <- function(){
rafa_loc2 <- function(){ message('rafa_loc2')
fields <- geocodebr::setup_address_fields(
logradouro = 'logradouro',
numero = 'numero',
Expand All @@ -135,7 +135,7 @@ rafa_loc2 <- function(){
}


rafa_loc_arrow <- function(){
rafa_loc_arrow <- function(){ message('rafa_loc_arrow')
fields <- geocodebr::setup_address_fields(
logradouro = 'logradouro',
numero = 'numero',
Expand All @@ -154,7 +154,7 @@ rafa_loc_arrow <- function(){
)
}

dani <- function(){
dani <- function(){ message('dani')
fields <- geocodebr::setup_address_fields(
logradouro = 'logradouro',
numero = 'numero',
Expand Down Expand Up @@ -464,7 +464,13 @@ d <- cnf |>



# 5.314673
# 5.467121
#expr min lq mean median uq max neval
#rafa_arrow_many_tab 4.926556 4.951964 5.314673 5.088485 5.625931 5.980431 5
#rafa_arrow_many_tab 4.944741 5.340474 5.467121 5.521033 5.742732 5.786627 5

devtools::load_all('.')
library(data.table)

mb01 <- microbenchmark::microbenchmark(
Expand All @@ -475,9 +481,16 @@ mb01 <- microbenchmark::microbenchmark(
rafa_arrow_many_tab = rafa_loc_arrow(),
# dani_L = dani_like(),
# rafa_like = rafa_like(),
times = 10
times = 5
)

# expr min lq mean median uq max neval
# dani 10.375007 12.247182 12.237117 12.800108 12.845822 12.917467 5
# rafa 7.327960 7.402672 8.153484 7.655536 8.580643 9.800608 5
# rafa_db_1tab 7.206572 9.197473 11.098398 11.685918 13.108194 14.293833 5
# rafa_db_many_tab 3.354288 4.821311 4.743353 4.932382 5.204425 5.404357 5
# rafa_arrow_many_tab 5.585149 5.673980 5.896249 5.811516 5.984594 6.426007 5

mb02 <- microbenchmark::microbenchmark(
dani = dani(),
rafa = rafa_loop(),
Expand All @@ -486,7 +499,7 @@ mb02 <- microbenchmark::microbenchmark(
rafa_arrow_many_tab = rafa_loc_arrow(),
# dani_L = dani_like(),
# rafa_like = rafa_like(),
times = 10
times = 5
)


Expand All @@ -498,7 +511,7 @@ mb03 <- microbenchmark::microbenchmark(
rafa_arrow_many_tab = rafa_loc_arrow(),
# dani_L = dani_like(),
# rafa_like = rafa_like(),
times = 10
times = 5
)

mb04 <- microbenchmark::microbenchmark(
Expand All @@ -509,7 +522,7 @@ mb04 <- microbenchmark::microbenchmark(
rafa_arrow_many_tab = rafa_loc_arrow(),
# dani_L = dani_like(),
# rafa_like = rafa_like(),
times = 10
times = 5
)

mb05 <- microbenchmark::microbenchmark(
Expand All @@ -520,7 +533,7 @@ mb05 <- microbenchmark::microbenchmark(
rafa_arrow_many_tab = rafa_loc_arrow(),
# dani_L = dani_like(),
# rafa_like = rafa_like(),
times = 10
times = 5
)

mb06 <- microbenchmark::microbenchmark(
Expand All @@ -531,7 +544,7 @@ mb06 <- microbenchmark::microbenchmark(
rafa_arrow_many_tab = rafa_loc_arrow(),
# dani_L = dani_like(),
# rafa_like = rafa_like(),
times = 10
times = 5
)

get_df <- function(mb, round){
Expand All @@ -557,3 +570,9 @@ df <- data.table::rbindlist(
df_mb05,
df_mb06)
)

library(ggplot2)

ggplot() +
geom_line(data=df, aes(x=factor(round), y = V1,
group = expr, color=expr))

0 comments on commit cb5a5b6

Please sign in to comment.