diff --git a/manage.py b/manage.py index 9e4f48f..c9dfcc3 100755 --- a/manage.py +++ b/manage.py @@ -165,7 +165,7 @@ def dependency_graph(files): for object_name in re.findall(r'\b(?:FROM|JOIN)\s+(\w+)', content, flags=re.MULTILINE): imports[identifier].add(object_name) imports[identifier].update(imports_from_function) - for object_name in re.findall(r'\bWITH\s+(\w+)\s+AS', content, flags=re.MULTILINE): + for object_name in re.findall(r'(?:,\n|\bWITH\s+)(\w+)\s+AS', content, flags=re.MULTILINE): imports[identifier].discard(object_name) imports[identifier].difference_update(exports | ignore) diff --git a/pyproject.toml b/pyproject.toml index 758cd12..af405de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,17 +45,14 @@ log_level = "INFO" [tool.sqlfluff.core] dialect = "postgres" large_file_skip_byte_limit = 40000 -max_line_length = 100 exclude_rules = [ + # Preferences "ST07", # structure.using - - # TBD + # Preserve existing column order "L034", # structure.column_order - "ST05", # structure.subquery - "LT05", # layout.long_lines - - # Unfixable + # Require (some) manual fixes "AM04", # ambiguous.column_count + "LT05", # layout.long_lines "RF02", # references.qualification "RF04", # references.keywords ] diff --git a/sql/middle/agg_parties.sql b/sql/middle/agg_parties.sql index 35643f6..762c4fd 100644 --- a/sql/middle/agg_parties.sql +++ b/sql/middle/agg_parties.sql @@ -1,19 +1,5 @@ CREATE TABLE tmp_release_party_aggregates AS -SELECT - id, - parties_role_counts, - total_parties_roles, - total_parties -FROM ( - SELECT - id, - count(*) AS total_parties - FROM - parties_summary - GROUP BY - id -) AS total_parties -LEFT JOIN ( +WITH parties_role_counts AS ( SELECT id, sum(total_parties_roles) AS total_parties_roles, @@ -33,6 +19,22 @@ LEFT JOIN ( ) AS id_role GROUP BY id -) AS parties_role_counts USING (id); +) + +SELECT + id, + parties_role_counts, + total_parties_roles, + total_parties +FROM ( + SELECT + id, + count(*) AS total_parties + FROM + parties_summary + GROUP BY + id +) AS total_parties +LEFT JOIN parties_role_counts USING (id); CREATE UNIQUE INDEX tmp_release_party_aggregates_id ON tmp_release_party_aggregates (id); diff --git a/sql/middle/awards_summary.sql b/sql/middle/awards_summary.sql index 128b81b..f45c34e 100644 --- a/sql/middle/awards_summary.sql +++ b/sql/middle/awards_summary.sql @@ -1,4 +1,40 @@ CREATE TABLE awards_summary_no_data AS +WITH document_documenttype_counts AS ( + SELECT + id, + award_index, + jsonb_object_agg(coalesce(documenttype, ''), total_documenttypes) AS document_documenttype_counts, + count(*) AS total_documents + FROM ( + SELECT + id, + award_index, + documenttype, + count(*) AS total_documenttypes + FROM + award_documents_summary + GROUP BY + id, + award_index, + documenttype + ) AS d + GROUP BY + id, + award_index +), + +items_counts AS ( + SELECT + id, + award_index, + count(*) AS total_items + FROM + award_items_summary + GROUP BY + id, + award_index +) + SELECT r.id, r.award_index, @@ -30,40 +66,8 @@ SELECT total_items FROM tmp_awards_summary AS r -LEFT JOIN ( - SELECT - id, - award_index, - jsonb_object_agg(coalesce(documenttype, ''), total_documenttypes) AS document_documenttype_counts, - count(*) AS total_documents - FROM ( - SELECT - id, - award_index, - documenttype, - count(*) AS total_documenttypes - FROM - award_documents_summary - GROUP BY - id, - award_index, - documenttype - ) AS d - GROUP BY - id, - award_index -) AS document_documenttype_counts USING (id, award_index) -LEFT JOIN ( - SELECT - id, - award_index, - count(*) AS total_items - FROM - award_items_summary - GROUP BY - id, - award_index -) AS items_counts USING (id, award_index); +LEFT JOIN document_documenttype_counts USING (id, award_index) +LEFT JOIN items_counts USING (id, award_index); CREATE UNIQUE INDEX awards_summary_no_data_id ON awards_summary_no_data (id, award_index); diff --git a/sql/middle/contracts_summary.sql b/sql/middle/contracts_summary.sql index 6ea0d06..c902f69 100644 --- a/sql/middle/contracts_summary.sql +++ b/sql/middle/contracts_summary.sql @@ -1,41 +1,11 @@ -CREATE TABLE contracts_summary_no_data AS SELECT DISTINCT ON (r.id, r.contract_index) - r.id, - r.contract_index, - r.release_type, - r.collection_id, - r.ocid, - r.release_id, - r.data_id, - r.awardid, - (aws.awardid IS NOT NULL)::integer AS link_to_awards, - contract ->> 'id' AS contract_id, - contract ->> 'title' AS title, - contract ->> 'status' AS status, - contract ->> 'description' AS description, - convert_to_numeric(contract -> 'value' ->> 'amount') AS value_amount, - contract -> 'value' ->> 'currency' AS value_currency, - convert_to_timestamp(contract ->> 'dateSigned') AS datesigned, - convert_to_timestamp(contract -> 'period' ->> 'startDate') AS period_startdate, - convert_to_timestamp(contract -> 'period' ->> 'endDate') AS period_enddate, - convert_to_timestamp(contract -> 'period' ->> 'maxExtentDate') AS period_maxextentdate, - convert_to_numeric(contract -> 'period' ->> 'durationInDays') AS period_durationindays, - document_documenttype_counts.total_documents, - document_documenttype_counts.document_documenttype_counts, - total_milestones, - milestone_type_counts, - items_counts.total_items, - total_implementation_documents, - implementation_document_documenttype_counts, - total_implementation_milestones, - implementation_milestone_type_counts, - total_implementation_transactions -FROM - tmp_contracts_summary AS r -LEFT JOIN (SELECT - award_id AS awardid, - * -FROM awards_summary) AS aws USING (id, awardid) -LEFT JOIN ( +CREATE TABLE contracts_summary_no_data AS WITH aws AS ( + SELECT + award_id AS awardid, + * + FROM awards_summary +), + +items_counts AS ( SELECT id, contract_index, @@ -45,8 +15,9 @@ LEFT JOIN ( GROUP BY id, contract_index -) AS items_counts USING (id, contract_index) -LEFT JOIN ( +), + +implementation_transactions_counts AS ( SELECT id, contract_index, @@ -56,8 +27,9 @@ LEFT JOIN ( GROUP BY id, contract_index -) AS implementation_transactions_counts USING (id, contract_index) -LEFT JOIN ( +), + +document_documenttype_counts AS ( SELECT id, contract_index, @@ -79,8 +51,9 @@ LEFT JOIN ( GROUP BY id, contract_index -) AS document_documenttype_counts USING (id, contract_index) -LEFT JOIN ( +), + +implementation_document_documenttype_counts AS ( SELECT id, contract_index, @@ -102,8 +75,9 @@ LEFT JOIN ( GROUP BY id, contract_index -) AS implementation_document_documenttype_counts USING (id, contract_index) -LEFT JOIN ( +), + +milestone_type_counts AS ( SELECT id, contract_index, @@ -125,8 +99,9 @@ LEFT JOIN ( GROUP BY id, contract_index -) AS milestone_type_counts USING (id, contract_index) -LEFT JOIN ( +), + +implementation_milestone_type_counts AS ( SELECT id, contract_index, @@ -148,7 +123,48 @@ LEFT JOIN ( GROUP BY id, contract_index -) AS implementation_milestone_type_counts USING (id, contract_index); +) + +SELECT DISTINCT ON (r.id, r.contract_index) + r.id, + r.contract_index, + r.release_type, + r.collection_id, + r.ocid, + r.release_id, + r.data_id, + r.awardid, + (aws.awardid IS NOT NULL)::integer AS link_to_awards, + contract ->> 'id' AS contract_id, + contract ->> 'title' AS title, + contract ->> 'status' AS status, + contract ->> 'description' AS description, + convert_to_numeric(contract -> 'value' ->> 'amount') AS value_amount, + contract -> 'value' ->> 'currency' AS value_currency, + convert_to_timestamp(contract ->> 'dateSigned') AS datesigned, + convert_to_timestamp(contract -> 'period' ->> 'startDate') AS period_startdate, + convert_to_timestamp(contract -> 'period' ->> 'endDate') AS period_enddate, + convert_to_timestamp(contract -> 'period' ->> 'maxExtentDate') AS period_maxextentdate, + convert_to_numeric(contract -> 'period' ->> 'durationInDays') AS period_durationindays, + document_documenttype_counts.total_documents, + document_documenttype_counts.document_documenttype_counts, + total_milestones, + milestone_type_counts, + items_counts.total_items, + total_implementation_documents, + implementation_document_documenttype_counts, + total_implementation_milestones, + implementation_milestone_type_counts, + total_implementation_transactions +FROM + tmp_contracts_summary AS r +LEFT JOIN aws USING (id, awardid) +LEFT JOIN items_counts USING (id, contract_index) +LEFT JOIN implementation_transactions_counts USING (id, contract_index) +LEFT JOIN document_documenttype_counts USING (id, contract_index) +LEFT JOIN implementation_document_documenttype_counts USING (id, contract_index) +LEFT JOIN milestone_type_counts USING (id, contract_index) +LEFT JOIN implementation_milestone_type_counts USING (id, contract_index); CREATE UNIQUE INDEX contracts_summary_no_data_id ON contracts_summary_no_data (id, contract_index); diff --git a/sql/middle/planning_summary.sql b/sql/middle/planning_summary.sql index 97afea6..ef843bd 100644 --- a/sql/middle/planning_summary.sql +++ b/sql/middle/planning_summary.sql @@ -1,21 +1,5 @@ CREATE TABLE planning_summary_no_data AS -SELECT - r.id, - r.release_type, - r.collection_id, - r.ocid, - r.release_id, - r.data_id, - convert_to_numeric(planning -> 'budget' -> 'amount' ->> 'amount') AS budget_amount_amount, - planning -> 'budget' -> 'amount' ->> 'currency' AS budget_amount_currency, - planning -> 'budget' ->> 'projectID' AS budget_projectid, - total_documents, - document_documenttype_counts, - total_milestones, - milestone_type_counts -FROM - tmp_planning_summary AS r -LEFT JOIN ( +WITH document_documenttype_counts AS ( SELECT id, jsonb_object_agg(coalesce(documenttype, ''), total_documenttypes) AS document_documenttype_counts, @@ -33,8 +17,9 @@ LEFT JOIN ( ) AS d GROUP BY id -) AS document_documenttype_counts USING (id) -LEFT JOIN ( +), + +milestone_type_counts AS ( SELECT id, jsonb_object_agg(coalesce(type, ''), total_milestonetypes) AS milestone_type_counts, @@ -52,7 +37,26 @@ LEFT JOIN ( ) AS d GROUP BY id -) AS milestone_type_counts USING (id); +) + +SELECT + r.id, + r.release_type, + r.collection_id, + r.ocid, + r.release_id, + r.data_id, + convert_to_numeric(planning -> 'budget' -> 'amount' ->> 'amount') AS budget_amount_amount, + planning -> 'budget' -> 'amount' ->> 'currency' AS budget_amount_currency, + planning -> 'budget' ->> 'projectID' AS budget_projectid, + total_documents, + document_documenttype_counts, + total_milestones, + milestone_type_counts +FROM + tmp_planning_summary AS r +LEFT JOIN document_documenttype_counts USING (id) +LEFT JOIN milestone_type_counts USING (id); CREATE UNIQUE INDEX planning_summary_no_data_id ON planning_summary_no_data (id); diff --git a/sql/middle/release_summary.sql b/sql/middle/release_summary.sql index 916ecc5..ede1541 100644 --- a/sql/middle/release_summary.sql +++ b/sql/middle/release_summary.sql @@ -1,18 +1,14 @@ CREATE TABLE release_summary_no_data AS -SELECT * -FROM - tmp_release_summary -LEFT JOIN tmp_release_party_aggregates USING (id) -LEFT JOIN ( +WITH planning_summary AS ( SELECT id, total_documents AS total_planning_documents, total_milestones AS total_planning_milestones FROM planning_summary -) AS planning_summary USING (id) -LEFT JOIN tmp_planning_documents_aggregates USING (id) -LEFT JOIN ( +), + +tender_summary AS ( SELECT id, total_tenderers AS total_tender_tenderers, @@ -21,7 +17,15 @@ LEFT JOIN ( total_items AS total_tender_items FROM tender_summary -) AS tender_summary USING (id) +) + +SELECT * +FROM + tmp_release_summary +LEFT JOIN tmp_release_party_aggregates USING (id) +LEFT JOIN planning_summary USING (id) +LEFT JOIN tmp_planning_documents_aggregates USING (id) +LEFT JOIN tender_summary USING (id) LEFT JOIN tmp_tender_documents_aggregates USING (id) LEFT JOIN tmp_awards_aggregates USING (id) LEFT JOIN tmp_award_suppliers_aggregates USING (id) diff --git a/sql/middle/tender_summary.sql b/sql/middle/tender_summary.sql index 3c38b56..52e2b05 100644 --- a/sql/middle/tender_summary.sql +++ b/sql/middle/tender_summary.sql @@ -1,4 +1,54 @@ CREATE TABLE tender_summary_no_data AS +WITH document_documenttype_counts AS ( + SELECT + id, + jsonb_object_agg(coalesce(documenttype, ''), total_documenttypes) AS document_documenttype_counts, + count(*) AS total_documents + FROM ( + SELECT + id, + documenttype, + count(*) AS total_documenttypes + FROM + tender_documents_summary + GROUP BY + id, + documenttype + ) AS d + GROUP BY + id +), + +milestone_type_counts AS ( + SELECT + id, + jsonb_object_agg(coalesce(type, ''), total_milestonetypes) AS milestone_type_counts, + count(*) AS total_milestones + FROM ( + SELECT + id, + type, + count(*) AS total_milestonetypes + FROM + tender_milestones_summary + GROUP BY + id, + type + ) AS d + GROUP BY + id +), + +items_counts AS ( + SELECT + id, + count(*) AS total_items + FROM + tender_items_summary + GROUP BY + id +) + SELECT r.id, r.release_type, @@ -52,53 +102,9 @@ SELECT total_items FROM tmp_tender_summary AS r -LEFT JOIN ( - SELECT - id, - jsonb_object_agg(coalesce(documenttype, ''), total_documenttypes) AS document_documenttype_counts, - count(*) AS total_documents - FROM ( - SELECT - id, - documenttype, - count(*) AS total_documenttypes - FROM - tender_documents_summary - GROUP BY - id, - documenttype - ) AS d - GROUP BY - id -) AS document_documenttype_counts USING (id) -LEFT JOIN ( - SELECT - id, - jsonb_object_agg(coalesce(type, ''), total_milestonetypes) AS milestone_type_counts, - count(*) AS total_milestones - FROM ( - SELECT - id, - type, - count(*) AS total_milestonetypes - FROM - tender_milestones_summary - GROUP BY - id, - type - ) AS d - GROUP BY - id -) AS milestone_type_counts USING (id) -LEFT JOIN ( - SELECT - id, - count(*) AS total_items - FROM - tender_items_summary - GROUP BY - id -) AS items_counts USING (id); +LEFT JOIN document_documenttype_counts USING (id) +LEFT JOIN milestone_type_counts USING (id) +LEFT JOIN items_counts USING (id); CREATE UNIQUE INDEX tender_summary_no_data_id ON tender_summary_no_data (id);