-
Notifications
You must be signed in to change notification settings - Fork 37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SRCH-5154 Bulk delete zombie records from Elastic Search #1743
base: main
Are you sure you want to change the base?
Changes from all commits
18a9d2f
7714aba
f597ccf
b58dcdb
502a5fb
a3bc228
8c00b6f
8497dde
139cfb4
7b4929d
5b1923a
d894950
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,40 +1,34 @@ | ||
# frozen_string_literal: true | ||
|
||
module Admin | ||
class BulkAffiliateStylesUploadController < AdminController | ||
def index | ||
@page_title = 'Bulk Affiliate Styles Upload' | ||
end | ||
|
||
def upload | ||
begin | ||
@file = params[:bulk_upload_affiliate_styles] | ||
BulkAffiliateStyles::FileValidator.new(@file).validate! | ||
enqueue_job | ||
flash[:success] = success_message(@file.original_filename) | ||
rescue BulkAffiliateStylesUploader::Error => e | ||
Rails.logger.error e | ||
flash[:error] = e.message | ||
end | ||
class Admin::BulkAffiliateStylesUploadController < Admin::BulkUploadController | ||
def upload | ||
handle_bulk_upload( | ||
params_key: :bulk_upload_affiliate_styles, | ||
validator_class: BulkAffiliateStyles::FileValidator, | ||
error_class: BulkAffiliateStylesUploader::Error, | ||
success_path: admin_bulk_affiliate_styles_upload_index_path, | ||
logger_message: 'Bulk Affiliate Styles upload failed' | ||
) | ||
end | ||
|
||
redirect_to admin_bulk_affiliate_styles_upload_index_path | ||
end | ||
private | ||
|
||
private | ||
def set_page_title | ||
@page_title = 'Bulk Affiliate Styles Upload' | ||
end | ||
|
||
def success_message(filename) | ||
<<~SUCCESS_MESSAGE | ||
Successfully uploaded #{filename} for processing. | ||
The results will be emailed to you. | ||
SUCCESS_MESSAGE | ||
end | ||
def success_message(filename) | ||
<<~SUCCESS_MESSAGE | ||
Successfully uploaded #{filename} for processing. | ||
The results will be emailed to you. | ||
SUCCESS_MESSAGE | ||
end | ||
|
||
def enqueue_job | ||
BulkAffiliateStylesUploaderJob.perform_later( | ||
current_user, | ||
@file.original_filename, | ||
@file.tempfile.path | ||
) | ||
end | ||
def enqueue_job | ||
BulkAffiliateStylesUploaderJob.perform_later( | ||
current_user, | ||
@file.original_filename, | ||
@file.tempfile.path | ||
) | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# frozen_string_literal: true | ||
|
||
class Admin::BulkUploadController < Admin::AdminController | ||
include BulkUploadHandler | ||
before_action :set_page_title | ||
|
||
def index; end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# frozen_string_literal: true | ||
|
||
class Admin::BulkZombieUrlUploadController < Admin::BulkUploadController | ||
include BulkUploadHandler | ||
before_action :set_page_title | ||
|
||
def upload | ||
handle_bulk_upload( | ||
params_key: :bulk_upload_zombie_urls, | ||
validator_class: BulkZombieUrls::FileValidator, | ||
error_class: BulkZombieUrlUploader::Error, | ||
success_path: admin_bulk_zombie_url_upload_index_path, | ||
logger_message: 'Zombie Url upload failed' | ||
) | ||
end | ||
|
||
private | ||
|
||
def set_page_title | ||
@page_title = 'Bulk Zombie Url Upload' | ||
end | ||
|
||
def success_message(filename) | ||
<<~SUCCESS_MESSAGE | ||
Successfully uploaded #{filename} for processing. | ||
The results will be emailed to you. | ||
SUCCESS_MESSAGE | ||
end | ||
|
||
def enqueue_job | ||
s3_client = Aws::S3::Client.new(region: ENV['AWS_REGION']) | ||
filepath = "#{Rails.env}/file_uploads/#{SecureRandom.uuid}/#{@file.original_filename}" | ||
|
||
s3_client.put_object( | ||
bucket: ENV['AWS_BUCKET'], | ||
key: filepath, | ||
body: @file.tempfile.set_encoding('UTF-8') | ||
) | ||
|
||
BulkZombieUrlUploaderJob.perform_now( | ||
current_user, | ||
@file.original_filename, | ||
filepath, | ||
) | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# frozen_string_literal: true | ||
|
||
module BulkUploadHandler | ||
def handle_bulk_upload(params_key:, validator_class:, error_class:, success_path:, logger_message:) | ||
@file = params[params_key] | ||
validate_file(@file, validator_class) | ||
perform_upload(@file) | ||
rescue error_class => e | ||
handle_upload_error(logger_message, e) | ||
ensure | ||
redirect_to success_path | ||
end | ||
|
||
private | ||
|
||
def validate_file(file, validator_class) | ||
validator_class.new(file).validate! | ||
end | ||
|
||
def perform_upload(file) | ||
enqueue_job | ||
flash[:success] = success_message(file.original_filename) | ||
end | ||
|
||
def handle_upload_error(logger_message, error) | ||
Rails.logger.error logger_message, error | ||
flash[:error] = error.message | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# frozen_string_literal: true | ||
|
||
class BulkZombieUrlUploaderJob < ApplicationJob | ||
queue_as :searchgov | ||
|
||
delegate :upload, to: :@uploader | ||
|
||
def perform(user, filename, filepath) | ||
@user = user | ||
@filename = filename | ||
|
||
s3_client = Aws::S3::Client.new(region: ENV['AWS_REGION']) | ||
response = s3_client.get_object(bucket: ENV['AWS_BUCKET'], key: filepath) | ||
|
||
local_filepath = Rails.root.join('tmp', filename) | ||
File.open(local_filepath, 'wb') { |file| file.write(response.body.read) } | ||
|
||
@uploader = BulkZombieUrlUploader.new(filename, local_filepath) | ||
upload | ||
|
||
File.delete(local_filepath) if File.exist?(local_filepath) | ||
report_results | ||
end | ||
|
||
private | ||
|
||
def report_results | ||
log_results | ||
send_results_email | ||
end | ||
|
||
def log_results | ||
results = @uploader.results | ||
Rails.logger.info(BulkZombieUrlUploaderJob: results.file_name, total_urls: results.total_count, errors: results.error_count) | ||
end | ||
|
||
def send_results_email | ||
results = @uploader.results | ||
email = BulkZombieUrlUploadResultsMailer.with(user: @user, results:).results_email | ||
email.deliver_now! | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# frozen_string_literal: true | ||
|
||
class BulkZombieUrlUploadResultsMailer < ApplicationMailer | ||
def results_email | ||
@results = params[:results] | ||
mail(to: params[:user].email, subject: "Bulk Zombie URL upload results for #{@results.file_name}") | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# frozen_string_literal: true | ||
|
||
class BulkZombieUrlUploader | ||
attr_reader :results | ||
|
||
class Error < StandardError; end | ||
|
||
def initialize(filename, filepath) | ||
@file_name = filename | ||
@file_path = filepath | ||
@results = nil | ||
end | ||
|
||
def upload | ||
initialize_results | ||
process_upload | ||
rescue StandardError => e | ||
Rails.logger.error("Failed to process bulk zombie URL document", e, filename: @file_name) | ||
ensure | ||
@results ||= BulkZombieUrls::Results.new(@file_name) | ||
end | ||
|
||
private | ||
|
||
def initialize_results | ||
@results = BulkZombieUrls::Results.new(@file_name) | ||
raise Error, 'Results object not initialized' unless @results | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need to raise an error in here. Maybe it can fail if the file doesn't exist but if that's the case the error should say that. |
||
end | ||
|
||
def process_upload | ||
CSV.parse(File.read(@file_path), headers: true).each { |row| process_row(row) } | ||
end | ||
|
||
def process_row(row) | ||
raise Error, 'Results object not initialized' unless @results | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no need to raise |
||
|
||
url = row['URL']&.strip | ||
document_id = row['DOC_ID']&.strip | ||
|
||
return log_missing_document_id(row, url) if document_id.blank? | ||
|
||
handle_url_processing(url, document_id, row) | ||
end | ||
|
||
def handle_url_processing(url, document_id, row) | ||
process_url(url, document_id) | ||
update_results | ||
rescue StandardError => e | ||
handle_processing_error(e, url, document_id, row) | ||
end | ||
|
||
def update_results | ||
@results.delete_ok | ||
@results.increment_updated | ||
end | ||
|
||
def log_missing_document_id(row, url) | ||
@results.add_error('Document ID is missing', url || 'Unknown') | ||
Rails.logger.error("Skipping row: #{row.inspect}. Document ID is mandatory.") | ||
end | ||
|
||
def handle_processing_error(error, url, document_id, row) | ||
key = url.presence || document_id | ||
@results&.add_error(error.message, key) | ||
Rails.logger.error('Failure to process bulk upload zombie URL row:', error, url:, document_id:) | ||
end | ||
|
||
def process_url(url, document_id) | ||
if url.present? | ||
process_url_with_searchgov(url, document_id) | ||
else | ||
I14yDocument.delete(handle: 'searchgov', document_id:) | ||
end | ||
end | ||
|
||
def process_url_with_searchgov(url, document_id) | ||
searchgov_url = SearchgovUrl.find_by(url:) | ||
searchgov_url.destroy if searchgov_url | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# frozen_string_literal: true | ||
|
||
class BulkZombieUrls::FileValidator | ||
MAXIMUM_FILE_SIZE = 4.megabytes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where this this restriction comes from? I don't see it as part of the ticket. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not the requirement from the ticket, but I am following this as per the previous other upload processes. |
||
VALID_CONTENT_TYPES = %w[text/csv].freeze | ||
|
||
def initialize(uploaded_file) | ||
@uploaded_file = uploaded_file | ||
end | ||
|
||
def validate! | ||
ensure_present | ||
ensure_valid_content_type | ||
ensure_not_too_big | ||
end | ||
|
||
private | ||
|
||
def ensure_valid_content_type | ||
return if VALID_CONTENT_TYPES.include?(@uploaded_file.content_type) | ||
|
||
error_message = "Files of type #{@uploaded_file.content_type} are not supported." | ||
raise(BulkZombieUrlUploader::Error, error_message) | ||
end | ||
|
||
def ensure_present | ||
return if @uploaded_file.present? | ||
|
||
error_message = 'Please choose a file to upload.' | ||
raise(BulkZombieUrlUploader::Error, error_message) | ||
end | ||
|
||
def ensure_not_too_big | ||
return if @uploaded_file.size <= MAXIMUM_FILE_SIZE | ||
|
||
error_message = "#{@uploaded_file.original_filename} is too big; please split it." | ||
raise(BulkZombieUrlUploader::Error, error_message) | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This line is redundant since it was already initialized in the
initialize_results
method. Theupload
method is callingBulkZombieUrls::Results.new
twice.