Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SRCH-5154 Bulk delete zombie records from Elastic Search #1743

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 26 additions & 32 deletions app/controllers/admin/bulk_affiliate_styles_upload_controller.rb
Original file line number Diff line number Diff line change
@@ -1,40 +1,34 @@
# frozen_string_literal: true

module Admin
class BulkAffiliateStylesUploadController < AdminController
def index
@page_title = 'Bulk Affiliate Styles Upload'
end

def upload
begin
@file = params[:bulk_upload_affiliate_styles]
BulkAffiliateStyles::FileValidator.new(@file).validate!
enqueue_job
flash[:success] = success_message(@file.original_filename)
rescue BulkAffiliateStylesUploader::Error => e
Rails.logger.error e
flash[:error] = e.message
end
class Admin::BulkAffiliateStylesUploadController < Admin::BulkUploadController
def upload
handle_bulk_upload(
params_key: :bulk_upload_affiliate_styles,
validator_class: BulkAffiliateStyles::FileValidator,
error_class: BulkAffiliateStylesUploader::Error,
success_path: admin_bulk_affiliate_styles_upload_index_path,
logger_message: 'Bulk Affiliate Styles upload failed'
)
end

redirect_to admin_bulk_affiliate_styles_upload_index_path
end
private

private
def set_page_title
@page_title = 'Bulk Affiliate Styles Upload'
end

def success_message(filename)
<<~SUCCESS_MESSAGE
Successfully uploaded #{filename} for processing.
The results will be emailed to you.
SUCCESS_MESSAGE
end
def success_message(filename)
<<~SUCCESS_MESSAGE
Successfully uploaded #{filename} for processing.
The results will be emailed to you.
SUCCESS_MESSAGE
end

def enqueue_job
BulkAffiliateStylesUploaderJob.perform_later(
current_user,
@file.original_filename,
@file.tempfile.path
)
end
def enqueue_job
BulkAffiliateStylesUploaderJob.perform_later(
current_user,
@file.original_filename,
@file.tempfile.path
)
end
end
8 changes: 8 additions & 0 deletions app/controllers/admin/bulk_upload_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# frozen_string_literal: true

class Admin::BulkUploadController < Admin::AdminController
include BulkUploadHandler
before_action :set_page_title

def index; end
end
29 changes: 15 additions & 14 deletions app/controllers/admin/bulk_url_upload_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,27 @@

module Admin
class BulkUrlUploadController < AdminController
def index
@page_title = 'Bulk URL Upload'
end
include BulkUploadHandler
before_action :set_page_title

def upload
begin
@file = params[:bulk_upload_urls]
BulkUrlUploader::UrlFileValidator.new(@file).validate!
enqueue_job
flash[:success] = success_message(@file.original_filename)
rescue BulkUrlUploader::Error => e
Rails.logger.error 'Url upload failed', e
flash[:error] = e.message
end
def index; end

redirect_to admin_bulk_url_upload_index_path
def upload
handle_bulk_upload(
params_key: :bulk_upload_urls,
validator_class: BulkUrlUploader::UrlFileValidator,
error_class: BulkUrlUploader::Error,
success_path: admin_bulk_url_upload_index_path,
logger_message: 'Url upload failed'
)
end

private

def set_page_title
@page_title = 'Bulk URL Upload'
end

def success_message(filename)
<<~SUCCESS_MESSAGE
Successfully uploaded #{filename} for processing.
Expand Down
46 changes: 46 additions & 0 deletions app/controllers/admin/bulk_zombie_url_upload_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# frozen_string_literal: true

class Admin::BulkZombieUrlUploadController < Admin::BulkUploadController
include BulkUploadHandler
before_action :set_page_title

def upload
handle_bulk_upload(
params_key: :bulk_upload_zombie_urls,
validator_class: BulkZombieUrls::FileValidator,
error_class: BulkZombieUrlUploader::Error,
success_path: admin_bulk_zombie_url_upload_index_path,
logger_message: 'Zombie Url upload failed'
)
end

private

def set_page_title
@page_title = 'Bulk Zombie Url Upload'
end

def success_message(filename)
<<~SUCCESS_MESSAGE
Successfully uploaded #{filename} for processing.
The results will be emailed to you.
SUCCESS_MESSAGE
end

def enqueue_job
s3_client = Aws::S3::Client.new(region: ENV['AWS_REGION'])
filepath = "#{Rails.env}/file_uploads/#{SecureRandom.uuid}/#{@file.original_filename}"

s3_client.put_object(
bucket: ENV['AWS_BUCKET'],
key: filepath,
body: @file.tempfile.set_encoding('UTF-8')
)

BulkZombieUrlUploaderJob.perform_now(
current_user,
@file.original_filename,
filepath,
)
end
end
29 changes: 29 additions & 0 deletions app/controllers/concerns/bulk_upload_handler.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# frozen_string_literal: true

module BulkUploadHandler
def handle_bulk_upload(params_key:, validator_class:, error_class:, success_path:, logger_message:)
@file = params[params_key]
validate_file(@file, validator_class)
perform_upload(@file)
rescue error_class => e
handle_upload_error(logger_message, e)
ensure
redirect_to success_path
end

private

def validate_file(file, validator_class)
validator_class.new(file).validate!
end

def perform_upload(file)
enqueue_job
flash[:success] = success_message(file.original_filename)
end

def handle_upload_error(logger_message, error)
Rails.logger.error logger_message, error
flash[:error] = error.message
end
end
42 changes: 42 additions & 0 deletions app/jobs/bulk_zombie_url_uploader_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# frozen_string_literal: true

class BulkZombieUrlUploaderJob < ApplicationJob
queue_as :searchgov

delegate :upload, to: :@uploader

def perform(user, filename, filepath)
@user = user
@filename = filename

s3_client = Aws::S3::Client.new(region: ENV['AWS_REGION'])
response = s3_client.get_object(bucket: ENV['AWS_BUCKET'], key: filepath)

local_filepath = Rails.root.join('tmp', filename)
File.open(local_filepath, 'wb') { |file| file.write(response.body.read) }

@uploader = BulkZombieUrlUploader.new(filename, local_filepath)
upload

File.delete(local_filepath) if File.exist?(local_filepath)
report_results
end

private

def report_results
log_results
send_results_email
end

def log_results
results = @uploader.results
Rails.logger.info(BulkZombieUrlUploaderJob: results.file_name, total_urls: results.total_count, errors: results.error_count)
end

def send_results_email
results = @uploader.results
email = BulkZombieUrlUploadResultsMailer.with(user: @user, results:).results_email
email.deliver_now!
end
end
8 changes: 8 additions & 0 deletions app/mailers/bulk_zombie_url_upload_results_mailer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# frozen_string_literal: true

class BulkZombieUrlUploadResultsMailer < ApplicationMailer
def results_email
@results = params[:results]
mail(to: params[:user].email, subject: "Bulk Zombie URL upload results for #{@results.file_name}")
end
end
80 changes: 80 additions & 0 deletions app/services/bulk_zombie_url_uploader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# frozen_string_literal: true

class BulkZombieUrlUploader
attr_reader :results

class Error < StandardError; end

def initialize(filename, filepath)
@file_name = filename
@file_path = filepath
@results = nil
end

def upload
initialize_results
process_upload
rescue StandardError => e
Rails.logger.error("Failed to process bulk zombie URL document", e, filename: @file_name)
ensure
@results ||= BulkZombieUrls::Results.new(@file_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is redundant since it was already initialized in the initialize_results method. The upload method is calling BulkZombieUrls::Results.new twice.

end

private

def initialize_results
@results = BulkZombieUrls::Results.new(@file_name)
raise Error, 'Results object not initialized' unless @results
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to raise an error in here. Maybe it can fail if the file doesn't exist but if that's the case the error should say that.

end

def process_upload
CSV.parse(File.read(@file_path), headers: true).each { |row| process_row(row) }
end

def process_row(row)
raise Error, 'Results object not initialized' unless @results
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no need to raise Error everywhere since the uploader is being initialized correctly every time.


url = row['URL']&.strip
document_id = row['DOC_ID']&.strip

return log_missing_document_id(row, url) if document_id.blank?

handle_url_processing(url, document_id, row)
end

def handle_url_processing(url, document_id, row)
process_url(url, document_id)
update_results
rescue StandardError => e
handle_processing_error(e, url, document_id, row)
end

def update_results
@results.delete_ok
@results.increment_updated
end

def log_missing_document_id(row, url)
@results.add_error('Document ID is missing', url || 'Unknown')
Rails.logger.error("Skipping row: #{row.inspect}. Document ID is mandatory.")
end

def handle_processing_error(error, url, document_id, row)
key = url.presence || document_id
@results&.add_error(error.message, key)
Rails.logger.error('Failure to process bulk upload zombie URL row:', error, url:, document_id:)
end

def process_url(url, document_id)
if url.present?
process_url_with_searchgov(url, document_id)
else
I14yDocument.delete(handle: 'searchgov', document_id:)
end
end

def process_url_with_searchgov(url, document_id)
searchgov_url = SearchgovUrl.find_by(url:)
searchgov_url.destroy if searchgov_url
end
end
39 changes: 39 additions & 0 deletions app/services/bulk_zombie_urls/file_validator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# frozen_string_literal: true

class BulkZombieUrls::FileValidator
MAXIMUM_FILE_SIZE = 4.megabytes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where this this restriction comes from? I don't see it as part of the ticket.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not the requirement from the ticket, but I am following this as per the previous other upload processes.

VALID_CONTENT_TYPES = %w[text/csv].freeze

def initialize(uploaded_file)
@uploaded_file = uploaded_file
end

def validate!
ensure_present
ensure_valid_content_type
ensure_not_too_big
end

private

def ensure_valid_content_type
return if VALID_CONTENT_TYPES.include?(@uploaded_file.content_type)

error_message = "Files of type #{@uploaded_file.content_type} are not supported."
raise(BulkZombieUrlUploader::Error, error_message)
end

def ensure_present
return if @uploaded_file.present?

error_message = 'Please choose a file to upload.'
raise(BulkZombieUrlUploader::Error, error_message)
end

def ensure_not_too_big
return if @uploaded_file.size <= MAXIMUM_FILE_SIZE

error_message = "#{@uploaded_file.original_filename} is too big; please split it."
raise(BulkZombieUrlUploader::Error, error_message)
end
end
Loading