-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprediction_Validation_Insertion.py
68 lines (51 loc) · 3.5 KB
/
prediction_Validation_Insertion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from Prediction_Raw_Data_Validation.predictionDataValidation import Prediction_Data_validation
from DataTypeValidation_Insertion_Prediction.DataTypeValidationPrediction import dBOperation
from DataTransformation_Prediction.DataTransformationPrediction import dataTransformPredict
from application_logging import logger
class pred_validation:
def __init__(self,path):
self.raw_data = Prediction_Data_validation(path)
self.dataTransform = dataTransformPredict()
self.dBOperation = dBOperation()
self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
self.log_writer = logger.App_Logger()
def prediction_validation(self):
try:
self.log_writer.log(self.file_object,'Start of Validation on files for prediction!!')
#extracting values from prediction schema
LengthOfDateStampInFile,LengthOfTimeStampInFile,column_names,noofcolumns = self.raw_data.valuesFromSchema()
#getting the regex defined to validate filename
regex = self.raw_data.manualRegexCreation()
#validating filename of prediction files
self.raw_data.validationFileNameRaw(regex,LengthOfDateStampInFile,LengthOfTimeStampInFile)
#validating column length in the file
self.raw_data.validateColumnLength(noofcolumns)
#validating if any column has all values missing
self.raw_data.validateMissingValuesInWholeColumn()
self.log_writer.log(self.file_object,"Raw Data Validation Complete!!")
self.log_writer.log(self.file_object,("Starting Data Transforamtion!!"))
#replacing blanks in the csv file with "Null" values to insert in table
self.dataTransform.addQuotesToStringValuesInColumn()
self.log_writer.log(self.file_object,"DataTransformation Completed!!!")
self.log_writer.log(self.file_object,"Creating Prediction_Database and tables on the basis of given schema!!!")
#create database with given name, if present open the connection! Create table with columns given in schema
self.dBOperation.createTableDb('Prediction',column_names)
self.log_writer.log(self.file_object,"Table creation Completed!!")
self.log_writer.log(self.file_object,"Insertion of Data into Table started!!!!")
#insert csv files in the table
self.dBOperation.insertIntoTableGoodData('Prediction')
self.log_writer.log(self.file_object,"Insertion in Table completed!!!")
self.log_writer.log(self.file_object,"Deleting Good Data Folder!!!")
#Delete the good data folder after loading files in table
self.raw_data.deleteExistingGoodDataTrainingFolder()
self.log_writer.log(self.file_object,"Good_Data folder deleted!!!")
self.log_writer.log(self.file_object,"Moving bad files to Archive and deleting Bad_Data folder!!!")
#Move the bad files to archive folder
self.raw_data.moveBadFilesToArchiveBad()
self.log_writer.log(self.file_object,"Bad files moved to archive!! Bad folder Deleted!!")
self.log_writer.log(self.file_object,"Validation Operation completed!!")
self.log_writer.log(self.file_object,"Extracting csv file from table")
#export data in table to csvfile
self.dBOperation.selectingDatafromtableintocsv('Prediction')
except Exception as e:
raise e