-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfats_feature_extraction.py
56 lines (49 loc) · 2.02 KB
/
fats_feature_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import numpy as np
import pandas as pd
import csv
import importlib
import feature_functions as f
from utils import feature_list
from load_lc import load_lc, get_file_name
import sys
from os.path import dirname, abspath
from pipeline_utils import unified_fieldnames, data_dir
function_list = [getattr(f, x) for x in feature_list]
featuresdf = pd.DataFrame(columns=feature_list)
errorsdf = pd.DataFrame(columns=['filename','error'])
def get_features(lc,tag1,tag2, objid):
global featuresdf
features = list(map(lambda f: f(lc), function_list))
row = pd.DataFrame([features],columns=feature_list)
row['ID'] = objid
row['Type'] = tag1
row['SubType'] = tag2
featuresdf = pd.concat([featuresdf, row], ignore_index=True)
def fats_feature_extraction(ffilename, errorfilename):
global errorsdf
global featuresdf
# tagged_metadata = load_tagged_metadata(inputFilename)
# lc_metadata = data_dir+"metadata/lc_metadata.csv"
tagged_metadata = pd.read_pickle(data_dir+"metadata/lc_metadata.pkl")
# tagged_metadata = pd.read_csv(lc_metadata,names=unified_fieldnames,skiprows = 1)
# print(tagged_metadata)
for index, row in tagged_metadata.iterrows():
dirname, filename = get_file_name(row)
lc = load_lc(dirname, filename)
if lc != "":
tag1 = row["Type"]
tag2 = row["SubType"]
objid = row["ID"]
try:
print("Trying to get features for: ", filename)
get_features(lc, tag1,tag2, objid)
except Exception as e:
print('error: ',str(e))
print("something went wrong when trying to process file: ", filename)
error = {'filename':[filename],'error': [str(e)]}
error_row = pd.DataFrame(data=error)
errorsdf = pd.concat([errorsdf, error_row],ignore_index=True)
# #save features + errors
# data_dir = dirname(dirname(abspath(__file__)))+"/data/"
featuresdf.to_pickle(ffilename)
errorsdf.to_pickle(errorfilename)