Commit 6011945d authored by Jean-Didier's avatar Jean-Didier
Browse files

application of interpolation on dataset

parent 46464136
Pipeline #17535 passed with stage
in 1 minute and 11 seconds
No preview for this file type
......@@ -322,11 +322,7 @@ class Train():
###########
_start = time.time()
data = data.round(decimals=2)
data = missing_data_handling(data, rolling_mean=True)
missing_data = percent_missing(data)
print("---Missing resume---")
print(missing_data)
print("---End resume---")
data = missing_data_handling(data, slinear_interpolation=True, drop_all_nan=True)
data = datetime_conversion(data, self.time_column_name)
data = important_data(data, self.features)
......
......@@ -33,7 +33,7 @@ def percent_missing(data):
#TODO here has to be placed a function for handling missing data
def missing_data_handling(data ,drop_all_nan = False, fill_with_mean = False,
fill_with_median = False, rolling_mean = False, rolling_median = False):
fill_with_median = False, rolling_mean = False, rolling_median = False, slinear_interpolation=False):
def drop_all_nan(data):
data = data.dropna()
......@@ -103,10 +103,7 @@ def missing_data_handling(data ,drop_all_nan = False, fill_with_mean = False,
data.assign(InterpolateSpline4=data.target.interpolate(method='spline', order=5))
return data
if drop_all_nan == True:
data = drop_all_nan(data)
elif fill_with_mean == True:
if fill_with_mean == True:
data = fill_with_mean(data)
elif fill_with_median == True:
data = fill_with_median(data)
......@@ -136,6 +133,8 @@ def missing_data_handling(data ,drop_all_nan = False, fill_with_mean = False,
data = spline_interpolate4(data)
elif spline_interpolate5 == True:
data = spline_interpolate5(data)
elif drop_all_nan == True:
data = drop_all_nan(data)
else:
pass
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment