Skip to content

Commit

Permalink
Nowcasting Feature at Scale
Browse files Browse the repository at this point in the history
Summary: See this diff of d6u Li.D28616195 (4edbd6d) Extending that to Nowcasting.

Reviewed By: thomashydeli

Differential Revision: D28621067

fbshipit-source-id: 5cac57f9a1c47a4343e1466c94367770ec1c63ec
  • Loading branch information
Jun Gao authored and facebook-github-bot committed Jun 9, 2021
1 parent 4884647 commit b84c91f
Showing 1 changed file with 114 additions and 0 deletions.
114 changes: 114 additions & 0 deletions kats/models/nowcasting/feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
"""

import pandas as pd
import numpy as np
from typing import Dict, Optional

def ROC(df: pd.DataFrame, n: int, column: str = 'y') -> pd.DataFrame:
"""Adds another column indicating return comparing to step n back.
Expand Down Expand Up @@ -126,3 +128,115 @@ def MACD(df: pd.DataFrame, n_fast: int =12, n_slow: int =21, column: str = 'y')
df = df.join(MACDsign)
df = df.join(MACDdiff)
return df



def _ewma(
arr: np.ndarray,
span: int,
min_periods: int
):
"""
Exponentialy weighted moving average specified by a decay ``window``
to provide better adjustments for small windows via:
y[t] = (x[t] + (1-a)*x[t-1] + (1-a)^2*x[t-2] + ... + (1-a)^n*x[t-n]) /
(1 + (1-a) + (1-a)^2 + ... + (1-a)^n).
Args:
arr : np.ndarray; A single dimenisional numpy array.
span : int; The decay window, or 'span'.
min_periods: int; Minimum amount of data points we'd like to include in the output.
Returns:
A np.ndarray. The exponentially weighted moving average of the array.
"""
output_array = np.empty(arr.shape[0], dtype=np.float64)
output_array[:] = np.NaN

arr = arr[~np.isnan(arr)]
n = arr.shape[0]
ewma = np.empty(n, dtype=np.float64)
alpha = 2 / float(span + 1)
w = 1
ewma_old = arr[0]
ewma[0] = ewma_old
for i in range(1, n):
w += (1-alpha)**i
ewma_old = ewma_old*(1-alpha) + arr[i]
ewma[i] = ewma_old / w

output_subset = ewma[(min_periods-1):]
output_array[-len(output_subset):] = output_subset
return output_array

def _get_nowcasting_np(
x: np.ndarray,
window: int = 5,
n_fast: int = 12,
n_slow: int = 21,
extra_args: Optional[Dict[str, bool]] = None,
default_status: bool = True,
):
"""
Internal function for actually performing feature engineering using the same procedure as
nowcasting feature_extraction under kats/models.
Args:
x: The univariate time series array in the form of 1d numpy array.
window: int; Length of window size for all Nowcasting features. Default value is 5.
n_fast: int; length of "fast" or short period exponential moving average in the MACD
algorithm in the nowcasting features. Default value is 12.
n_slow: int; length of "slow" or long period exponential moving average in the MACD
algorithm in the nowcasting features. Default value is 21.
extra_args: A dictionary containing information for disabling calculation
of a certain feature. Default value is None, i.e. no feature is disabled.
default_status: Default status of the switch for calculate the features or not.
Default value is True.
Returns:
A list containing extracted nowcast features.
"""

# initializing the outputs
nowcasting_features = [np.nan for _ in range(7)]

# ROC: indicating return comparing to step n back.
if extra_args is not None and extra_args.get("nowcast_roc", default_status):
M = x[(window-1):] - x[:-(window-1)]
N = x[:-(window-1)]
arr = M / N
nowcasting_features[0] = np.nan_to_num(arr, nan=0.0, posinf=0.0, neginf=0.0).mean()

# MOM: indicating momentum: difference of current value and n steps back.
if extra_args is not None and extra_args.get("nowcast_mom", default_status):
M = x[window:] - x[:-window]
nowcasting_features[1] = np.nan_to_num(M, nan=0.0, posinf = 0.0, neginf=0.0).mean()

# MA: indicating moving average in the past n steps.
if extra_args is not None and extra_args.get("nowcast_ma", default_status):
ret = np.cumsum(x, dtype=float)
ret[window:] = ret[window:] - ret[:-window]
ma = ret[window - 1:] / window
nowcasting_features[2] = np.nan_to_num(ma, nan=0.0, posinf=0.0, neginf=0.0).mean()

# LAG: indicating lagged value at the past n steps.
if extra_args is not None and extra_args.get("nowcast_lag", default_status):
N = x[:-window]
nowcasting_features[3] = np.nan_to_num(N, nan=0.0, posinf=0.0, neginf=0.0).mean()

# MACD: https://www.investopedia.com/terms/m/macd.asp.
ema_fast = _ewma(x, n_fast, n_slow-1)
ema_slow = _ewma(x, n_slow, n_slow-1)
MACD = ema_fast - ema_slow
if extra_args is not None and extra_args.get("nowcast_macd", default_status):
nowcasting_features[4] = np.nan_to_num(np.nanmean(MACD), nan=0.0, posinf=0.0, neginf=0.0)

MACDsign = _ewma(MACD, 9, 8)
if extra_args is not None and extra_args.get("nowcast_macdsign", default_status):
nowcasting_features[5] = np.nan_to_num(np.nanmean(MACDsign), nan=0.0, posinf=0.0, neginf=0.0)

MACDdiff = MACD - MACDsign
if extra_args is not None and extra_args.get("nowcast_macddiff", default_status):
nowcasting_features[6] = np.nan_to_num(np.nanmean(MACDdiff), nan=0.0, posinf=0.0, neginf=0.0)

return nowcasting_features

0 comments on commit b84c91f

Please sign in to comment.