Skip to content

Commit

Permalink
test fixes for fbcode/kats
Browse files Browse the repository at this point in the history
Summary:
Some PSS1-pinned directories have issues that trace back to fbcode/kats, so trying to upgrade it first. This diff makes backwards-compatible fixes to several test failures seen on D63678019:

**bocpd_model.py**
- [HoltWintersResults.slope deprecated in favor of .trend](https://www.statsmodels.org/v0.12.2/generated/statsmodels.tsa.holtwinters.HoltWintersResults.slope.html#statsmodels.tsa.holtwinters.HoltWintersResults.slope)

**special_events_base.py**
- assuming that self.original_fcst is supposed to be a copy, ignore chained_assignment warning

**test_dtw.py**
- freq field on pandas.Timestamp is deprecated and doesn't do anything anyways

**decomposition.py**
- first argument is expected to be 1D, achievable with .squeeze()

Changes copied from proof-by-accident D58854294:
- changepoint_evaluator.py
- stl_intraday_model.py
- test_feature_engineering.py
- feature_engineering.py

everything else is .append -> pandas.concat

Reviewed By: igorsugak

Differential Revision: D63737460

fbshipit-source-id: 66b3cebc7d3a9a912de6f6674b4c587b462217b5
  • Loading branch information
Natasha Badami authored and facebook-github-bot committed Oct 4, 2024
1 parent 257e029 commit 2c6e236
Show file tree
Hide file tree
Showing 12 changed files with 49 additions and 40 deletions.
2 changes: 1 addition & 1 deletion kats/detectors/bocpd_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def _holt_winter_fit(
)

level_arr = fit1.level
trend_arr = fit1.slope
trend_arr = fit1.trend
fit_arr = [x + y for x, y in zip(level_arr, trend_arr)]
fit_diff = np.diff(fit_arr)
fit_diff = np.concatenate(([fit_diff[0]], fit_diff))
Expand Down
18 changes: 10 additions & 8 deletions kats/detectors/detector_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,27 +577,29 @@ def update(
def _update_ts_slice(
self, ts: TimeSeriesData, time: datetime, value: Union[float, ArrayLike]
) -> TimeSeriesData:
time = ts.time.iloc[1:].append(pd.Series(time, copy=False))
time.reset_index(drop=True, inplace=True)
time_df = pd.concat([ts.time.iloc[1:], pd.Series(time, copy=False)])
time_df.reset_index(drop=True, inplace=True)
if self.num_series == 1:
value = ts.value.iloc[1:].append(pd.Series(value, copy=False))
value.reset_index(drop=True, inplace=True)
return TimeSeriesData(time=time, value=value)
value_df = pd.concat([ts.value.iloc[1:], pd.Series(value, copy=False)])
value_df.reset_index(drop=True, inplace=True)
# pyre-fixme[6]: For 1st argument expected `Union[None, DatetimeIndex,
# Series]` but got `DataFrame`.
return TimeSeriesData(time=time_df, value=value_df)
else:
if isinstance(value, float):
raise ValueError(
f"num_series = {self.num_series} so value should have type ArrayLike."
)
value_dict = {}
for i, value_col in enumerate(self.key_mapping):
value_dict[value_col] = (
ts.value[value_col].iloc[1:].append(pd.Series(value[i], copy=False))
value_dict[value_col] = pd.concat(
[ts.value[value_col].iloc[1:], pd.Series(value[i], copy=False)]
)
value_dict[value_col].reset_index(drop=True, inplace=True)
return TimeSeriesData(
pd.DataFrame(
{
**{"time": time},
**{"time": time_df},
**{
value_col: value_dict[value_col]
for value_col in self.key_mapping
Expand Down
2 changes: 1 addition & 1 deletion kats/detectors/distribution_distance_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def fit_predict(
total_data_df_group0.loc[start_time_index:],
total_data_df.loc[start_time_index:],
],
1,
axis=1,
copy=False,
)
scores = total_df.apply(self._js_div_func, axis=1)
Expand Down
8 changes: 4 additions & 4 deletions kats/detectors/trend_mk.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,8 +442,8 @@ def detector(
ts_deseas = self._remove_seasonality(ts, freq=self.freq)
ts_smoothed = self._smoothing(ts_deseas) # smoothing
# append MK statistics to MK_statistics dataframe
MK_statistics = MK_statistics.append(
self.runDetector(ts=ts_smoothed),
MK_statistics = pd.concat(
[MK_statistics, pd.DataFrame([self.runDetector(ts=ts_smoothed)])],
ignore_index=True,
)

Expand All @@ -458,8 +458,8 @@ def detector(
# look back window_size day for trend detection
ts_tmp = ts_smoothed.loc[:t, :]
# append MK statistics to MK_statistics dataframe
MK_statistics = MK_statistics.append(
self.runDetector(ts=ts_tmp),
MK_statistics = pd.concat(
[MK_statistics, pd.DataFrame([self.runDetector(ts=ts_tmp)])],
ignore_index=True,
)

Expand Down
9 changes: 7 additions & 2 deletions kats/models/globalmodel/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1686,8 +1686,13 @@ def pad_ts(ts: TimeSeriesData, n: int, freq: pd.Timedelta) -> TimeSeriesData:
val_col = [col for col in df.columns if col != time_col][0]
pad_val = df[val_col].values[df[val_col].first_valid_index()]
# add the padding value
df = df.append(
{time_col: df[time_col].iloc[0] - n * freq, val_col: pad_val},
df = pd.concat(
[
df,
pd.DataFrame(
[{time_col: df[time_col].iloc[0] - n * freq, val_col: pad_val}]
),
],
ignore_index=True,
)
return TimeSeriesData(df)
2 changes: 1 addition & 1 deletion kats/tests/data/test_data_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def test_data_validation(self) -> None:
[["1900-01-01", 2], ["2020-01-01", 2]], columns=["time", "y"]
)
DATA = self.TSData.to_dataframe()
data_with_extra_point = DATA.copy().append(extra_point)
data_with_extra_point = pd.concat([DATA.copy(), extra_point])

tsData_with_missing_point = TimeSeriesData(data_with_extra_point)

Expand Down
4 changes: 2 additions & 2 deletions kats/tests/detectors/test_distribution_distance_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def generate_multi_ts_data(

multi_ts_df = pd.DataFrame(multi_ts_val)
multi_ts_df.columns = ["val_" + str(i) for i in range(10)]
multi_ts_df = pd.concat([pd.Series(ts_time, name="time"), multi_ts_df], 1)
multi_ts_df = pd.concat([pd.Series(ts_time, name="time"), multi_ts_df], axis=1)
# pyre-fixme[6]: For 1st argument expected `Optional[DataFrame]` but got
# `Union[DataFrame, Series]`.
ts = TimeSeriesData(multi_ts_df)
Expand Down Expand Up @@ -134,7 +134,7 @@ def generate_irregular_granularity_data(
start="2020-01-01", freq=str(granularities_sec) + "s", periods=length
)[np.random.choice(list(range(length)), n, replace=False)]

multi_ts_df = pd.concat([pd.Series(ts_time, name="time"), multi_ts_df], 1)
multi_ts_df = pd.concat([pd.Series(ts_time, name="time"), multi_ts_df], axis=1)
# pyre-fixme[6]: For 1st argument expected `Optional[DataFrame]` but got
# `Union[DataFrame, Series]`.
ts = TimeSeriesData(multi_ts_df)
Expand Down
8 changes: 4 additions & 4 deletions kats/tests/detectors/test_dtw.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def test_clear_spike_in_zero_data_yields_cp(self) -> None:

expected_result = [
DTWCPDChangePoint(
start_time=pd.Timestamp("2021-03-02 00:00:00", freq="D"),
end_time=pd.Timestamp("2021-04-20 00:00:00", freq="D"),
start_time=pd.Timestamp("2021-03-02 00:00:00"),
end_time=pd.Timestamp("2021-04-20 00:00:00"),
confidence=1e9,
ts_name="ts3",
)
Expand Down Expand Up @@ -99,8 +99,8 @@ def test_two_similar_spikes_in_zero_data_yields_cp(self) -> None:

expected_result = [
DTWCPDChangePoint(
start_time=pd.Timestamp("2021-03-02 00:00:00", freq="D"),
end_time=pd.Timestamp("2021-04-20 00:00:00", freq="D"),
start_time=pd.Timestamp("2021-03-02 00:00:00"),
end_time=pd.Timestamp("2021-04-20 00:00:00"),
confidence=mock.ANY,
ts_name="ts3",
)
Expand Down
4 changes: 3 additions & 1 deletion kats/tests/detectors/test_outlier.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,9 @@ class MultivariateVARDetectorTest(TestCase):
def setUp(self) -> None:
DATA_multi = load_data("multivariate_anomaly_simulated_data.csv")
self.TSData_multi = TimeSeriesData(DATA_multi)
DATA_multi2 = pd.concat([DATA_multi.iloc[:10, :], DATA_multi.iloc[12:, :]], 0)
DATA_multi2 = pd.concat(
[DATA_multi.iloc[:10, :], DATA_multi.iloc[12:, :]], axis=0
)
self.TSData_multi2 = TimeSeriesData(DATA_multi2)
DATA_multi3 = pd.merge(
DATA_multi, DATA_multi, how="inner", on="time", suffixes=("_1", "_2")
Expand Down
24 changes: 12 additions & 12 deletions kats/tests/test_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1170,15 +1170,15 @@ def setUp(self) -> None:
transformed_df_date.ds = transformed_df_date.ds.apply(
lambda x: x + relativedelta(years=NUM_YEARS_OFFSET)
)
transformed_df_date_concat = self.AIR_DF.append(
transformed_df_date, ignore_index=True
transformed_df_date_concat = pd.concat(
[self.AIR_DF, transformed_df_date], ignore_index=True
)
transformed_df_date_double = self.AIR_DF_DATETIME.copy(deep=True)
transformed_df_date_double.ds = transformed_df_date.ds.apply(
lambda x: x + relativedelta(years=NUM_YEARS_OFFSET * 2)
)
transformed_df_date_concat_double = self.AIR_DF.append(
transformed_df_date_double, ignore_index=True
transformed_df_date_concat_double = pd.concat(
[self.AIR_DF, transformed_df_date_double], ignore_index=True
)
# DataFrames with value offset
transformed_df_value = self.AIR_DF.copy(deep=True)
Expand All @@ -1195,21 +1195,21 @@ def setUp(self) -> None:
transformed_df_date_multi[VALUE_COL_NAME + "_1"] = (
transformed_df_date_multi.y * 2
)
transformed_df_date_concat_multi = self.MULTIVAR_AIR_DF.append(
transformed_df_date_multi, ignore_index=True
transformed_df_date_concat_multi = pd.concat(
[self.MULTIVAR_AIR_DF, transformed_df_date_multi], ignore_index=True
)
transformed_df_date_concat_mixed = self.MULTIVAR_AIR_DF_DATETIME.append(
transformed_df_date
transformed_df_date_concat_mixed = pd.concat(
[self.MULTIVAR_AIR_DF_DATETIME, transformed_df_date]
)
transformed_df_date_double_multi = transformed_df_date_double.copy(deep=True)
transformed_df_date_double_multi[VALUE_COL_NAME + "_1"] = (
transformed_df_date_double_multi.y * 2
)
transformed_df_date_concat_double_multi = self.MULTIVAR_AIR_DF.append(
transformed_df_date_double_multi, ignore_index=True
transformed_df_date_concat_double_multi = pd.concat(
[self.MULTIVAR_AIR_DF, transformed_df_date_double_multi], ignore_index=True
)
transformed_df_date_concat_double_mixed = self.MULTIVAR_AIR_DF_DATETIME.append(
transformed_df_date_double
transformed_df_date_concat_double_mixed = pd.concat(
[self.MULTIVAR_AIR_DF_DATETIME, transformed_df_date_double]
)
# DataFrame with value offset (multivariate)
transformed_df_value_none_multi = self.MULTIVAR_AIR_DF.copy(deep=True)
Expand Down
6 changes: 3 additions & 3 deletions kats/tests/utils/test_feature_engineering.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_datetime_features(self) -> None:
index=dates,
)
result = fe.datetime_features(pd.Series(values, index=dates, name="val"))
assert_frame_equal(expected, result)
assert_frame_equal(expected, result, check_like=True, check_dtype=False)

def test_date_features(self) -> None:
dates = pd.date_range("2021-01-22", "2021-01-31", tz="US/Pacific").tolist()
Expand Down Expand Up @@ -109,7 +109,7 @@ def test_date_features(self) -> None:
index=dates,
)
result = fe.date_features(pd.Series(values, index=dates, name="val"))
assert_frame_equal(expected, result)
assert_frame_equal(expected, result, check_like=True, check_dtype=False)

def test_time_features(self) -> None:
dates = pd.date_range("2021-01-22", "2021-01-31", tz="US/Pacific").tolist()
Expand All @@ -134,7 +134,7 @@ def test_time_features(self) -> None:
index=dates,
)
result = fe.time_features(pd.Series(values, index=dates, name="val"))
assert_frame_equal(expected, result)
assert_frame_equal(expected, result, check_like=True, check_dtype=False)

def test_timestamp_time_features(self) -> None:
t = pd.Timestamp("2021-01-01T02:03:04.5678", tz="US/Pacific")
Expand Down
2 changes: 1 addition & 1 deletion kats/utils/decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def __decompose_STL(self, original: pd.DataFrame) -> Dict[str, pd.DataFrame]:

# pyre-fixme[16]: Module `seasonal` has no attribute `STL`.
result = STL(
data,
data.squeeze(),
period=period,
seasonal=self.seasonal,
trend=self.trend,
Expand Down

0 comments on commit 2c6e236

Please sign in to comment.