resolve dependencies; re-run topic 9 notebooks (#787)

Co-authored-by: Yury Kashnitsky <[email protected]>
Yorko · Jan 6, 2025 · 5f72a0f · 5f72a0f
1 parent fd89891
commit 5f72a0f
Show file tree

Hide file tree

Showing 7 changed files with 830 additions and 175 deletions.
diff --git a/jupyter_english/topic09_time_series/topic9_part1_time_series_python.ipynb b/jupyter_english/topic09_time_series/topic9_part1_time_series_python.ipynb
diff --git a/mlcourse_ai_jupyter_book/book/topic09/assignment09_time_series.md b/mlcourse_ai_jupyter_book/book/topic09/assignment09_time_series.md
@@ -77,6 +77,7 @@ DATA_PATH = "https://raw.githubusercontent.com/Yorko/mlcourse.ai/main/data/"
 df = pd.read_csv(DATA_PATH + "wiki_machine_learning.csv", sep=" ")
 df = df[df["count"] != 0]
 df.head()
+
 ```
 
 

diff --git a/mlcourse_ai_jupyter_book/book/topic09/assignment09_time_series_solution.md b/mlcourse_ai_jupyter_book/book/topic09/assignment09_time_series_solution.md
@@ -119,7 +119,7 @@ train_df = df[:-predictions].copy()
 
 ```{code-cell} ipython3
 m = Prophet()
-m.fit(train_df);
+m.fit(train_df)
 ```
 
 

diff --git a/mlcourse_ai_jupyter_book/book/topic09/topic9_part1_time_series_python.md b/mlcourse_ai_jupyter_book/book/topic09/topic9_part1_time_series_python.md
@@ -87,7 +87,7 @@ currency = pd.read_csv(
 ```
 
 ```{code-cell} ipython3
-plt.figure(figsize=(12, 6))
+plt.figure(figsize=(12, 8))
 plt.plot(ads.Ads)
 plt.title("Ads watched (hourly data)")
 plt.grid(True)
@@ -1099,7 +1099,6 @@ def plotSARIMA(series, model, n_steps):
 
     # forecasting on n_steps forward
     forecast = model.predict(start=data.shape[0], end=data.shape[0] + n_steps)
-    forecast = data.arima_model.append(forecast)
     # calculate error, again having shifted on s+d steps from the beginning
     error = mean_absolute_percentage_error(
         data["actual"][s + d :], data["arima_model"][s + d :]
@@ -1530,7 +1529,7 @@ plotModelResults(
     X_train=X_train_scaled,
     X_test=X_test_scaled,
     plot_intervals=True,
-    plot_anomalies=True,
+    plot_anomalies=True
 )
 ```
 

diff --git a/mlcourse_ai_jupyter_book/book/topic09/topic9_part2_facebook_prophet.md b/mlcourse_ai_jupyter_book/book/topic09/topic9_part2_facebook_prophet.md
@@ -216,7 +216,7 @@ Let's sort the dataframe by time and take a look at what we've got:
 
 
 ```{code-cell} ipython3
-df.sort_values(by=["published"]).head(n=3)
+df.sort_values(by=["published"]).head(n=2)
 ```
 
 Medium's public release date was August 15, 2012. But, as you can see from the data above, there are at least several rows with much earlier publication dates. They have somehow turned up in our dataset, but they are hardly legitimate ones. We will just trim our time series to keep only those rows that fall onto the period from August 15, 2012 to June 25, 2017:

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,13 +23,13 @@ pydotplus = "^2.0.2"
 pytesseract = "^0.3.10"
 reverse-geocoder = "^1.5.1"
 seaborn = "^0.13.2"
-scikit-learn = "^1.6.0"
+scikit-learn = "1.5.2"
 scipy = "^1.15.0"
 statsmodels = "^0.14.4"
 ua-parser = "^0.16.1"
 user_agents = "^2.2.0"
 wordcloud = "^1.9.1"
-xgboost = "^2.1.3"
+xgboost = "2.1.0"
 
 [tool.poetry.dev-dependencies]
 black = {version = "^21.12b0", allow-prereleases = true}
-Original file line number
+Diff line change
@@ Expand Up @@
     df = pd.read_csv(DATA_PATH + "wiki_machine_learning.csv", sep=" ")
     df = df[df["count"] != 0]
     df.head()
     ```
@@ Expand Down @@