diff --git a/main.py b/main.py new file mode 100644 index 0000000..0758a92 --- /dev/null +++ b/main.py @@ -0,0 +1,169 @@ +# ============================================ +# 1. Imports +# ============================================ +import numpy as np +import pandas as pd +import yfinance as yf + +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import accuracy_score +from sklearn.preprocessing import StandardScaler + +import matplotlib.pyplot as plt + +# ============================================ +# 2. Parameters +# ============================================ +TICKERS = ["AAPL", "MSFT", "GOOGL", "AMZN", "META"] +START_DATE = "2015-01-01" +END_DATE = "2024-01-01" +TRAIN_END = "2020-12-31" + +TRANSACTION_COST = 0.001 # 0.1% + +# ============================================ +# 3. Download Data +# ============================================ +def download_data(tickers): + data = yf.download(tickers, start=START_DATE, end=END_DATE, group_by="ticker") + + dfs = [] + for ticker in tickers: + df = data[ticker].copy() + df["ticker"] = ticker + dfs.append(df) + + df = pd.concat(dfs) + df.index.name = "date" + return df.reset_index() + +df = download_data(TICKERS) + +# ============================================ +# 4. Sort (IMPORTANT) +# ============================================ +df = df.sort_values(["ticker", "date"]) + +# ============================================ +# 5. Feature Engineering (NO APPLY) +# ============================================ +df["return_1d"] = df.groupby("ticker")["Close"].pct_change() +df["return_5d"] = df.groupby("ticker")["Close"].pct_change(5) + +df["ma_5"] = df.groupby("ticker")["Close"].transform(lambda x: x.rolling(5).mean()) +df["ma_10"] = df.groupby("ticker")["Close"].transform(lambda x: x.rolling(10).mean()) + +df["volatility_5d"] = ( + df.groupby("ticker")["return_1d"] + .transform(lambda x: x.rolling(5).std()) +) + +df["volume_change"] = df.groupby("ticker")["Volume"].pct_change() + +df["price_ma5_ratio"] = df["Close"] / df["ma_5"] + +# ============================================ +# 6. Labels (SAFE) +# ============================================ +df["future_return"] = df.groupby("ticker")["Close"].pct_change().shift(-1) +df["target"] = (df["future_return"] > 0).astype(int) + +# ============================================ +# 7. Clean Data +# ============================================ +df = df.dropna().reset_index(drop=True) + +# ============================================ +# 8. Train/Test Split +# ============================================ +train = df[df["date"] <= TRAIN_END] +test = df[df["date"] > TRAIN_END] + +FEATURES = [ + "return_1d", + "return_5d", + "ma_5", + "ma_10", + "volatility_5d", + "volume_change", + "price_ma5_ratio" +] + +X_train = train[FEATURES] +y_train = train["target"] + +X_test = test[FEATURES] +y_test = test["target"] + +# ============================================ +# 9. Scaling +# ============================================ +scaler = StandardScaler() +X_train = scaler.fit_transform(X_train) +X_test = scaler.transform(X_test) + +# ============================================ +# 10. Train Model +# ============================================ +model = RandomForestClassifier( + n_estimators=100, + max_depth=5, + random_state=42 +) + +model.fit(X_train, y_train) + +# ============================================ +# 11. Predictions +# ============================================ +preds = model.predict(X_test) + +accuracy = accuracy_score(y_test, preds) +print(f"Test Accuracy: {accuracy:.4f}") + +# ============================================ +# 12. Backtest +# ============================================ +test = test.copy() +test["prediction"] = preds + +# 🚨 Avoid lookahead bias +test["prediction"] = test.groupby("ticker")["prediction"].shift(1) + +# Strategy returns +test["strategy_return"] = test["future_return"] * test["prediction"] + +# Transaction costs +test["position_change"] = ( + test.groupby("ticker")["prediction"].diff().abs() +) +test["transaction_cost"] = test["position_change"] * TRANSACTION_COST + +test["strategy_return"] -= test["transaction_cost"] + +# Drop NaNs from shifting +test = test.dropna() + +# ============================================ +# 13. Performance +# ============================================ +test["cum_market"] = (1 + test["future_return"]).cumprod() +test["cum_strategy"] = (1 + test["strategy_return"]).cumprod() + +sharpe = np.sqrt(252) * test["strategy_return"].mean() / test["strategy_return"].std() +print(f"Sharpe Ratio: {sharpe:.2f}") + +# ============================================ +# 14. Plot +# ============================================ +plt.figure(figsize=(10,6)) +plt.plot(test["date"], test["cum_market"], label="Market") +plt.plot(test["date"], test["cum_strategy"], label="Strategy") +plt.legend() +plt.title("Strategy vs Market") +plt.xlabel("Date") +plt.ylabel("Cumulative Return") +plt.grid() +#plt.show() +plt.savefig("strategy.png", dpi=150) +print("Plot saved as strategy.png") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b8b0715 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,36 @@ +beautifulsoup4==4.14.3 +certifi==2026.2.25 +cffi==2.0.0 +charset-normalizer==3.4.6 +contourpy==1.3.3 +curl_cffi==0.13.0 +cycler==0.12.1 +fonttools==4.62.1 +frozendict==2.4.7 +idna==3.11 +joblib==1.5.3 +kiwisolver==1.5.0 +lxml==6.0.2 +matplotlib==3.10.8 +multitasking==0.0.12 +numpy==2.4.3 +packaging==26.0 +pandas==3.0.1 +peewee==4.0.2 +pillow==12.1.1 +platformdirs==4.9.4 +protobuf==7.34.1 +pycparser==3.0 +pyparsing==3.3.2 +python-dateutil==2.9.0.post0 +pytz==2026.1.post1 +requests==2.32.5 +scikit-learn==1.8.0 +scipy==1.17.1 +six==1.17.0 +soupsieve==2.8.3 +threadpoolctl==3.6.0 +typing_extensions==4.15.0 +urllib3==2.6.3 +websockets==16.0 +yfinance==1.2.0