170 lines
4.7 KiB
Python
170 lines
4.7 KiB
Python
# ============================================
|
|
# 1. Imports
|
|
# ============================================
|
|
import numpy as np
|
|
import pandas as pd
|
|
import yfinance as yf
|
|
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
from sklearn.metrics import accuracy_score
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
# ============================================
|
|
# 2. Parameters
|
|
# ============================================
|
|
TICKERS = ["AAPL", "MSFT", "GOOGL", "AMZN", "META"]
|
|
START_DATE = "2015-01-01"
|
|
END_DATE = "2024-01-01"
|
|
TRAIN_END = "2020-12-31"
|
|
|
|
TRANSACTION_COST = 0.001 # 0.1%
|
|
|
|
# ============================================
|
|
# 3. Download Data
|
|
# ============================================
|
|
def download_data(tickers):
|
|
data = yf.download(tickers, start=START_DATE, end=END_DATE, group_by="ticker")
|
|
|
|
dfs = []
|
|
for ticker in tickers:
|
|
df = data[ticker].copy()
|
|
df["ticker"] = ticker
|
|
dfs.append(df)
|
|
|
|
df = pd.concat(dfs)
|
|
df.index.name = "date"
|
|
return df.reset_index()
|
|
|
|
df = download_data(TICKERS)
|
|
|
|
# ============================================
|
|
# 4. Sort (IMPORTANT)
|
|
# ============================================
|
|
df = df.sort_values(["ticker", "date"])
|
|
|
|
# ============================================
|
|
# 5. Feature Engineering (NO APPLY)
|
|
# ============================================
|
|
df["return_1d"] = df.groupby("ticker")["Close"].pct_change()
|
|
df["return_5d"] = df.groupby("ticker")["Close"].pct_change(5)
|
|
|
|
df["ma_5"] = df.groupby("ticker")["Close"].transform(lambda x: x.rolling(5).mean())
|
|
df["ma_10"] = df.groupby("ticker")["Close"].transform(lambda x: x.rolling(10).mean())
|
|
|
|
df["volatility_5d"] = (
|
|
df.groupby("ticker")["return_1d"]
|
|
.transform(lambda x: x.rolling(5).std())
|
|
)
|
|
|
|
df["volume_change"] = df.groupby("ticker")["Volume"].pct_change()
|
|
|
|
df["price_ma5_ratio"] = df["Close"] / df["ma_5"]
|
|
|
|
# ============================================
|
|
# 6. Labels (SAFE)
|
|
# ============================================
|
|
df["future_return"] = df.groupby("ticker")["Close"].pct_change().shift(-1)
|
|
df["target"] = (df["future_return"] > 0).astype(int)
|
|
|
|
# ============================================
|
|
# 7. Clean Data
|
|
# ============================================
|
|
df = df.dropna().reset_index(drop=True)
|
|
|
|
# ============================================
|
|
# 8. Train/Test Split
|
|
# ============================================
|
|
train = df[df["date"] <= TRAIN_END]
|
|
test = df[df["date"] > TRAIN_END]
|
|
|
|
FEATURES = [
|
|
"return_1d",
|
|
"return_5d",
|
|
"ma_5",
|
|
"ma_10",
|
|
"volatility_5d",
|
|
"volume_change",
|
|
"price_ma5_ratio"
|
|
]
|
|
|
|
X_train = train[FEATURES]
|
|
y_train = train["target"]
|
|
|
|
X_test = test[FEATURES]
|
|
y_test = test["target"]
|
|
|
|
# ============================================
|
|
# 9. Scaling
|
|
# ============================================
|
|
scaler = StandardScaler()
|
|
X_train = scaler.fit_transform(X_train)
|
|
X_test = scaler.transform(X_test)
|
|
|
|
# ============================================
|
|
# 10. Train Model
|
|
# ============================================
|
|
model = RandomForestClassifier(
|
|
n_estimators=100,
|
|
max_depth=5,
|
|
random_state=42
|
|
)
|
|
|
|
model.fit(X_train, y_train)
|
|
|
|
# ============================================
|
|
# 11. Predictions
|
|
# ============================================
|
|
preds = model.predict(X_test)
|
|
|
|
accuracy = accuracy_score(y_test, preds)
|
|
print(f"Test Accuracy: {accuracy:.4f}")
|
|
|
|
# ============================================
|
|
# 12. Backtest
|
|
# ============================================
|
|
test = test.copy()
|
|
test["prediction"] = preds
|
|
|
|
# 🚨 Avoid lookahead bias
|
|
test["prediction"] = test.groupby("ticker")["prediction"].shift(1)
|
|
|
|
# Strategy returns
|
|
test["strategy_return"] = test["future_return"] * test["prediction"]
|
|
|
|
# Transaction costs
|
|
test["position_change"] = (
|
|
test.groupby("ticker")["prediction"].diff().abs()
|
|
)
|
|
test["transaction_cost"] = test["position_change"] * TRANSACTION_COST
|
|
|
|
test["strategy_return"] -= test["transaction_cost"]
|
|
|
|
# Drop NaNs from shifting
|
|
test = test.dropna()
|
|
|
|
# ============================================
|
|
# 13. Performance
|
|
# ============================================
|
|
test["cum_market"] = (1 + test["future_return"]).cumprod()
|
|
test["cum_strategy"] = (1 + test["strategy_return"]).cumprod()
|
|
|
|
sharpe = np.sqrt(252) * test["strategy_return"].mean() / test["strategy_return"].std()
|
|
print(f"Sharpe Ratio: {sharpe:.2f}")
|
|
|
|
# ============================================
|
|
# 14. Plot
|
|
# ============================================
|
|
plt.figure(figsize=(10,6))
|
|
plt.plot(test["date"], test["cum_market"], label="Market")
|
|
plt.plot(test["date"], test["cum_strategy"], label="Strategy")
|
|
plt.legend()
|
|
plt.title("Strategy vs Market")
|
|
plt.xlabel("Date")
|
|
plt.ylabel("Cumulative Return")
|
|
plt.grid()
|
|
#plt.show()
|
|
plt.savefig("strategy.png", dpi=150)
|
|
print("Plot saved as strategy.png")
|