555 lines
20 KiB
Python
555 lines
20 KiB
Python
"""Historical backtester for the temporal arbitrage strategy.
|
|
|
|
Supports two modes:
|
|
1. Synthetic: Random walk price simulation for quick parameter testing.
|
|
2. Historical: Real Binance kline data for realistic backtesting.
|
|
|
|
Usage:
|
|
python backtest.py --mode synthetic --windows 2000
|
|
python backtest.py --mode historical --asset BTC --days 7
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import asyncio
|
|
import json
|
|
import math
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone, timedelta
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from src.config import load_config
|
|
from src.data.models import Asset, Direction, Signal, Timeframe
|
|
from src.risk.fee_calculator import FeeCalculator
|
|
from src.strategy.temporal_arb import TemporalArbStrategy
|
|
|
|
import structlog
|
|
|
|
log = structlog.get_logger()
|
|
|
|
|
|
@dataclass
|
|
class BacktestTrade:
|
|
"""Single backtest trade result."""
|
|
window_idx: int
|
|
asset: str
|
|
timeframe: str
|
|
direction: str
|
|
entry_price: float
|
|
size: int
|
|
edge: float
|
|
estimated_prob: float
|
|
won: bool
|
|
pnl: float
|
|
fee: float
|
|
timestamp: float = 0.0
|
|
|
|
|
|
@dataclass
|
|
class BacktestResult:
|
|
"""Aggregated backtest results."""
|
|
mode: str = "synthetic"
|
|
asset: str = ""
|
|
timeframe: str = ""
|
|
total_windows: int = 0
|
|
total_trades: int = 0
|
|
wins: int = 0
|
|
losses: int = 0
|
|
total_pnl: float = 0.0
|
|
total_fees: float = 0.0
|
|
total_volume: float = 0.0
|
|
max_drawdown: float = 0.0
|
|
best_trade: float = 0.0
|
|
worst_trade: float = 0.0
|
|
peak_balance: float = 0.0
|
|
trades: list[BacktestTrade] = field(default_factory=list)
|
|
|
|
@property
|
|
def win_rate(self) -> float:
|
|
return self.wins / self.total_trades * 100 if self.total_trades > 0 else 0
|
|
|
|
@property
|
|
def avg_pnl(self) -> float:
|
|
return self.total_pnl / self.total_trades if self.total_trades > 0 else 0
|
|
|
|
@property
|
|
def profit_factor(self) -> float:
|
|
gross_wins = sum(t.pnl for t in self.trades if t.pnl > 0)
|
|
gross_losses = abs(sum(t.pnl for t in self.trades if t.pnl < 0))
|
|
return gross_wins / gross_losses if gross_losses > 0 else float("inf")
|
|
|
|
@property
|
|
def sharpe_ratio(self) -> float:
|
|
"""Approximate Sharpe ratio from trade PnLs."""
|
|
if len(self.trades) < 2:
|
|
return 0.0
|
|
pnls = [t.pnl for t in self.trades]
|
|
avg = sum(pnls) / len(pnls)
|
|
variance = sum((p - avg) ** 2 for p in pnls) / len(pnls)
|
|
std = math.sqrt(variance) if variance > 0 else 1e-9
|
|
return avg / std * math.sqrt(len(pnls)) # Annualized approximation
|
|
|
|
@property
|
|
def max_consecutive_losses(self) -> int:
|
|
max_streak = 0
|
|
current = 0
|
|
for t in self.trades:
|
|
if not t.won:
|
|
current += 1
|
|
max_streak = max(max_streak, current)
|
|
else:
|
|
current = 0
|
|
return max_streak
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"mode": self.mode,
|
|
"asset": self.asset,
|
|
"timeframe": self.timeframe,
|
|
"total_windows": self.total_windows,
|
|
"total_trades": self.total_trades,
|
|
"wins": self.wins,
|
|
"losses": self.losses,
|
|
"win_rate": round(self.win_rate, 2),
|
|
"total_pnl": round(self.total_pnl, 2),
|
|
"avg_pnl": round(self.avg_pnl, 2),
|
|
"total_fees": round(self.total_fees, 2),
|
|
"total_volume": round(self.total_volume, 2),
|
|
"profit_factor": round(self.profit_factor, 2),
|
|
"sharpe_ratio": round(self.sharpe_ratio, 2),
|
|
"max_drawdown": round(self.max_drawdown, 2),
|
|
"best_trade": round(self.best_trade, 2),
|
|
"worst_trade": round(self.worst_trade, 2),
|
|
"max_consecutive_losses": self.max_consecutive_losses,
|
|
}
|
|
|
|
|
|
class Backtester:
|
|
"""Replay historical or synthetic data through the temporal arb strategy."""
|
|
|
|
def __init__(self, config_path: str = "config.toml", balance: float = 10000.0) -> None:
|
|
self.config = load_config(config_path)
|
|
self.initial_balance = balance
|
|
self.fee_calc = FeeCalculator(self.config.fees)
|
|
|
|
def _make_strategy(self, balance: float) -> TemporalArbStrategy:
|
|
return TemporalArbStrategy(
|
|
arb_config=self.config.temporal_arb,
|
|
risk_config=self.config.risk,
|
|
fees_config=self.config.fees,
|
|
balance=balance,
|
|
)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Synthetic backtest
|
|
# ------------------------------------------------------------------
|
|
|
|
async def run_synthetic(
|
|
self,
|
|
asset: str = "BTC",
|
|
timeframe: str = "15M",
|
|
num_windows: int = 1000,
|
|
avg_volatility_pct: float = 0.3,
|
|
) -> BacktestResult:
|
|
"""Run a synthetic backtest using simulated price movements."""
|
|
import numpy as np
|
|
|
|
strategy = self._make_strategy(self.initial_balance)
|
|
result = BacktestResult(
|
|
mode="synthetic",
|
|
asset=asset,
|
|
timeframe=timeframe,
|
|
total_windows=num_windows,
|
|
)
|
|
balance = self.initial_balance
|
|
peak_balance = balance
|
|
window_sec = 300 if timeframe == "5M" else 900
|
|
|
|
rng = np.random.default_rng(42)
|
|
|
|
base_prices = {"BTC": 84000, "ETH": 2300, "SOL": 135}
|
|
base_price = base_prices.get(asset, 50000)
|
|
|
|
for i in range(num_windows):
|
|
start_price = base_price * (1 + rng.normal(0, 0.02))
|
|
num_ticks = 100
|
|
returns = rng.normal(0, avg_volatility_pct / 100 / math.sqrt(num_ticks), num_ticks)
|
|
prices = [start_price]
|
|
for r in returns:
|
|
prices.append(prices[-1] * (1 + r))
|
|
|
|
end_price = prices[-1]
|
|
actual_direction = "UP" if end_price > start_price else "DOWN"
|
|
|
|
# Evaluate at multiple points in the window
|
|
for eval_frac in [0.3, 0.5, 0.7]:
|
|
eval_idx = int(num_ticks * eval_frac)
|
|
eval_price = prices[eval_idx]
|
|
time_remaining = window_sec * (1 - eval_frac)
|
|
|
|
change_pct = (eval_price - start_price) / start_price * 100
|
|
|
|
# Simulate Polymarket price (lagging behind reality)
|
|
if abs(change_pct) > 0.05:
|
|
lag_factor = 0.8 # Polymarket adjusts slowly behind CEX
|
|
if change_pct > 0:
|
|
sim_poly_up = 0.50 + abs(change_pct) * lag_factor
|
|
sim_poly_up = min(sim_poly_up, 0.70)
|
|
sim_poly_down = max(0.30, 1.0 - sim_poly_up - rng.uniform(0, 0.04))
|
|
else:
|
|
sim_poly_down = 0.50 + abs(change_pct) * lag_factor
|
|
sim_poly_down = min(sim_poly_down, 0.70)
|
|
sim_poly_up = max(0.30, 1.0 - sim_poly_down - rng.uniform(0, 0.04))
|
|
else:
|
|
sim_poly_up = 0.50 + rng.uniform(-0.02, 0.02)
|
|
sim_poly_down = 0.50 + rng.uniform(-0.02, 0.02)
|
|
|
|
strategy.update_balance(balance)
|
|
signal = await strategy.evaluate(
|
|
symbol=asset,
|
|
cex_price=eval_price,
|
|
window_start_price=start_price,
|
|
window_end_time=time.time() + time_remaining,
|
|
poly_up_ask=sim_poly_up,
|
|
poly_down_ask=sim_poly_down,
|
|
up_token_id=f"up_{i}",
|
|
down_token_id=f"down_{i}",
|
|
timeframe=timeframe,
|
|
)
|
|
|
|
if signal is None:
|
|
continue
|
|
|
|
# Simulate outcome
|
|
won = (signal.direction == Direction.UP and actual_direction == "UP") or \
|
|
(signal.direction == Direction.DOWN and actual_direction == "DOWN")
|
|
|
|
pnl = self.fee_calc.net_payout(
|
|
timeframe=timeframe,
|
|
entry_price=signal.price,
|
|
size=signal.size,
|
|
won=won,
|
|
)
|
|
fee = self.fee_calc.taker_fee(timeframe, signal.price, signal.size) if won else 0
|
|
|
|
balance += pnl
|
|
peak_balance = max(peak_balance, balance)
|
|
drawdown = peak_balance - balance
|
|
|
|
trade = BacktestTrade(
|
|
window_idx=i,
|
|
asset=asset,
|
|
timeframe=timeframe,
|
|
direction=signal.direction.value,
|
|
entry_price=signal.price,
|
|
size=signal.size,
|
|
edge=signal.edge,
|
|
estimated_prob=signal.estimated_prob,
|
|
won=won,
|
|
pnl=pnl,
|
|
fee=fee,
|
|
)
|
|
|
|
result.trades.append(trade)
|
|
result.total_trades += 1
|
|
result.total_pnl += pnl
|
|
result.total_fees += fee
|
|
result.total_volume += signal.price * signal.size
|
|
if won:
|
|
result.wins += 1
|
|
else:
|
|
result.losses += 1
|
|
result.best_trade = max(result.best_trade, pnl)
|
|
result.worst_trade = min(result.worst_trade, pnl)
|
|
result.max_drawdown = max(result.max_drawdown, drawdown)
|
|
result.peak_balance = peak_balance
|
|
|
|
break # Only take one trade per window
|
|
|
|
return result
|
|
|
|
# ------------------------------------------------------------------
|
|
# Historical backtest (Binance klines)
|
|
# ------------------------------------------------------------------
|
|
|
|
async def fetch_binance_klines(
|
|
self,
|
|
symbol: str,
|
|
interval: str = "1m",
|
|
days: int = 7,
|
|
) -> list[dict]:
|
|
"""Fetch historical kline data from Binance REST API."""
|
|
import aiohttp
|
|
|
|
pair = f"{symbol}USDT"
|
|
url = "https://api.binance.com/api/v3/klines"
|
|
end_time = int(time.time() * 1000)
|
|
start_time = end_time - (days * 24 * 60 * 60 * 1000)
|
|
|
|
all_klines = []
|
|
current_start = start_time
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
while current_start < end_time:
|
|
params = {
|
|
"symbol": pair,
|
|
"interval": interval,
|
|
"startTime": current_start,
|
|
"endTime": end_time,
|
|
"limit": 1000,
|
|
}
|
|
async with session.get(url, params=params) as resp:
|
|
if resp.status != 200:
|
|
log.error("binance_klines_error", status=resp.status)
|
|
break
|
|
data = await resp.json()
|
|
if not data:
|
|
break
|
|
|
|
for k in data:
|
|
all_klines.append({
|
|
"open_time": k[0],
|
|
"open": float(k[1]),
|
|
"high": float(k[2]),
|
|
"low": float(k[3]),
|
|
"close": float(k[4]),
|
|
"volume": float(k[5]),
|
|
"close_time": k[6],
|
|
})
|
|
|
|
current_start = data[-1][6] + 1 # Next ms after last close
|
|
await asyncio.sleep(0.1) # Rate limiting
|
|
|
|
log.info("klines_fetched", symbol=symbol, count=len(all_klines), days=days)
|
|
return all_klines
|
|
|
|
async def run_historical(
|
|
self,
|
|
asset: str = "BTC",
|
|
timeframe: str = "15M",
|
|
days: int = 7,
|
|
) -> BacktestResult:
|
|
"""Run backtest using real Binance historical data."""
|
|
strategy = self._make_strategy(self.initial_balance)
|
|
result = BacktestResult(
|
|
mode="historical",
|
|
asset=asset,
|
|
timeframe=timeframe,
|
|
)
|
|
balance = self.initial_balance
|
|
peak_balance = balance
|
|
|
|
# Fetch 1-minute klines
|
|
klines = await self.fetch_binance_klines(asset, interval="1m", days=days)
|
|
if not klines:
|
|
log.error("no_klines_data", asset=asset)
|
|
return result
|
|
|
|
window_minutes = 5 if timeframe == "5M" else 15
|
|
window_sec = window_minutes * 60
|
|
|
|
# Group klines into windows
|
|
window_idx = 0
|
|
i = 0
|
|
while i + window_minutes <= len(klines):
|
|
window_klines = klines[i:i + window_minutes]
|
|
start_price = window_klines[0]["open"]
|
|
end_price = window_klines[-1]["close"]
|
|
actual_direction = "UP" if end_price > start_price else "DOWN"
|
|
|
|
result.total_windows += 1
|
|
|
|
# Simulate evaluation at mid-point
|
|
mid_idx = window_minutes // 2
|
|
mid_price = window_klines[mid_idx]["close"]
|
|
time_remaining = window_sec * 0.5
|
|
|
|
change_pct = (mid_price - start_price) / start_price * 100
|
|
|
|
# Simulate Polymarket prices based on actual market behavior
|
|
# Conservative lag simulation for historical data
|
|
if abs(change_pct) > 0.05:
|
|
lag = 0.6 # Market adjusts at ~60% speed
|
|
if change_pct > 0:
|
|
poly_up = 0.50 + abs(change_pct) * lag
|
|
poly_up = min(poly_up, 0.68)
|
|
poly_down = max(0.32, 1.0 - poly_up - 0.02)
|
|
else:
|
|
poly_down = 0.50 + abs(change_pct) * lag
|
|
poly_down = min(poly_down, 0.68)
|
|
poly_up = max(0.32, 1.0 - poly_down - 0.02)
|
|
else:
|
|
poly_up = 0.50
|
|
poly_down = 0.50
|
|
|
|
strategy.update_balance(balance)
|
|
signal = await strategy.evaluate(
|
|
symbol=asset,
|
|
cex_price=mid_price,
|
|
window_start_price=start_price,
|
|
window_end_time=time.time() + time_remaining,
|
|
poly_up_ask=poly_up,
|
|
poly_down_ask=poly_down,
|
|
up_token_id=f"hist_up_{window_idx}",
|
|
down_token_id=f"hist_down_{window_idx}",
|
|
timeframe=timeframe,
|
|
)
|
|
|
|
if signal:
|
|
won = (signal.direction == Direction.UP and actual_direction == "UP") or \
|
|
(signal.direction == Direction.DOWN and actual_direction == "DOWN")
|
|
|
|
pnl = self.fee_calc.net_payout(
|
|
timeframe=timeframe,
|
|
entry_price=signal.price,
|
|
size=signal.size,
|
|
won=won,
|
|
)
|
|
fee = self.fee_calc.taker_fee(timeframe, signal.price, signal.size) if won else 0
|
|
|
|
balance += pnl
|
|
peak_balance = max(peak_balance, balance)
|
|
drawdown = peak_balance - balance
|
|
|
|
trade = BacktestTrade(
|
|
window_idx=window_idx,
|
|
asset=asset,
|
|
timeframe=timeframe,
|
|
direction=signal.direction.value,
|
|
entry_price=signal.price,
|
|
size=signal.size,
|
|
edge=signal.edge,
|
|
estimated_prob=signal.estimated_prob,
|
|
won=won,
|
|
pnl=pnl,
|
|
fee=fee,
|
|
timestamp=window_klines[0]["open_time"] / 1000,
|
|
)
|
|
|
|
result.trades.append(trade)
|
|
result.total_trades += 1
|
|
result.total_pnl += pnl
|
|
result.total_fees += fee
|
|
result.total_volume += signal.price * signal.size
|
|
if won:
|
|
result.wins += 1
|
|
else:
|
|
result.losses += 1
|
|
result.best_trade = max(result.best_trade, pnl)
|
|
result.worst_trade = min(result.worst_trade, pnl)
|
|
result.max_drawdown = max(result.max_drawdown, drawdown)
|
|
result.peak_balance = peak_balance
|
|
|
|
i += window_minutes
|
|
window_idx += 1
|
|
|
|
return result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Output
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def print_results(result: BacktestResult) -> None:
|
|
"""Pretty-print backtest results."""
|
|
print("\n" + "=" * 65)
|
|
print(f" BACKTEST RESULTS — {result.asset} {result.timeframe} ({result.mode})")
|
|
print("=" * 65)
|
|
print(f" Windows Tested: {result.total_windows}")
|
|
print(f" Total Trades: {result.total_trades}")
|
|
print(f" Wins / Losses: {result.wins} / {result.losses}")
|
|
print(f" Win Rate: {result.win_rate:.1f}%")
|
|
print(f" Total PnL: ${result.total_pnl:+,.2f}")
|
|
print(f" Average PnL: ${result.avg_pnl:+,.2f}")
|
|
print(f" Total Fees: ${result.total_fees:,.2f}")
|
|
print(f" Total Volume: ${result.total_volume:,.0f}")
|
|
print(f" Profit Factor: {result.profit_factor:.2f}")
|
|
print(f" Sharpe Ratio: {result.sharpe_ratio:.2f}")
|
|
print(f" Max Drawdown: ${result.max_drawdown:,.2f}")
|
|
print(f" Best Trade: ${result.best_trade:+,.2f}")
|
|
print(f" Worst Trade: ${result.worst_trade:+,.2f}")
|
|
print(f" Max Consec. Losses: {result.max_consecutive_losses}")
|
|
print("=" * 65)
|
|
|
|
|
|
def save_results(results: list[BacktestResult], output_path: str = "backtest_results.json") -> None:
|
|
"""Save all backtest results to JSON."""
|
|
data = [r.to_dict() for r in results]
|
|
Path(output_path).write_text(json.dumps(data, indent=2))
|
|
print(f"\nResults saved to {output_path}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Entry point
|
|
# ---------------------------------------------------------------------------
|
|
|
|
async def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Polymarket Arb Bot Backtester")
|
|
parser.add_argument("--mode", choices=["synthetic", "historical", "both"],
|
|
default="synthetic", help="Backtest mode")
|
|
parser.add_argument("--asset", nargs="+", default=["BTC", "ETH", "SOL"],
|
|
help="Assets to backtest")
|
|
parser.add_argument("--timeframe", nargs="+", default=["5M", "15M"],
|
|
help="Timeframes")
|
|
parser.add_argument("--windows", type=int, default=1000,
|
|
help="Number of windows for synthetic mode")
|
|
parser.add_argument("--days", type=int, default=7,
|
|
help="Days of history for historical mode")
|
|
parser.add_argument("--balance", type=float, default=10000.0,
|
|
help="Starting balance")
|
|
parser.add_argument("--output", default="backtest_results.json",
|
|
help="Output JSON file")
|
|
|
|
args = parser.parse_args()
|
|
bt = Backtester(balance=args.balance)
|
|
all_results = []
|
|
|
|
if args.mode in ("synthetic", "both"):
|
|
print("\n>>> SYNTHETIC BACKTEST <<<\n")
|
|
vol_map = {"BTC": 0.3, "ETH": 0.4, "SOL": 0.6}
|
|
for asset in args.asset:
|
|
for tf in args.timeframe:
|
|
result = await bt.run_synthetic(
|
|
asset=asset,
|
|
timeframe=tf,
|
|
num_windows=args.windows,
|
|
avg_volatility_pct=vol_map.get(asset, 0.3),
|
|
)
|
|
print_results(result)
|
|
all_results.append(result)
|
|
|
|
if args.mode in ("historical", "both"):
|
|
print("\n>>> HISTORICAL BACKTEST <<<\n")
|
|
for asset in args.asset:
|
|
for tf in args.timeframe:
|
|
print(f"Fetching {args.days} days of {asset} data...")
|
|
result = await bt.run_historical(
|
|
asset=asset,
|
|
timeframe=tf,
|
|
days=args.days,
|
|
)
|
|
print_results(result)
|
|
all_results.append(result)
|
|
|
|
if all_results:
|
|
save_results(all_results, args.output)
|
|
|
|
# Summary
|
|
print("\n" + "=" * 65)
|
|
print(" COMBINED SUMMARY")
|
|
print("=" * 65)
|
|
total_pnl = sum(r.total_pnl for r in all_results)
|
|
total_trades = sum(r.total_trades for r in all_results)
|
|
total_wins = sum(r.wins for r in all_results)
|
|
print(f" Total Trades: {total_trades}")
|
|
print(f" Overall PnL: ${total_pnl:+,.2f}")
|
|
print(f" Overall WR: {total_wins / total_trades * 100:.1f}%" if total_trades > 0 else " N/A")
|
|
print("=" * 65)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|