update 03-22 09:28

2026-03-22 09:28:14 +09:00
commit 7f45211276
43 changed files with 9373 additions and 0 deletions
--- a/backtest.py
+++ b/backtest.py
@@ -0,0 +1,554 @@
+"""Historical backtester for the temporal arbitrage strategy.
+
+Supports two modes:
+1. Synthetic: Random walk price simulation for quick parameter testing.
+2. Historical: Real Binance kline data for realistic backtesting.
+
+Usage:
+    python backtest.py --mode synthetic --windows 2000
+    python backtest.py --mode historical --asset BTC --days 7
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import math
+import time
+from dataclasses import dataclass, field
+from datetime import datetime, timezone, timedelta
+from pathlib import Path
+from typing import Optional
+
+from src.config import load_config
+from src.data.models import Asset, Direction, Signal, Timeframe
+from src.risk.fee_calculator import FeeCalculator
+from src.strategy.temporal_arb import TemporalArbStrategy
+
+import structlog
+
+log = structlog.get_logger()
+
+
+@dataclass
+class BacktestTrade:
+    """Single backtest trade result."""
+    window_idx: int
+    asset: str
+    timeframe: str
+    direction: str
+    entry_price: float
+    size: int
+    edge: float
+    estimated_prob: float
+    won: bool
+    pnl: float
+    fee: float
+    timestamp: float = 0.0
+
+
+@dataclass
+class BacktestResult:
+    """Aggregated backtest results."""
+    mode: str = "synthetic"
+    asset: str = ""
+    timeframe: str = ""
+    total_windows: int = 0
+    total_trades: int = 0
+    wins: int = 0
+    losses: int = 0
+    total_pnl: float = 0.0
+    total_fees: float = 0.0
+    total_volume: float = 0.0
+    max_drawdown: float = 0.0
+    best_trade: float = 0.0
+    worst_trade: float = 0.0
+    peak_balance: float = 0.0
+    trades: list[BacktestTrade] = field(default_factory=list)
+
+    @property
+    def win_rate(self) -> float:
+        return self.wins / self.total_trades * 100 if self.total_trades > 0 else 0
+
+    @property
+    def avg_pnl(self) -> float:
+        return self.total_pnl / self.total_trades if self.total_trades > 0 else 0
+
+    @property
+    def profit_factor(self) -> float:
+        gross_wins = sum(t.pnl for t in self.trades if t.pnl > 0)
+        gross_losses = abs(sum(t.pnl for t in self.trades if t.pnl < 0))
+        return gross_wins / gross_losses if gross_losses > 0 else float("inf")
+
+    @property
+    def sharpe_ratio(self) -> float:
+        """Approximate Sharpe ratio from trade PnLs."""
+        if len(self.trades) < 2:
+            return 0.0
+        pnls = [t.pnl for t in self.trades]
+        avg = sum(pnls) / len(pnls)
+        variance = sum((p - avg) ** 2 for p in pnls) / len(pnls)
+        std = math.sqrt(variance) if variance > 0 else 1e-9
+        return avg / std * math.sqrt(len(pnls))  # Annualized approximation
+
+    @property
+    def max_consecutive_losses(self) -> int:
+        max_streak = 0
+        current = 0
+        for t in self.trades:
+            if not t.won:
+                current += 1
+                max_streak = max(max_streak, current)
+            else:
+                current = 0
+        return max_streak
+
+    def to_dict(self) -> dict:
+        return {
+            "mode": self.mode,
+            "asset": self.asset,
+            "timeframe": self.timeframe,
+            "total_windows": self.total_windows,
+            "total_trades": self.total_trades,
+            "wins": self.wins,
+            "losses": self.losses,
+            "win_rate": round(self.win_rate, 2),
+            "total_pnl": round(self.total_pnl, 2),
+            "avg_pnl": round(self.avg_pnl, 2),
+            "total_fees": round(self.total_fees, 2),
+            "total_volume": round(self.total_volume, 2),
+            "profit_factor": round(self.profit_factor, 2),
+            "sharpe_ratio": round(self.sharpe_ratio, 2),
+            "max_drawdown": round(self.max_drawdown, 2),
+            "best_trade": round(self.best_trade, 2),
+            "worst_trade": round(self.worst_trade, 2),
+            "max_consecutive_losses": self.max_consecutive_losses,
+        }
+
+
+class Backtester:
+    """Replay historical or synthetic data through the temporal arb strategy."""
+
+    def __init__(self, config_path: str = "config.toml", balance: float = 10000.0) -> None:
+        self.config = load_config(config_path)
+        self.initial_balance = balance
+        self.fee_calc = FeeCalculator(self.config.fees)
+
+    def _make_strategy(self, balance: float) -> TemporalArbStrategy:
+        return TemporalArbStrategy(
+            arb_config=self.config.temporal_arb,
+            risk_config=self.config.risk,
+            fees_config=self.config.fees,
+            balance=balance,
+        )
+
+    # ------------------------------------------------------------------
+    # Synthetic backtest
+    # ------------------------------------------------------------------
+
+    async def run_synthetic(
+        self,
+        asset: str = "BTC",
+        timeframe: str = "15M",
+        num_windows: int = 1000,
+        avg_volatility_pct: float = 0.3,
+    ) -> BacktestResult:
+        """Run a synthetic backtest using simulated price movements."""
+        import numpy as np
+
+        strategy = self._make_strategy(self.initial_balance)
+        result = BacktestResult(
+            mode="synthetic",
+            asset=asset,
+            timeframe=timeframe,
+            total_windows=num_windows,
+        )
+        balance = self.initial_balance
+        peak_balance = balance
+        window_sec = 300 if timeframe == "5M" else 900
+
+        rng = np.random.default_rng(42)
+
+        base_prices = {"BTC": 84000, "ETH": 2300, "SOL": 135}
+        base_price = base_prices.get(asset, 50000)
+
+        for i in range(num_windows):
+            start_price = base_price * (1 + rng.normal(0, 0.02))
+            num_ticks = 100
+            returns = rng.normal(0, avg_volatility_pct / 100 / math.sqrt(num_ticks), num_ticks)
+            prices = [start_price]
+            for r in returns:
+                prices.append(prices[-1] * (1 + r))
+
+            end_price = prices[-1]
+            actual_direction = "UP" if end_price > start_price else "DOWN"
+
+            # Evaluate at multiple points in the window
+            for eval_frac in [0.3, 0.5, 0.7]:
+                eval_idx = int(num_ticks * eval_frac)
+                eval_price = prices[eval_idx]
+                time_remaining = window_sec * (1 - eval_frac)
+
+                change_pct = (eval_price - start_price) / start_price * 100
+
+                # Simulate Polymarket price (lagging behind reality)
+                if abs(change_pct) > 0.05:
+                    lag_factor = 0.3  # Polymarket adjusts at 30% of actual
+                    if change_pct > 0:
+                        sim_poly_up = 0.50 + abs(change_pct) * lag_factor * 10
+                        sim_poly_up = min(sim_poly_up, 0.75)
+                        sim_poly_down = max(0.25, 1.0 - sim_poly_up - rng.uniform(0, 0.04))
+                    else:
+                        sim_poly_down = 0.50 + abs(change_pct) * lag_factor * 10
+                        sim_poly_down = min(sim_poly_down, 0.75)
+                        sim_poly_up = max(0.25, 1.0 - sim_poly_down - rng.uniform(0, 0.04))
+                else:
+                    sim_poly_up = 0.50 + rng.uniform(-0.02, 0.02)
+                    sim_poly_down = 0.50 + rng.uniform(-0.02, 0.02)
+
+                strategy.update_balance(balance)
+                signal = await strategy.evaluate(
+                    symbol=asset,
+                    cex_price=eval_price,
+                    window_start_price=start_price,
+                    window_end_time=time.time() + time_remaining,
+                    poly_up_ask=sim_poly_up,
+                    poly_down_ask=sim_poly_down,
+                    up_token_id=f"up_{i}",
+                    down_token_id=f"down_{i}",
+                    timeframe=timeframe,
+                )
+
+                if signal is None:
+                    continue
+
+                # Simulate outcome
+                won = (signal.direction == Direction.UP and actual_direction == "UP") or \
+                      (signal.direction == Direction.DOWN and actual_direction == "DOWN")
+
+                pnl = self.fee_calc.net_payout(
+                    timeframe=timeframe,
+                    entry_price=signal.price,
+                    size=signal.size,
+                    won=won,
+                )
+                fee = self.fee_calc.taker_fee(timeframe, signal.price, signal.size) if won else 0
+
+                balance += pnl
+                peak_balance = max(peak_balance, balance)
+                drawdown = peak_balance - balance
+
+                trade = BacktestTrade(
+                    window_idx=i,
+                    asset=asset,
+                    timeframe=timeframe,
+                    direction=signal.direction.value,
+                    entry_price=signal.price,
+                    size=signal.size,
+                    edge=signal.edge,
+                    estimated_prob=signal.estimated_prob,
+                    won=won,
+                    pnl=pnl,
+                    fee=fee,
+                )
+
+                result.trades.append(trade)
+                result.total_trades += 1
+                result.total_pnl += pnl
+                result.total_fees += fee
+                result.total_volume += signal.price * signal.size
+                if won:
+                    result.wins += 1
+                else:
+                    result.losses += 1
+                result.best_trade = max(result.best_trade, pnl)
+                result.worst_trade = min(result.worst_trade, pnl)
+                result.max_drawdown = max(result.max_drawdown, drawdown)
+                result.peak_balance = peak_balance
+
+                break  # Only take one trade per window
+
+        return result
+
+    # ------------------------------------------------------------------
+    # Historical backtest (Binance klines)
+    # ------------------------------------------------------------------
+
+    async def fetch_binance_klines(
+        self,
+        symbol: str,
+        interval: str = "1m",
+        days: int = 7,
+    ) -> list[dict]:
+        """Fetch historical kline data from Binance REST API."""
+        import aiohttp
+
+        pair = f"{symbol}USDT"
+        url = "https://api.binance.com/api/v3/klines"
+        end_time = int(time.time() * 1000)
+        start_time = end_time - (days * 24 * 60 * 60 * 1000)
+
+        all_klines = []
+        current_start = start_time
+
+        async with aiohttp.ClientSession() as session:
+            while current_start < end_time:
+                params = {
+                    "symbol": pair,
+                    "interval": interval,
+                    "startTime": current_start,
+                    "endTime": end_time,
+                    "limit": 1000,
+                }
+                async with session.get(url, params=params) as resp:
+                    if resp.status != 200:
+                        log.error("binance_klines_error", status=resp.status)
+                        break
+                    data = await resp.json()
+                    if not data:
+                        break
+
+                    for k in data:
+                        all_klines.append({
+                            "open_time": k[0],
+                            "open": float(k[1]),
+                            "high": float(k[2]),
+                            "low": float(k[3]),
+                            "close": float(k[4]),
+                            "volume": float(k[5]),
+                            "close_time": k[6],
+                        })
+
+                    current_start = data[-1][6] + 1  # Next ms after last close
+                    await asyncio.sleep(0.1)  # Rate limiting
+
+        log.info("klines_fetched", symbol=symbol, count=len(all_klines), days=days)
+        return all_klines
+
+    async def run_historical(
+        self,
+        asset: str = "BTC",
+        timeframe: str = "15M",
+        days: int = 7,
+    ) -> BacktestResult:
+        """Run backtest using real Binance historical data."""
+        strategy = self._make_strategy(self.initial_balance)
+        result = BacktestResult(
+            mode="historical",
+            asset=asset,
+            timeframe=timeframe,
+        )
+        balance = self.initial_balance
+        peak_balance = balance
+
+        # Fetch 1-minute klines
+        klines = await self.fetch_binance_klines(asset, interval="1m", days=days)
+        if not klines:
+            log.error("no_klines_data", asset=asset)
+            return result
+
+        window_minutes = 5 if timeframe == "5M" else 15
+        window_sec = window_minutes * 60
+
+        # Group klines into windows
+        window_idx = 0
+        i = 0
+        while i + window_minutes <= len(klines):
+            window_klines = klines[i:i + window_minutes]
+            start_price = window_klines[0]["open"]
+            end_price = window_klines[-1]["close"]
+            actual_direction = "UP" if end_price > start_price else "DOWN"
+
+            result.total_windows += 1
+
+            # Simulate evaluation at mid-point
+            mid_idx = window_minutes // 2
+            mid_price = window_klines[mid_idx]["close"]
+            time_remaining = window_sec * 0.5
+
+            change_pct = (mid_price - start_price) / start_price * 100
+
+            # Simulate Polymarket prices based on actual market behavior
+            # More conservative lag simulation for historical
+            if abs(change_pct) > 0.05:
+                lag = 0.25  # Market adjusts at ~25% speed
+                if change_pct > 0:
+                    poly_up = 0.50 + abs(change_pct) * lag * 8
+                    poly_up = min(poly_up, 0.72)
+                    poly_down = max(0.28, 1.0 - poly_up - 0.02)
+                else:
+                    poly_down = 0.50 + abs(change_pct) * lag * 8
+                    poly_down = min(poly_down, 0.72)
+                    poly_up = max(0.28, 1.0 - poly_down - 0.02)
+            else:
+                poly_up = 0.50
+                poly_down = 0.50
+
+            strategy.update_balance(balance)
+            signal = await strategy.evaluate(
+                symbol=asset,
+                cex_price=mid_price,
+                window_start_price=start_price,
+                window_end_time=time.time() + time_remaining,
+                poly_up_ask=poly_up,
+                poly_down_ask=poly_down,
+                up_token_id=f"hist_up_{window_idx}",
+                down_token_id=f"hist_down_{window_idx}",
+                timeframe=timeframe,
+            )
+
+            if signal:
+                won = (signal.direction == Direction.UP and actual_direction == "UP") or \
+                      (signal.direction == Direction.DOWN and actual_direction == "DOWN")
+
+                pnl = self.fee_calc.net_payout(
+                    timeframe=timeframe,
+                    entry_price=signal.price,
+                    size=signal.size,
+                    won=won,
+                )
+                fee = self.fee_calc.taker_fee(timeframe, signal.price, signal.size) if won else 0
+
+                balance += pnl
+                peak_balance = max(peak_balance, balance)
+                drawdown = peak_balance - balance
+
+                trade = BacktestTrade(
+                    window_idx=window_idx,
+                    asset=asset,
+                    timeframe=timeframe,
+                    direction=signal.direction.value,
+                    entry_price=signal.price,
+                    size=signal.size,
+                    edge=signal.edge,
+                    estimated_prob=signal.estimated_prob,
+                    won=won,
+                    pnl=pnl,
+                    fee=fee,
+                    timestamp=window_klines[0]["open_time"] / 1000,
+                )
+
+                result.trades.append(trade)
+                result.total_trades += 1
+                result.total_pnl += pnl
+                result.total_fees += fee
+                result.total_volume += signal.price * signal.size
+                if won:
+                    result.wins += 1
+                else:
+                    result.losses += 1
+                result.best_trade = max(result.best_trade, pnl)
+                result.worst_trade = min(result.worst_trade, pnl)
+                result.max_drawdown = max(result.max_drawdown, drawdown)
+                result.peak_balance = peak_balance
+
+            i += window_minutes
+            window_idx += 1
+
+        return result
+
+
+# ---------------------------------------------------------------------------
+# Output
+# ---------------------------------------------------------------------------
+
+def print_results(result: BacktestResult) -> None:
+    """Pretty-print backtest results."""
+    print("\n" + "=" * 65)
+    print(f"  BACKTEST RESULTS — {result.asset} {result.timeframe} ({result.mode})")
+    print("=" * 65)
+    print(f"  Windows Tested:        {result.total_windows}")
+    print(f"  Total Trades:          {result.total_trades}")
+    print(f"  Wins / Losses:         {result.wins} / {result.losses}")
+    print(f"  Win Rate:              {result.win_rate:.1f}%")
+    print(f"  Total PnL:             ${result.total_pnl:+,.2f}")
+    print(f"  Average PnL:           ${result.avg_pnl:+,.2f}")
+    print(f"  Total Fees:            ${result.total_fees:,.2f}")
+    print(f"  Total Volume:          ${result.total_volume:,.0f}")
+    print(f"  Profit Factor:         {result.profit_factor:.2f}")
+    print(f"  Sharpe Ratio:          {result.sharpe_ratio:.2f}")
+    print(f"  Max Drawdown:          ${result.max_drawdown:,.2f}")
+    print(f"  Best Trade:            ${result.best_trade:+,.2f}")
+    print(f"  Worst Trade:           ${result.worst_trade:+,.2f}")
+    print(f"  Max Consec. Losses:    {result.max_consecutive_losses}")
+    print("=" * 65)
+
+
+def save_results(results: list[BacktestResult], output_path: str = "backtest_results.json") -> None:
+    """Save all backtest results to JSON."""
+    data = [r.to_dict() for r in results]
+    Path(output_path).write_text(json.dumps(data, indent=2))
+    print(f"\nResults saved to {output_path}")
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+async def main() -> None:
+    parser = argparse.ArgumentParser(description="Polymarket Arb Bot Backtester")
+    parser.add_argument("--mode", choices=["synthetic", "historical", "both"],
+                        default="synthetic", help="Backtest mode")
+    parser.add_argument("--asset", nargs="+", default=["BTC", "ETH", "SOL"],
+                        help="Assets to backtest")
+    parser.add_argument("--timeframe", nargs="+", default=["5M", "15M"],
+                        help="Timeframes")
+    parser.add_argument("--windows", type=int, default=1000,
+                        help="Number of windows for synthetic mode")
+    parser.add_argument("--days", type=int, default=7,
+                        help="Days of history for historical mode")
+    parser.add_argument("--balance", type=float, default=10000.0,
+                        help="Starting balance")
+    parser.add_argument("--output", default="backtest_results.json",
+                        help="Output JSON file")
+
+    args = parser.parse_args()
+    bt = Backtester(balance=args.balance)
+    all_results = []
+
+    if args.mode in ("synthetic", "both"):
+        print("\n>>> SYNTHETIC BACKTEST <<<\n")
+        vol_map = {"BTC": 0.3, "ETH": 0.4, "SOL": 0.6}
+        for asset in args.asset:
+            for tf in args.timeframe:
+                result = await bt.run_synthetic(
+                    asset=asset,
+                    timeframe=tf,
+                    num_windows=args.windows,
+                    avg_volatility_pct=vol_map.get(asset, 0.3),
+                )
+                print_results(result)
+                all_results.append(result)
+
+    if args.mode in ("historical", "both"):
+        print("\n>>> HISTORICAL BACKTEST <<<\n")
+        for asset in args.asset:
+            for tf in args.timeframe:
+                print(f"Fetching {args.days} days of {asset} data...")
+                result = await bt.run_historical(
+                    asset=asset,
+                    timeframe=tf,
+                    days=args.days,
+                )
+                print_results(result)
+                all_results.append(result)
+
+    if all_results:
+        save_results(all_results, args.output)
+
+        # Summary
+        print("\n" + "=" * 65)
+        print("  COMBINED SUMMARY")
+        print("=" * 65)
+        total_pnl = sum(r.total_pnl for r in all_results)
+        total_trades = sum(r.total_trades for r in all_results)
+        total_wins = sum(r.wins for r in all_results)
+        print(f"  Total Trades:   {total_trades}")
+        print(f"  Overall PnL:    ${total_pnl:+,.2f}")
+        print(f"  Overall WR:     {total_wins / total_trades * 100:.1f}%" if total_trades > 0 else "  N/A")
+        print("=" * 65)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())