"""
Tests for Interactive Brokers FlexQuery interpreter.

Run with: pytest normalizator/sync/interactive_brokers/test_flexquery.py -v
"""

import pytest
import polars as pl
import json
from datetime import datetime

from pipeline.p01_normalize.brokers.interactive_brokers.flexquery import IBFlexQueryInterpreter
from pipeline.p01_normalize.brokers.interactive_brokers.detector import detect
from pipeline.p01_normalize.exceptions import UnknownFormatError


class TestIBFlexQueryInterpreter:
    """Tests for IBFlexQueryInterpreter class."""

    @pytest.fixture
    def interpreter(self):
        """Create interpreter instance."""
        return IBFlexQueryInterpreter()

    @pytest.fixture
    def sample_df(self):
        """Create sample DataFrame mimicking parsed XML with _row_index."""
        return pl.DataFrame({
            "accountId": ["U12345", "U12345", "U12345"],
            "symbol": ["AAPL", "TSLA", "SPY"],
            "buySell": ["BUY", "SELL", "BUY"],
            "quantity": ["100", "50", "200"],
            "tradePrice": ["150.25", "245.50", "450.00"],
            "dateTime": ["20240515;093000", "20240515;100000", "20240515;110000"],
            "ibCommission": ["-1.50", "-0.75", "-2.00"],
            "currency": ["USD", "USD", "USD"],
            "assetCategory": ["STK", "STK", "STK"],
            "multiplier": ["1", "1", "1"],
            "ibExecID": ["exec001", "exec002", "exec003"],
            "tradeID": ["1001", "1002", "1003"],
            "description": ["Apple Inc", "Tesla Inc", "SPDR S&P 500"],
            "exchange": ["NASDAQ", "NASDAQ", "ARCA"],
            "conid": ["265598", "76792991", "756733"],
            "securityID": ["US0378331005", "US88160R1014", "US78462F1030"],
            "putCall": ["", "", ""],
            "strike": ["", "", ""],
            "expiry": ["", "", ""],
            "underlyingSymbol": ["", "", ""],
            "fxRateToBase": ["1.0", "1.0", "1.0"],
            "taxes": ["0", "0", "0"],
            "notes": ["", "", ""],  # For assignment trades detection
            "closePrice": ["", "", ""],  # Fallback price for assignments
            "openCloseIndicator": ["", "", ""],  # For TRADECANCEL reversal logic
            "_row_index": [0, 1, 2],  # Required by interpreter for ordering
        })

    def test_can_handle_valid_data(self, interpreter, sample_df):
        """Test can_handle returns True for valid IB data."""
        assert interpreter.can_handle(sample_df, {}) is True

    def test_can_handle_missing_columns(self, interpreter):
        """Test can_handle returns False for invalid data."""
        invalid_df = pl.DataFrame({
            "symbol": ["AAPL"],
            "price": [150.0],  # Wrong column name
        })
        assert interpreter.can_handle(invalid_df, {}) is False

    def test_normalize_basic(self, interpreter, sample_df):
        """Test basic normalization produces grouping-ready schema."""
        result = interpreter.normalize(sample_df.lazy(), user_id=999).collect()

        assert len(result) == 3
        # Core required columns for pipeline (19 columns per schema)
        required_columns = [
            "user_id", "account_id", "execution_id", "symbol", "side",
            "quantity", "price", "timestamp", "commission", "fees", "swap",
            "currency", "asset", "option_strike", "option_expire",
            "multiplier", "pip_value", "original_file_row", "file_row"
        ]
        for col in required_columns:
            assert col in result.columns, f"Missing required column: {col}"

    def test_normalize_user_id(self, interpreter, sample_df):
        """Test user_id is set correctly."""
        result = interpreter.normalize(sample_df.lazy(), user_id=12345).collect()
        assert all(result["user_id"] == 12345)

    def test_normalize_account_id_from_xml(self, interpreter, sample_df):
        """Test account_id is extracted from XML accountId field."""
        result = interpreter.normalize(sample_df.lazy(), user_id=12345).collect()
        # sample_df has accountId = "U12345" for all rows
        assert all(result["account_id"] == "U12345")

    def test_normalize_symbol_uppercase(self, interpreter):
        """Test symbol is uppercase."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["aapl"],  # lowercase
            "buySell": ["BUY"],
            "quantity": ["100"],
            "tradePrice": ["150.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-1.00"],
            "currency": ["USD"],
            "assetCategory": ["STK"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["symbol"][0] == "AAPL"

    def test_normalize_side_mapping(self, interpreter, sample_df):
        """Test side values are correct (BUY/SELL)."""
        result = interpreter.normalize(sample_df.lazy(), user_id=1).collect()
        assert result["side"].to_list() == ["BUY", "SELL", "BUY"]

    def test_normalize_quantity_absolute(self, interpreter):
        """Test quantity is absolute value."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["AAPL"],
            "buySell": ["SELL"],
            "quantity": ["-100"],  # negative
            "tradePrice": ["150.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-1.00"],
            "currency": ["USD"],
            "assetCategory": ["STK"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["quantity"][0] == 100.0

    def test_normalize_commission_absolute(self, interpreter, sample_df):
        """Test commission is absolute value (IB reports negative)."""
        result = interpreter.normalize(sample_df.lazy(), user_id=1).collect()
        assert all(result["commission"] >= 0)
        assert result["commission"][0] == 1.50

    def test_normalize_timestamp_parsing(self, interpreter, sample_df):
        """Test timestamp is parsed correctly."""
        result = interpreter.normalize(sample_df.lazy(), user_id=1).collect()
        ts = result["timestamp"][0]
        assert ts.year == 2024
        assert ts.month == 5
        assert ts.day == 15
        assert ts.hour == 9
        assert ts.minute == 30

    def test_normalize_asset_stocks(self, interpreter, sample_df):
        """Test STK maps to stocks."""
        result = interpreter.normalize(sample_df.lazy(), user_id=1).collect()
        assert all(result["asset"] == "stocks")

    def test_normalize_asset_forex(self, interpreter):
        """Test CASH maps to forex."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["EUR.USD"],
            "buySell": ["BUY"],
            "quantity": ["10000"],
            "tradePrice": ["1.0850"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-2.00"],
            "currency": ["USD"],
            "assetCategory": ["CASH"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["asset"][0] == "forex"

    def test_normalize_option_call(self, interpreter):
        """Test OPT with putCall=C maps to options."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["AAPL 240517C00150000"],
            "buySell": ["BUY"],
            "quantity": ["10"],
            "tradePrice": ["5.50"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-0.65"],
            "currency": ["USD"],
            "assetCategory": ["OPT"],
            "multiplier": ["100"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": ["AAPL CALL 150"],
            "exchange": ["CBOE"],
            "conid": ["12345"],
            "securityID": [""],
            "putCall": ["C"],
            "strike": ["150"],
            "expiry": ["20240517"],
            "underlyingSymbol": ["AAPL"],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["asset"][0] == "options"
        assert result["option_strike"][0] == 150.0
        assert result["option_expire"][0] == "20240517"

    def test_normalize_option_put(self, interpreter):
        """Test OPT with putCall=P maps to options."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["AAPL 240517P00140000"],
            "buySell": ["BUY"],
            "quantity": ["5"],
            "tradePrice": ["3.25"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-0.33"],
            "currency": ["USD"],
            "assetCategory": ["OPT"],
            "multiplier": ["100"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": ["AAPL PUT 140"],
            "exchange": ["CBOE"],
            "conid": ["12346"],
            "securityID": [""],
            "putCall": ["P"],
            "strike": ["140"],
            "expiry": ["20240517"],
            "underlyingSymbol": ["AAPL"],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["asset"][0] == "options"
        assert result["option_strike"][0] == 140.0

    def test_original_file_row_valid_json(self, interpreter, sample_df):
        """Test original_file_row is valid JSON."""
        result = interpreter.normalize(sample_df.lazy(), user_id=1).collect()
        for row in result["original_file_row"]:
            parsed = json.loads(row)  # Should not raise
            assert isinstance(parsed, dict)

    def test_original_file_row_keys_lowercase(self, interpreter, sample_df):
        """Test all keys in original_file_row are lowercase."""
        result = interpreter.normalize(sample_df.lazy(), user_id=1).collect()
        parsed = json.loads(result["original_file_row"][0])
        for key in parsed.keys():
            assert key == key.lower(), f"Key '{key}' should be lowercase"

    def test_original_file_row_preserves_values(self, interpreter, sample_df):
        """Test original_file_row preserves original values (not lowercased)."""
        result = interpreter.normalize(sample_df.lazy(), user_id=1).collect()
        parsed = json.loads(result["original_file_row"][0])
        # Values are preserved as-is from original data
        assert parsed["symbol"] == "AAPL"
        assert parsed["buysell"] == "BUY"
        assert parsed["currency"] == "USD"

    def test_original_file_row_numeric_not_quoted(self, interpreter, sample_df):
        """Test numeric values are not quoted in original_file_row."""
        result = interpreter.normalize(sample_df.lazy(), user_id=1).collect()
        parsed = json.loads(result["original_file_row"][0])
        # Numeric fields should be numbers, not strings
        assert isinstance(parsed["quantity"], (int, float))
        assert isinstance(parsed["tradeprice"], (int, float))
        assert isinstance(parsed["ibcommission"], (int, float))

    def test_original_file_row_deterministic(self, interpreter, sample_df):
        """Test original_file_row is deterministic."""
        result1 = interpreter.normalize(sample_df.lazy(), user_id=1).collect()
        result2 = interpreter.normalize(sample_df.lazy(), user_id=1).collect()
        assert result1["original_file_row"].to_list() == result2["original_file_row"].to_list()


class TestDetector:
    """Tests for format detector."""

    def test_detect_flexquery(self):
        """Test detector returns IBFlexQueryInterpreter for valid data."""
        df = pl.DataFrame({
            "symbol": ["AAPL"],
            "buySell": ["BUY"],
            "quantity": ["100"],
            "tradePrice": ["150.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-1.00"],
        })
        interpreter = detect(df)
        assert isinstance(interpreter, IBFlexQueryInterpreter)

    def test_detect_unknown_format(self):
        """Test detector raises for unknown format."""
        df = pl.DataFrame({
            "col1": [1],
            "col2": [2],
        })
        with pytest.raises(UnknownFormatError):
            detect(df)


class TestFileRowHash:
    """Tests for file_row hash formula (legacy compatibility)."""

    @pytest.fixture
    def interpreter(self):
        return IBFlexQueryInterpreter()

    def test_file_row_is_valid_md5(self, interpreter):
        """Test file_row is 32-char MD5 hex."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["AAPL"],
            "buySell": ["BUY"],
            "quantity": ["100"],
            "tradePrice": ["150.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-1.00"],
            "currency": ["USD"],
            "assetCategory": ["STK"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["49162775"],
            "description": ["Apple Inc"],
            "exchange": ["NASDAQ"],
            "conid": ["265598"],
            "securityID": ["US0378331005"],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        file_row = result["file_row"][0]
        # Should be exactly 32 hex chars
        assert len(file_row) == 32
        assert all(c in "0123456789abcdef" for c in file_row)

    def test_file_row_deterministic(self, interpreter):
        """Test same input produces same hash."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["AAPL"],
            "buySell": ["BUY"],
            "quantity": ["100"],
            "tradePrice": ["150.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-1.00"],
            "currency": ["USD"],
            "assetCategory": ["STK"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["12345"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
        })
        result1 = interpreter.normalize(df.lazy(), user_id=1).collect()
        result2 = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result1["file_row"][0] == result2["file_row"][0]

    def test_file_row_uses_tradeid_priority(self, interpreter):
        """Test file_row uses tradeid as first priority."""
        import hashlib

        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["AAPL"],
            "buySell": ["BUY"],
            "quantity": ["100"],
            "tradePrice": ["150.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-1.00"],
            "currency": ["USD"],
            "assetCategory": ["STK"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["49162775"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()

        # Hash should be MD5(json.dumps(str(tradeid)))
        expected_hash = hashlib.md5(json.dumps("49162775").encode('utf-8')).hexdigest()
        assert result["file_row"][0] == expected_hash

    def test_file_row_matches_legacy_value(self, interpreter):
        """Test file_row matches known legacy value."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["AAPL"],
            "buySell": ["BUY"],
            "quantity": ["100"],
            "tradePrice": ["150.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-1.00"],
            "currency": ["USD"],
            "assetCategory": ["STK"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["49162775"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()

        # Known legacy hash for tradeid "49162775"
        assert result["file_row"][0] == "1bf891a1c31e241bfa6d962fb256be16"

    def test_file_row_different_tradeids_produce_different_hashes(self, interpreter):
        """Test different tradeids produce different hashes."""
        df = pl.DataFrame({
            "accountId": ["U12345", "U12345"],
            "symbol": ["AAPL", "TSLA"],
            "buySell": ["BUY", "SELL"],
            "quantity": ["100", "50"],
            "tradePrice": ["150.00", "200.00"],
            "dateTime": ["20240515;093000", "20240515;100000"],
            "ibCommission": ["-1.00", "-0.75"],
            "currency": ["USD", "USD"],
            "assetCategory": ["STK", "STK"],
            "multiplier": ["1", "1"],
            "ibExecID": ["exec001", "exec002"],
            "tradeID": ["11111", "22222"],
            "description": ["", ""],
            "exchange": ["", ""],
            "conid": ["", ""],
            "securityID": ["", ""],
            "putCall": ["", ""],
            "strike": ["", ""],
            "expiry": ["", ""],
            "underlyingSymbol": ["", ""],
            "fxRateToBase": ["1.0", "1.0"],
            "taxes": ["0", "0"],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["file_row"][0] != result["file_row"][1]

    def test_file_row_string_conversion(self):
        """Test that integer tradeid produces same hash as string tradeid."""
        # Test the static method directly with both integer and string tradeid
        import hashlib

        # With string tradeid in JSON
        original_str = json.dumps({"tradeid": "49162775"})
        hash_str = IBFlexQueryInterpreter._compute_file_row_hash(original_str)

        # With integer tradeid in JSON
        original_int = json.dumps({"tradeid": 49162775})
        hash_int = IBFlexQueryInterpreter._compute_file_row_hash(original_int)

        # Both should produce the same hash
        assert hash_str == hash_int
        # And match the expected legacy value
        assert hash_str == "1bf891a1c31e241bfa6d962fb256be16"

    def test_file_row_fallback_to_orderid(self):
        """Test file_row falls back to orderid when tradeid is empty."""
        import hashlib

        # No tradeid, has orderid
        original = json.dumps({"orderid": "98765", "iborderid": "11111"})
        hash_val = IBFlexQueryInterpreter._compute_file_row_hash(original)

        expected = hashlib.md5(json.dumps("98765").encode('utf-8')).hexdigest()
        assert hash_val == expected

    def test_file_row_fallback_to_iborderid(self):
        """Test file_row falls back to iborderid when tradeid and orderid are empty."""
        import hashlib

        # No tradeid, no orderid, has iborderid
        original = json.dumps({"iborderid": "55555"})
        hash_val = IBFlexQueryInterpreter._compute_file_row_hash(original)

        expected = hashlib.md5(json.dumps("55555").encode('utf-8')).hexdigest()
        assert hash_val == expected

    def test_file_row_fallback_to_full_dict(self):
        """Test file_row falls back to full dict when no id fields present."""
        import hashlib

        # No id fields at all
        order = {"symbol": "AAPL", "quantity": "100"}
        original = json.dumps(order)
        hash_val = IBFlexQueryInterpreter._compute_file_row_hash(original)

        expected = hashlib.md5(json.dumps(order).encode('utf-8')).hexdigest()
        assert hash_val == expected


class TestCriticalValidations:
    """Tests for critical data integrity validations."""

    @pytest.fixture
    def interpreter(self):
        return IBFlexQueryInterpreter()

    def test_quantity_zero_rejected(self, interpreter):
        """Test trades with quantity=0 are filtered out."""
        df = pl.DataFrame({
            "accountId": ["U12345", "U12345"],
            "symbol": ["AAPL", "TSLA"],
            "buySell": ["BUY", "BUY"],
            "quantity": ["0", "100"],  # First is zero
            "tradePrice": ["150.00", "200.00"],
            "dateTime": ["20240515;093000", "20240515;100000"],
            "ibCommission": ["-1.00", "-1.00"],
            "currency": ["USD", "USD"],
            "assetCategory": ["STK", "STK"],
            "multiplier": ["1", "1"],
            "ibExecID": ["exec001", "exec002"],
            "tradeID": ["1001", "1002"],
            "description": ["", ""],
            "exchange": ["", ""],
            "conid": ["", ""],
            "securityID": ["", ""],
            "putCall": ["", ""],
            "strike": ["", ""],
            "expiry": ["", ""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0", "1.0"],
            "taxes": ["0", "0"],
            "notes": ["", ""],
            "closePrice": ["", ""],
            "openCloseIndicator": ["", ""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        # Only one record should remain (quantity > 0)
        assert len(result) == 1
        assert result["symbol"][0] == "TSLA"

    def test_symbol_empty_rejected(self, interpreter):
        """Test trades with empty symbol are filtered out."""
        df = pl.DataFrame({
            "accountId": ["U12345", "U12345"],
            "symbol": ["", "AAPL"],  # First is empty
            "buySell": ["BUY", "BUY"],
            "quantity": ["100", "100"],
            "tradePrice": ["150.00", "200.00"],
            "dateTime": ["20240515;093000", "20240515;100000"],
            "ibCommission": ["-1.00", "-1.00"],
            "currency": ["USD", "USD"],
            "assetCategory": ["STK", "STK"],
            "multiplier": ["1", "1"],
            "ibExecID": ["exec001", "exec002"],
            "tradeID": ["1001", "1002"],
            "description": ["", ""],
            "exchange": ["", ""],
            "conid": ["", ""],
            "securityID": ["", ""],
            "putCall": ["", ""],
            "strike": ["", ""],
            "expiry": ["", ""],
            "underlyingSymbol": ["", ""],
            "fxRateToBase": ["1.0", "1.0"],
            "taxes": ["0", "0"],
            "notes": ["", ""],
            "closePrice": ["", ""],
            "openCloseIndicator": ["", ""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        # Only one record should remain (non-empty symbol)
        assert len(result) == 1
        assert result["symbol"][0] == "AAPL"

    def test_price_zero_rejected(self, interpreter):
        """Test trades with price=0 are filtered out."""
        df = pl.DataFrame({
            "accountId": ["U12345", "U12345"],
            "symbol": ["AAPL", "TSLA"],
            "buySell": ["BUY", "BUY"],
            "quantity": ["100", "100"],
            "tradePrice": ["0", "200.00"],  # First is zero
            "dateTime": ["20240515;093000", "20240515;100000"],
            "ibCommission": ["-1.00", "-1.00"],
            "currency": ["USD", "USD"],
            "assetCategory": ["STK", "STK"],
            "multiplier": ["1", "1"],
            "ibExecID": ["exec001", "exec002"],
            "tradeID": ["1001", "1002"],
            "description": ["", ""],
            "exchange": ["", ""],
            "conid": ["", ""],
            "securityID": ["", ""],
            "putCall": ["", ""],
            "strike": ["", ""],
            "expiry": ["", ""],
            "underlyingSymbol": ["", ""],
            "fxRateToBase": ["1.0", "1.0"],
            "taxes": ["0", "0"],
            "notes": ["", ""],
            "closePrice": ["", ""],
            "openCloseIndicator": ["", ""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        # Only one record should remain (price > 0)
        assert len(result) == 1
        assert result["symbol"][0] == "TSLA"

    def test_timestamp_null_rejected(self, interpreter):
        """Test trades with invalid timestamp are filtered out."""
        df = pl.DataFrame({
            "accountId": ["U12345", "U12345"],
            "symbol": ["AAPL", "TSLA"],
            "buySell": ["BUY", "BUY"],
            "quantity": ["100", "100"],
            "tradePrice": ["150.00", "200.00"],
            "dateTime": ["", "20240515;100000"],  # First is empty/invalid
            "ibCommission": ["-1.00", "-1.00"],
            "currency": ["USD", "USD"],
            "assetCategory": ["STK", "STK"],
            "multiplier": ["1", "1"],
            "ibExecID": ["exec001", "exec002"],
            "tradeID": ["1001", "1002"],
            "description": ["", ""],
            "exchange": ["", ""],
            "conid": ["", ""],
            "securityID": ["", ""],
            "putCall": ["", ""],
            "strike": ["", ""],
            "expiry": ["", ""],
            "underlyingSymbol": ["", ""],
            "fxRateToBase": ["1.0", "1.0"],
            "taxes": ["0", "0"],
            "notes": ["", ""],
            "closePrice": ["", ""],
            "openCloseIndicator": ["", ""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        # Only one record should remain (valid timestamp)
        assert len(result) == 1
        assert result["symbol"][0] == "TSLA"


class TestSymbolTransformations:
    """Tests for symbol transformations by asset type."""

    @pytest.fixture
    def interpreter(self):
        return IBFlexQueryInterpreter()

    def test_bitcoin_symbol_cleanup(self, interpreter):
        """Test BITCOIN prefix is removed from stock symbols."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["BITCOINUSD"],
            "buySell": ["BUY"],
            "quantity": ["1"],
            "tradePrice": ["50000.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-5.00"],
            "currency": ["USD"],
            "assetCategory": ["STK"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
            "notes": [""],
            "closePrice": [""],
            "openCloseIndicator": [""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["symbol"][0] == "USD"  # BITCOIN prefix removed

    def test_hong_kong_stocks_formatting(self, interpreter):
        """Test Hong Kong stocks get .HK suffix."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["0700"],  # Numeric 4-digit
            "buySell": ["BUY"],
            "quantity": ["100"],
            "tradePrice": ["350.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-2.00"],
            "currency": ["HKD"],
            "assetCategory": ["STK"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["0.13"],
            "taxes": ["0"],
            "notes": [""],
            "closePrice": [""],
            "openCloseIndicator": [""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["symbol"][0] == "0700.HK"

    def test_cfd_symbol_formatting(self, interpreter):
        """Test CFD symbols get |CFD suffix."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["AAPL"],
            "buySell": ["BUY"],
            "quantity": ["100"],
            "tradePrice": ["150.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-1.00"],
            "currency": ["USD"],
            "assetCategory": ["CFD"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
            "notes": [""],
            "closePrice": [""],
            "openCloseIndicator": [""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["symbol"][0] == "AAPL|CFD"

    def test_forex_symbol_formatting(self, interpreter):
        """Test Forex pairs get $ prefix and no dot."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["EUR.USD"],
            "buySell": ["BUY"],
            "quantity": ["10000"],
            "tradePrice": ["1.0850"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-2.00"],
            "currency": ["USD"],
            "assetCategory": ["CASH"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
            "notes": [""],
            "closePrice": [""],
            "openCloseIndicator": [""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["symbol"][0] == "$EURUSD"

    def test_futures_symbol_formatting(self, interpreter):
        """Test Futures get / prefix."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["ESH24"],
            "buySell": ["BUY"],
            "quantity": ["1"],
            "tradePrice": ["4500.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-2.50"],
            "currency": ["USD"],
            "assetCategory": ["FUT"],
            "multiplier": ["50"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
            "notes": [""],
            "closePrice": [""],
            "openCloseIndicator": [""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["symbol"][0] == "/ESH24"

    def test_spxw_to_spx_conversion(self, interpreter):
        """Test SPXW options are converted to SPX."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["SPXW 240517C04500000"],
            "buySell": ["BUY"],
            "quantity": ["1"],
            "tradePrice": ["50.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-1.00"],
            "currency": ["USD"],
            "assetCategory": ["OPT"],
            "multiplier": ["100"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": ["C"],
            "strike": ["4500"],
            "expiry": ["20240517"],
            "underlyingSymbol": ["SPX"],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
            "notes": [""],
            "closePrice": [""],
            "openCloseIndicator": [""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        assert result["symbol"][0] == "SPX 240517C04500000"


class TestSwapDetection:
    """Tests for swap detection (high commission reclassification)."""

    @pytest.fixture
    def interpreter(self):
        return IBFlexQueryInterpreter()

    def test_high_commission_reclassified_as_swap(self, interpreter):
        """Test commissions > $1000 are reclassified as swap."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["AAPL"],
            "buySell": ["BUY"],
            "quantity": ["1000"],
            "tradePrice": ["150.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-1500.00"],  # High commission
            "currency": ["USD"],
            "assetCategory": ["STK"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
            "notes": [""],
            "closePrice": [""],
            "openCloseIndicator": [""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        # Commission should be 0, swap should be 1500
        assert result["commission"][0] == 0.0
        assert result["swap"][0] == 1500.0

    def test_normal_commission_not_reclassified(self, interpreter):
        """Test normal commissions are not reclassified."""
        df = pl.DataFrame({
            "accountId": ["U12345"],
            "symbol": ["AAPL"],
            "buySell": ["BUY"],
            "quantity": ["100"],
            "tradePrice": ["150.00"],
            "dateTime": ["20240515;093000"],
            "ibCommission": ["-10.00"],  # Normal commission
            "currency": ["USD"],
            "assetCategory": ["STK"],
            "multiplier": ["1"],
            "ibExecID": ["exec001"],
            "tradeID": ["1001"],
            "description": [""],
            "exchange": [""],
            "conid": [""],
            "securityID": [""],
            "putCall": [""],
            "strike": [""],
            "expiry": [""],
            "underlyingSymbol": [""],
            "fxRateToBase": ["1.0"],
            "taxes": ["0"],
            "notes": [""],
            "closePrice": [""],
            "openCloseIndicator": [""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        # Commission should remain as is, swap should be 0
        assert result["commission"][0] == 10.0
        assert result["swap"][0] == 0.0


class TestAssetWhitelist:
    """Tests for asset type whitelist validation."""

    @pytest.fixture
    def interpreter(self):
        return IBFlexQueryInterpreter()

    def test_unknown_asset_rejected(self, interpreter):
        """Test unknown asset types are filtered out."""
        df = pl.DataFrame({
            "accountId": ["U12345", "U12345"],
            "symbol": ["AAPL", "TSLA"],
            "buySell": ["BUY", "BUY"],
            "quantity": ["100", "100"],
            "tradePrice": ["150.00", "200.00"],
            "dateTime": ["20240515;093000", "20240515;100000"],
            "ibCommission": ["-1.00", "-1.00"],
            "currency": ["USD", "USD"],
            "assetCategory": ["UNKNOWN", "STK"],  # First is unknown
            "multiplier": ["1", "1"],
            "ibExecID": ["exec001", "exec002"],
            "tradeID": ["1001", "1002"],
            "description": ["", ""],
            "exchange": ["", ""],
            "conid": ["", ""],
            "securityID": ["", ""],
            "putCall": ["", ""],
            "strike": ["", ""],
            "expiry": ["", ""],
            "underlyingSymbol": ["", ""],
            "fxRateToBase": ["1.0", "1.0"],
            "taxes": ["0", "0"],
            "notes": ["", ""],
            "closePrice": ["", ""],
            "openCloseIndicator": ["", ""],
        })
        result = interpreter.normalize(df.lazy(), user_id=1).collect()
        # Only one record should remain (known asset type)
        assert len(result) == 1
        assert result["symbol"][0] == "TSLA"


if __name__ == "__main__":
    pytest.main([__file__, "-v"])
