"""
Tests for Binance interpreter.

Run with: pytest normalizator/sync/binance/test_binance.py -v
"""

import pytest
import polars as pl
import json

from pipeline.p01_normalize.brokers.binance.binance import BinanceInterpreter
from pipeline.p01_normalize.brokers.binance.detector import detect
from pipeline.p01_normalize.exceptions import UnknownFormatError


class TestBinanceInterpreter:
    """Tests for BinanceInterpreter class."""

    @pytest.fixture
    def interpreter(self):
        """Create interpreter instance."""
        return BinanceInterpreter()

    @pytest.fixture
    def sample_futures_order(self):
        """Create sample Binance futures order."""
        return {
            "symbol": "BTCUSDT",
            "id": 6362122492,
            "orderId": 696274174251,
            "side": "BUY",
            "price": "105029.40",
            "qty": "0.131",
            "realizedPnl": "0",
            "quoteQty": "13758.85140",
            "commission": "2.75177028",
            "commissionAsset": "USDT",
            "time": 1748806357785,
            "positionSide": "LONG",
            "buyer": True,
            "maker": True
        }

    @pytest.fixture
    def sample_spot_order(self):
        """Create sample Binance spot order."""
        return {
            "symbol": "BTCUSDT",
            "id": 3409728443,
            "orderId": 24786008110,
            "orderListId": -1,
            "price": "47188.00000000",
            "qty": "0.00106000",
            "quoteQty": "50.01928000",
            "commission": "0.00011617",
            "commissionAsset": "BNB",
            "time": 1707554232375,
            "isBuyer": True,
            "isMaker": False,
            "isBestMatch": True
        }

    @pytest.fixture
    def sample_spot_sell_order(self):
        """Create sample Binance spot sell order."""
        return {
            "symbol": "ETHUSDT",
            "id": 3410446035,
            "orderId": 24793654667,
            "orderListId": -1,
            "price": "3000.00000000",
            "qty": "1.5",
            "quoteQty": "4500.00000000",
            "commission": "0.00225",
            "commissionAsset": "BNB",
            "time": 1707598152150,
            "isBuyer": False,
            "isMaker": False,
            "isBestMatch": True
        }

    @pytest.fixture
    def sample_futures_json(self, sample_futures_order):
        """Create sample futures JSON content."""
        return json.dumps({
            "user_id": "test_user",
            "execution_id": "test123",
            "category": "futures",
            "symbol": "ALL_FUTURES",
            "total_orders": 1,
            "orders": [sample_futures_order],
            "processed_at": "2025-01-01T00:00:00"
        })

    @pytest.fixture
    def sample_spot_json(self, sample_spot_order, sample_spot_sell_order):
        """Create sample spot JSON content."""
        return json.dumps({
            "user_id": "test_user",
            "execution_id": "test456",
            "category": "spot",
            "symbol": "BTCUSDT",
            "total_orders": 2,
            "orders": [sample_spot_order, sample_spot_sell_order],
            "processed_at": "2025-01-01T00:00:00"
        })

    def test_parse_futures_json(self, interpreter, sample_futures_json):
        """Test parsing futures JSON produces correct DataFrame."""
        df = interpreter.parse_json_content(sample_futures_json)

        assert len(df) == 1
        assert "id" in df.columns
        assert "symbol" in df.columns
        assert "side" in df.columns
        assert "qty" in df.columns
        assert "price" in df.columns
        assert df["category"][0] == "futures"

    def test_parse_spot_json(self, interpreter, sample_spot_json):
        """Test parsing spot JSON produces correct DataFrame."""
        df = interpreter.parse_json_content(sample_spot_json)

        assert len(df) == 2
        assert df["category"][0] == "spot"

    def test_normalize_basic(self, interpreter, sample_futures_json):
        """Test basic normalization produces correct schema."""
        df = interpreter.parse_json_content(sample_futures_json)
        result = interpreter.normalize(df.lazy(), user_id=999, account_id="test123").collect()

        assert len(result) == 1
        # Check core normalizer columns are present
        core_columns = [
            "user_id", "account_id", "execution_id", "symbol", "side",
            "quantity", "price", "timestamp", "commission", "fees", "swap",
            "currency", "asset", "option_strike", "option_expire",
            "multiplier", "pip_value", "original_file_row", "file_row"
        ]
        for col in core_columns:
            assert col in result.columns, f"Missing required column: {col}"

    def test_normalize_user_id(self, interpreter, sample_futures_json):
        """Test user_id is set correctly."""
        df = interpreter.parse_json_content(sample_futures_json)
        result = interpreter.normalize(df.lazy(), user_id=12345, account_id="").collect()
        assert all(result["user_id"] == 12345)

    def test_normalize_account_id(self, interpreter, sample_futures_json):
        """Test account_id is set from input parameter."""
        df = interpreter.parse_json_content(sample_futures_json)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="my_account").collect()
        assert all(result["account_id"] == "my_account")

    def test_normalize_symbol_uppercase(self, interpreter):
        """Test symbol is uppercase."""
        content = json.dumps({
            "category": "spot",
            "orders": [{
                "id": 1,
                "symbol": "btcusdt",
                "price": "50000.0",
                "qty": "1.0",
                "commission": "0.5",
                "commissionAsset": "USDT",
                "time": 1707554232375,
                "isBuyer": True
            }]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["symbol"][0] == "BTCUSDT"

    def test_normalize_futures_side_buy(self, interpreter, sample_futures_order):
        """Test futures BUY side maps to side="BUY"."""
        content = json.dumps({"category": "futures", "orders": [sample_futures_order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["side"][0] == "BUY"

    def test_normalize_futures_side_sell(self, interpreter, sample_futures_order):
        """Test futures SELL side maps to side="SELL"."""
        order = sample_futures_order.copy()
        order["side"] = "SELL"
        content = json.dumps({"category": "futures", "orders": [order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["side"][0] == "SELL"

    def test_normalize_spot_side_buy(self, interpreter, sample_spot_order):
        """Test spot isBuyer=True maps to side="BUY"."""
        content = json.dumps({"category": "spot", "orders": [sample_spot_order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["side"][0] == "BUY"

    def test_normalize_spot_side_sell(self, interpreter, sample_spot_sell_order):
        """Test spot isBuyer=False maps to side="SELL"."""
        content = json.dumps({"category": "spot", "orders": [sample_spot_sell_order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["side"][0] == "SELL"

    def test_normalize_quantity(self, interpreter, sample_futures_order):
        """Test quantity is parsed correctly."""
        content = json.dumps({"category": "futures", "orders": [sample_futures_order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["quantity"][0] == 0.131

    def test_normalize_price(self, interpreter, sample_futures_order):
        """Test price is parsed correctly."""
        content = json.dumps({"category": "futures", "orders": [sample_futures_order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["price"][0] == 105029.40

    def test_normalize_commission(self, interpreter, sample_futures_order):
        """Test commission is extracted correctly."""
        content = json.dumps({"category": "futures", "orders": [sample_futures_order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert abs(result["commission"][0] - 2.75177028) < 0.0001

    def test_normalize_currency(self, interpreter, sample_spot_order):
        """Test currency from commissionAsset."""
        content = json.dumps({"category": "spot", "orders": [sample_spot_order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["currency"][0] == "BNB"

    def test_normalize_timestamp(self, interpreter, sample_futures_order):
        """Test timestamp parsing from milliseconds."""
        content = json.dumps({"category": "futures", "orders": [sample_futures_order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        ts = result["timestamp"][0]
        assert ts.year == 2025

    def test_normalize_asset_spot(self, interpreter, sample_spot_order):
        """Test spot category maps to crypto."""
        content = json.dumps({"category": "spot", "orders": [sample_spot_order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["asset"][0] == "crypto"

    def test_normalize_asset_futures(self, interpreter, sample_futures_order):
        """Test futures category maps to crypto."""
        content = json.dumps({"category": "futures", "orders": [sample_futures_order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["asset"][0] == "crypto"

    def test_original_file_row_valid_json(self, interpreter, sample_futures_json):
        """Test original_file_row is valid JSON."""
        df = interpreter.parse_json_content(sample_futures_json)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        for row in result["original_file_row"]:
            parsed = json.loads(row)
            assert isinstance(parsed, dict)

    def test_original_file_row_preserves_original_case(self, interpreter, sample_futures_json):
        """Test original_file_row preserves original broker data case."""
        df = interpreter.parse_json_content(sample_futures_json)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        parsed = json.loads(result["original_file_row"][0])
        # Verify it's valid JSON (case is preserved from original broker data)
        assert isinstance(parsed, dict)

    def test_original_file_row_deterministic(self, interpreter, sample_futures_json):
        """Test original_file_row is deterministic."""
        df = interpreter.parse_json_content(sample_futures_json)
        result1 = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        result2 = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result1["original_file_row"].to_list() == result2["original_file_row"].to_list()

    def test_execution_id_from_id(self, interpreter, sample_futures_order):
        """Test execution_id is set from order id."""
        content = json.dumps({"category": "futures", "orders": [sample_futures_order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["execution_id"][0] == str(sample_futures_order["id"])


class TestDetector:
    """Tests for format detector."""

    def test_detect_binance(self):
        """Test detector returns BinanceInterpreter for valid data."""
        df = pl.DataFrame({
            "id": ["1"],
            "symbol": ["BTCUSDT"],
            "price": [50000.0],
            "qty": [1.0],
            "time": ["1707554232375"],
            "commission": [0.5],
        })
        interpreter = detect(df)
        assert isinstance(interpreter, BinanceInterpreter)

    def test_detect_unknown_format(self):
        """Test detector raises for unknown format."""
        df = pl.DataFrame({
            "col1": [1],
            "col2": [2],
        })
        with pytest.raises(UnknownFormatError):
            detect(df)


class TestMixedCategories:
    """Tests for handling multiple categories."""

    @pytest.fixture
    def spot_orders(self):
        """Create spot orders."""
        return [{
            "id": 1,
            "symbol": "ETHUSDT",
            "price": "3000.0",
            "qty": "2.0",
            "commission": "0.003",
            "commissionAsset": "BNB",
            "time": 1707554232375,
            "isBuyer": True
        }]

    @pytest.fixture
    def futures_orders(self):
        """Create futures orders."""
        return [{
            "id": 2,
            "symbol": "BTCUSDT",
            "side": "SELL",
            "price": "65000.0",
            "qty": "0.5",
            "commission": "16.25",
            "commissionAsset": "USDT",
            "time": 1707600000000,
            "positionSide": "SHORT"
        }]

    def test_spot_category_preserved(self, spot_orders):
        """Test spot orders have crypto asset."""
        interpreter = BinanceInterpreter()
        content = json.dumps({"category": "spot", "orders": spot_orders})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["asset"][0] == "crypto"

    def test_futures_category_preserved(self, futures_orders):
        """Test futures orders have crypto asset."""
        interpreter = BinanceInterpreter()
        content = json.dumps({"category": "futures", "orders": futures_orders})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["asset"][0] == "crypto"


class TestFileRowHash:
    """Tests for file_row hash computation."""

    def test_file_row_is_md5_hash(self):
        """Test file_row is a valid MD5 hash (32 hex chars)."""
        interpreter = BinanceInterpreter()
        content = json.dumps({
            "category": "futures",
            "orders": [{
                "id": 1,
                "symbol": "BTCUSDT",
                "side": "BUY",
                "price": "50000.0",
                "qty": "1.0",
                "commission": "0.5",
                "commissionAsset": "USDT",
                "time": 1707554232375,
            }]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()

        file_row = result["file_row"][0]
        assert len(file_row) == 32
        assert all(c in "0123456789abcdef" for c in file_row)

    def test_file_row_deterministic(self):
        """Test file_row hash is deterministic for same input."""
        interpreter = BinanceInterpreter()
        content = json.dumps({
            "category": "futures",
            "orders": [{
                "id": 1,
                "symbol": "BTCUSDT",
                "side": "BUY",
                "price": "50000.0",
                "qty": "1.0",
                "commission": "0.5",
                "commissionAsset": "USDT",
                "time": 1707554232375,
            }]
        })
        df = interpreter.parse_json_content(content)
        result1 = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        result2 = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()

        assert result1["file_row"][0] == result2["file_row"][0]

    def test_file_row_legacy_formula(self):
        """Test file_row matches expected legacy formula.

        Formula: MD5(json.dumps(order_with_pre_hash_fields))
        Pre-hash fields:
        - binance_type = 'FUTURE' or 'SPOT' (uppercase)
        - category = 'Future' or 'Spot' (title case)
        - created_at = time / 1000 (seconds as float)
        - created_at_formated = datetime.utcfromtimestamp(created_at).strftime('%Y-%m-%d %H:%M:%S')
        """
        import hashlib
        from datetime import datetime

        # Sample order
        order = {
            "symbol": "BTCUSDT",
            "side": "BUY",
            "price": "50000.0",
            "qty": "1.0",
            "commission": "0.5",
            "commissionAsset": "USDT",
            "time": 1707554232375,
            "id": 123456,
        }

        # Compute expected hash manually
        time_ms = int(order["time"])
        created_at = time_ms / 1000
        created_at_formated = datetime.utcfromtimestamp(created_at).strftime('%Y-%m-%d %H:%M:%S')

        order_for_hash = dict(order)
        order_for_hash["binance_type"] = "FUTURE"  # futures category
        order_for_hash["category"] = "Future"
        order_for_hash["created_at"] = created_at
        order_for_hash["created_at_formated"] = created_at_formated

        expected_hash = hashlib.md5(json.dumps(order_for_hash).encode('utf-8')).hexdigest()

        # Compute via interpreter
        interpreter = BinanceInterpreter()
        content = json.dumps({"category": "futures", "orders": [order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()

        assert result["file_row"][0] == expected_hash

    def test_file_row_spot_category(self):
        """Test file_row uses 'SPOT'/'Spot' category for spot trades."""
        import hashlib
        from datetime import datetime

        order = {
            "id": 1,
            "symbol": "BTCUSDT",
            "price": "50000.0",
            "qty": "1.0",
            "commission": "0.5",
            "commissionAsset": "USDT",
            "time": 1707554232375,
            "isBuyer": True,
        }

        # Compute expected hash with Spot category
        time_ms = int(order["time"])
        created_at = time_ms / 1000
        created_at_formated = datetime.utcfromtimestamp(created_at).strftime('%Y-%m-%d %H:%M:%S')

        order_for_hash = dict(order)
        order_for_hash["binance_type"] = "SPOT"
        order_for_hash["category"] = "Spot"
        order_for_hash["created_at"] = created_at
        order_for_hash["created_at_formated"] = created_at_formated

        expected_hash = hashlib.md5(json.dumps(order_for_hash).encode('utf-8')).hexdigest()

        # Compute via interpreter with spot category
        interpreter = BinanceInterpreter()
        content = json.dumps({"category": "spot", "orders": [order]})
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()

        assert result["file_row"][0] == expected_hash

    def test_file_row_different_for_different_orders(self):
        """Test different orders produce different hashes."""
        interpreter = BinanceInterpreter()
        content = json.dumps({
            "category": "futures",
            "orders": [
                {
                    "id": 1,
                    "symbol": "BTCUSDT",
                    "side": "BUY",
                    "price": "50000.0",
                    "qty": "1.0",
                    "commission": "0.5",
                    "commissionAsset": "USDT",
                    "time": 1707554232375,
                },
                {
                    "id": 2,
                    "symbol": "ETHUSDT",
                    "side": "SELL",
                    "price": "3000.0",
                    "qty": "10.0",
                    "commission": "3.0",
                    "commissionAsset": "USDT",
                    "time": 1707554300000,
                }
            ]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()

        assert result["file_row"][0] != result["file_row"][1]


class TestEdgeCases:
    """Tests for edge cases."""

    def test_empty_orders(self):
        """Test handling of empty orders array."""
        interpreter = BinanceInterpreter()
        content = json.dumps({"category": "spot", "orders": []})
        df = interpreter.parse_json_content(content)
        assert len(df) == 0

    def test_zero_commission(self):
        """Test order with zero commission."""
        interpreter = BinanceInterpreter()
        content = json.dumps({
            "category": "spot",
            "orders": [{
                "id": 1,
                "symbol": "BTCUSDT",
                "price": "50000.0",
                "qty": "1.0",
                "commission": "0",
                "commissionAsset": "USDT",
                "time": 1707554232375,
                "isBuyer": True
            }]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["commission"][0] == 0.0

    def test_missing_commission_asset(self):
        """Test order with missing commissionAsset defaults to USDT."""
        interpreter = BinanceInterpreter()
        content = json.dumps({
            "category": "spot",
            "orders": [{
                "id": 1,
                "symbol": "BTCUSDT",
                "price": "50000.0",
                "qty": "1.0",
                "commission": "0.5",
                "time": 1707554232375,
                "isBuyer": True
            }]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["currency"][0] == "USDT"

    def test_buyer_boolean_fallback(self):
        """Test fallback to buyer boolean when side and isBuyer not present."""
        interpreter = BinanceInterpreter()
        content = json.dumps({
            "category": "futures",
            "orders": [{
                "id": 1,
                "symbol": "BTCUSDT",
                "price": "50000.0",
                "qty": "1.0",
                "commission": "0.5",
                "commissionAsset": "USDT",
                "time": 1707554232375,
                "buyer": False  # Using buyer field instead of side or isBuyer
            }]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="").collect()
        assert result["side"][0] == "SELL"


class TestCriticalValidations:
    """Tests for critical data integrity validations."""

    @pytest.fixture
    def interpreter(self):
        return BinanceInterpreter()

    def test_status_filled_filter(self, interpreter):
        """Test only FILLED orders are processed."""
        content = json.dumps({
            "category": "spot",
            "orders": [
                {
                    "id": 1,
                    "symbol": "BTCUSDT",
                    "price": "50000.0",
                    "qty": "1.0",
                    "commission": "0.5",
                    "commissionAsset": "USDT",
                    "time": 1707554232375,
                    "isBuyer": True,
                    "status": "FILLED"
                },
                {
                    "id": 2,
                    "symbol": "ETHUSDT",
                    "price": "3000.0",
                    "qty": "2.0",
                    "commission": "0.3",
                    "commissionAsset": "USDT",
                    "time": 1707554232376,
                    "isBuyer": True,
                    "status": "CANCELLED"  # Should be filtered out
                }
            ]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
        # Only one order should remain (FILLED)
        assert len(result) == 1
        assert result["symbol"][0] == "BTCUSDT"

    def test_quantity_zero_rejected(self, interpreter):
        """Test trades with quantity=0 are filtered out."""
        content = json.dumps({
            "category": "spot",
            "orders": [
                {
                    "id": 1,
                    "symbol": "BTCUSDT",
                    "price": "50000.0",
                    "qty": "0",  # Zero quantity
                    "commission": "0.5",
                    "commissionAsset": "USDT",
                    "time": 1707554232375,
                    "isBuyer": True,
                    "status": "FILLED"
                },
                {
                    "id": 2,
                    "symbol": "ETHUSDT",
                    "price": "3000.0",
                    "qty": "2.0",
                    "commission": "0.3",
                    "commissionAsset": "USDT",
                    "time": 1707554232376,
                    "isBuyer": True,
                    "status": "FILLED"
                }
            ]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
        # Only one order should remain (qty > 0)
        assert len(result) == 1
        assert result["symbol"][0] == "ETHUSDT"

    def test_price_zero_rejected(self, interpreter):
        """Test trades with price=0 are filtered out."""
        content = json.dumps({
            "category": "spot",
            "orders": [
                {
                    "id": 1,
                    "symbol": "BTCUSDT",
                    "price": "0",  # Zero price
                    "qty": "1.0",
                    "commission": "0.5",
                    "commissionAsset": "USDT",
                    "time": 1707554232375,
                    "isBuyer": True,
                    "status": "FILLED"
                },
                {
                    "id": 2,
                    "symbol": "ETHUSDT",
                    "price": "3000.0",
                    "qty": "2.0",
                    "commission": "0.3",
                    "commissionAsset": "USDT",
                    "time": 1707554232376,
                    "isBuyer": True,
                    "status": "FILLED"
                }
            ]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
        # Only one order should remain (price > 0)
        assert len(result) == 1
        assert result["symbol"][0] == "ETHUSDT"

    def test_symbol_empty_rejected(self, interpreter):
        """Test trades with empty symbol are filtered out."""
        content = json.dumps({
            "category": "spot",
            "orders": [
                {
                    "id": 1,
                    "symbol": "",  # Empty symbol
                    "price": "50000.0",
                    "qty": "1.0",
                    "commission": "0.5",
                    "commissionAsset": "USDT",
                    "time": 1707554232375,
                    "isBuyer": True,
                    "status": "FILLED"
                },
                {
                    "id": 2,
                    "symbol": "ETHUSDT",
                    "price": "3000.0",
                    "qty": "2.0",
                    "commission": "0.3",
                    "commissionAsset": "USDT",
                    "time": 1707554232376,
                    "isBuyer": True,
                    "status": "FILLED"
                }
            ]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
        # Only one order should remain (non-empty symbol)
        assert len(result) == 1
        assert result["symbol"][0] == "ETHUSDT"


class TestSymbolTransformations:
    """Tests for symbol transformations and filtering."""

    @pytest.fixture
    def interpreter(self):
        return BinanceInterpreter()

    def test_chinese_character_filtering(self, interpreter):
        """Test promotional trades with Chinese characters are filtered."""
        content = json.dumps({
            "category": "spot",
            "orders": [
                {
                    "id": 1,
                    "symbol": "币安人生",  # Chinese characters (promotional)
                    "price": "1.0",
                    "qty": "100.0",
                    "commission": "0.1",
                    "commissionAsset": "USDT",
                    "time": 1707554232375,
                    "isBuyer": True,
                    "status": "FILLED"
                },
                {
                    "id": 2,
                    "symbol": "BTCUSDT",
                    "price": "50000.0",
                    "qty": "1.0",
                    "commission": "0.5",
                    "commissionAsset": "USDT",
                    "time": 1707554232376,
                    "isBuyer": True,
                    "status": "FILLED"
                }
            ]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
        # Only one order should remain (no Chinese characters)
        assert len(result) == 1
        assert result["symbol"][0] == "BTCUSDT"

    def test_cover_short_mapping(self, interpreter):
        """Test COVER and SHORT are mapped to BUY and SELL."""
        content = json.dumps({
            "category": "futures",
            "orders": [
                {
                    "id": 1,
                    "symbol": "BTCUSDT",
                    "price": "50000.0",
                    "qty": "1.0",
                    "commission": "0.5",
                    "commissionAsset": "USDT",
                    "time": 1707554232375,
                    "side": "COVER",  # Should map to BUY
                    "status": "FILLED"
                },
                {
                    "id": 2,
                    "symbol": "ETHUSDT",
                    "price": "3000.0",
                    "qty": "2.0",
                    "commission": "0.3",
                    "commissionAsset": "USDT",
                    "time": 1707554232376,
                    "side": "SHORT",  # Should map to SELL
                    "status": "FILLED"
                }
            ]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
        assert len(result) == 2
        assert result["side"][0] == "BUY"  # COVER → BUY
        assert result["side"][1] == "SELL"  # SHORT → SELL


class TestFeeScientificNotation:
    """Tests for scientific notation handling in fees/commission."""

    @pytest.fixture
    def interpreter(self):
        return BinanceInterpreter()

    def test_fee_scientific_notation_conversion(self, interpreter):
        """Test fees in scientific notation are converted to decimal."""
        content = json.dumps({
            "category": "spot",
            "orders": [
                {
                    "id": 1,
                    "symbol": "BTCUSDT",
                    "price": "50000.0",
                    "qty": "0.0001",
                    "commission": 1e-8,  # Scientific notation
                    "commissionAsset": "USDT",
                    "time": 1707554232375,
                    "isBuyer": True,
                    "status": "FILLED"
                }
            ]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
        # Commission should be rounded to 8 decimals (0.00000001)
        assert result["commission"][0] == pytest.approx(1e-8, rel=1e-9)


class TestSideValidation:
    """Tests for side/action validation."""

    @pytest.fixture
    def interpreter(self):
        return BinanceInterpreter()

    def test_invalid_side_rejected(self, interpreter):
        """Test trades with invalid side are rejected."""
        # This test verifies that only BUY/SELL are accepted
        # Manual side assignment that's not BUY/SELL would be filtered
        # Since parse_json_content always normalizes to BUY/SELL, this is more of an integration test
        content = json.dumps({
            "category": "futures",
            "orders": [
                {
                    "id": 1,
                    "symbol": "BTCUSDT",
                    "price": "50000.0",
                    "qty": "1.0",
                    "commission": "0.5",
                    "commissionAsset": "USDT",
                    "time": 1707554232375,
                    "side": "BUY",
                    "status": "FILLED"
                },
                {
                    "id": 2,
                    "symbol": "ETHUSDT",
                    "price": "3000.0",
                    "qty": "2.0",
                    "commission": "0.3",
                    "commissionAsset": "USDT",
                    "time": 1707554232376,
                    "side": "SELL",
                    "status": "FILLED"
                }
            ]
        })
        df = interpreter.parse_json_content(content)
        result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
        # All sides should be BUY or SELL
        assert all(result["side"].is_in(["BUY", "SELL"]))


if __name__ == "__main__":
    pytest.main([__file__, "-v"])
