"""Base classes for broker interpreters."""

import polars as pl
from abc import ABC, abstractmethod
from typing import ClassVar, Set, List, Dict, Any


class BaseInterpreter(ABC):
    """
    Abstract base class for broker data interpreters.

    Subclasses must implement:
    - can_handle(): Check if this interpreter handles the data
    - normalize(): Transform data to normalized schema
    """

    BROKER_ID: ClassVar[str] = ""
    FORMAT_VERSION: ClassVar[str] = "1.0"
    SUPPORTED_ASSETS: ClassVar[Set[str]] = {"stocks"}

    SIDE_MAP: ClassVar[dict] = {
        "b": "BUY", "buy": "BUY", "bot": "BUY", "bought": "BUY", "bto": "BUY",
        "s": "SELL", "sell": "SELL", "sld": "SELL", "sold": "SELL", "sto": "SELL",
    }

    @classmethod
    @abstractmethod
    def can_handle(cls, df: pl.DataFrame, metadata: dict) -> bool:
        """
        Return True if this interpreter can handle the given data.

        Args:
            df: Input DataFrame to check
            metadata: Additional metadata about the data source

        Returns:
            True if this interpreter can handle the data
        """
        pass

    @classmethod
    def get_priority(cls) -> int:
        """
        Return priority for interpreter selection.
        Higher values are checked first.

        Returns:
            Priority value (default 100)
        """
        return 100

    def _build_file_row_expr(self, columns: List[str], schema: Dict[str, Any]) -> pl.Expr:
        """
        Build original_file_row as JSON string.

        CRITICAL: This field is used for deduplication.
        See references/file-row.md for rules.

        Args:
            columns: List of column names
            schema: Column name to dtype mapping

        Returns:
            Polars expression that builds JSON string
        """
        parts = []
        for col in columns:
            col_lower = col.lower()
            dtype = schema.get(col)
            is_numeric = dtype in (pl.Float64, pl.Int64, pl.Int32, pl.Decimal)

            if is_numeric:
                parts.append(
                    pl.lit(f'"{col_lower}": ') +
                    pl.col(col).cast(pl.Utf8).fill_null("null")
                )
            else:
                # NOTE: Preserving original case to match broker data exactly.
                # This affects deduplication hash for new imports vs historical data.
                parts.append(
                    pl.lit(f'"{col_lower}": "') +
                    pl.col(col).cast(pl.Utf8).fill_null("") +
                    pl.lit('"')
                )

        return pl.lit("{") + pl.concat_str(parts, separator=", ") + pl.lit("}")

    @abstractmethod
    def normalize(self, df: pl.LazyFrame, account_id: int) -> pl.LazyFrame:
        """
        Transform broker data to normalized schema.

        Args:
            df: Input data as LazyFrame
            account_id: TraderSync account ID

        Returns:
            Normalized LazyFrame matching NormalizedExecution schema
        """
        pass
