# Ejemplos de Cambios de Código - KuCoin Normalizer

Este documento muestra ejemplos concretos de código BEFORE/AFTER para cada validación, incluyendo tests.

---

## FASE 1: Validaciones Críticas

### 1. Side Validation Estricta

#### BEFORE (Código Actual - PROBLEMA)
**Ubicación:** `brokers/kucoin/kucoin.py:124-125`

```python
side_raw = order.get("side", "").lower()
side = "BUY" if side_raw == "buy" else "SELL"  # ❌ Default silencioso!
```

**Problema:**
- Sides inválidos ("INVALID", "FOO", "") se convierten a "SELL" sin warning
- No hay trazabilidad de datos corruptos
- Posiciones pueden calcularse incorrectamente

#### AFTER (Código Propuesto - SOLUCIÓN)
**Ubicación:** `brokers/kucoin/kucoin.py:123-130`

```python
# Validación estricta de side
side_raw = order.get("side", "").lower()

if not side_raw or side_raw not in ("buy", "sell"):
    logger.warning(
        f"Skipping order {order.get('tradeId', 'unknown')}: "
        f"invalid side '{side_raw}'"
    )
    continue  # Skip este order completamente

side = "BUY" if side_raw == "buy" else "SELL"
```

**Beneficios:**
- ✅ Rechaza sides inválidos explícitamente
- ✅ Log warning con tradeId para trazabilidad
- ✅ Previene datos corruptos en pipeline

#### Tests Requeridos

```python
def test_side_validation_rejects_invalid():
    """Verifica que sides inválidos son rechazados"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test-invalid-side",
            "symbol": "XBTUSDTM",
            "side": "INVALID",  # ❌ Side inválido
            "price": "50000",
            "size": "1",
            "value": "50000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 0  # Debe rechazar el order


def test_side_validation_rejects_empty():
    """Verifica que sides vacíos son rechazados"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test-empty-side",
            "symbol": "XBTUSDTM",
            "side": "",  # ❌ Side vacío
            "price": "50000",
            "size": "1",
            "value": "50000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 0  # Debe rechazar el order


def test_side_validation_accepts_valid_buy():
    """Verifica que 'buy' válido es aceptado"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test-valid-buy",
            "symbol": "XBTUSDTM",
            "side": "buy",  # ✅ Válido
            "price": "50000",
            "size": "1",
            "value": "50000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert len(result) == 1
    assert result["side"][0] == "BUY"


def test_side_validation_accepts_valid_sell():
    """Verifica que 'sell' válido es aceptado"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test-valid-sell",
            "symbol": "XBTUSDTM",
            "side": "sell",  # ✅ Válido
            "price": "50000",
            "size": "1",
            "value": "50000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert len(result) == 1
    assert result["side"][0] == "SELL"
```

---

### 2. Required Fields Validation

#### BEFORE (Código Actual - PROBLEMA)
**Ubicación:** `brokers/kucoin/kucoin.py:122+`

```python
# ❌ Sin validación explícita de campos requeridos
for row_idx, order in enumerate(orders):
    # Procesa directamente sin validar
    trade_id = str(order.get("tradeId", ""))
    symbol = str(order.get("symbol", ""))
    # ... continúa procesando
```

**Problema:**
- tradeId vacíos causan hash collisions
- symbol vacíos causan grouping errors
- price/size zero/missing causan calculation errors

#### AFTER (Código Propuesto - SOLUCIÓN)
**Ubicación:** `brokers/kucoin/kucoin.py:122+`

```python
for row_idx, order in enumerate(orders):
    # Extraer campos requeridos
    trade_id = order.get("tradeId", "")
    symbol = order.get("symbol", "")
    side = order.get("side", "")
    price = order.get("price", 0)
    size = order.get("size", 0)

    # ✅ Validar tradeId
    if not trade_id:
        logger.warning("Skipping order: missing tradeId")
        continue

    # ✅ Validar symbol
    if not symbol:
        logger.warning(f"Skipping order {trade_id}: missing symbol")
        continue

    # ✅ Validar side
    if not side:
        logger.warning(f"Skipping order {trade_id}: missing side")
        continue

    # ✅ Validar price > 0 y numérico
    try:
        price_float = float(price or 0)
        if price_float <= 0:
            logger.warning(
                f"Skipping order {trade_id}: invalid price {price_float}"
            )
            continue
    except (ValueError, TypeError):
        logger.warning(f"Skipping order {trade_id}: non-numeric price")
        continue

    # ✅ Validar size > 0 y numérico
    try:
        size_float = float(size or 0)
        if size_float <= 0:
            logger.warning(
                f"Skipping order {trade_id}: invalid size {size_float}"
            )
            continue
    except (ValueError, TypeError):
        logger.warning(f"Skipping order {trade_id}: non-numeric size")
        continue

    # Continuar con procesamiento normal...
```

#### Tests Requeridos

```python
def test_required_fields_missing_trade_id():
    """Rechazar orders sin tradeId"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "",  # ❌ Vacío
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 0


def test_required_fields_missing_symbol():
    """Rechazar orders sin symbol"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "",  # ❌ Vacío
            "side": "buy",
            "price": "50000",
            "size": "1",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 0


def test_required_fields_zero_price():
    """Rechazar orders con price = 0"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": 0,  # ❌ Zero
            "size": "1",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 0


def test_required_fields_negative_price():
    """Rechazar orders con price negativo"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "-50000",  # ❌ Negativo
            "size": "1",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 0


def test_required_fields_zero_size():
    """Rechazar orders con size = 0"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": 0,  # ❌ Zero
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 0


def test_required_fields_non_numeric_price():
    """Rechazar orders con price no numérico"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "invalid",  # ❌ No numérico
            "size": "1",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 0


def test_required_fields_all_valid():
    """Aceptar orders con todos los campos válidos"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1",
            "value": "50000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 1
```

---

### 3. Type Filter Validation

#### BEFORE (Código Actual - PROBLEMA)
**Ubicación:** `brokers/kucoin/kucoin.py:122+`

```python
# ❌ Sin filtro de tipo de orden
for row_idx, order in enumerate(orders):
    # Procesa todos los tipos sin discriminar
    # TRANSFER, LIQUIDATION, FUNDING, etc. pasan sin filtro
```

**Problema:**
- Órdenes no-trade contaminan reportes
- Cálculos P&L incorrectos
- Datos irrelevantes en análisis

#### AFTER (Código Propuesto - SOLUCIÓN)
**Ubicación:** `brokers/kucoin/kucoin.py` (después de validación de campos)

```python
# ✅ Filtrar por tipo de orden
trade_type = order.get("tradeType", "").upper()
if trade_type and trade_type not in ("TRADE", "BUSTTRADE"):
    logger.debug(
        f"Skipping order {trade_id}: non-trade type '{trade_type}'"
    )
    continue
```

#### Tests Requeridos

```python
def test_type_filter_rejects_transfer():
    """Rechazar TRANSFER events"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1",
            "tradeType": "TRANSFER",  # ❌ No-trade
            "value": "50000",
            "fee": "0",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 0


def test_type_filter_rejects_liquidation():
    """Rechazar LIQUIDATION events"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "sell",
            "price": "50000",
            "size": "1",
            "tradeType": "LIQUIDATION",  # ❌ No-trade
            "value": "50000",
            "fee": "0",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 0


def test_type_filter_accepts_trade():
    """Aceptar TRADE events"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1",
            "tradeType": "TRADE",  # ✅ Valid trade
            "value": "50000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 1


def test_type_filter_accepts_busttrade():
    """Aceptar BUSTTRADE events"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "sell",
            "price": "50000",
            "size": "1",
            "tradeType": "BUSTTRADE",  # ✅ Valid trade (liquidation trade)
            "value": "50000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 1


def test_type_filter_accepts_missing_type():
    """Aceptar orders sin tradeType (asumir TRADE)"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1",
            # Sin tradeType - asumir válido
            "value": "50000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    assert len(df) == 1
```

---

## FASE 2: Calidad de Datos

### 5. Multiplier Validation

#### BEFORE (Código Actual - PROBLEMA)
**Ubicación:** `brokers/kucoin/kucoin.py:300-306`

```python
# ⚠️ Solo valida divisores zero, no rango de resultado
pl.when((pl.col("price") > 0) & (pl.col("size") > 0))
.then(pl.col("value") / (pl.col("price") * pl.col("size")))
.otherwise(pl.lit(1.0))
.alias("multiplier"),
```

**Problema:**
- Multipliers anómalos (0.00001, 999999) pasan sin warning
- No valida que value > 0
- Cálculos posteriores pueden fallar

#### AFTER (Código Propuesto - SOLUCIÓN)
**Ubicación:** `brokers/kucoin/kucoin.py:300-306` (reemplazar)

```python
# ✅ Validación robusta con range check
pl.when(
    (pl.col("price") > 0) &
    (pl.col("size") > 0) &
    (pl.col("value") > 0)
)
.then(
    pl.when(
        (pl.col("value") / (pl.col("price") * pl.col("size")))
        .is_between(0.01, 1000000)  # Range razonable
    )
    .then(pl.col("value") / (pl.col("price") * pl.col("size")))
    .otherwise(pl.lit(1.0))  # Fallback para valores anómalos
)
.otherwise(pl.lit(1.0))
.alias("multiplier"),
```

#### Tests Requeridos

```python
def test_multiplier_validation_normal():
    """Multiplier en rango normal"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1",
            "value": "50000",  # multiplier = 50000/(50000*1) = 1
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert result["multiplier"][0] == 1.0


def test_multiplier_validation_anomalous():
    """Multiplier anómalo → fallback a 1.0"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "1",
            "size": "1",
            "value": "999999999",  # multiplier = 999999999 (fuera de rango)
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert result["multiplier"][0] == 1.0  # Fallback


def test_multiplier_validation_zero_value():
    """Value = 0 → multiplier = 1.0"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1",
            "value": "0",  # ❌ Value zero
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert result["multiplier"][0] == 1.0  # Fallback


def test_multiplier_validation_futures_contract():
    """Futures con multiplier típico (100-1000)"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1",
            "value": "5000000",  # multiplier = 100
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert result["multiplier"][0] == 100.0
```

---

### 7. Quantity Absolute Value + Decimal Limit

#### BEFORE (Código Actual - PROBLEMA)
**Ubicación:** `brokers/kucoin/kucoin.py:262`

```python
# ❌ Sin abs() ni límite de decimales
pl.col("size").alias("quantity"),
```

**Problema:**
- Quantities negativas pueden causar confusión
- Exceso de decimales puede causar problemas de precisión

#### AFTER (Código Propuesto - SOLUCIÓN)
**Ubicación:** `brokers/kucoin/kucoin.py:262`

```python
# ✅ Absolute value + rounding para crypto precision
pl.col("size").abs().round(8).alias("quantity"),  # 8 decimales para crypto
```

#### Tests Requeridos

```python
def test_quantity_absolute_value_negative():
    """Quantity negativa → positiva"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "sell",
            "price": "50000",
            "size": "-1.5",  # ❌ Negativo
            "value": "75000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert result["quantity"][0] == 1.5  # Convertido a positivo


def test_quantity_decimal_limit():
    """Quantity con decimales excesivos → redondeado"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1.123456789012345",  # Muchos decimales
            "value": "56172.84",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    # Redondeado a 8 decimales
    assert abs(result["quantity"][0] - 1.12345679) < 0.00000001


def test_quantity_preserves_valid_values():
    """Quantity válida se preserva"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1.5",
            "value": "75000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert result["quantity"][0] == 1.5
```

---

### 8. Price Rounding

#### BEFORE (Código Actual - PROBLEMA)
**Ubicación:** `brokers/kucoin/kucoin.py:265`

```python
# ❌ Sin rounding
pl.col("price").alias("price"),
```

**Problema:**
- Precisión excesiva puede causar problemas de storage
- Inconsistencia con legacy (6 decimales)

#### AFTER (Código Propuesto - SOLUCIÓN)
**Ubicación:** `brokers/kucoin/kucoin.py:265`

```python
# ✅ Rounding para crypto precision
pl.col("price").round(8).alias("price"),  # 8 decimales para crypto
```

#### Tests Requeridos

```python
def test_price_rounding():
    """Price con decimales excesivos → redondeado"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000.123456789012345",  # Muchos decimales
            "size": "1",
            "value": "50000.12",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    # Redondeado a 8 decimales
    assert abs(result["price"][0] - 50000.12345679) < 0.00000001


def test_price_preserves_valid_values():
    """Price válido se preserva"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000.5",
            "size": "1",
            "value": "50000.5",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert result["price"][0] == 50000.5


def test_price_integer():
    """Price entero se preserva"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "test1",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1",
            "value": "50000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert result["price"][0] == 50000.0
```

---

## Resumen de Cambios por Archivo

### `brokers/kucoin/kucoin.py`

**Líneas modificadas:**
- 123-130: Side validation estricta
- 122+: Required fields validation (30-40 líneas nuevas)
- 262: Quantity absolute + rounding
- 265: Price rounding
- 300-306: Multiplier validation robusta

**Estimado total:** +60-80 líneas

### `tests/brokers/test_kucoin.py`

**Tests nuevos:**
- Side validation: 4 tests (+15 líneas)
- Required fields: 7 tests (+40 líneas)
- Type filter: 5 tests (+25 líneas)
- Multiplier validation: 4 tests (+30 líneas)
- Quantity validation: 3 tests (+20 líneas)
- Price validation: 3 tests (+15 líneas)

**Estimado total:** +145-160 líneas

---

## Patrones de Testing

### Pattern 1: Test de Rechazo
```python
def test_validation_rejects_invalid():
    """Verifica que input inválido es rechazado"""
    content = json.dumps({"orders": [invalid_order]})
    df = interpreter.parse_json_content(content)
    assert len(df) == 0  # Debe rechazar
```

### Pattern 2: Test de Aceptación
```python
def test_validation_accepts_valid():
    """Verifica que input válido es aceptado"""
    content = json.dumps({"orders": [valid_order]})
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert len(result) == 1
    # Verificar valor esperado
```

### Pattern 3: Test de Transformación
```python
def test_validation_transforms_correctly():
    """Verifica que transformación es correcta"""
    content = json.dumps({"orders": [order_with_issue]})
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()
    assert result["field"][0] == expected_transformed_value
```

---

## Verificación de Regresión

### Test Suite Completo
```bash
# Run todos los tests
pytest tests/brokers/test_kucoin.py -v

# Run solo nuevos tests
pytest tests/brokers/test_kucoin.py -v -k "validation"

# Run con coverage
pytest tests/brokers/test_kucoin.py --cov=pipeline.p01_normalize.brokers.kucoin
```

### Hash Compatibility Check
```python
def test_hash_compatibility_maintained():
    """Verificar que hash formula no cambió"""
    interpreter = KucoinInterpreter()
    content = json.dumps({
        "orders": [{
            "tradeId": "known-trade-id-123",
            "symbol": "XBTUSDTM",
            "side": "buy",
            "price": "50000",
            "size": "1",
            "value": "50000",
            "fee": "25",
            "createdAt": "1736276232000",
        }]
    })
    df = interpreter.parse_json_content(content)
    result = interpreter.normalize(df.lazy(), user_id=1, account_id="test").collect()

    # Hash esperado (de legacy)
    import hashlib
    import json
    expected_hash = hashlib.md5(
        json.dumps("known-trade-id-123").encode('utf-8')
    ).hexdigest()

    assert result["file_row"][0] == expected_hash
```

---

**Fecha de creación:** 2026-01-14
**Última actualización:** 2026-01-14
