#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import csv, re
from collections import defaultdict
from datetime import date, timedelta

rows = list(csv.reader(open("diario.csv", encoding="utf-8-sig")))
data = rows[1:]
# pedido real: 26MM-NNNN (MM 01-12), no rodeado de digitos
PED = re.compile(r"(?<!\d)(26(?:0[1-9]|1[0-2])-\d{4})(?!\d)")

def D(s):
    s = (s or "").strip()
    return (date(1899,12,30)+timedelta(days=int(float(s)))).isoformat() if s else ""

def num(s):
    s = (s or "").strip()
    return float(s) if s else 0.0

lines = []
cur = None
for r in data:
    if len(r) < 18:
        r = r + [""]*(18-len(r))
    rid = r[0].strip()
    if rid:
        cur = {"diario": r[6].strip(), "conc": r[8].strip(), "contacto": r[11].strip(),
               "importe": num(r[12]), "fecha": D(r[14]), "asiento": r[16].strip(),
               "pedidos": set()}
        for cell in r:
            for m in PED.findall(cell): cur["pedidos"].add(m)
        lines.append(cur)
    elif cur is not None:
        for cell in r:
            for m in PED.findall(cell): cur["pedidos"].add(m)

print(f"Lineas de extracto (statement lines): {len(lines)}")
fechas = [l['fecha'] for l in lines if l['fecha']]
print(f"Rango fechas: {min(fechas)} .. {max(fechas)}")
print()
print("== Por DIARIO ==")
byj = defaultdict(lambda: {"n":0,"neg":0,"pos":0,"sum":0.0,"ped":0})
for l in lines:
    b = byj[l["diario"]]
    b["n"]+=1; b["sum"]+=l["importe"]
    if l["importe"]<0: b["neg"]+=1
    elif l["importe"]>0: b["pos"]+=1
    if l["pedidos"]: b["ped"]+=1
for j in sorted(byj, key=lambda x:-byj[x]["n"]):
    b=byj[j]
    print(f"  {j:<26} n={b['n']:<4} +{b['pos']:<4} -{b['neg']:<4} conPedido={b['ped']:<4} neto=${b['sum']:>13,.0f}")
print()
tot_ped = sum(1 for l in lines if l["pedidos"])
print(f"Lineas con pedido linkeado: {tot_ped}/{len(lines)}")
print(f"Pedidos distintos: {len(set(p for l in lines for p in l['pedidos']))}")
print()
print("== Muestra ANONIMOS / sin contacto, con y sin pedido ==")
anon = [l for l in lines if l["contacto"] in ("Consumidor Final Anónimo","")]
print(f"  lineas anon/sin-contacto: {len(anon)} ; de esas con pedido: {sum(1 for l in anon if l['pedidos'])}")
for l in sorted(anon, key=lambda x:-abs(x["importe"]))[:12]:
    ped = ",".join(l["pedidos"]) or "—"
    print(f"   {l['fecha']} {l['diario']:<14} ${l['importe']:>11,.0f}  ped:{ped}")
