"""Generate fake data for testing."""

import logging
import random
import tempfile
import threading
import time
from argparse import ArgumentParser
from concurrent import futures
from pathlib import Path
from typing import TYPE_CHECKING
from uuid import uuid4

from fpdf import FPDF
from PIL import Image

from octopus.utils import compute_hash, load_config, set_log_verbosity
from squirro_client import DocumentUploader

if TYPE_CHECKING:
    from argparse import Namespace
    from uuid import UUID


set_log_verbosity(logging.INFO)


def _generate_pdf(uid: "UUID") -> tuple[str, Path]:
    """Generate a PDF with random text.

    Args:
        uid: UUID of the PDF.

    Returns:
        Tuple of binary hash and path to the PDF.
    """
    tmp_dir = tempfile.mkdtemp()
    pdf = FPDF(format="A4")

    # Add text
    pdf.set_font("Helvetica")
    pdf.add_page()
    for _ in range(3):
        for _ in range(100):
            pdf.cell(
                0,
                10,
                "".join(
                    random.choices(  # noqa: S311
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"
                        "nopqrstuvwxyz0123456789 \n",
                        k=100,
                    )
                ),
                ln=True,
            )

    # Use image to boost PDF size
    for _ in range(7):
        pdf.add_page()
        img = Image.new(
            "RGB",
            (10000, 10000),
            (
                random.randint(0, 255),  # noqa: S311
                random.randint(0, 255),  # noqa: S311
                random.randint(0, 255),  # noqa: S311
            ),
        )
        pdf.image(img, x=0, y=0, w=210, h=297)

    fp = Path(tmp_dir) / f"{uid}.pdf"
    pdf.output(str(fp))
    with fp.open("rb") as b:
        binary_hash = compute_hash(b)
    return binary_hash, fp


def main(args: "Namespace") -> None:
    """Entrypoint."""
    cfg = load_config()

    uploader = DocumentUploader(
        project_id=cfg["squirro"]["project_id"],
        token=cfg["squirro"]["token"],
        cluster=cfg["squirro"]["cluster"],
        source_name="Performance Test Data",
        pipeline_workflow_name="Mock Data",
    )

    num_pdfs = int(args.total_size // args.pdf_size)
    batch = []
    upload_lock = threading.Lock()

    def process_pdf() -> None:
        nonlocal batch
        uuid = uuid4()
        binary_hash, fp = _generate_pdf(uuid)
        batch.append(fp)
        labels = {
            "document_date_pred": ["2024-09-09"],
            "document_date": ["2024-09-09"],
            "wfi_document_date": ["2024-09-09"],
            "document_type_pred": ["ASSIGNMENT"],
            "company_name_pred": [args.company_name],
            "permission_code": [args.permission_code],
            "uid_permission_code": [args.uid_permission_code],
            "wfi_company_segment": ["R"],
            "wfi_company_rm_code": [args.rm_code],
            "wfi_company_team_name": ["Team A"],
            "wfi_document_category": ["Current Documents"],
            "wfi_document_name": ["General Assignment (Misc)"],
            "wfi_document_type": ["Misc"],
            "user_email": [args.user_email],
            "user_name": ["Mock Data Script"],
            "binary_hash": [binary_hash],
            "force_ocr": ["false"],
            "ocr_status": ["skipped"],
            "company_name": [args.company_name],
            "document_type": ["ASSIGNMENT"],
            "rm_name": [args.rm_name],
            "company_uid": [args.company_uid],
            "company_cif": [args.company_cif],
            "wfi_company_name": [args.company_name],
            "wfi_company_cif": [args.company_cif],
            "bbca_documents": ["true"],
            "document_category": ["Legal Documents"],
            "wfi_references": [""],
            "source_type": "End point Upload",
        }
        try:
            uploader.upload(
                str(fp),
                mime_type="application/pdf",
                title=fp.name,
                doc_id=f"{uuid}{binary_hash}",
                keywords=labels,
            )
        except Exception:
            logging.exception("Failed to upload PDF")
            return  # nosec: B112

        with upload_lock:
            if len(batch) * args.pdf_size >= 20:  # noqa: PLR2004
                logging.info("Uploading batch...")
                uploader.flush()
                while batch:
                    batch.pop().unlink()
                time.sleep(60)

    with futures.ThreadPoolExecutor(max_workers=5) as executor:
        f_list = []
        for idx in range(num_pdfs):
            logging.info("Generating PDF #%d...", idx)
            f_list.append(executor.submit(process_pdf))
        futures.wait(f_list)

    if batch:
        logging.info("Uploading batch...")
        uploader.flush()
        while batch:
            batch.pop().unlink()


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument(
        "--pdf-size", default=2, type=float, help="Target size of the PDF in MB."
    )
    parser.add_argument(
        "--total-size", default=300_000, type=float, help="Total size in MB."
    )
    # Fake labels
    parser.add_argument("--company-name", type=str, required=True, help="Company name.")
    parser.add_argument("--company-cif", type=str, required=True, help="Company CIF.")
    parser.add_argument("--company-uid", type=str, required=True, help="Company UID.")
    parser.add_argument(
        "--permission-code", type=str, required=True, help="Permission code."
    )
    parser.add_argument(
        "--uid-permission-code", type=str, required=True, help="UID permission code."
    )
    parser.add_argument("--rm-code", type=str, required=True, help="RM code.")
    parser.add_argument("--rm-name", type=str, required=True, help="RM name.")
    parser.add_argument("--user-email", type=str, required=True, help="User email.")

    args = parser.parse_args()
    main(args)
