"""Retry storing files into WFI or queue for another retry."""

import json
from datetime import UTC, datetime, timedelta
from typing import TYPE_CHECKING

from requests.exceptions import HTTPError

from octopus.activity_tracking import RejectionReason
from octopus.clients import init_redis_client, init_squirro_client, init_wfi_client
from octopus.text import remove_special_characters
from squirro.common.config import get_config
from squirro.lib.storage.handler import StorageHandler
from squirro.sdk import PipeletV1, require
from squirro.service.pdfconversion.controllers import ConvertController

if TYPE_CHECKING:
    from logging import Logger
    from typing import Any

REDIS_FAILED_ITEMS_HASH = "failed_items_hash"
DATE_FORMAT = "%Y-%m-%dT%H:%M:%S"
UNKNOWN = "UNKNOWN"
BAD_REQUEST = 400


# pylint: disable=too-few-public-methods,too-many-return-statements
@require("log")
class RetryStoringIntoWFI(PipeletV1):  # type: ignore[misc]
    """Retry storing files into WFI or queue for another retry.

    The pipelet gathers all information from Redis hash and reattempts
    to store the file in WFI or in case of any exception queues it for
    another retry.
    """

    log: "Logger"

    def __init__(self, _: "dict[str, Any]") -> None:
        """Initialize the pipelet."""
        self.sq_client, self.project_id = init_squirro_client()
        self.wfi_client = init_wfi_client()
        self.redis_client = init_redis_client()
        self.storage_handler = StorageHandler(get_config("squirro.lib.storage"))
        self.item_rejection_hash = "item_rejection_hash"
        self.item_success_hash = "item_success_hash"

    def consume(self, item: "dict[str, Any]") -> "dict[str, Any] | None":  # noqa: PLR0911 refactor in the future
        """Consume an item.

        Args:
            item: The item to consume

        Returns:
            The consumed item
        """
        content_url = item.get("title")  # content_url is mapped to title in dataloader
        if not content_url:
            self.log.error("Item has no title mapped to content_url")
            return None

        try:
            redis_item_bytes: bytes | None = self.redis_client.hget(
                REDIS_FAILED_ITEMS_HASH, content_url
            )
            if not redis_item_bytes:
                self.log.error(
                    "Item %s not found in hash %s",
                    content_url,
                    REDIS_FAILED_ITEMS_HASH,
                )
                return None
            redis_item: dict[str, Any] = json.loads(redis_item_bytes.decode())
        except Exception:
            self.log.exception(
                "Getting item from Redis hash `%s` failed",
                REDIS_FAILED_ITEMS_HASH,
            )
            return None

        item = redis_item["item"]
        file = redis_item["original_file"]
        item_id = item["id"]

        is_initial_checkin = True
        if item["keywords"].get("source_type", [""])[0].startswith("WFI"):
            # Item from WFI treated as initial checkin if unzipped
            is_initial_checkin = item.get("wfi:initial_checkin", False)
            self.item_rejection_hash = "item_wfi_rejection_hash"
            self.item_success_hash = "item_wfi_success_hash"

        try:
            created_at = datetime.strptime(
                item.get("created_at", ""), DATE_FORMAT
            ).astimezone(UTC)

        except Exception:
            self._reject_item(
                item_id, RejectionReason.WFI_RETRY_CREATED_TIME_NOT_FOUND, file
            )
            self._delete_item(item_id)
            self._delete_failed_item_metadata(file)

            self.log.exception(
                "Getting value for `created_at` field from Redis hash "
                "`%s` for item `%s` failed",
                REDIS_FAILED_ITEMS_HASH,
                item_id,
            )
            return None

        if datetime.now(UTC) - created_at > timedelta(days=1):
            self.log.error("Item `%s` was created more than 24 hours ago", item_id)

            self._reject_item(item_id, RejectionReason.CREATED_OVER_24H)
            self._delete_failed_item_metadata(file)

            item["keywords"]["wfi_status"] = ["Failure"]
            return item

        try:
            new_document_id = self._retry_storing_item_into_wfi(
                item, file, initial_checkin=is_initial_checkin
            )
        except HTTPError as error:
            if error.response is not None and error.response.status_code == BAD_REQUEST:
                return None  # Reject item
            return item  # Do nothing and wait for the next retry

        # Retry is successful
        self._post_process_item(item, file, new_document_id)
        self._delete_hash_records(hash_name=self.item_rejection_hash, items=[item])
        self._delete_failed_item_metadata(file)
        return item

    def _retry_storing_item_into_wfi(
        self,
        item: "dict[str, Any]",
        file: "dict[str, str]",
        *,
        initial_checkin: bool = True,
    ) -> str:
        item_id: str = item["id"]
        document_id: str = item.get("keywords", {}).get("wfi_document_id", [""])[0]
        files, data = self._prepare_checkin_payload(
            item, file, initial_checkin=initial_checkin
        )

        checkin_msg = f"Retrying checking in item {item_id}"
        if document_id:
            checkin_msg += f" (minor version) into WFI document id {document_id}"
        self.log.info(
            "%s\n" "Payload files %s: %s\n" "Payload data %s: %s",
            checkin_msg,
            item_id,
            files,
            item_id,
            data,
        )

        try:
            new_document_id: str = self.wfi_client.checkin_document(
                files,
                data,
                document_id=document_id,
            )
        except HTTPError as error:
            response = error.response
            if (
                response is not None and response.status_code == BAD_REQUEST
            ):  # No point retrying for BAD_REQUEST
                self.log.exception("Retrying receives WFI BAD_REQUEST BAD REQUEST")
                self._reject_item(item_id, RejectionReason.WFI_INVALID_REQUEST, file)
                self._delete_failed_item_metadata(file)
                raise

            self.log.exception(
                "Retrying item `%s` to WFI failed, item is queued for retry",
                item_id,
            )
            raise

        self.log.info(
            "Item `%s` successfully retried storing into WFI `%s`",
            item_id,
            new_document_id,
        )
        return new_document_id

    def _prepare_checkin_payload(
        self,
        item: "dict[str, Any]",
        file: "dict[str, str]",
        *,
        initial_checkin: bool,
    ) -> "tuple[dict[str, Any], dict[str, str]]":
        """Prepare the request payload to check in document into WFI.

        Args:
            item: Current item.
            file: File to check in.
            initial_checkin: Whether to perform check in as first time or minor version.

        Returns:
            A tuple containing the files and data to be used in request body.
        """
        allowed_chars = "_.-:,&?!'\u2019)( "
        title = remove_special_characters(item["title"], list(allowed_chars))

        item["title"] = title  # To be consistent with WFI

        properties = [
            {
                "propertyName": "DocumentTitle",
                "propertyDataType": "string",
                "value": title,
            }
        ]

        if references := item["keywords"].get("references", []):
            item["keywords"]["wfi_references"] = [";".join(references)]

        if initial_checkin:
            for label, metadata in self.wfi_client.FIELDS.items():
                wfi_metadata = metadata["wfi"]
                wfi_metadata.update(
                    {
                        "value": item.get("keywords", {}).get(
                            label, metadata["default"]
                        )[0]
                    }
                )
                properties.append(wfi_metadata)

        doc_props = {
            "className": "BBCADocuments",
            "propertyList": properties,
        }

        return (
            {"File": self.storage_handler.open(file["content_url"])},
            {"DocProps": json.dumps(doc_props)},
        )

    def _post_process_item(
        self,
        item: "dict[str, Any]",
        file_stored: "dict[str, str]",
        document_id: str,
    ) -> None:
        """Post process item after storing into WFI.

        Update item's labels, delete local file, modify content url after
        storing into WFI.

        Args:
            item: Current item.
            file_stored: The file that was stored into WFI.
            document_id: WFI document ID of the checked-in document.
        """
        item["keywords"]["wfi_document_id"] = [document_id]
        item["keywords"]["wfi_status"] = ["Success"]

        self.storage_handler.delete(file_stored["content_url"])
        self._modify_content_url(item, file_stored, document_id)
        self._add_success_entry(item)

    def _modify_content_url(
        self,
        item: "dict[str, Any]",
        file_stored: "dict[str, str]",
        document_id: "str",
    ) -> None:
        """Modify the content url that points to the storage plugin's url.

        This ensures that documents are fetched directly from WFI. If document
        needs to be converted to PDF on the fly, also modify the pdf_conversion
        url.

        Args:
            item: Current item.
            file_stored: The file that was stored into WFI.
            document_id: WFI document ID of the checked-in document.
        """
        files = item.get("files", [])

        content_url = f"storage://wfi/{self.project_id}/{document_id}"

        for file in files:
            if file.get("content_url", "").startswith("storage://pdf_conversion"):
                file["content_url"] = (
                    "storage://pdf_conversion/"
                    + ConvertController._pdf_path(content_url)  # noqa: SLF001
                )

            if file["id"] == file_stored["id"]:
                file["id"] = document_id
                file["content_url"] = content_url

    def _add_success_entry(self, item: "dict[str, Any]") -> None:
        """Add the successfully ingested item to redis for activity tracking.

        Args:
            item: The successful item
        """
        item_id: str = item["id"]

        success_entry: dict[str, str] = {
            "id": item_id,
            "title": item["title"],
            "created_at": item["created_at"],
            "source_type": item["keywords"].get("source_type", [UNKNOWN])[0],
            "company_name": item["keywords"].get("company_name", [UNKNOWN]),
            "document_type": item["keywords"].get("document_type", [UNKNOWN])[0],
            "document_date": item["keywords"].get("document_date", [UNKNOWN][0]),
            "num_pages": item["keywords"].get("num_pages", [0])[0],
            "user_name": item["keywords"].get("user_name", [UNKNOWN])[0],
            "user_email": item["keywords"].get("user_email", [UNKNOWN])[0],
            "ingestion_status": "Success",
        }

        if wfi_id := item["keywords"].get("wfi_id_original", [""])[0]:
            success_entry["wfi_id_original"] = wfi_id

        if zip_reference := item["keywords"].get("zip_reference", [""])[0]:
            success_entry["zip_reference"] = zip_reference

        self.redis_client.hset(
            self.item_success_hash, item_id, json.dumps(success_entry)
        )

    def _reject_item(
        self,
        item_id: str,
        rejection_id: str,
        file: "dict[str, str] | None" = None,
    ) -> None:
        """Reject item.

        Add rejected item to redis hash and delete the file on local
        filesystem.

        Args:
            item_id: Squirro item id.
            rejection_id: Rejection ID/code.
            file: File on local disk to remove.
        """
        msg = f"Rejecting item `{item_id}` with status code `{rejection_id}`"
        self.log.info(msg)

        try:
            self.redis_client.hset(self.item_rejection_hash, item_id, rejection_id)
            if file:
                self.storage_handler.delete(file["content_url"])
        except Exception:
            self.log.exception("%s failed", msg)
            raise

    def _delete_item(
        self,
        item_id: "str",
        project_id: "str | None" = None,
    ) -> None:
        if project_id is None:
            project_id = self.project_id

        msg = f"Deleting item `{item_id}`"
        self.log.info(msg)

        try:
            self.sq_client.delete_item(project_id, item_id)
        except Exception:
            self.log.exception("%s failed", msg)

    def _delete_hash_records(
        self, hash_name: "str", items: "list[dict[str, Any]]"
    ) -> None:
        """Deletes records from Redis hash based on item IDs."""
        if isinstance(items, list) and len(items) > 0:
            try:
                count = self.redis_client.hdel(
                    hash_name, *[item["id"] for item in items]
                )
            except Exception:
                self.log.exception(
                    "Deleting records from Redis hash `%s` failed", hash_name
                )
                raise
            self.log.info("Removed %d record(s) from Redis hash `%s`", count, hash_name)
        else:
            self.log.info("No records removed from Redis hash `%s`", hash_name)

    def _delete_failed_item_metadata(self, original_file: "dict[str, str]") -> None:
        original_content_url = original_file.get("content_url", "")
        message = (
            f"Deleting path `{original_content_url}`"
            f"from Redis hash `{REDIS_FAILED_ITEMS_HASH}`"
        )

        try:
            self.redis_client.hdel(REDIS_FAILED_ITEMS_HASH, original_content_url)
        except Exception:
            self.log.exception("%s failed", message)

        self.log.info(message)
