# mypy: ignore-errors
import base64
import hashlib
import os
import zipfile
from typing import TYPE_CHECKING

from octopus.activity_tracking import RejectionReason
from octopus.clients.redis_client import init_redis_client
from squirro.common.config import get_config
from squirro.common.dependency import get_injected
from squirro.lib.storage.handler import StorageHandler
from squirro.sdk import PipeletV1, require

if TYPE_CHECKING:
    from collections.abc import Generator
    from logging import Logger
    from typing import Any

FILTER_EXCEEDING_SIZE_FILES = "filter_exceeding_size_files"

REDIS_ITEM_REJECTION_HASH = "item_rejection_hash"
MAX_FILE_SIZE = 50  # 50 MB
SUPPORTED_MIME_TYPES = [
    "application/zip",
    "application/x-zip-compressed",
    "multipart/x-zip",
]
EXTENSION_MIME_TYPE_MAP = {
    ".doc": "application/msword",
    ".dot": "application/msword",
    ".wiz": "application/msword",
    ".pdf": "application/pdf",
    ".xls": "application/vnd.ms-excel",
    ".xlb": "application/vnd.ms-excel",
    ".ppt": "application/vnd.ms-powerpoint",
    ".pot": "application/vnd.ms-powerpoint",
    ".ppa": "application/vnd.ms-powerpoint",
    ".pps": "application/vnd.ms-powerpoint",
    ".pwz": "application/vnd.ms-powerpoint",
    ".jpg": "image/jpeg",
    ".jpe": "image/jpeg",
    ".jpeg": "image/jpeg",
    ".png": "image/png",
    ".tiff": "image/tiff",
    ".tif": "image/tiff",
    ".rtf": "application/rtf",
    ".123": "application/vnd.lotus-1-2-3",
    ".xlm": "application/vnd.ms-excel",
    ".xla": "application/vnd.ms-excel",
    ".xlc": "application/vnd.ms-excel",
    ".xlt": "application/vnd.ms-excel",
    ".xlw": "application/vnd.ms-excel",
    ".xlsm": "application/vnd.ms-excel.sheet.macroenabled.12",
    ".xltm": "application/vnd.ms-excel.template.macroenabled.12",
    ".pptm": "application/vnd.ms-powerpoint.presentation.macroenabled.12",
    ".ppsm": "application/vnd.ms-powerpoint.slideshow.macroenabled.12",
    ".potm": "application/vnd.ms-powerpoint.template.macroenabled.12",
    ".docm": "application/vnd.ms-word.document.macroenabled.12",
    ".dotm": "application/vnd.ms-word.template.macroenabled.12",
    ".wps": "application/vnd.ms-works",
    ".wks": "application/vnd.ms-works",
    ".wcm": "application/vnd.ms-works",
    ".wdb": "application/vnd.ms-works",
    ".odc": "application/vnd.oasis.opendocument.chart",
    ".odf": "application/vnd.oasis.opendocument.formula",
    ".odg": "application/vnd.oasis.opendocument.graphics",
    ".otg": "application/vnd.oasis.opendocument.graphics-template",
    ".odp": "application/vnd.oasis.opendocument.presentation",
    ".otp": "application/vnd.oasis.opendocument.presentation-template",
    ".ods": "application/vnd.oasis.opendocument.spreadsheet",
    ".ots": "application/vnd.oasis.opendocument.spreadsheet-template",
    ".odt": "application/vnd.oasis.opendocument.text",
    ".odm": "application/vnd.oasis.opendocument.text-master",
    ".ott": "application/vnd.oasis.opendocument.text-template",
    ".oth": "application/vnd.oasis.opendocument.text-web",
    ".pptx": (
        "application/vnd.openxmlformats-officedocument.presentationml.presentation"
    ),
    ".ppsx": "application/vnd.openxmlformats-officedocument.presentationml.slideshow",
    ".potx": "application/vnd.openxmlformats-officedocument.presentationml.template",
    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    ".xltx": "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    ".dotx": "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
    ".sxc": "application/vnd.sun.xml.calc",
    ".stc": "application/vnd.sun.xml.calc.template",
    ".sxd": "application/vnd.sun.xml.draw",
    ".std": "application/vnd.sun.xml.draw.template",
    ".sxi": "application/vnd.sun.xml.impress",
    ".sti": "application/vnd.sun.xml.impress.template",
    ".sxm": "application/vnd.sun.xml.math",
    ".sxw": "application/vnd.sun.xml.writer",
    ".sxg": "application/vnd.sun.xml.writer.global",
    ".stw": "application/vnd.sun.xml.writer.template",
    ".wpd": "application/vnd.wordperfect",
    ".msg": "application/vnd.ms-outlook",
}


def _generate_id(components: "list") -> str:
    id_hash = hashlib.blake2b()

    for component in components:
        id_hash.update(component.encode())

    return base64.urlsafe_b64encode(id_hash.digest())[:22].decode()


def _get_mime_type(file_name: str) -> str:
    _, file_extension = os.path.splitext(file_name.lower())
    return EXTENSION_MIME_TYPE_MAP.get(file_extension, "")


@require("log")
class ExtractZIP(PipeletV1):
    """Extract files from the ZIP archive attached to the email.

    The pipelet yields items extracted from the ZIP archive skipping
    directories and hidden files. Password protected ZIP archives or ZIP
    archives exceeding provided size are filtered out.
    """

    log: "Logger"

    def __init__(self, config: "dict[str, Any]") -> None:
        self.config = config
        self.redis_client = init_redis_client()

    def _get_file_path(self, file: "dict") -> "str":
        file_id = file.get("id", "")
        config = get_config("squirro.lib.storage")
        storage = StorageHandler(config)

        try:
            # pylint: disable-next=protected-access
            _, _, path = storage._parse_url(file.get("content_url", ""))
        except Exception:
            self.log.exception(f"Getting file path for file `{file_id}` failed")
            raise

        return os.path.join(config.get("storage_localfile", "directory"), path)

    def _remove_file(self, file: "dict", path: "str") -> None:
        file_id = file.get("id", "")
        message = f"Removing file `{file_id}` stored under path `{path}`"
        self.log.info(message)

        try:
            os.remove(path)
        except Exception:
            self.log.exception(f"{message} failed")

    def _add_item_rejection(self, item_id: "str", rejection_id: str) -> None:
        self.log.warning(
            "Adding item `%s` with status code `%s` to Redis hash `%s`",
            item_id,
            rejection_id,
            REDIS_ITEM_REJECTION_HASH,
        )

        try:
            self.redis_client.hset(REDIS_ITEM_REJECTION_HASH, item_id, rejection_id)
        except Exception:
            self.log.exception("Writing to `%s` failed", REDIS_ITEM_REJECTION_HASH)
            raise

    def _is_valid_zip(self, item: "dict", file: "dict", path: "str") -> "bool":
        item_id = item.get("id", "")
        file_id = file.get("id", "")

        if item_id == "":
            self.log.warning(f"Rejecting item `{item}`: item has no ID")

            self._remove_file(file, path)

            return False

        filter_file_size = self.config.get(FILTER_EXCEEDING_SIZE_FILES, MAX_FILE_SIZE)
        filter_file_size = (min(MAX_FILE_SIZE, filter_file_size)) * 1_000_000

        if os.path.getsize(path) > filter_file_size:
            self.log.warning(
                f"Rejecting item `{item_id}`: file `{file_id}` exceeds maximum size"
            )

            self._add_item_rejection(item_id, RejectionReason.FILE_TOO_LARGE)
            self._remove_file(file, path)

            return False

        with zipfile.ZipFile(path) as zip_file:
            try:
                zip_file.testzip()
            except RuntimeError:
                self.log.warning(
                    "Rejecting item `%s`: file `%s` is password protected",
                    item_id,
                    file_id,
                )

                self._add_item_rejection(item_id, RejectionReason.PASSWORD_PROTECTED)
                self._remove_file(file, path)

                return False

        return True

    def _process_zip(self, item: "dict", file: "dict", path: "str") -> "Generator":
        item["keywords"]["zip_reference"] = [
            f"{item['title']}|{item['created_at']}|{item['id']}"
        ]

        file_paths = self._extract_zip(path, file)

        for file_path in file_paths:
            title = os.path.basename(file_path)

            item.update(
                {
                    "id": _generate_id(
                        [item.get("source_id", ""), title, get_injected("short_uuid")]
                    ),
                    "title": title,
                    "files": [
                        {
                            "id": get_injected("short_uuid"),
                            "name": title,
                            "mime_type": _get_mime_type(title),
                            "content_url": os.path.join(
                                os.path.dirname(file.get("content_url", "")),
                                file_path,
                            ),
                        }
                    ],
                }
            )

            if item.get("keywords", {}).get("source_type", [""])[0].startswith("WFI"):
                if wfi_zip_id := item["keywords"].pop("wfi_document_id", None):
                    item["keywords"]["wfi_zip_id"] = wfi_zip_id
                item["wfi:initial_checkin"] = True

            yield item

    def _extract_zip(
        self,
        path: "str",
        file: "dict",
        level: "int" = 0,
        dir_prefix: "str" = "",
    ) -> "list[str]":
        files_list: list[str] = []
        extract_dir = os.path.dirname(path)

        with zipfile.ZipFile(path) as zip_file:
            # Check for password protection
            try:
                zip_file.testzip()
            except RuntimeError:
                self.log.error(f"ZIP file {path} is password protected")
                return []

            for zip_info in zip_file.infolist():
                filename = zip_info.filename
                title = os.path.basename(filename)

                if zip_info.is_dir() or title.startswith("."):
                    continue

                zip_file.extract(zip_info, path=extract_dir)

                # Unzip recursively
                if title.lower().endswith(".zip") and zipfile.is_zipfile(
                    file_path := os.path.join(extract_dir, filename)
                ):
                    files_list += self._extract_zip(
                        file_path,
                        file,
                        level + 1,
                        os.path.join(dir_prefix, os.path.dirname(filename)),
                    )
                else:
                    files_list.append(os.path.join(dir_prefix, filename))

        self._remove_file(file, path)
        return files_list

    def consume(self, item: "dict") -> "dict | Generator | None":
        """Consume an item.

        Args:
            item: The item to consume

        Returns:
            The consumed item
        """
        item_id = item.get("id", "")
        files = item.get("files", [])

        if isinstance(files, list) and len(files) == 0:
            self._add_item_rejection(item_id, RejectionReason.ITEM_HAS_NO_FILE)

            return None

        if isinstance(files, list) and len(files) > 0:
            # Currently each Squirro item can only refer to one file.
            # https://docs.squirro.com/en/latest/technical/pipelets/how-to/how-access.html#concept
            file = files[0]
            file_id = file.get("id", "")
            mime_type = file.get("mime_type", "")
            path = self._get_file_path(file)

            if not os.path.isfile(path):
                self.log.warning(
                    "Rejecting item `%s`: file `%s` not found under path `%s`",
                    item_id,
                    file_id,
                    path,
                )

                self._add_item_rejection(item_id, RejectionReason.FILE_NOT_FOUND)

                return None

            if mime_type in SUPPORTED_MIME_TYPES and zipfile.is_zipfile(path):
                if self._is_valid_zip(item, file, path):
                    return self._process_zip(item, file, path)

                return None

        return item

    @staticmethod
    # pylint: disable-next=invalid-name
    def getArguments() -> "list[dict[str, Any]]":  # noqa: N802
        return [
            {
                "name": FILTER_EXCEEDING_SIZE_FILES,
                "display_label": (
                    "Filter items with files exceeding provided size (MB)"
                ),
                "type": "int",
                "default": MAX_FILE_SIZE,
            }
        ]
