# mypy: ignore-errors
"""This script is for Activity Tracking that is run as a CRON job, that is responsible for
1. Tracking what documents got ingested and what failed
2. Sending email notifications to users about the status of their documents
"""

import json
import logging
from collections import defaultdict
from datetime import datetime
from typing import TYPE_CHECKING

import pytz
from jinja2 import Template

from octopus.activity_tracking import RejectionReason
from octopus.clients import init_redis_client, init_squirro_client
from octopus.email import EmailPayload, STREAM_NAME
from octopus.stream import add_to_stream
from octopus.utils import load_config, set_log_verbosity
from squirro_client import ItemUploader

if TYPE_CHECKING:
    from typing import Any, Literal

set_log_verbosity(logging.INFO)


VALID_STATUS_CODES = (200, 204)
RETRY_REJECTION_IDS = (RejectionReason.WFI_CHECKIN_FAILED,)

SUCCESS = "success"
PENDING = "pending"
FAILURE = "failure"

RETRY_COUNT = 3
UNKNOWN = "UNKNOWN"
DATETIME_FORMAT = "%d/%m/%Y %H:%M:%S"


# pylint: disable=too-few-public-methods, too-many-instance-attributes
class TrackJobs:
    """Track jobs."""

    def __init__(self) -> None:
        main_config_parser = load_config()

        try:
            self.squirro_token = main_config_parser.get("squirro", "token")
            self.squirro_project_id = main_config_parser.get("squirro", "project_id")
            self.squirro_cluster = main_config_parser.get("squirro", "cluster")
            self.squirro_environment = main_config_parser.get("squirro", "environment")
            self.activity_project_id = main_config_parser.get("activity", "project_id")
        except Exception:
            logging.exception(
                "Reading configuration file failed",
            )
            raise

        self.sq_client, _ = init_squirro_client(main_config_parser)
        self.redis_client = init_redis_client()

    def _delete_hash_records(self, hash_name: "str", keys: "list") -> None:
        """Deletes records from Redis hash based on item IDs."""
        if keys:
            try:
                count = self.redis_client.hdel(hash_name, *keys)
            except Exception:
                logging.exception(
                    "Deleting records from Redis hash `%s` failed", hash_name
                )
                raise
            logging.info("Removed %s record(s) from Redis hash `%s`", count, hash_name)
        else:
            logging.info("No records removed from Redis hash `%s`", hash_name)

    def _send_email_notifications(self, data_map: "dict") -> None:
        """Sends email notifications to end users."""
        users_data: dict = defaultdict(
            lambda: {
                "name": UNKNOWN,
                SUCCESS: [],
                PENDING: [],
                FAILURE: [],
            }
        )

        for status, items in data_map.items():
            if status != FAILURE:
                continue
            for item in items:
                email = item["user_email"]

                item["created_at"] = (
                    datetime.fromisoformat(item["created_at"])
                    .astimezone(pytz.timezone("Asia/Singapore"))
                    .strftime(DATETIME_FORMAT)
                )

                users_data[email][status].append(item)

                if users_data[email]["name"] == UNKNOWN:
                    users_data[email]["name"] = item.get("user_name", UNKNOWN)

        for email, data in users_data.items():
            logging.info(
                "Sending emails for user - %s; Email data is - %s", email, data
            )

            if any(value for key, value in data.items() if key == FAILURE and value):
                add_to_stream(
                    EmailPayload.create_payload(
                        recipients=email,
                        data={"user_info": data},
                        type="ingestion_report",
                    ).to_dict(),
                    STREAM_NAME,
                    redis_client=self.redis_client,
                )
            else:
                logging.info("No documents present to notify to user `{email}`")

        logging.info("Completed sending of notifications")

    def _get_processed_redis_hash(self, redis_key: str) -> "dict[str, Any]":
        key_type = self.redis_client.type(redis_key)

        if key_type == b"none":
            return {}

        if not key_type == b"hash":
            raise TypeError("Invalid redis key type.")

        data = {}
        for key, val in self.redis_client.hgetall(redis_key).items():
            try:
                val = json.loads(val)
            except json.decoder.JSONDecodeError:
                val = val.decode()

            data[key.decode()] = val
        return data

    def _create_sq_item(
        self,
        data: "dict[str, str]",
        status: "Literal['Success', 'Failure']",
        body_template: str,
    ) -> "dict[str, Any]":
        item_id = data.get("id", "")
        title = data.get("title", "")
        created_at = data.get("created_at", "")

        keywords: dict[str, list[str]] = {}
        for key, value in data.items():
            if key in ("id", "title", "created_at"):  # SQ Fields, not keywords
                continue

            if not isinstance(value, list):
                value = [value]

            keywords[key] = value

        keywords["action"] = ["document.load"]
        keywords["item_id"] = [item_id]
        keywords["item_title"] = [title]
        keywords["ingestion_status"] = [status]
        keywords["source_name"] = keywords.pop(
            "source_type"
        )  # source_name is used in Monitoring project

        return {
            "item_created_at": created_at,
            "title": title,
            "body": Template(body_template).render(
                keywords=keywords, created_at=created_at
            ),
            "keywords": keywords,
        }

    # pylint: disable=too-many-locals, too-many-arguments, too-many-positional-arguments
    def track(
        self,
        item_hash,
        item_rejection_hash,
        item_success_hash,
        rejection_hash,
        duplicated_item_hash,
        source_name,
        send_emails=False,
    ) -> None:
        """Tracks successful and failed items in the Squirro Monitoring
        Project, and send email notifications to users.
        """
        items = self._get_processed_redis_hash(item_hash)
        rejection_messages = self._get_processed_redis_hash(rejection_hash)

        successful_items = self._get_processed_redis_hash(item_success_hash)

        rejected_items = self._get_processed_redis_hash(
            item_rejection_hash
        )  # Excluding duplicates
        duplicated_items = self._get_processed_redis_hash(duplicated_item_hash)

        rejected_items.update(duplicated_items)
        rejected_items_full: dict[str, str] = {}
        for item_id, rejection_reason in rejected_items.items():
            rejection_reason = str(rejection_reason)
            if rejection_reason in RETRY_REJECTION_IDS:
                continue

            # pylint: disable=consider-using-get
            if rejection_reason in rejection_messages:
                rejection_reason = rejection_messages[rejection_reason]

            metadata = items.get(item_id)
            if not metadata:
                logging.warning("Item %s not in %s", item_id, item_hash)
                continue
            metadata["id"] = item_id
            metadata["ingestion_status"] = "Failure"
            metadata["rejection_reason"] = rejection_reason
            rejected_items_full[item_id] = metadata

        pending_items = {
            item_id: metadata
            for item_id, metadata in items.items()
            if item_id not in successful_items and item_id not in rejected_items_full
        }

        items_map: dict[str, list[dict[str, str]]] = {
            SUCCESS: list(successful_items.values()),
            PENDING: list(pending_items.values()),
            FAILURE: list(rejected_items_full.values()),
        }

        uploader = ItemUploader(
            project_id=self.activity_project_id,
            cluster=self.squirro_cluster,
            token=self.squirro_token,
            source_name=source_name,
            pipeline_workflow_name="Standard",
        )

        with open("templates/body_template.html", encoding="utf-8") as f:
            body_template = f.read()

        for status, entries in items_map.items():
            sq_items = [
                self._create_sq_item(entry, status.capitalize(), body_template)
                for entry in entries
            ]

            uploader.upload(sq_items)

        if send_emails:
            self._send_email_notifications(data_map=items_map)

        self._delete_hash_records(item_success_hash, list(successful_items.keys()))
        self._delete_hash_records(item_rejection_hash, list(rejected_items_full.keys()))
        self._delete_hash_records(duplicated_item_hash, list(duplicated_items.keys()))
        self._delete_hash_records(
            item_hash,
            list(successful_items.keys()) + list(rejected_items_full.keys()),
        )


if __name__ == "__main__":
    tracker = TrackJobs()

    tracker.track(
        item_hash="item_hash",
        item_rejection_hash="item_rejection_hash",
        item_success_hash="item_success_hash",
        rejection_hash="rejection_hash",
        duplicated_item_hash="duplicated_item_hash",
        source_name="Ingestion",
        send_emails=True,
    )

    tracker.track(
        item_hash="item_wfi_hash",
        item_rejection_hash="item_wfi_rejection_hash",
        item_success_hash="item_wfi_success_hash",
        rejection_hash="rejection_hash",
        duplicated_item_hash="duplicated_item_wfi_hash",
        source_name="Ingestion WFI",
    )
