"""Company data index."""

import json
import re
from pathlib import Path
from typing import TYPE_CHECKING

from octopus.text import sanitize_text

from .suffixes import generate_suffix_list

if TYPE_CHECKING:
    from collections.abc import Hashable
    from re import Pattern
    from typing import Any


def _generate_regexes_lookup(names: list[str]) -> tuple[list[str], dict[str, str]]:
    """Generate regexes and lookup from company names.

    Args:
        names: Company names.

    Returns:
        Tuple of regexes and lookup.
    """
    regexes: set[str] = set()
    lookup = {}
    for name in sorted(names):
        # Maintain the original format from the company name list
        search_str = sanitize_text(name, keep_stopwords=True, keep_chars="all")

        regex = ""
        for suffixes in generate_suffix_list():
            unique_suffixes: dict[str, Any] = {}
            name_wo_suffix = ""
            for suffix in suffixes:
                matches = re.findall(rf"{suffix}", search_str)
                if not matches:
                    continue
                end_idx = search_str.rfind(matches[-1])

                # If it is not end of search_str continue to look at other suffix
                if end_idx + len(matches[-1]) != len(search_str):
                    continue

                name_wo_suffix = sanitize_text(
                    search_str[:end_idx], keep_stopwords=True, keep_chars=[".", " "]
                ).replace(r"\s+", " ")
                unique_suffixes = dict.fromkeys(
                    [re.sub(r"\s+", " ", s) for s in suffixes]
                )
                regex = f"{name_wo_suffix}(?:{'|'.join(unique_suffixes)})"
                break

            if not regex or not unique_suffixes:
                continue

            regexes.add(regex)

            for suffix in unique_suffixes:
                s = suffix.replace("\\.", ".")
                lookup[f"{name_wo_suffix}{s}"] = name

            break
        else:
            key = sanitize_text(search_str, keep_stopwords=True, keep_chars=[".", " "])
            regexes.add(key)
            lookup[f"{key}"] = name

    # pylint: disable-next=W0108
    return sorted(regexes, key=lambda x: (-len(x), x)), lookup


class CompanyDataIndex:
    """Company data index.

    Allows to search for company data by name or uid.
    """

    regex: "Pattern[str]"
    regex_lookup: "dict[str, str]"
    _index_cif: "dict[str, dict[Hashable, list[str]]]"
    _index_uid: "dict[str, dict[Hashable, list[str]]]"
    _index_name: "dict[str, dict[Hashable, list[str]]]"
    _names: "list[str]"

    def __init__(self) -> None:
        """Initialize the company data index."""
        self.regex_lookup = {}
        self._index_cif = {}
        self._index_uid = {}
        self._index_name = {}
        self._names = []

    @classmethod
    def build_index(
        cls, company_data: "list[dict[Hashable, list[str]]]"
    ) -> "CompanyDataIndex":
        """Builds the index for company data.

        UID and name are unique and indexed separately.

        Args:
            company_data: Company data from company data project.

        Returns:
            CompanyDataIndex object.
        """
        obj = cls()
        names: set[str] = set()
        for company in company_data:
            if cif := company.get("company_cif", [""])[0]:
                obj.index_cif[cif] = company
            if uid := company.get("company_uid", [""])[0]:
                obj.index_uid[uid] = company
            if name := company.get("company_name", [""])[0]:
                obj.index_name[name] = company
                names.update([name])
        obj.names = sorted(names)

        regexes, lookup = _generate_regexes_lookup(obj.names)
        obj.regex = re.compile(rf"{'|'.join(regexes)}")
        obj.regex_lookup.update(lookup)

        return obj

    def extract_companies(self, txt: "str") -> "list[str]":
        """Extracts company name from text.

        Args:
            txt: Text to extract company names from.

        Returns:
            Company names.
        """
        names = [
            self.regex_lookup.get(x, "__KEYERR__")
            for x in sorted(
                self.regex.findall(
                    sanitize_text(txt, keep_stopwords=True, keep_chars=[".", " "])
                ),
                key=lambda x: (-len(x), x),
            )
            if x != "__KEYERR__"
        ]
        return list(names)

    def get_all_names(self) -> "list[str]":
        """Returns all company names.

        Returns:
            List of company names.
        """
        return sorted(self._names)

    @property
    def index_cif(self) -> "dict[str, dict[Hashable, list[str]]]":
        """Return the index cif."""
        return self._index_cif

    @index_cif.setter
    def index_cif(self, value: "dict[str, dict[Hashable, list[str]]]") -> None:
        """Set the index cif."""
        self._index_cif = value

    @property
    def index_name(self) -> "dict[str, dict[Hashable, list[str]]]":
        """Return the index name."""
        return self._index_name

    @index_name.setter
    def index_name(self, value: "dict[str, dict[Hashable, list[str]]]") -> None:
        """Set the index name."""
        self._index_name = value

    @property
    def index_uid(self) -> "dict[str, dict[Hashable, list[str]]]":
        """Return the index uid."""
        return self._index_uid

    @index_uid.setter
    def index_uid(self, value: "dict[str, dict[Hashable, list[str]]]") -> None:
        """Set the index uid."""
        self._index_uid = value

    @classmethod
    def load_index(
        cls, path: "str" = "/flash/octopus/cache/company_data_index.json"
    ) -> "CompanyDataIndex":
        """Loads the index from a file.

        Args:
            path: Path to load the index from.

        Returns:
            CompanyDataIndex object.
        """
        with Path(path).open(encoding="utf-8") as f:
            data = json.load(f)
        obj = cls()
        obj.regex = re.compile(data["regex"])
        obj.regex_lookup = data["regex_lookup"]
        obj.index_cif = data["index_cif"]
        obj.index_uid = data["index_uid"]
        obj.index_name = data["index_name"]
        obj.names = data["names"]
        return obj

    @property
    def names(self) -> "list[str]":
        """Return the names."""
        return self._names

    @names.setter
    def names(self, value: "list[str]") -> None:
        """Set the names."""
        self._names = value

    def save_index(
        self, path: "str" = "/flash/octopus/cache/company_data_index.json"
    ) -> None:
        """Saves the index to a file.

        Args:
            path: Path to save the index.
        """
        with Path(path).open("w", encoding="utf-8") as f:
            json.dump(
                {
                    "regex": self.regex.pattern,
                    "regex_lookup": self.regex_lookup,
                    "index_cif": self._index_cif,
                    "index_uid": self._index_uid,
                    "index_name": self._index_name,
                    "names": self._names,
                },
                f,
            )

    def search_by_cifs(
        self, cif: "str | list[str]"
    ) -> "list[dict[Hashable, list[str]]]":
        """Searches for company data by cif.

        Args:
            cif: Company cif(s).

        Returns:
            Company data.
        """
        if isinstance(cif, str):
            cif = [cif]
        return [x for c in cif if (x := self._index_cif.get(c))]

    def search_by_names(
        self, name: "str | list[str]"
    ) -> "list[dict[Hashable, list[str]]]":
        """Searches for company data by name.

        Args:
            name: Company name(s).

        Returns:
            Company data.
        """
        if isinstance(name, str):
            name = [name]
        return [x for n in name if (x := self._index_name.get(n))]

    def search_by_uids(
        self, uid: "str | list[str]"
    ) -> "list[dict[Hashable, list[str]]]":
        """Searches for company data by uid.

        Args:
            uid: Company uid(s).

        Returns:
            Company data.
        """
        if isinstance(uid, str):
            uid = [uid]
        return [x for u in uid if (x := self._index_uid.get(u))]
