"""File related utilities."""

import hashlib
import logging
import subprocess  # noqa: S404 - Used to call `fil`
import tempfile
import zipfile
from io import BytesIO
from pathlib import Path
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from collections.abc import Generator
    from io import BufferedReader


def check_file_exists(filepath: "Path | str") -> "tuple[Path, bool]":
    """Check if file exists.

    Args:
        filepath: Path to file

    Returns:
        Path to file and boolean indicating if file exists
    """
    path = Path(filepath)
    return path, path.exists() and path.is_file()


def check_is_binary_non_empty(binary: bytes) -> bool:
    """Check if it is an empty bytes.

    Args:
        binary: Bytes to check

    Returns:
        Boolean indicating if file is not 0 bytes
    """
    return len(binary) > 0


def check_is_file_valid(filepath: "Path | str") -> bool:
    """Check if file is valid.

    Args:
        filepath: Path to file

    Returns:
        Boolean indicating if file is valid
    """
    path, exists = check_file_exists(filepath)
    return exists and path.stat().st_size > 0


def compute_hash(bytes_io: "BufferedReader | BytesIO") -> str:
    """Compute a binary hash for the given binary content.

    Args:
        bytes_io: The binary content

    Returns:
        The hash
    """
    digest = hashlib.blake2b()  # nosec
    while chunk := bytes_io.read(4096):
        digest.update(chunk)
    return digest.hexdigest()


def ensure_file_exists(filepath: str) -> Path:
    """Check if file exists.

    Args:
        filepath: Path to file

    Returns:
        Path to file

    Raises:
        FileNotFoundError: If the file does not exist.
    """
    if not (path := Path(filepath)).exists() or not path.is_file():
        msg = f"File `{filepath}` does not exist."
        logging.error(msg)
        raise FileNotFoundError(msg)
    return path


def extract_zip(fp: Path) -> "tuple[list[str], list[str]]":
    """Extract zip file.

    There could be nested directories and nested zip files. All files should be
    extracted.

    Args:
        fp: Path to zip file

    Returns:
        List of paths to extracted files and list of paths to invalid files
    """
    files = []
    invalid_files = []
    base_path = Path(tempfile.mkdtemp())

    try:
        with zipfile.ZipFile(fp) as zf:
            try:
                zf.testzip()
            except Exception:
                invalid_files.append(fp.name)
                fp.unlink()
                return [], invalid_files

            zf.extractall(base_path)

            for zip_info in zf.infolist():
                cfp = base_path / zip_info.filename
                if (
                    any(x.startswith(".") for x in zip_info.filename.split("/"))
                    or (cfp := base_path / zip_info.filename).is_dir()
                ):
                    continue

                if is_zip_file(str(cfp)):
                    nested_extracted_files, nested_invalid = extract_zip(cfp)
                    files.extend(nested_extracted_files)
                    invalid_files.extend(nested_invalid)
                elif not check_is_file_valid(cfp):
                    invalid_files.append(cfp.name)
                    cfp.unlink()
                else:
                    files.append(str(cfp))
    except Exception:
        fp.unlink()
    fp.unlink()
    return files, invalid_files


def extract_zip_in_mem_generator(
    content: BytesIO,
) -> "Generator[tuple[bytes | None, str | None], None, None]":
    """Extract zip file.

    There could be nested directories and nested zip files. All files should be
    extracted. This function returns a generator that yields the extracted files.

    Args:
        content: BytesIO object containing zip file

    Yields:
        Tuple of extracted file and file name
    """
    zf = zipfile.ZipFile(content)
    if zf.testzip():
        logging.error("GG NOT ZIP")
        yield None, zf.filename
        return  # noqa: DOC201

    for zip_info in zf.infolist():
        if zip_info.is_dir() or any(
            x.startswith(".") for x in zip_info.filename.split("/")
        ):
            continue

        try:
            with zf.open(zip_info) as czf:
                fc = czf.read()
                if zipfile.is_zipfile(czf):
                    yield from extract_zip_in_mem_generator(BytesIO(fc))
                elif fc:
                    yield fc, zip_info.filename
                else:
                    yield None, zip_info.filename
        except Exception:
            yield None, zip_info.filename


def get_mime_type(path: str) -> str:
    """Get mime type of file.

    Args:
        path: Path to file

    Returns:
        Mime type of file
    """
    return (
        subprocess.check_output(["/usr/bin/file", "--mime-type", path])  # noqa: S603
        .decode("utf-8")
        .split(": ")[1]
        .strip()
    )


def is_zip_file(path: str) -> bool:
    """Check if file is a zip file.

    Args:
        path: Path to file

    Returns:
        Boolean indicating if file is a zip file
    """
    return get_mime_type(path) in {
        "application/x-compressed",
        "application/x-compress",
        "application/x-gzip",
        "application/x-zip",
        "application/x-zip-compressed",
        "application/zip",
        "multipart/x-zip",
    }
