from unittest.mock import patch

import pytest
from duplicate_handling import DuplicateHandlingPipelet

from squirro.lib.storage.handler import StorageHandler


class TestDuplicateHandlingPipelet:
    def setup_method(self):
        self.pipelet = DuplicateHandlingPipelet({})

    def test_consume(self, item, caplog):
        self.pipelet.sq_client.query.return_value = {"items": []}
        output = self.pipelet.consume(item)

        StorageHandler.instance.open.assert_called_once_with(
            "https://example.com/document.pdf"
        )
        self.pipelet.sq_client.query.assert_called_once_with(
            "project_id", query="binary_hash:hash -is_deleted:true"
        )
        assert "No duplicates found." in caplog.text
        assert output["keywords"] == {"binary_hash": ["hash"]}

    def test_consume_skip(self, item_skip, caplog):
        self.pipelet.sq_client.query.return_value = {"items": []}
        output = self.pipelet.consume(item_skip)
        self.pipelet.sq_client.query.assert_called_once_with(
            "project_id", query="binary_hash:123 -is_deleted:true"
        )
        assert "No duplicates found." in caplog.text
        assert output["keywords"] == {"binary_hash": ["123"]}

    @patch("duplicate_handling._load_file")
    def test_load_file_failed(self, mock_load_file, item, caplog):
        mock_load_file.side_effect = Exception()
        with pytest.raises(Exception):
            self.pipelet.consume(item)

        assert "Could not read file from storage." in caplog.text

    def test_duplicate_found(self, item, caplog):
        msg = (
            f"The document you uploaded is a duplicate of {item['id']} and "
            "has been rejected. This is the link to the original document: "
            f"https://ui.com/app/#dashboard/project_id?modal-item-id={item['id']}"
        )
        self.pipelet.sq_client.query.return_value = {"items": [item]}
        output = self.pipelet.consume(item)

        assert msg in caplog.text
        self.pipelet.redis_client.hset.assert_called_once_with(
            "duplicated_item_hash", item["id"], msg
        )
        assert output is None

    def test_duplicate_is_deleted(self, item, item_is_deleted, caplog):
        self.pipelet.sq_client.query.return_value = {"items": []}
        output = self.pipelet.consume(item.copy())

        assert "No duplicates found." in caplog.text
        assert item == output

    def test_duplicate_found_wfi_source(self, item_wfi, caplog):
        msg = (
            f"The document you uploaded is a duplicate of {item_wfi['id']} and "
            "has been rejected. This is the link to the original document: "
            f"https://ui.com/app/#dashboard/project_id?"
            f"modal-item-id={item_wfi['id']}"
        )
        self.pipelet.sq_client.query.return_value = {"items": [item_wfi]}
        output = self.pipelet.consume(item_wfi)

        assert msg in caplog.text
        self.pipelet.redis_client.hset.assert_called_once_with(
            "duplicated_item_wfi_hash", item_wfi["id"], msg
        )
        assert output is None


@pytest.fixture
def item():
    return {
        "id": "123",
        "files": [{"content_url": "https://example.com/document.pdf"}],
        "keywords": {},
    }


@pytest.fixture
def item_skip():
    return {
        "id": "123",
        "files": [{"content_url": "https://example.com/document.pdf"}],
        "keywords": {"binary_hash": ["123"], "skip_binary_hash_compute": ["true"]},
    }


@pytest.fixture
def item_wfi():
    return {
        "id": "123",
        "files": [{"content_url": "https://example.com/document.pdf"}],
        "keywords": {"source_type": ["WFI Historic"], "wfi_document_id": ["wfi_id"]},
    }


@pytest.fixture
def item_is_deleted():
    return {
        "id": "123deleted",
        "files": [{"content_url": "https://example.com/document.pdf"}],
        "keywords": {
            "is_deleted": ["true"],
        },
    }
