import json

import pytest
from sub_items_enrichment import SubItemsEnrichment


class TestSubItemsEnrichment:
    def setup_method(self):
        self.pipelet = SubItemsEnrichment({"max_pages_to_process": 3})
        self.pipelet.redis_client.hget.return_value = json.dumps(
            {"document_category": "Approvals"}
        ).encode()

    def test_init(self):
        assert self.pipelet.max_pages_to_process == 3

    def test_company_extraction_no_company(self, mock_item_no_company):
        item = self.pipelet.consume(mock_item_no_company)

        assert "company_name_pred" not in item["keywords"]

    def test_company_extraction_max_page_3(self, mock_item):
        item = self.pipelet.consume(mock_item)

        assert item["keywords"]["company_name_pred"] == [
            "Company A",
            "Company B",
            "Company C",
        ]

    def test_company_extraction_no_sub_items(self, mock_item_no_sub_items):
        item = self.pipelet.consume(mock_item_no_sub_items)

        assert item["keywords"]["company_name_pred"] == ["Company E"]

    def test_company_extraction_no_body(self, mock_item_no_body, caplog):
        item = self.pipelet.consume(mock_item_no_body)

        assert "company_name_pred" not in item["keywords"]
        assert "No pages to extract companies from" in caplog.text

    def test_company_extraction_multipages(self, mock_item):
        self.pipelet.max_pages_to_process = 4
        item = self.pipelet.consume(mock_item)
        assert item["keywords"]["company_name_pred"] == [
            "Company A",
            "Company B",
            "Company C",
            "Company D",
        ]

        self.pipelet.max_pages_to_process = 6
        item = self.pipelet.consume(mock_item)
        assert item["keywords"]["company_name_pred"] == [
            "Company A",
            "Company B",
            "Company C",
            "Company D",
        ]

    def test_define_display_labels(self, mock_item, caplog):
        item = self.pipelet.consume(mock_item)

        assert item["keywords"].get("company_uid") == [
            "SG_11111",
            "SG_22222",
        ]
        assert (
            "Company uid provided by user for item: ['SG_11111', 'SG_22222']"
            in caplog.text
        )

    def test_define_display_labels_no_company_uid(self, mock_item_no_body):
        item = self.pipelet.consume(mock_item_no_body)

        assert "company_name" not in item["keywords"]

    def test_company_data_augmentation_missing_company(
        self, mock_item_missing_company_name, caplog
    ):
        item = self.pipelet.consume(mock_item_missing_company_name)

        assert mock_item_missing_company_name == item
        assert (
            "Not all companies found in the company index, found 1 companies, expected 2"
            in caplog.text
        )

    def test_company_data_augmentation(self, mock_item):
        item = self.pipelet.consume(mock_item)

        assert item["keywords"]["company_uid"] == ["SG_11111", "SG_22222"]
        assert item["keywords"]["company_cif"] == ["11111", "22222"]
        assert {
            "A1",
            "B1",
            "C1",
        } == set(item["keywords"]["permission_code"])
        assert item["keywords"]["uid_permission_code"] == [
            "SG_11111___A1",
            "SG_11111___B1",
            "SG_22222___B1",
            "SG_22222___C1",
        ]

    def test_tag_company_data_not_initial_checkin(self, mock_item_no_initial_checkin):
        item = self.pipelet.consume(mock_item_no_initial_checkin)

        assert "wfi_company_name" not in item["keywords"]
        assert "wfi_company_cif" not in item["keywords"]
        assert "wfi_company_rm_code" not in item["keywords"]
        assert "wfi_company_segment" not in item["keywords"]
        assert "wfi_company_team_name" not in item["keywords"]

    def test_tag_company_data(self, mock_item):
        item = self.pipelet.consume(mock_item)

        assert item["keywords"]["wfi_company_name"] == ["Company B"]
        assert item["keywords"]["wfi_company_cif"] == ["11111"]
        assert item["keywords"]["wfi_company_rm_code"] == ["R012"]
        assert item["keywords"]["wfi_company_segment"] == ["R"]
        assert item["keywords"]["wfi_company_team_name"] == ["Team B"]


@pytest.fixture
def mock_item():
    return {
        "id": "item_id",
        "title": "title",
        "body": "This is the body of the item",
        "sub_items": [
            {"body": "This is the body of the sub item 1 with Company A"},
            {"body": "This is the body of the sub item 2 with Company B"},
            {"body": "This is the body of the sub item 3 with Company C"},
            {"body": "This is the body of the sub item 4 with Company D"},
        ],
        "keywords": {
            "company_cif": ["11111", "22222"],
            "company_uid": ["SG_11111", "SG_22222"],
            "company_name": ["Company A", "Company B"],
        },
    }


@pytest.fixture
def mock_item_no_initial_checkin():
    return {
        "id": "item_id",
        "title": "title",
        "keywords": {
            "company_uid": ["SG_11111", "SG_22222"],
            "company_name": ["Company A", "Company B"],
            "source_type": ["WFI"],
        },
        "wfi:initial_checkin": False,
    }


@pytest.fixture
def mock_item_missing_company_name():
    return {
        "id": "item_id",
        "title": "title",
        "keywords": {
            "company_name": ["Company A", "Squirro"],
        },
    }


@pytest.fixture
def mock_item_no_company():
    return {
        "id": "item_id",
        "title": "title",
        "body": "This is the body of the item",
        "sub_items": [
            {"body": "This is the body of the sub item 1 with Company"},
            {"body": "This is the body of the sub item 2 with Company"},
            {"body": "This is the body of the sub item 3 with Company"},
            {"body": "This is the body of the sub item 4 with Company"},
        ],
        "keywords": {},
    }


@pytest.fixture
def mock_item_no_sub_items():
    return {
        "id": "item_id",
        "title": "title",
        "body": "This is the body of the item. Company E.",
        "keywords": {},
    }


@pytest.fixture
def mock_item_no_body():
    return {
        "id": "item_id",
        "title": "title",
        "keywords": {},
    }
