import json
import timeit
from pathlib import Path

from octopus.data import CompanyDataIndex

texts = [
    "100.txt",
    "1000.txt",
    "10000.txt",
]


def main():
    # Load mocked company data from jsonl
    company_data = []
    with open(Path(__file__).parent / "test-data" / "company_data.jsonl") as f:
        for line in f:
            company_data.append(json.loads(line))

    # Build index
    index = CompanyDataIndex.build_index(company_data)
    print("# of companies in index:", len(index.get_all_names()))

    # Profile the time taken to extract companies from text of different length
    for text in texts:
        with open(Path(__file__).parent / "test-data" / text) as f:
            txt = f.read()

        print(f"Extracting companies from text of length {len(txt.split())}:", end="\t")
        print(
            timeit.timeit(
                lambda: index.extract_companies(txt),  # noqa: B023
                number=100,
            )
        )


if __name__ == "__main__":
    main()
