from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
import re

import yaml


WORKS = r'C:\dokuwiki\harnwiki-sync\data\pages\works'
OUTPUT_FILE = 'ref_db.txt'
GLOB = '*.txt'
IGNORES = ['alpha_list.txt',
           'by_creation_date.txt',
           'i_template.txt',
           'c_template.txt',
           'navbar.txt',
           'start.txt',
           'sidebar.txt',
           'rightsidebar.txt']
GLOB_PUBLISHERS = '*/publishers/*'
RE_PUBLISHERS = '^.*publishers$'
DUPLICATES_FILE = 'harnwiki_duplicate_pagenames.yml'

ICON_CANON = r'{{mdi>checkbox-marked?size=12|Canon}}'
ICON_FANON = r'{{mdi>fan?size=12|Fanon}}'
ICON_OOP = r'{{mdi>grave-stone?size=12|Out of print}}'
DB_HEADER = '^ Note name ^ Note text ^'
RE_CANON = r'\{\{page>icons:(?:cgi|kpl)&inline\}\}'
RE_OOP = r'\{\{page>icons:out_of_print&inline\}\}'
REF_PREFIX = ':ref:'

UTF8 = 'utf-8'
NEWLINE = '\n'
COLON = ':'
UNDERSCORE = "_"
PIPE = '|'
OPEN_LINK = '[['
CLOSE_LINK = ']]'


@dataclass
class DupeHandler(object):
    """Data for handling duplicated page names."""
    page_name: str
    add_prefixes: list = None
    favor_namespace: str = None


class LineMaker(object):
    """Get data for line in references database."""

    def __init__(self, re_oop=RE_OOP, re_canon=RE_CANON, icon_oop=ICON_OOP,
                 icon_canon=ICON_CANON, icon_fanon=ICON_FANON,
                 wiki_path=WORKS, encoding=UTF8, ref_prefix=REF_PREFIX,
                 add_prefixes=None, prefix_delim=UNDERSCORE):
        """Initialize object."""
        self.re_oop = re.compile(re_oop)
        self.re_canon = re.compile(re_canon)
        self.icon_oop = icon_oop
        self.icon_canon = icon_canon
        self.icon_fanon = icon_fanon
        self.wiki_path = Path(wiki_path)
        self.encoding = encoding
        self.ref_prefix = ref_prefix
        self.add_prefixes = add_prefixes
        self.prefix_delim = prefix_delim

    def format_line(self, page):
        """Format one line in references db as DokuWiki table."""
        link = doku_link(path_to_page(self.trim_path(page)))
        return f'| {self.get_ref(page)} | {self.get_icon(page)} {link} |'

    def get_icon(self, page):
        """Get code for icon to display with page based on page content."""
        with open(page, 'r', encoding=self.encoding) as file:
            file_content = file.read()
        if self.re_oop.search(file_content) is not None:
            return_value = self.icon_oop
        elif self.re_canon.search(file_content) is not None:
            return_value = self.icon_canon
        else:
            return_value = self.icon_fanon
        return return_value

    def get_ref(self, page):
        """Get formatted RefNotes reference for page."""
        prefix = self.get_ref_prefix(page)
        return self.ref_prefix + prefix + page.stem

    def get_ref_prefix(self, page):
        """Get prefix for duplicate page name, if needed."""
        return_value = ""
        dupe = found_dupe(page, self.add_prefixes)
        if dupe is not None:
            prefix = list_item_in_other_list(dupe.add_prefixes,
                                             page.parent.parts)
            if prefix is not None:
                return_value = prefix + self.prefix_delim
        return return_value

    def trim_path(self, page):
        """Remove beginning of page path."""
        return page.relative_to(self.wiki_path.parent)


class PageGetter(object):
    """Get the list of pages to include in references dictionary."""

    def __init__(self, works_dir=WORKS, works_glob=GLOB, name_ignores=IGNORES,
                 glob_exclude=GLOB_PUBLISHERS, re_exclude=RE_PUBLISHERS,
                 duplicates_file=DUPLICATES_FILE, duplicates_encoding=UTF8):
        """Initialize object."""
        self.works_dir = works_dir
        self.works_glob = works_glob
        self.name_ignores = name_ignores
        self.glob_exclude = glob_exclude
        if re_exclude is not None:
            self.re_exclude = re.compile(re_exclude)
        self.load_duplicates(duplicates_file, duplicates_encoding)

    def get_works(self):
        """Get list of qualifying pages via pathlib.match."""
        works = Path(self.works_dir).rglob(self.works_glob)
        return [page for page in works if page.name not in self.name_ignores
                and not page.match(self.glob_exclude)
                and self.is_favored_namespace(page)]

    def get_works_re(self):
        """Get list of qualifying pages via re.

        This isn't currently being used but is left here in case the glob
        method in the other function is no longer sufficient.
        """
        works = Path(self.works_dir).rglob(self.works_glob)
        return [page for page in works if page.name not in self.name_ignores
                and self.re_exclude.match(str(page.parent)) is None
                and self.is_favored_namespace(page)]

    def is_favored_namespace(self, page):
        """Does filename have favored namespace for duplicate names."""
        return_value = True
        dupe = found_dupe(page, self.favor_namespaces)
        if dupe is not None:
            return_value = dupe.favor_namespace in page.parent.parts
        return return_value

    def load_duplicates(self, duplicates_file, duplicates_encoding):
        """Get duplicated pagename handlers from data file."""
        with open(duplicates_file, encoding=duplicates_encoding) as file:
            fromfile = yaml.safe_load(file)
        dupes = [DupeHandler(**dupe) for dupe in fromfile]
        self.add_prefixes = [dupe for dupe in dupes
                             if dupe.add_prefixes is not None]
        self.favor_namespaces = [dupe for dupe in dupes
                                 if dupe.favor_namespace is not None]


def doku_link(page, text=None, delim=PIPE, open_link=OPEN_LINK,
              close_link=CLOSE_LINK):
    """Make formatted DokuWiki link from page."""
    if text is not None:
        page += delim + text
    return open_link + page + close_link


def find_multiples():
    """Find page names with multiple instances."""
    getter = PageGetter()
    names = [page.name for page in getter.get_works()]
    name_counts = defaultdict(int)
    for name in names:
        name_counts[name] += 1
    return [key for key, value in name_counts.items() if value > 1]


def found_dupe(page, dupes):
    """Is there a DupeHandler for page.name."""
    found_dupe = None
    for dupe in dupes:
        if dupe.page_name == page.name:
            found_dupe = dupe
            break
    return found_dupe


def get_ref_db(header=DB_HEADER):
    """Get DokuWiki references database page for use with RefNotes."""
    getter = PageGetter()
    maker = LineMaker(add_prefixes=getter.add_prefixes)
    works = [header] + [maker.format_line(page) for page in getter.get_works()]
    return works


def list_item_in_other_list(look_fors, look_ins):
    """Get item from first list if it is also found in second list."""
    return_value = None
    for look_for in look_fors:
        if look_for in look_ins:
            return_value = look_for
            break
    return return_value


def path_to_page(page_path, delim=COLON):
    """Convert Path to DokuWiki page address."""
    return delim.join(page_path.parent.joinpath(page_path.stem).parts)


def write_file(filename=OUTPUT_FILE):
    """Write reference database output to file."""
    with open(OUTPUT_FILE, 'w', encoding=UTF8) as file:
        for line in get_ref_db():
            file.write(line + NEWLINE)


write_file()
