import os
import re
import sys
import argparse

class DocLineDB:
    __instance = None

    def __init__(self):
        self.entries = {}

    def get_instance():
        if DocLineDB.__instance is None:
            DocLineDB.__instance = DocLineDB()
        return DocLineDB.__instance

    def add(self, doc_line):
        if doc_line.class_name not in self.exported_classes:
            return
        key = (doc_line.class_name, doc_line.method_name)
        if key not in self.entries:
            self.entries[key] = list()
            self.entries[key].append(doc_line)
        else:
            # only append if doc string not already present
            if doc_line.doc_string not in [d.doc_string for d in self.entries[key]]:
                self.entries[key].append(doc_line)

    def __str__(self):
        result = ""
        for key, doc_lines in self.entries.items():
            for doc_line in doc_lines:
                result += f"- {doc_line}\n"
        return result

    def cleanup(self):
        # reorder key entries
        self.entries = dict(sorted(self.entries.items(), key=lambda item: f"{item[0]}{item[1]}"))

        # Remove suffix of entries without suffix
        for doc_lines in self.entries.values():            
            if len(doc_lines) == 1:
                doc_lines[0].clean_suffix = "" 
            else:
                # reorder
                for i,doc_line in enumerate(doc_lines):
                    doc_line.clean_suffix = f"{i+1}"

class DocLine:
    def __init__(self, match):
        self.full_match = match.group(0)
        self.line = match.group(1)
        self.class_name = match.group(2)
        self.method_name = match.group(3) or ""
        self.suffix = match.group(4) or ""
        if self.suffix.startswith("_"):
            self.suffix = self.suffix[1:]
        self.clean_suffix = self.suffix
        self.doc_string = match.group(5) or ""
        DocLineDB.get_instance().add(self)
        
    def __str__(self):
        result = f"{self.class_name}"
        if self.method_name != "":
            result += f"::{self.method_name}"
            if self.suffix != "":
                result += f"({self.suffix})"
            else:
                result += "()"
        else:
            result += " (class)" # documentation for class itself 
            if self.suffix != "":
                result += f" #{self.suffix}"
        result += f" -> {self.string_name()} -> {self.cleaned_string_name()}"
        return result

    def string_name(self):
        result = f"camitk_{self.class_name}"
        if self.method_name:
            result += f"_{self.method_name}"
        if self.suffix:
            result += f"_{self.suffix}"
        return result

    def cleaned_string_name(self):
        result = f"camitk_{self.class_name}"
        if self.method_name:
            result += f"_{self.method_name}"
        if self.clean_suffix:
            result += f"_{self.clean_suffix}"
        return result

def extract_class_names(directory):
    # Regex pattern to match py::class_<ns_ClassName,... or py::class_<ns_ClassName>
    pattern = re.compile(r'py::class_<camitk::([A-Za-z]+)(?:[>, /])')
    class_names = set()

    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        if os.path.isfile(filepath):  # Ensure it's a file, not a subdirectory
            try:
                with open(filepath, 'r') as file:
                    for line in file:
                        match = pattern.search(line)
                        if match:
                            class_names.add(match.group(1))
            except Exception as e:
                print(f"Error reading {filepath}: {e}")
        
    return class_names

def load_docstrings(input_file, exported_classes):
    with open(input_file, 'r') as f:
        content = f.read()

    # Find all declarations and their R"doc(...)" blocks
    pattern = re.compile(
        r"(static const char \*mkd_doc_camitk_([A-Z][a-zA-Z]+)_*([a-zA-Z]+)?(_[0-9]+)? =\sR\"doc\((.*?)\)doc\";)",
        re.DOTALL
    )
    matches = pattern.finditer(content)

    seen = {}
    output_lines = []
    last_end = 0

    DocLineDB.get_instance().exported_classes = exported_classes

    for match in matches:
        d = DocLine(match)

    DocLineDB.get_instance().cleanup()

def save_cleaned_docstrings(output_file):
    db = DocLineDB.get_instance()
    with open(output_file, 'w') as f:
        f.write("""/*
  This file contains docstrings for use in the Python bindings.
  Do not edit! They were automatically extracted by pybind11_mkdoc and cleaned by CamiTK's clean-docstring.py script.
*/

#define MKD_EXPAND(x)                                      x
#define MKD_COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
#define MKD_VA_SIZE(...)                                   MKD_EXPAND(MKD_COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0))
#define MKD_CAT1(a, b)                                     a ## b
#define MKD_CAT2(a, b)                                     MKD_CAT1(a, b)
#define MKD_DOC1(n1)                                       mkd_doc_##n1
#define MKD_DOC2(n1, n2)                                   mkd_doc_##n1##_##n2
#define MKD_DOC3(n1, n2, n3)                               mkd_doc_##n1##_##n2##_##n3
#define MKD_DOC4(n1, n2, n3, n4)                           mkd_doc_##n1##_##n2##_##n3##_##n4
#define MKD_DOC5(n1, n2, n3, n4, n5)                       mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5
#define MKD_DOC7(n1, n2, n3, n4, n5, n6, n7)               mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
#define DOC(...)                                           MKD_EXPAND(MKD_EXPAND(MKD_CAT2(MKD_DOC, MKD_VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))

#if defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif


""")

        f.write("/* Cleaned docstrings for exported CamiTK core classes */\n")

        f.write("""static const char *mkd_doc_camitk_module =        
R"doc(
CamiTK Python Bindings
========================

This module provides Python bindings for CamiTK core functionalities.
It allows one to access core classes and methods from Action Python scripts.

**NOTE**

    This module is intended to be used within CamiTK Application context.
    It is meant to be used in Action Python scripts from inside a CamiTK application Python interpreter, and not as a standalone library.

**Available classes and functions**

""")
        for class_name in db.exported_classes:
            class_doc = db.entries.get((class_name, ""))
            if class_doc and len(class_doc) == 1:               
                f.write(f"- {class_name}: ")
                f.write(f"{class_doc[0].doc_string.split(".", 1)[0].replace('\n',' ')}.\n")                
        f.write("\n\n")
        f.write("""
)doc";""")

        current_class = ""
        for doc_lines in db.entries.values():
            if doc_lines[0].class_name != current_class:
                current_class = doc_lines[0].class_name
                f.write("\n")
                f.write("/* ----------------------------------------\n")
                f.write(f"   Docstrings for {doc_lines[0].class_name}\n")
                f.write("---------------------------------------- */\n")
                f.write("\n")
            for doc_line in doc_lines:
                cleaned_docstring = f'static const char *mkd_doc_{doc_line.cleaned_string_name()} = R"doc({doc_line.doc_string})doc";\n\n'
                f.write(cleaned_docstring)

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: python clean_cpp.py input_file.cpp output_file.cpp")
        sys.exit(1)
    input_file = sys.argv[1]
    output_file = sys.argv[2]

    # Extract only exported class names
    exported_classes = sorted(extract_class_names(".."))

    # Clean the docstring file
    load_docstrings(input_file, exported_classes)
    save_cleaned_docstrings(output_file)
    print(f"Cleaned file saved as {output_file}")