# -*- coding:utf-8 -*-
'''
Function for building whole packed version of highlight.js out of
pre-packed modules.
'''

import os
import shutil
import re
import argparse
import subprocess
import json
import codecs
from functools import partial

REPLACES = {
    'case_insensitive': 'cI',
    'lexemes': 'l',
    'contains': 'c',
    'keywords': 'k',
    'subLanguage': 'sL',
    'className': 'cN',
    'begin': 'b',
    'beginKeywords': 'bK',
    'end': 'e',
    'endsWithParent': 'eW',
    'illegal': 'i',
    'excludeBegin': 'eB',
    'excludeEnd': 'eE',
    'returnBegin': 'rB',
    'returnEnd': 'rE',
    'relevance': 'r',
    'variants': 'v',

    'IDENT_RE': 'IR',
    'UNDERSCORE_IDENT_RE': 'UIR',
    'NUMBER_RE': 'NR',
    'C_NUMBER_RE': 'CNR',
    'BINARY_NUMBER_RE': 'BNR',
    'RE_STARTERS_RE': 'RSR',
    'BACKSLASH_ESCAPE': 'BE',
    'APOS_STRING_MODE': 'ASM',
    'QUOTE_STRING_MODE': 'QSM',
    'PHRASAL_WORDS_MODE': 'PWM',
    'C_LINE_COMMENT_MODE': 'CLCM',
    'C_BLOCK_COMMENT_MODE': 'CBCM',
    'HASH_COMMENT_MODE': 'HCM',
    'NUMBER_MODE': 'NM',
    'C_NUMBER_MODE': 'CNM',
    'BINARY_NUMBER_MODE': 'BNM',
    'CSS_NUMBER_MODE': 'CSSNM',
    'REGEXP_MODE': 'RM',
    'TITLE_MODE': 'TM',
    'UNDERSCORE_TITLE_MODE': 'UTM',

    'beginRe': 'bR',
    'endRe': 'eR',
    'illegalRe': 'iR',
    'lexemesRe': 'lR',
    'terminators': 't',
    'terminator_end': 'tE',
}

CATEGORIES = {
    'common': [
        'apache', 'nginx',
        'java', 'cs', 'cpp', 'objectivec',
        'ini', 'diff', 'bash', 'makefile',
        'sql', 'php', 'ruby', 'python', 'perl',
        'css', 'xml', 'javascript', 'coffeescript', 'http', 'json',
        'markdown',
    ],
}

def lang_name(filename):
    return os.path.splitext(os.path.basename(filename))[0]

# This is used instead of plain `open` everywhere as there are apparently
# "some systems" that don't use utf-8 as the default system encoding.
# We should probably drop it in the better, brighter future.
def utf8_open(filename, mode='r'):
    return codecs.open(filename, mode, 'utf-8')

def mapnonstrings(source, func):
    result = []
    pos = 0
    quotes = re.compile('[\'"/]')
    while pos < len(source):
        match = quotes.search(source, pos)
        end = match.start() if match else len(source)
        result.append(func(source[pos:end]))
        pos = end
        if match:
            terminator = re.compile(r'[%s\\]' % match.group(0))
            start = pos
            pos += 1
            while True:
                match = terminator.search(source, pos)
                if not match:
                    raise ValueError('Unmatched quote')
                if match.group(0) == '\\':
                    pos = match.start() + 2
                else:
                    pos = match.start() + 1
                    result.append(source[start:pos])
                    break
    return ''.join(result)

def compress_content(tools_path, content, filetype='js'):
    if filetype == 'js':
        for s, r in REPLACES.items():
            content = mapnonstrings(content, partial(re.sub, r'\b%s\b' % s, r))
        content = re.sub(r'(block|parentNode)\.cN', r'\1.className', content)

    try:
        args = ['java', '-jar', os.path.join(tools_path, 'yuicompressor.jar'), '--type', filetype]
        p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    except FileNotFoundError as e:
        raise RuntimeError('Couldn\'t find "%s" which is required for compression to work. You can skip compression with the `-n` option.' % args[0]) from e
    p.stdin.write(content.encode('utf-8'))
    p.stdin.close()
    content = p.stdout.read().decode('utf-8')
    p.stdout.close()

    return content

def parse_header(filename):
    '''
    Parses possible language description header from a file. If a header is found returns it
    as dict, otherwise returns None.
    '''
    content = utf8_open(filename).read(1024)
    match = re.search(r'^\s*/\*(.*?)\*/', content, re.S)
    if not match:
        return
    headers = match.group(1).split('\n')
    headers = dict(h.strip().split(': ', 1) for h in headers if ': ' in h)
    return headers if 'Language' in headers else None

def language_filenames(src_path, languages):
    '''
    Resolves dependencies and returns the list of actual language filenames
    '''
    lang_path = os.path.join(src_path, 'languages')
    filenames = [f for f in os.listdir(lang_path) if f.endswith('.js')]
    headers = [parse_header(os.path.join(lang_path, f)) for f in filenames]
    infos = [(h, f) for h, f in zip(headers, filenames) if h]

    # Filtering infos based on list of languages and categories
    if languages:
        categories = {l for l in languages if l.startswith(':')}
        languages = set(languages) - categories
        categories = {c.strip(':') for c in categories}
        cat_languages = {l for c, ls in CATEGORIES.items() if c in categories for l in ls}
        languages |= cat_languages
        infos = [
            (i, f) for i, f in infos
            if lang_name(f) in languages
        ]

    def append(filename):
        if filename not in filenames:
            filenames.append(filename)

    filenames = []
    for info, filename in infos:
        if 'Requires' in info:
            requires = [r.strip() for r in info['Requires'].split(',')]
            for r in requires:
                append(r)
        append(filename)
    return [os.path.join(lang_path, f) for f in filenames]

def strip_read(filename):
    s = utf8_open(filename).read()
    pattern = re.compile(r'^\s*(/\*(.*?)\*/)?\s*', re.DOTALL)
    s = pattern.sub('', s)
    return s.strip()

def wrap_language(filename, content, compressed):
    '''
    Wraps a language file content for the browser build. The "compressed" parameter
    selects which wrapping code to use:

    - If compressed is False the function expects source files to be uncompressed and
      wraps them maintaining readability of the source.
    - If compressed is True the function expects source files to be already compressed
      individually and wraps them with the minimal code, effectively emulating
      what yuicompressor does.
    '''
    name = lang_name(filename)
    if compressed:
        content = content.rstrip(';')
        wrap = 'hljs.registerLanguage("%s",%s);'
    else:
        wrap = '\nhljs.registerLanguage(\'%s\', %s);\n'
    return wrap % (name, content)

def glue_files(hljs_filename, language_filenames, compressed):
    '''
    Glues files together for the browser build.
    '''
    if compressed:
        hljs = 'var hljs=%s;' % strip_read(hljs_filename)
        file_func = lambda f: utf8_open(f).read()
    else:
        hljs = 'var hljs = %s;\n' % strip_read(hljs_filename)
        file_func = strip_read
    return ''.join([hljs] + [wrap_language(f, file_func(f), compressed) for f in language_filenames])

def copy_docs(root, build_path):
    build_docs_path = os.path.join(build_path, 'docs')
    os.makedirs(build_docs_path)

    docs_path = os.path.join(root, 'docs')
    filenames = os.listdir(docs_path)
    for filename in filenames:
        if '.rst' in filename:
            shutil.copyfile(
                os.path.join(docs_path, filename),
                os.path.join(build_docs_path, filename)
            )

def build_browser(root, build_path, filenames, options, is_amd=False, need_copy_docs=True):
    src_path = os.path.join(root, 'src')
    tools_path = os.path.join(root, 'tools')

    print('Building %d files:\n%s' % (len(filenames), '\n'.join(filenames)))
    content = glue_files(os.path.join(src_path, 'highlight.js'), filenames, False)
    if is_amd:
        content = 'define(function() {\n%s\nreturn hljs;\n});' % content # AMD wrap

    print('Uncompressed size:', len(content.encode('utf-8')))
    if options.compress:
        print('Compressing...')
        content = compress_content(tools_path, content)
        print('Compressed size:', len(content.encode('utf-8')))
    utf8_open(os.path.join(build_path, 'highlight.pack.js'), 'w').write(content)

    if need_copy_docs:
        print('Copying docs...')
        copy_docs(root, build_path)

def build_amd(root, build_path, filenames, options):
    build_browser(root, build_path, filenames, options, True, False)

def build_node(root, build_path, filenames, options):
    src_path = os.path.join(root, 'src')
    os.makedirs(os.path.join(build_path, 'lib', 'languages'))

    print('Building %d files:' % len(filenames))
    for filename in filenames:
        print(filename)
        content = 'module.exports = %s;' % strip_read(filename)
        utf8_open(os.path.join(build_path, 'lib', 'languages', os.path.basename(filename)), 'w').write(content)
    filename = os.path.join(src_path, 'highlight.js')
    print(filename)
    core = 'var Highlight = %s;' % strip_read(filename)
    core += '\nmodule.exports = Highlight;'
    utf8_open(os.path.join(build_path, 'lib', 'highlight.js'), 'w').write(core)

    print('Registering languages with the library...')
    hljs = "var hljs = require('./highlight');"
    filenames = map(os.path.basename, filenames)
    for filename in filenames:
        hljs += '\nhljs.registerLanguage(\'%s\', require(\'./languages/%s\'));' % (lang_name(filename), filename)
    hljs += '\nmodule.exports = hljs;'
    utf8_open(os.path.join(build_path, 'lib', 'index.js'), 'w').write(hljs)
    if options.compress:
        print('Notice: not compressing files for "node" target.')

    print('Copying styles...')
    build_style_path = os.path.join(build_path, 'styles')
    src_style_path = os.path.join(src_path, 'styles')
    os.mkdir(build_style_path)
    styles = [os.path.join(src_style_path, f) for f in os.listdir(src_style_path) if f.endswith('.css')]
    for style in styles:
        print(style)
        shutil.copy(style, build_style_path)

    print('Copying over Metafiles...')
    filenames = ['LICENSE', 'README.md']
    for filename in filenames:
        source = os.path.join(root, filename)
        dest   = os.path.join(build_path, filename)
        shutil.copyfile(source, dest)

    print('Adding package.json...')
    package = json.load(utf8_open(os.path.join(root, 'package.json')))
    authors = utf8_open(os.path.join(root, 'AUTHORS.txt'))
    matches = (re.match('^- (?P<name>.*) <(?P<email>.*)>$', a) for a in authors)
    package['contributors'] = [m.groupdict() for m in matches if m]
    content = json.dumps(package, indent=2, ensure_ascii=False)
    utf8_open(os.path.join(build_path, 'package.json'), 'w').write(content)

    print('Copying docs...')
    copy_docs(root, build_path)

def build_cdn(root, build_path, filenames, options):
    src_path = os.path.join(root, 'src')
    tools_path = os.path.join(root, 'tools')
    if not options.compress:
        print('Notice: forcing compression for "cdn" target')
        options.compress = True

    build_browser(root, build_path, filenames, options, False, False)
    os.rename(os.path.join(build_path, 'highlight.pack.js'), os.path.join(build_path, 'highlight.min.js'))

    print('Compressing all languages...')
    lang_path = os.path.join(build_path, 'languages')
    os.mkdir(lang_path)
    all_filenames = language_filenames(src_path, [])
    for filename in all_filenames:
        print(filename)
        content = compress_content(tools_path, strip_read(filename))
        content = wrap_language(filename, content, True)
        utf8_open(os.path.join(lang_path, '%s.min.js' % lang_name(filename)), 'w').write(content)

    print('Compressing styles...')
    build_style_path = os.path.join(build_path, 'styles')
    src_style_path = os.path.join(src_path, 'styles')
    os.mkdir(build_style_path)
    styles = [lang_name(f) for f in os.listdir(src_style_path) if f.endswith('.css')]
    for style in styles:
        filename = os.path.join(src_style_path, '%s.css' % style)
        print(filename)
        content = compress_content(tools_path, utf8_open(filename).read(), 'css')
        utf8_open(os.path.join(build_style_path, '%s.min.css' % style), 'w').write(content)

def build(buildfunc, root, build_path, args):
    #build_path = os.path.join(root, 'build')
    if os.path.exists(build_path):
        shutil.rmtree(build_path)
    os.mkdir(build_path)
    filenames = language_filenames(os.path.join(root, 'src'), args.languages)
    buildfunc(root, build_path, filenames, args)
    print('Done.')

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Build highlight.js for various targets')
    parser.add_argument(
        'languages', nargs='*',
        help = 'language (the name of a language file without the .js extension) or :category (currently the only available category is ":common")',
    )
    parser.add_argument(
        '-n', '--no-compress',
        dest = 'compress', action = 'store_false', default = True,
        help = 'Don\'t compress source files. Compression only works for the "browser" target.',
    )
    parser.add_argument(
        '-t', '--target', dest = 'target',
        choices = ['browser', 'node', 'cdn', 'amd'], default = 'browser',
        help = 'Target format, default is "browser"',
    )
    args = parser.parse_args()
    buildfunc = locals()['build_%s' % args.target]
    root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    bpath = os.path.join(root, 'build_%s' % args.target)
    build(buildfunc, root, bpath, args)
