#! /usr/bin/env python3
import os
import re
import argparse
import sys
from tokenize import tokenize, tok_name, INDENT, DEDENT, NAME

from bython import VERSION_NUMBER
from bython.logger import Logger


def ends_in_py(word):
    """
    Returns True if word ends in .py, else False

    Args:
        word (str):     Filename to check

    Returns:
        boolean: Whether 'word' ends with 'py' or not
    """
    return word[-3:] == ".py"


def change_file_name(name, outputname=None):
    """
    Changes *.py filenames to *.by filenames. If filename does not end in .py, 
    it adds .by to the end.

    Args:
        name (str):         Filename to edit
        outputname (str):   Optional. Overrides result of function.

    Returns:
        str: Resulting filename with *.by at the end (unless 'outputname' is
        specified, then that is returned).
    """
    # If outputname is specified, return that
    if outputname is not None:
        return outputname

    # Otherwise, create a new name
    if ends_in_py(name):
        return name[:-3] + ".by"
    else:
        return name + ".by"


def translate_dictionary(definition_string):
    """
    Translate one specific dictionary definition from using {} to using dict()

    Args:
        definition_string (str):    A string with a dictionary definition 
                                    (including '=' beforehand)

    Returns:
        str: An equivalent definition (including '='), but using the
        dict()-contructor instead of { and }
    """
    # Remove = before definition
    definition_string = re.sub(r"\s*=\s*", "", definition_string)

    # Remove { and }
    definition_string = re.sub(r"[{}]", "", definition_string)
    
    # Remove newlines
    definition_string = re.sub(r"\s*\n\s*", "", definition_string)

    # Find all pairs
    pairs = re.split(r"\s*,\s*", definition_string)

    # Convert each pair to a tuple definition
    result_inner = ""
    for pair in pairs:
        if pair.strip() == "": continue
        key, value = re.split(r"\s*:\s*", pair)

        if result_inner == "":
            result_inner = "(%s, %s)" % (key, value)

        else:
            result_inner += ", (%s, %s)" % (key, value)

    if result_inner == "": return "= dict()"
    else: return "= dict([%s])" % result_inner



def pre_reverse_parse(infile_string):
    """
    Perform some necessary changes to the file before reverse parsing can ensue.
    This include changing dict definitions to include 

    Args:
        infile_string (str):    A string containing the whole python source

    Returns:
        str: The source with changes to dictionary definitions
    """
    dictionaries = re.findall(r"=\s*{\s*(?:.+\s*:\s*.+(?:\s*,\s*)?)*\s*}", infile_string)

    for dictionary in dictionaries:
        infile_string = re.sub(dictionary, translate_dictionary(dictionary), infile_string)

    return infile_string


def reverse_parse(filename, outputname):
    """
    Changes a Python file to a Bython file
    
    All semantically significant whitespace resulting in a change
    in indentation levels will have a matching opening or closing
    curly-brace.

    Args:
        filename (str):     Path of file to parse
        outputname (str):   Path of destination file
    """
    # Open a file as bytes
    infile = open(filename, "rb")
    inlines = infile.readlines()

    # Store and format the contents for later modification
    for index, line in enumerate(inlines):
        inlines[index] = line.decode("utf-8")
        inlines[index] = inlines[index].rstrip()

    # Tokenize the same file, close it
    infile.seek(0)
    tokens = list(tokenize(infile.readline))
    infile.close()
    
    # Stores a list of tuples for INDENT/DEDENT
    # (token, line_number, position_in_line)
    indent_tracker = []
    
    # Track line by line the indentation position.
    # Populates indent_tracker, using indent_levels as a stack
    # to properly record whitespace for each bython brace.
    indent_levels = []
    position = 0
    line_of_last_name_token = 0
    max_indent = 0

    for token in tokens:
        current_line = token.start[0]
        if ((token.exact_type == NAME)
            and line_of_last_name_token != current_line):
            line_of_last_name_token = current_line
            position = token.start[1]

        if (token.exact_type == INDENT):
            indent_levels.append(position)
            indent_tracker.append((INDENT,current_line,position))

        if (token.exact_type == DEDENT):
            indent_tracker.append((DEDENT,current_line,indent_levels.pop()))

        if (len(indent_levels) > max_indent):
            max_indent = len(indent_levels)


    # Add curly braces where necessary to create our bython file
    extra = 0

    for indent in indent_tracker:
        token = indent[0]
        index = indent[1]
        position = indent[2]

        inlines.insert(
            index + extra - 1,
            " " * position
            + ("}","{")[token==INDENT]
        )

        extra += 1

    # Save the file
    outfile = open(outputname, "w")

    # Combine lines to one string
    entire_file = "\n".join(inlines)

    # Quick fix to solve problem with remaining colons, should be reworked to
    # something more elegant
    entire_file = re.sub(r"\s*:", "", entire_file)

    # Another quick fix solving the problem with inline comments, (see issue 14
    # on github). This also makes the indentation style Java-style instead of 
    # Allman-style as before (see https://en.wikipedia.org/wiki/Indentation_style)
    entire_file = re.sub(r"((?:\s*#\s*.*\n?)*)?\n\s*{", r" {\1", entire_file)

    # Fix issue with where the closing braces are placed. Search for places where
    # the following three things appear: (newlines) ((indented) curly brace) (newline)
    # and invert the order.
    for i in range(max_indent):
        entire_file = re.sub(r"(\n+)([ \t]*})(\n)", r"\3\2\1", entire_file)

    print(entire_file, file=outfile)


def main():
    """
    Translate python to bython

    Command line utility and Python module for translating python code
    to bython code, adding curly braces at semantically significant
    indentations.
    """ 
    argparser = argparse.ArgumentParser("py2by",
        description="py2by translates python to bython",
        formatter_class=argparse.RawTextHelpFormatter
    )
    argparser.add_argument("-v", "--version", 
        action="version",
        version="py2by is a part of Bython v%s\nMathias Lohne and Tristan Pepin 2017" % VERSION_NUMBER)
    argparser.add_argument("-o", "--output",
        type=str, 
        help="specify name of output file",
        nargs=1)
    argparser.add_argument("input", type=str,
        help="python file to translate",
        nargs=1)

    cmd_args = argparser.parse_args()

    logger = Logger()

    try:
        outputname = cmd_args.output[0] if cmd_args.output is not None else change_file_name(cmd_args.input[0], None)

        infile = open(cmd_args.input[0], "r")
        infile_string = "".join(infile.readlines())

        pre_parsed = pre_reverse_parse(infile_string)
        tempoutfile = open(cmd_args.input[0]+".py2bytemp", "w")
        tempoutfile.write(pre_parsed)
        tempoutfile.close()

        reverse_parse(cmd_args.input[0]+".py2bytemp", outputname)

        os.remove(cmd_args.input[0]+".py2bytemp")

    except FileNotFoundError:
        logger.log_error("No file named %s" % cmd_args.input[0])

    except Exception as e:
        logger.log_error("Unexpected error: %s" % str(e))
        sys.exit(1)


if __name__ == '__main__':
    main()
