"""Essentially a recreation of PyParsing, just for my own benefit."""

import types, sys

DEFAULT = object() # used to detected default values sometimes

# ___________________________________________________________________________
# Public interface:
#

class NonterminalCollection(object):
    def __init__(self):
        self._inv_atoms = {}

    def __setattr__(self, attrname, attrval):
        if isinstance(attrval, Atom):
            self._inv_atoms[attrval] = attrname
        super(NonterminalCollection, self).__setattr__(attrname, attrval)

class Grammar(object):

    """
    This object, along with the public methods declared on Atom (below),
    embodies the public interface for making a grammar.

    It's often convenient, once you have constructed a grammar object,
    to assign a bunch of global variables to bound versions of its
    methods.

    You should use the attribute "nt" to store your nonterminals in.
    This has several advantages, but the biggest is that the debug
    output is much smarter.  We have pre-populated it with
    a number of standard things such as ualnum.

    All internal attributes of the grammar begin with a _.  It is the
    intention that you assign the definitions of your non terminals to
    public attributes on the grammar object.  Of course, you have to
    avoid overwriting any methods.  One advantage of doing this is
    that debug output can be improved by adding the names of the
    various terminals.
    """

    def __init__(self, name=None):
        self._name = name
        self.nt = NonterminalCollection()
        self.nt.udigits = AtomCharMatching(self, unicode.isdigit)
        self.nt.ualnum = AtomCharMatching(self, unicode.isalnum)
        self.nt.ualpha = AtomCharMatching(self, unicode.isalpha)
        self.nt.uupper = AtomCharMatching(self, unicode.isupper)
        self.nt.ulower = AtomCharMatching(self, unicode.islower)
        self.nt.uspace = AtomCharMatching(self, unicode.isspace)
        self.nt.nl = AtomCharMatching(self, lambda c: c == '\n', r"\n")
        self.nt.uspacenotnl = AtomCharMatching(self, _space_but_not_newline)
        self.nt.uanychar = AtomCharMatching(self, lambda c: True, ".")
        self.nt.nothing = AtomNothing(self)
        self._sep_atom = self.nt.uspace.rep()

    def set_default_sep(self, sep_atom):
        if sep_atom is not None:
            self._sep_atom = self.atom(sep_atom)
        else:
            self._sep_atom = None

    def pad(self, atom):
        " Allows default sep to come before and after "
        return self.fb_sep(atom).pb_sep()
    
    def fb_sep(self, atom):
        """ Returns 'atom', followed by the separator """
        if self._sep_atom:
            return self.atom([atom, self._sep_atom])[0]
        return self.atom(atom)

    def pb_sep(self, atom):
        """ Returns 'atom', preceded by the separator """
        if self._sep_atom:
            return self.atom([self._sep_atom, atom])[1]
        return self.atom(atom)

    # ______________________________________________________________________
    # Convenient functions and constants for creating grammars
    def atom(self, *atoms):
        " Magically decide what the user wanted. "

        # Check for atom(x, y, z) and make a sequence:
        if len(atoms) > 1:
            return AtomSeq(self, [self.atom(a) for a in atoms])

        # Is it an atom?
        first_atom = atoms[0]
        if isinstance(first_atom, Atom): return first_atom

        # Things we can convert directly to atoms:
        tatom = type(first_atom)
        if tatom == types.UnicodeType:
            return AtomFixed(self, first_atom)
        elif tatom == types.StringType:
            return AtomFixed(self, unicode(first_atom, 'ascii'))

        # Next check for lists or tuples we can convert recursively:
        if hasattr(first_atom, '__iter__'):
            return self.atom(*list(first_atom))

        # Finally give up
        raise TypeError('Cannot make an atom from type '+repr(tatom))

    def choice(self, *choices):
        " You can also use 'atom1 or atom2' "
        return AtomChoice(self, [self.atom(c) for c in choices])

    def char_in(self, charset):
        " Any character in charset "
        return AtomCharMatching(self,
                                lambda c: c in charset,
                                "[%s]" % (repr(charset),))

    def char_not_in(self, charset):
        " Any character but those in charset "
        return AtomCharMatching(self,
                                lambda c: c not in charset,
                                "[^%s]" % (repr(charset),))


    def stub(self):
        return AtomStub(self)
    
# ___________________________________________________________________________
# Public interface:
#
#   The idea is to use the atom() function or one of the pre-built
#   constants to get an Atom object, and then the overloaded operators
#   and/or functions to construct a larger parse tree.

def pretty_print_error(err, filenm):
    """
    given a ParseFailed exception, returns a string describing the cause.
    Uses filenm as the name of the input file.  For now doesn't print
    the nicest thing ever.
    """
    def linecol(pnt):
        """ returns a tuple of integer (line_num, col_num) for a given pnt
        in the input. """
        line_num = pnt.ustring.count(u'\n', 0, pnt.index) + 1
        if line_num == 1:
            return (line_num, pnt.index)
        return (line_num, pnt.index - pnt.ustring.rindex(u'\n', 0, pnt.index))

    def formaterr(err):
        line, col = linecol(err.pnt)
        return "%s:%d:%d: %s" % (filenm, line, col, err.msg)

    all_errs = []
    def add_err(err):
        if err.cause: add_err(err.cause)
        all_errs.append(err)
    add_err(err)

    return "\n".join([formaterr(e) for e in all_errs])

class Atom(object):

    def __init__(self, grammar):
        assert isinstance(grammar, Grammar)
        self.grammar = grammar

    def _atom(self, *items):
        return self.grammar.atom(*items)

    def parse(self, text):
        global debug_data
        try:
            debug_wind()
            points = [None] * (len(text)+1)
            pnt = PackratPoint(points, text, 0)
            parsed_val, remainder = pnt.parsed_as(self)
            if remainder.eos():
                return parsed_val

            # Try to find the best explanation for why we failed to
            # parse the string: (hokey)
            max_err = None
            max_idx = None
            for atom, res in remainder.as_table.items():
                if isinstance(res, ParseFailed):
                    res_idx = res.cause_pnt().index
                    if max_idx is None or res_idx > max_idx:
                        max_err = res
                        max_idx = res_idx

            if not max_err:
                raise ParseFailed(
                    'Unconsumed text at end of string: %r' % (
                    remainder.ustring[remainder.index:],),
                    remainder,
                    None)
            else:
                raise max_err
        finally:
            debug_unwind(text)

    # Implementing choice:

    def __or__(self, alternative):
        " Note: has if-else semantics "
        return AtomChoice(self.grammar, [self, self._atom(alternative)])

    def __ror__(self, alternative):
        " Note: has if-else semantics "
        return atom(alternative) / self

    # Repetition and optional:

    def pad(self):
        " Allows default sep to come before and after "
        return self.pb_sep().fb_sep()
    
    def pb_sep(self):
        " Returns self preceded by the grammar separator "
        assert isinstance(self.grammar, Grammar)
        return self.grammar.pb_sep(self)
        
    def fb_sep(self):
        " Returns self followed by the grammar separator "
        assert isinstance(self.grammar, Grammar)
        return self.grammar.fb_sep(self)

    def plus(self, sep=DEFAULT):
        return self.rep(min=1, sep=sep)

    def rep(self, min=0, max=sys.maxint, sep=DEFAULT):
        "Repeats, separated by some separator (by default, use grammar sep)"

        # handle some pathological cases:
        if min > max:
            return self.grammar.nt.nothing
        if min == max == 1:
            return self
        if max == 1:
            return self.opt()

        # handle case of no separator:
        if sep is None:
            return AtomRepeat(self.grammar, self, min, max)

        # otherwise, make something like self { sep self }:
        if sep is DEFAULT: sepditem = self.pb_sep()
        else: sepditem = self._atom([sep, self])[1]
        seq = self._atom([
            self,
            AtomRepeat(self.grammar, sepditem, min-1, max-1)
            ]).prepend_item_list()

        # but if we are allowed to match none, have to make it optional
        if min == 0:
            return seq.opt(defval=[])
        return seq

    def opt(self, defval=None):
        "Makes this entry optional; will yield defval if not present"
        return AtomOpt(self.grammar, self, defval)

    def with_trailing(self, atom):
        return self.grammar.atom([self, atom])[0]

    def with_trailing_space(self):
        g = self.grammar
        return g.atom([self, g.nt.uspace.rep()])[0]

    # Separated:
    #
    #    a & b indicates first a, then dflt seperator, then b.
    #
    #    If the dflt sep is undesirable, you can use: atom(a, b)

    def __and__(self, alternative):
        " a & b matches first a then b, skipping whitespace in between "
        g = self.grammar
        return g.atom([self.fb_sep(), g.atom(alternative)])

    def __rand__(self, alternative):
        " a & b matches first a then b "
        return self._atom(alternative) & self

    # Predicates:

    def but_not(self, atom):
        " Matches self, unless atom matches! "
        atom = self._atom(atom)
        return self._atom([atom.pred_not(), self])[1]

    def but_not_char_in(self, chars):
        " Matches self, unless it begins with one of the characters in chars "
        atom = self.grammar.char_in(chars)
        return self.but_not(atom)
    
    def pred(self):
        return AtomCondition(self.grammar, self)

    def pred_not(self):
        return AtomNotCondition(self.grammar, self)

    def if_not_followed_by(self, atomp):
        " Checks that self is not followed by sep / atomp "
        atomp = self._atom(atomp)
        return self._atom([
            self,
            (u"" & atomp).pred_not()])[0]

    # Affecting the parse tree that results:

    def suppress(self):
        "Makes this atom produce None, usually used with AtomSeq's filter()"
        return AtomSuppressed(self.grammar, self)

    def as_(self, ctor):
        "Invokes ctor(val) to produce the final value that results"
        return AtomConstructed(self.grammar, self, ctor)

    def as_string(self):
        "Invokes ''.join() on the value"
        return self.as_(join_string_func)

    def prepend_item_list(self):
        "Use when the val is (item, list).  Returns [item]+list"
        return self.as_(prepend_item_list_func)

    def append_list_item(self):
        "Use when the val is (list, item).  Returns list+[item]"
        return self.as_(append_list_item_func)

    def __getitem__(self, arg):
        """Post-processes the value by doing [arg]; if arg is a tuple,
        extracts all the subitems (i.e., [1,2,3,4][0,2] == [1,3]) """
        if hasattr(arg, '__iter__'):
            indices = list(arg)
            def extract_indices(val):
                res = []
                for i in indices:
                    res.append(val[i])
                return res
            return self.as_(extract_indices)
        return self.as_(lambda val: val[arg])

    def __repr__(self):
        try:
            return self.grammar.nt._inv_atoms[self]
        except KeyError:
            return self._pvt_repr()

    def _pvt_repr(self):
        raise NotImplemented

class AtomPrivate(Atom):
    def wrap(self, val):
        "What we call to post process the value"
        return val

class AtomFixed(AtomPrivate):
    "Parses a fixed string"
    def __init__(self, grammar, string):
        AtomPrivate.__init__(self, grammar)
        self.ustring = string
    def _pvt_repr(self):
        return repr(self.ustring)

class AtomCharMatching(AtomPrivate):
    "Parses a char for which self.cfunc returns True"
    def __init__(self, grammar, cfunc, desc = None):
        AtomPrivate.__init__(self, grammar)
        self.cfunc = cfunc
        self.desc = desc if desc is not None else self.cfunc.__name__
        assert isinstance(self.desc, str)

    def _pvt_repr(self):
        return self.desc

    def rep(self, min=0, max=sys.maxint, sep=DEFAULT):
        "When you repeat this, you get a string result (without def sep!)"
        if sep == DEFAULT: sep = None
        return AtomPrivate.rep(self, min=min, max=max, sep=sep).as_string()

    def __or__(self, other):
        "a | b works on CharMatching.  b can be unicode or a function."
        if isinstance(other, unicode):
            return AtomCharMatching(
                self.grammar,
                lambda c: c in other or self.cfunc(c),
                "(%s|[%r])" % (self.desc, other))
        if isinstance(other, str):
            return self | unicode(other)
        return AtomPrivate.__or__(self, other)

    def inv(self):
        return AtomCharMatching(self.grammar,
                                lambda c: not self.cfunc(c),
                                "[^%s]" % (self.desc,))

class AtomStub(AtomPrivate):
    """
    Use to create a forward reference like so:
    primary = ParseStub()
    additive = primary & '+' & additive
    primary.atom = '(' & primary & ')'
    """

    def __init__(self, grammar):
        AtomPrivate.__init__(self, grammar)
        self.atom = None

    def _pvt_repr(self):
        return repr(self.atom) if self.atom else "AtomStub(None)"

class AtomRepeat(AtomPrivate):
    "Parses self.atom repeatedly, yielding a list"
    def __init__(self, grammar, atom, min, max):
        AtomPrivate.__init__(self, grammar)
        self.atom = atom
        self.min = min if min >= 0 else 0
        self.max = max

    def _pvt_repr(self):
        if self.min == 0 and self.max == sys.maxint:
            return repr(self.atom) + "*"
        elif self.min == 1 and self.max == sys.maxint:
            return repr(self.atom) + "+"
        return repr(self.atom) + "{%d,%d}" % (self.min, self.max)

class AtomSeq(AtomPrivate):
    """Atoms a sequence of atoms, all must parse for it to succeed,
    yields a list"""
    def __init__(self, grammar, atoms):
        AtomPrivate.__init__(self, grammar)
        self.atoms = list(atoms)

    def _pvt_repr(self):
        return repr(self.atoms)

    def __and__(self, alternative):
        " Avoid forming an annoying tree-like structure "
        return AtomSeq(
            self.grammar,
            self.atoms[:-1] +
            [self.atoms[-1].fb_sep()] +
            [self._atom(alternative)])

    def filter(self, filter=None):
        "Filters the result after we finished parsing; default excludes None"
        return AtomFiltered(self.grammar, self, filter)

    def select_index(self, index):
        return self.as_(lambda c: c[index])

class AtomOpt(AtomPrivate):
    "Parses self.atom if it applies, otherwise yields None"
    def __init__(self, grammar, atom, defval):
        AtomPrivate.__init__(self, grammar)
        self.atom = atom
        self.defval = defval

    def _pvt_repr(self):
        return repr(self.atom) + "?"

class AtomChoice(AtomPrivate):
    "Parses the first of self.atoms which apply"
    def __init__(self, grammar, atoms):
        AtomPrivate.__init__(self, grammar)
        assert all([isinstance(x, Atom) for x in atoms])
        self.atoms = atoms
    def otherwise(self, alternative):
        return AtomChoice(self.atoms + [atom(alternative)])
    def _pvt_repr(self):
        res = [repr(choice) for choice in self.atoms]
        return '(%s)' % (' | '.join(res),)

class AtomCondition(AtomPrivate):
    "Parses only if self.atom can be parsed here, but consumes nothing"
    def __init__(self, grammar, atom):
        AtomPrivate.__init__(self, grammar)
        self.atom = atom
    def _pvt_repr(self):
        return 'If(%r)' % (self.atom,)

class AtomNotCondition(AtomPrivate):
    "Parses only if self.atom cannot be parsed here, but consumes nothing"
    def __init__(self, grammar, atom):
        AtomPrivate.__init__(self, grammar)
        self.atom = atom
    def _pvt_repr(self):
        return '!If(%r)' % (self.atom,)

class AtomNothing(AtomPrivate):
    def _pvt_repr(self):
        return 'AtomNothing'

# Atoms that just affect the parsed result:

class AtomWrapper(AtomPrivate):
    def __init__(self, grammar, atom):
        AtomPrivate.__init__(self, grammar)
        self.atom = atom
    def _pvt_repr(self):
        return repr(self.atom)

class AtomSuppressed(AtomWrapper):
    def wrap(self, val):
        return None

class AtomConstructed(AtomWrapper):
    def __init__(self, grammar, atom, ctor):
        AtomWrapper.__init__(self, grammar, atom)
        self.ctor = ctor
    def wrap(self, val):
        return self.ctor(val)

class AtomJoinString(AtomWrapper):
    def wrap(self, val):
        return ''.join(val)

class AtomFiltered(AtomWrapper):
    def __init__(self, grammar, atom, filter):
        AtomWrapper.__init__(self, grammar, atom)
        self.filter = filter

    def wrap(self, val):
        if not self.filter:
            return [v for v in val if v is not None]
        return [v for v in val if self.filter(v)]

# ___________________________________________________________________________
# Small utility functions we use various places

def _space_but_not_newline(c):
    return c != u'\n' and unicode.isspace(c)

def join_string_func(val):
    return ''.join(val)

def prepend_item_list_func(val):
    itm, lst = val
    return [itm] + lst

def append_list_item_func(val):
    lst, itm = val
    return lst + [itm]

# ___________________________________________________________________________
# Debugging
#
# You can configure scfparse to print debugging information to the
# console.  Just call debug_print(), or set the "debug" to a function
# which accepts a single string (like sys.stdout.write).  For better
# printouts, set debug_names to be the globals() dictionary from the
# module that defines your grammar.  We will then try to use the name
# of the variable storing the atom rather than the repr() of the atom
# in the printouts.
#
# The idea is that you can set these in your py.test module which loads the
# parser.

debug = None                            # Function which accepts a debug str
debug_names = None                      # Str->Atom dictionary

debug_data = None                       # Temporary debug data: do not set
inv_debug_names = None                  # Temporary debug data: do not set

def debug_print(names=None):
    " Enables debug output using 'print' "
    global debug, debug_names
    def printf(str):
        if str[-1] == "\n": str = str[:-1]
        print str
    debug = printf
    debug_names = names

def _debug_record(pnt, atom):
    return [pnt, atom, None, []]

def debug_start(pnt, atom):
    global debug_data
    if not debug: return
    parent = debug_data
    child = _debug_record(pnt, atom)
    parent[3].append(child)
    debug_data = child
    return parent

def debug_result(res):
    global debug_data
    if not debug: return
    debug_data[2] = res

def debug_end(parent):
    global debug_data
    if not debug: return
    debug_data = parent

def debug_wind():
    global debug_data
    if not debug: return
    debug_data = _debug_record(None, None)

def debug_repr(atom):
    "Looks up atom in debug_names dictionary; if not found, returns repr()"
    global inv_debug_names
    if debug:
        if not inv_debug_names or inv_debug_names[0] is not debug_names:
            inv_debug_names = (debug_names, {})
            if debug_names:
                for nm, val in debug_names.items():
                    if isinstance(val, Atom):
                        inv_debug_names[1][val] = nm

        # Find the name of this atom from the module dict we were given
        if atom in inv_debug_names[1]:
            return inv_debug_names[1][atom]
    return atom._pvt_repr()

def debug_unwind(txt):
    global debug_data
    if not debug: return
    debug("Parsing:\t%r\n" % (txt,))
    def helper(data, indent):
        debug("%s\tPnt=%r\tAs=%r\tRes=%r\n" % (
            indent * "\t", data[0], data[1], data[2]))

        for child in data[3]:
            helper(child, indent+1)
    helper(debug_data, 0)

# ___________________________________________________________________________
# Actual Packrat Parsing Algorithm

class ParseFailed(Exception):
    def __init__(self, msg, pnt, cause=None):
        Exception.__init__(self, msg)
        self.msg = msg
        self.pnt = pnt
        self.cause = cause
    def cause_pnt(self):
        if not self.cause: return self.pnt
        return self.cause.cause_pnt()
    def __repr__(self):
        due_to = " due to %r" % (self.cause,) if self.cause else ""
        return "ParsedFailed(%s at %r%s)" % (self.msg, self.pnt, due_to)

class PackratPoint(object):
    """
    Stores data about a point in the input string, and different ways
    we have tried to interpret it.
    """
    def __init__(self, points, string, index):
        self.points = points
        self.ustring = string
        self.index = index
        self.points[self.index] = self
        assert self.index <= len(self.ustring)
        # maps Atom instances to (parsed, PackratPoint) pairs, or None
        # if the parsing failed.  The PackratPoint indicates the remaining
        # unconsumed input.
        self.as_table = {}

    def __repr__(self):
        return "(%d,%r)" % (
            self.index, self.ustring[self.index:self.index+3])

    def parsed_as(self, atom):
        debug_tag = debug_start(self, atom)
        try:
            if atom not in self.as_table:
                res = None
                try:
                    val, next_pnt = atom._parse(self)
                    res = atom.wrap(val), next_pnt
                except ParseFailed, e:
                    res = e
                self.as_table[atom] = res
            else:
                res = self.as_table[atom]

            debug_result(res)

            if isinstance(res, ParseFailed):
                raise res

            return res
        finally:
            debug_end(debug_tag)

    def advance(self, amnt):
        new_index = self.index+amnt
        if not self.points[new_index]:
            return PackratPoint(self.points, self.ustring, new_index)
        return self.points[new_index]

    def eos(self):
        return self.index >= len(self.ustring)

def insert_into(cls):
    def dec(func):
        setattr(cls, func.__name__, func)
        return func
    return dec

@insert_into(AtomFixed)
def _parse(self, pnt):
    if pnt.ustring.startswith(self.ustring, pnt.index):
        return (self.ustring, pnt.advance(len(self.ustring)))
    raise ParseFailed("Expected: %r but found %r" % (
        self.ustring, pnt.ustring[pnt.index:pnt.index+len(self.ustring)]), pnt)


@insert_into(AtomCharMatching)
def _parse(self, pnt):
    if not pnt.eos():
        c = pnt.ustring[pnt.index]
        if self.cfunc(c):
            return (c, pnt.advance(1))
        raise ParseFailed("Expected character matching %r" % (self.desc,), pnt)
    raise ParseFailed("Expected non-empty string", pnt)

@insert_into(AtomRepeat)
def _parse(self, pnt):
    parsed_list = []
    while len(parsed_list) < self.max:
        # Try to parse an instance of self.atom:
        try:
            parsed_item, post_pnt = pnt.parsed_as(self.atom)
            parsed_list.append(parsed_item)
            pnt = post_pnt
        except ParseFailed, e:
            if len(parsed_list) >= self.min:
                return (parsed_list, pnt)
            raise ParseFailed("Found %d instances of %s, but needed %d-%d" % (
                len(parsed_list), repr(self), self.min, self.max), pnt, e)
    return (parsed_list, pnt)

@insert_into(AtomSeq)
def _parse(self, pnt):
    parsed_list = []
    for atom in self.atoms:
        parsed_item, pnt = pnt.parsed_as(atom)
        parsed_list.append(parsed_item)
    return (parsed_list, pnt)

@insert_into(AtomOpt)
def _parse(self, pnt):
    try:
        return pnt.parsed_as(self.atom)
    except ParseFailed:
        return (self.defval, pnt)

@insert_into(AtomChoice)
def _parse(self, pnt):
    max_err = None
    for atom in self.atoms:
        try:
            return pnt.parsed_as(atom)
        except ParseFailed, e:
            # track the error which got the farthest into our input as the
            # most likely thing that the user meant to type
            if not max_err or e.cause_pnt().index > max_err.cause_pnt().index:
                max_err = e
    raise ParseFailed("No choice matched", pnt, max_err)

@insert_into(AtomCondition)
def _parse(self, pnt):
    pnt.parsed_as(self.atom)
    return (None, pnt)

@insert_into(AtomNotCondition)
def _parse(self, pnt):
    try:
        pnt.parsed_as(self.atom)
    except ParseFailed:
        return (None, pnt)
    raise ParseFailed("", pnt)

@insert_into(AtomStub)
def _parse(self, pnt):
    return pnt.parsed_as(self.atom)

@insert_into(AtomWrapper)
def _parse(self, pnt):
    return pnt.parsed_as(self.atom)

@insert_into(AtomNothing)
def _parse(self, pnt):
    raise ParseFailed("Nothing", pnt)