# Spanakopita: a tool for merging and maintaining GEDCOM files.
# Copyright (C) Niko Matsakis 2007
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or(at
# your option) any later version.
# 
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA

""" The Karpathos GEDCOM parser.  Reads in GEDCOM files as defined by
http://www.gendex.com/gedcom55/55gctoc.htm(GEDCOM Standard Release
v5.5).  Although not all the little dots and tiddles are supported, we
try to save anything we read in that we don't recognize so that it can
be output again later.

Produces a karpathos.Database."""

import sys, re, traceback
from datastructures import GEDCOMDatabase, GEDCOMError, isRef

class GEDCOMParseError(GEDCOMError):
    def __init__(self, msg, line_num):
        GEDCOMError.__init__(self, msg)
        self.line_num = line_num
        self.message = msg

    def __str__(self):
        return "Parse error at line %d: %s" % (self.line_num, self.message)

class GEDCOMParseEvent:

    """ Encapsulates a single line from a GEDCOM file.  Each line is
    prefixed by a numeric level, an optional cross-referencing id
    which identifies the object being defined, and a label such as
    'INDI' or 'NOTE' telling us what type of data to expect. """
    
    def __init__(self, string):
        words = string.split()
        self.level = int(words[0])
        self.label = words[1]
        if isRef(self.label):
            self.id = self.label
            self.label = words[2]
        else:
            self.id = None
            pass
        self.arg = string[string.find(self.label)+len(self.label)+1:].strip()
        return

    pass

class GEDCOMParser:

    def __init__(self):
        self.refTable = {}
        self.stack = []
        self.database = None
        return

    def parse(self, file):

        """ file should be any thing which we can iterate through line by
        line.  So an array of strings is fine, a file object is fine, whatever
        floats your boat. """

        # put the initial state onto the stack
        self.database = GEDCOMDatabase.alloc().initWithParser_(self)
        self.stack = [ self.database ]

        # process each line in the file
        pos = 1
        try:
            for line in file:
                if line:
                    # Parse the line into components
                    event = GEDCOMParseEvent(line)
                    
                    # Check for an event with a depth number that is out of
                    # bounds
                    if event.level < 0:
                        raise GEDCOMError('Level number too low, exp 0-%d'
                                          % len(self.stack))
                    if event.level > len(self.stack)+1:
                        raise GEDCOMError('Level number too high, exp 0-%d'
                                          % len(self.stack))
                    
                    # Pop any frames required
                    desiredlen = event.level+1
                    while len(self.stack) != desiredlen: self.pop_handler()
                    
                    # Pass the event to the desired receiver, and receive
                    # a new object in response to contain its data
                    newhandler = self.stack[-1].consumeEvent(self, event)
                    self.stack.append(newhandler)
                    
                    # If the event had an ID, then save what they added to
                    # the stack in response, and set a member of newhandler
                    # to the id
                    if event.id:
                        self.refTable[event.id] = newhandler
                        newhandler.databaseId = event.id[1:-1] # strip @ signs
                    
                    # Track line number in case of error
                    pos = pos + 1
                    pass
                pass

            # When we reach end of file, unwind what's left of the stack
            while len(self.stack) > 1: self.pop_handler()
            pass
        except GEDCOMError, e:
            raise GEDCOMParseError(str(e), pos)
        except TypeError, e:
            traceback.print_exc()
            raise GEDCOMParseError(str(e), pos)

        return self.stack[0]

    def pop_handler(self):

        """ Removes one frame from the parser stack, calling finalize()
        on the object """

        handler = self.stack.pop()
        handler.finalize()

    pass

