import sys, re, string
from karpathos import Person, FamilyGraph, BadFormatError, builddebug

"""
papa files are the impromptu format defined by my dad.  Use

FamilyGraph loadPapaFile (file)

to load genealogic data from one of these
"""

##

def loadPapaFile (file):
    # due to historical accident, do this this way
    graph = PapaFamilyGraph()
    graph.loadFromFile (file)
    return graph

##

class PapaPerson(Person):
    """
    Represents a person loaded from a papa file.  Basically, this
    class has lots of routines to try and extract data from the
    measly comments that a papa file provides for us.

    We currently extract patterns of the form:

    born + date -> indicates the birthday
    died -> indicates the person is dead
    died + age XX -> indicates the person died at age XX
    died + date -> indicates the day the person died
    XXX-YYY-ZZZZ -> phone number
    """

    monthtranslations = {
        'January':'Jan',
        'February':'Feb',
        'March':'Mar',
        'April':'Apr',
        'June':'Jun',
        'July':'Jul',
        'August':'Aug',
        'September':'Sep',
        'October':'Oct',
        'November':'Nov',
        'December':'Dec',
        '1':'Jan',
        '2':'Feb',
        '3':'Mar',
        '4':'Apr',
        '5':'May',
        '6':'Jun',
        '7':'Jul',
        '8':'Aug',
        '9':'Sep',
        '10':'Oct',
        '11':'Nov',
        '12':'Dec'
        }
    months = ('Jan', 'Feb', 'Mar', 'Apr', 'May',
              'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
              'Nov', 'Dec')

    # Different kinds of dates my dad uses:
    dateres = (re.compile(r" "+
                          r"(?P<day>\d\d?)" +
                          r"(?P<month>[a-zA-Z]{3,3})" +
                          r"(?P<year>\d\d\d\d|\d\d)"),
               re.compile(r"^"+
                          r"(?P<day>\d\d?)" +
                          r"(?P<month>[a-zA-Z]{3,3})" +
                          r"(?P<year>\d\d\d\d|\d\d)"),               
               re.compile(r" (?P<month>[a-zA-Z]+) "+
                          r"(?P<day>\d\d?), ?(?P<year>\d\d\d\d|\d\d)"),
               re.compile(r"^(?P<month>[a-zA-Z]+) "+
                          r"(?P<day>\d\d?), ?(?P<year>\d\d\d\d|\d\d)"),
               re.compile(r" (?P<month>[a-zA-Z]+) "+
                          r"(?P<day>\d\d?) (?P<year>\d\d\d\d|\d\d)"),
               re.compile(r"^(?P<month>[a-zA-Z]+) "+
                          r"(?P<day>\d\d?) (?P<year>\d\d\d\d|\d\d)"),
               re.compile(r" (?P<month>\d\d?)/(?P<day>\d\d?)/"+
                          r"(?P<year>\d\d\d\d|\d\d)"),
               re.compile(r"^(?P<month>\d\d?)/(?P<day>\d\d?)/"+
                          r"(?P<year>\d\d\d\d|\d\d)"),
               re.compile(r" (?P<month>\d\d?)-(?P<day>\d\d?)-"+
                          r"(?P<year>\d\d\d\d|\d\d)"),
               re.compile(r"^(?P<month>\d\d?)-(?P<day>\d\d?)-"+
                          r"(?P<year>\d\d\d\d|\d\d)"),
               re.compile(r" (?P<month>\d\d?)/(?P<year>\d\d\d\d|\d\d)"+
                          r"(?P<day>)"),
               re.compile(r"^(?P<month>\d\d?)/(?P<year>\d\d\d\d|\d\d)"+
                          r"(?P<day>)"),
               re.compile(r" (?P<year>\d\d\d\d)\D"+
                          r"(?P<day>)(?P<month>)"),
               re.compile(r"^(?P<year>\d\d\d\d)\D"+
                          r"(?P<day>)(?P<month>)"),
               re.compile(r" (?P<year>\d\d\d\d)"+
                          r"(?P<day>)(?P<month>)$"), 
               re.compile(r"^(?P<year>\d\d\d\d)"+
                          r"(?P<day>)(?P<month>)$"),
               re.compile(r" (?<!age )(?P<year>\d\d)\D"+
                          r"(?P<day>)(?P<month>)"),
               re.compile(r"^(?<!age )(?P<year>\d\d)\D"+
                          r"(?P<day>)(?P<month>)"),
               re.compile(r" (?<!age )(?P<year>\d\d)"+
                          r"(?P<day>)(?P<month>)$"),
               re.compile(r"^(?<!age )(?P<year>\d\d)"+
                          r"(?P<day>)(?P<month>)$"))
    
    ## Different tags of interest:
    birthre = re.compile(r"born(?P<rest> .*?)(?:died.*|$)")
    deathre = re.compile(r"died(?P<rest> .*?)(?:born.*|$)")
    phonere = re.compile(r"(?P<num>(?:\d\d\d-?)?\d\d\d-?\d\d\d\d)")
    agere = re.compile(r"age (?P<age>\d+)")
    
    def __init__(self, hx, althx, id, sex, name, comments, seekrit):
        # Perform basic initialization
        Person.__init__(self)

        # Put debug info as henry number
        self.debug = str(hx)
        if althx: self.debug = self.debug + " | " + str(althx)

        # Fill out papa fields
        self.id = id
        self.sex = sex
        self.name = name
        
        comments = string.strip (comments)
        if comments: self.comments = [comments]
        else: self.comments = []
        
        seekrit = string.strip(seekrit)
        if seekrit: self.seekrits = [ seekrit ]
        else: self.seekrits = []

        #See what data we can extract from the comments
        self._extractData (comments)
        return

    def _extractData (self, comments):
        """
        Attempts to extract information about birth and death from
        the comments
        """

        # These are the variables we will fill in:
        self.birthstats = { 'occurred':0,
                            'age':'?',
                            'date':"" }
        self.deathstats = { 'occurred':0,
                            'age':'?',
                            'date':"" }
        self.phonenum = ""
        
        for kind,stats in ( (self.birthre, self.birthstats),
                            (self.deathre, self.deathstats) ):
            mo = kind.search (comments)
            if mo:
                stats['occurred'] = 1
                rest = mo.group('rest')

                # Now try and extract a date
                founddate = None
                for date in self.dateres:
                    dmo = date.search (rest)
                    if dmo:
                        date = self._determineDate (dmo)
                        if date:
                            stats['date'] = date
                            break
                        pass
                    pass

                # Now try to extract an age
                amo = self.agere.search (rest)
                if amo: stats['age'] = amo.group('age')
                pass
            pass

        # If we didn't find a birth or death, look for a naked date
        if not self.birthstats['occurred'] and not self.deathstats['occurred']:
            for kind in self.dateres:
                dmo = kind.search (comments)
                if dmo:
                    self.birthstats['occurred'] = 1
                    # TODO: check for _DetermineDate NULL
                    date = self._determineDate (dmo)
                    if date:
                        self.birthstats['date'] = date
                        break
                    pass
                pass
            pass

        # Now try to extract phone number
        mo = self.phonere.search (comments)
        if mo: self.phonenum = mo.group('num')
        return

    def _transMonth (self, month):
        """
        Given a month name spelled in any conceivable way,
        returns a standard three letter month code
        """
        m = string.capitalize (month)
        if m in self.months: return m
        return self.monthtranslations.get(m, None)

    def _determineDate (self, dmo):
        if dmo.group('day'): day = dmo.group('day') + " "
        else: day = ""
        if dmo.group('month'):
            month = self._transMonth (dmo.group('month'))
            if not month: return ""
            month = month + " "
            pass
        else: month = ""
        if len(dmo.group('year')) == 2:
            year = int("19" + dmo.group('year'))
        elif len (dmo.group('year')) == 4:
            year = int(dmo.group('year'))
        else:
            return ""

        if year < 1700 or year > 2022:
            # Apply sanity checks to the year
            return ""
            
        return "%s%s%d" % (day, month, year)
    pass # end class PapaPerson
    
    
class Family:
    """
    Families are used when inputting papa files.  They track the current
    spouses etc so that we know who to add the kids to.
    
    Has the following fields:

    henry - this family's henry number, of 'primal' for primal family

    husb - a male Person
    
    wife - a female Person
    
    kids - a list of Persons
    
    By using the setSpouse and addChild routines, families can be
    built around an existing Person graph.
    
    By using the linkSpouse and linkChild routines, families will
    also construct the person graph as they go.
    """
    def __init__(self, henry):
        self.henry = henry
        self.husb = None
        self.wife = None
        self.kids = []
        return
    
    def setSpouse (self, p):
        if p.sex == 'M': self.husb = p
        elif p.sex == 'F': self.wife = p
        else: raise BadFormatError('Spouse of unknown sex: %s'%str(p))
        return
    
    def linkSpouse (self, p):
        if p.sex == 'M' and self.wife:
            self.wife.addSpouse (p)
            p.addSpouse (self.wife)
        elif p.sex == 'F' and self.husb:
            self.husb.addSpouse (p)
            p.addSpouse (self.husb)
            pass
        return self.setSpouse (p)
    
    def addChild (self, p):
        self.kids.append (p)
        return
    
    def linkChild (self, p):
        if not self.husb or not self.wife:
            raise BadFormatError ("Linking child with only one parent")
        self.husb.addChild (p)
        self.wife.addChild (p)
        p.addParent (self.husb)
        p.addParent (self.wife)
        return self.addChild (p)
    
    def getLastChild (self):
        return self.kids[-1]

    def __str__(self):
        return "Family[%s]" % self.henry
    pass # end class Family


# This papaline is not very specific in the middle. This is necessary
# because some people have too many kids!  Since we don't use the
# numkids / numspouses information anyway, I just replaced it with
# "....".  Should you ever want it again, numkids comes first.  It
# can be one or two digits.  Likewise numspouses.
papaline = re.compile(r"(?P<henry>(?:[ \d]\d){10,10})" + # ten henry numbers
                      r" (?P<sex>[mMfF])"+             # Grab the sex
                      r".... " +                      # Skip numkids/numspouse
                      r"(?P<spouse>\d) "+             # Which spouse is this?
                      r"(?P<althenry>[ \d]{7,7}) "+   # alt henry (if any)
                      r"(?P<info>.*)")                # personal information

class HenryNumber:
    """
    Represents a henry number.  This henry number can alternatively have a
    name at the end, in which case it identifies a single person.
    """
    althn = re.compile(r'^\d\d\d\d\d\d\d$')
    hn = re.compile(r'^[0-9a-zA-Z]{10,10}$')
    def __init__(self, henry, spouse=""):
        """
        constructs the henry number from either an alternate
        henry number or a normal one.  If spouse is provided,
        it should be the person's name which narrows down which
        spouse it is.  Internally, the HenryNumber is represented
        as a simple string.
        """
        if len(henry) == 7:
            # Shortened henry number
            if not self.althn.match (henry):
                raise BadFormatError ("Invalid alternate henry number %s"%
                                      henry)
            self.id = henry + "000"
        else:
            self.id = ""
            if self.hn.match (henry):
                self.id = henry
            else:
                for i in range(0, 20, 2):
                    digit = int(henry[i:i+2])
                    if digit < 10: self.id += "%s" % digit
                    else: self.id += string.letters[digit - 10]
                    pass
                pass
            pass
        self.id += spouse
        return
    
    def __str__(self):
        """
        Returns a unique string value that represents this henry
        number.
        """
        return self.id 
    
    def getIndent(self):
        """
        Returns the 'indent' of this henry number.  So, for
        someone like '1 1 2 0 0 0 0 0 0 0 0 0' the indent is
        three.  For '1 0 0 0 ..' the indent is one.
        """
        return str(self).find('0')    
    pass # end HenryNumber

class HenryDB:
    """
    The henry database tracks henry numbers and the people
    they correspond to.  Basically it maintains a list of
    Person objects for each henry number
    """
    def __init__(self):
        self.dict = {}
        return
    
    def addPerson (self, h, p):
        """
        Adds a person to the henry database based on their
        henry number.  If other people are in there by that
        henry number, which should only occur for family
        numbers, then maintain the family at exactly two people.
        """
        l = self.dict.get(str(h), None)
        self.dict[str(h)] = p
        if l:
            print >> sys.stderr, 'Warning: henry number %s duplicated.' % str(h)
            pass
        return
        
    def getPerson (self, h):
        return self.dict.get(str(h), None)
    
    def getParents (self, h):
        return self.dict.get(str(h.getParentNum()), [])
    
    pass # end class HenryDB

class PapaFamilyGraph(FamilyGraph):
    """
    Represents our family graph.  Basically a collection of People.
    Also has routines for parsing different kinds of inputs and
    populating the graph, as well as for printing it out.
    """
    def __init__(self):
        FamilyGraph.__init__(self)
        return

    ##########################################################
    ### Papa File Loading ####################################
    ##########################################################

    def loadFromFile (self, pf):
        """
        A papa file looks like a bunch of people with at least
        one henry number and some supplementary info.

        There are many other details that I see no need to document!
        """
        henries = HenryDB()
        families = [ Family('primal') ]
        linecnt = 1
        try:
            line = pf.readline()
            while line:
                mo = papaline.match (line)
                if mo:
                    # This line introduces a potentially new person.
                    # We need to add 'em
                    families = self._addPapaPerson (henries, families, mo)
                    pass
                line = pf.readline()
                linecnt = linecnt + 1
                pass
            pass
        except BadFormatError, e:
            print >> sys.stderr, "Exception caught on line %d:" % linecnt
            print >> sys.stderr, line
            print >> sys.stderr, "Families:",
            for f in families: print >> sys.stderr, str(f),
            print >> sys.stderr, "\n%s" % str(e)
            self.people = []
            pass
        return

    def _addPapaPerson (self, henries, families, mo):
        """
        Given a line from a papa file, tries to determine if this
        person is new and if so adds them.  If they are a new
        person and they have an alternate henry number, then that
        will be added to the henries dictionary for future
        reference!
        """

        # first parse papa's separator format for names and
        # extra info
        info = mo.group('info')
        (name, comments, seekrit) = self._parsePapaInfo (info)

        # Now construct individual id tags, both for
        # this person and their alternate henry number if
        # any.  Also construct the family henry number.
        h = HenryNumber (mo.group('henry'))
        hx = HenryNumber (mo.group('henry'), name)
        if mo.group('althenry').split():
            althx = HenryNumber (mo.group('althenry'), name)
        else: althx = None

        # Determine if this is an inlaw based on their spouse number
        # (0 != inlaw)
        inlaw = int(mo.group('spouse'))

        if builddebug:
            print "--------------------------------------------------"
            print "_addPapaPerson: h=%s althx=%s hx=%s" % (h, althx, hx)
            print "  info=%s name=%s comments=%s seekrit=%s" % \
                  (info, name, comments, seekrit)
            pass

        pp = None
        if althx:
            # If there is an alternate henry number, first check
            # if it already exists!
            # NOTE: use the "extended-henry-number" with the
            # spouse information to narrow it down to just this
            # one person.
            pp = henries.getPerson (althx)
            if builddebug and pp:
                print "Loading person from alternate database: %s"%str(pp)
                pass
            pass

        if not pp:
            # not already in database under alt henry, create new object
            # and add to henries database under the main henry number
            if mo.group('sex') in ['m', 'M']: sex = 'M'
            elif mo.group('sex') == '?': sex = '?'
            else: sex = 'F'
            
            pp = PapaPerson (hx,
                             althx,
                             len(self.people),
                             sex,
                             name,
                             comments,
                             seekrit)
            self.people.append(pp)
            henries.addPerson (hx, pp)
            pass

        # Determine the family indent and push/pop/link from the family
        # stack as necessary.
        indent = h.getIndent()
        diff = indent - len(families)
        if builddebug:
            print "Families: ",
            for fam in families: print str(fam),
            print ""
            print "Indent: %d" % indent
            pass
        if diff == 0 and inlaw == 0:
            # this is a child of the current family
            # i.e., like so:
            # 1 0 0 Mama
            # 1 0 0 Papa
            # 1 1 0 Child
            family = families[-1]
            if indent > 1: family.linkChild (pp)
            else: family.addChild (pp)
            if builddebug: print "Child of current family %s" % str(family)
        elif diff == 0 and inlaw == 1:
            # this is a spouse of the last child of the cur family
            # set them as a spouse, and add a new family for the kids
            # of the new family, if any
            # 1 0 0 Mama
            # 1 0 0 Papa
            # 1 1 0 Child
            # 1 1 0 Child is all grown up and has a husband of her own
            family = families[-1] # grab last family
            spouse = family.getLastChild ()
            newfam = Family (str(h))
            newfam.linkSpouse (spouse)
            newfam.linkSpouse (pp)
            families.append(newfam)
            if builddebug:
                print "Spouse of last child (%s) of cur fam %s" % (
                    str (spouse), str(family))
                pass
            pass
        elif diff < 0 and inlaw == 0:
            # this is a new sibling somewhere along the line
            # i.e., like so:
            # 1 1 0 ... Mama
            # 1 1 0 ... Papa 1 (inlaw)
            # 1 1 1 ... Baby from Papa 1
            # 1 2 0 ... Mama's Sister
            # or
            # 1 1 0 Mama
            # 1 1 0 Papa (inlaw)
            # 2 0 0 Mama's Aunt
            families = families[:indent]
            family = families[-1]
            if indent > 1: family.linkChild (pp)
            else: family.addChild (pp)
            if builddebug:
                print "New sibling of family %s: %s" % (str(family),str(pp))
                pass
            pass
        elif diff < 0 and inlaw > 1:
            # Some family got a new spouse
            # this occurs when we see lines like so (current line is
            # the last one):
            # 1 1 0 ... Mama
            # 1 1 0 ... Papa 1 (inlaw)
            # 1 1 1 ... Baby from Papa 1
            # 1 1 0 ... Papa 2 (inlaw)
            families = families[:indent+1]
            family = families[-1]
            family.linkSpouse (pp)
            if builddebug:
                print "New spouse of family %s: %s" % (str(family),str(pp))
                pass
            pass
        else:
            # this is just an error
            raise BadFormatError \
                  ("Bad Papa File: ind=%d fams:%d inlaw:%d diff:%d hx:%s"%
                   (indent, len(families), inlaw, diff, hx))
        return families

    def _parsePapaInfo (self, info):
        slashidx = info.find('\\')
        if slashidx == -1:
            semiidx = string.find(info, ';')
            if semiidx == -1:
                name = info
                comments = ""
                seekrit = ""
            else:
                name = info[:semiidx]
                comments = ""
                seekrit = info[semiidx+1:]
                pass
            pass
        else:
            name = info[:slashidx]
            semiidx = string.find(info, ';')
            if semiidx == -1:
                comments = info[slashidx+1:]
                seekrit = ""
            else:
                comments = info[slashidx+1:semiidx]
                seekrit = info[semiidx+1:]
                pass
            pass

        # normalize capitilzation
        name = string.capwords(string.strip(name))
        return (name, comments, seekrit)

    pass # end class PapaFamilyGraph
