"""

During code generation, we transform the normal IR for the function
into low IR.  Low IR is basically a machine specific pseudocode.

Each function consists of a series of operations.  Each operation
generally corresponds to one machine instruction on the target
machine.  See the class for more details.

"""

from cStringIO import StringIO
import util, types
import sys
import chelpers

# Indicates the type of target an Operation has:
tartypes = util.Enumeration (
    'NONE',

    # These types of targets are used in the
    # emitted operations:
    'OPER',      # an Operation object
    'GEN',       # generated code
    'LIB',       # library function

    # These types of targets are used only in the
    # instruction definition patterns:
    'VALUE',     # constant value argument to Instruction; resolves to GEN
    'BLOCKEXIT', # a terminal of the containing block; resolves to OPER
    'LABEL'      # label of upcoming Operation; resolves to OPER w/ basic block
    )

# Operations
# ----------

class Operation:

    """ An Operation is a machine level instruction.  An operation is
    defined by an opcode, branch target, a list of destination
    argument, and a list of source arguments.  

    The opcode is a string.  There are a set of standard opcodes, but
    some machines may define their own.  The set of operations comes
    from the machine definition.

    The branch target defines where the instruction goes.  Most
    operations simply fall through to the next one: in this case, the
    branch target is None.  Other possible types for the branch target
    are other operations, other generated functions (for function
    calls), or library routines.

    The destination arguments are those whose outgoing value is
    modified. The source arguments are those whose incoming value is
    used.  If a single argument occurs in both lists its current value
    is read, then modified.  Note that the registers allocators expect
    that these be lists, not tuples, and that Operations not share
    instances.  The only time it is safe to have a tuple or share the
    same object is when the Operation is guaranteed not to need to
    have any of its arguments modified (for example, if a Temporary
    argument is in the source list, it may need to be spilled, and in
    that case the register allocator will try to replace it in the
    source list in place; if it can't do that, it will get grumpy).

    The label field is used during the initial code generation.  Most
    Operations have a label of None, but others have a string value
    here.  These labels can be of two forms: they can be local labels
    that only apply within a single CodeGenTemplate, or they can be
    labels that apply within an entire procedure.  All the labels
    specified in the instrsdefn file are examples of the former type:
    if there is a branch to the label 'okay', for example, it commonly
    means that no exception occurred within this instruction and
    should be linked to the 'okay' that came from this instruction.
    If another instruction of the same type is generated right
    afterwards, it will also contain a label of 'okay'.  Global
    labels, on the other hand, are generally the labels that are
    generated at the beginning of basic blocks and should only occur
    once in any given procedure.  ** We use the rather lame convention
    that global labels begin with an underscore (_). ** We also use
    the convention that labels in instrsdefn.py are always lowercase,
    and those labels generated by codegen.py are uppercase.

    Other Operation fields:

    Each Operation has a cgfunc, or code generation function, set upon
    it.  This indicates which function will be used to generate the
    actual bytes for this Operation when the time comes and is set
    from the machine definition's Emit Table.

    Each Operation has a creation_instr field which records the mid
    level Instruction that caused this Operation to come into being.
    This is helpful for debugging and is set in Emitter._emit().

    Some Operations have a clobber field, which is a set of Register
    objects that the Operation will clobber, and which must thus be
    saved to the stack if they are live.  This is used when making
    CALLs to preserve caller save registers.  This field is read only
    and typically points at a list that is potentially shared among
    many instructions in a given function. """

    def __init__ (self,
                  opcode,
                  dests=(),                   # some code relies on these ...
                  sources=(),                 # ... being the 2nd/3rd args
                  clobber=(),
                  target=None,
                  cgfunc=None,
                  label=None,
                  comment=""):

        """ See class description """

        self.opcode = opcode
        self.label = label
        if target:
            self.tartype = target[0]
            self.target = target[1]
        else:
            self.tartype = tartypes.NONE
            self.target = None
            pass
        self.dests = dests
        self.sources = sources
        self.clobber = clobber
        self.cgfunc = None
        self.creation_instr = None
        self.comment = comment
        self.targeted = 0
        self.block = None
        pass

    def set_block (self, blk):
        self.block = blk
        pass

    def clone (self, dests, sources):
        return Operation (self.opcode,
                          dests=dests,
                          sources=sources,
                          clobber=self.clobber,
                          target=(self.tartype, self.target),
                          cgfunc=self.cgfunc,
                          label=self.label)

    def target_operation (self, oper):
        self.tartype = tartypes.OPER
        self.target = oper
        oper.targeted += 1
        pass

    def set_code_generation_func (self, cgfunc):
        self.cgfunc = cgfunc
        return

    def set_creation_instr (self, instr):
        self.creation_instr = instr
        return

    def set_source_arguments (self, sources):
        self.sources = sources
        return
    
    def set_dest_arguments (self, dests):
        self.dests = dests
        return

    def dump (self, opnamer, argnamer, jitnamer):

        """ Writes a description of this object to 'outbuf'.  Does not
        include pointer values and uses 'opname' to print the names of
        Operation objects. """

        outbuf = StringIO ()
        outbuf.write (self.opcode)
        if self.dests:
            outbuf.write (' d:')
            for o in self.dests: outbuf.write(' %s' % o.describe (argnamer))
            pass
        if self.sources:
            outbuf.write (' s:')
            for o in self.sources: outbuf.write(' %s' % o.describe (argnamer))
            pass
        if self.label:
            outbuf.write (' l: ')
            outbuf.write (str(self.label))
            pass
        if self.tartype == tartypes.OPER:
            outbuf.write (' t: %s' % opnamer (self.target))
        elif self.tartype == tartypes.GEN:
            outbuf.write (' t: %s' % jitnamer (self.target))
        elif self.tartype != tartypes.NONE:
            outbuf.write (' t: ')
            outbuf.write (str (self.target))
            pass
        return outbuf.getvalue ()

    def __str__ (self):
        return 'O(%x:%s)' % (id(self), self.opcode)

    pass

# Arguments
# ---------

# This enumeration specifies the flags that set on Arguments.  One of
# the primary flags that are set has to do with the type of the
# argument, but also whether it's address is taken and things like
# that.
argflags = util.BitsEnumeration (

    # TYPE FLAGS
    # ----------

    # The basic types: after register allocation, these are all that is left:
    'REG',        # Specific register
    'CONST',      # Small integer constant (what is small is defined by md)
    'LARGECONST', # Large integer constant
    'MEM',        # Indirect ref thru another arg
    'ADDR',       # Takes address of another arg

    # The pseudo types: register allocation removes these
    'TEMP',       # Unassigned storage: may be register or spilled to stack
    'SOFTREG',    # Non-specific register
    'SOFTSTACK',  # Non-specific stack slot
    
    # The Value arguments are only used in
    # instruction templates.  They are
    # replaced by appropriate arguments for
    # the operands to the mid level
    # instruction.  So, an Operation
    # uses a Value argument as a placeholder
    # to refer to the arguments of the
    # instruction the Operation is
    # defining.  Note that one ValueArgument may
    # expand to more than one argument if the
    # instruction has more than one Operand under
    # the same name.
    #
    # LENGTHVALUE expands to an integer constant
    # indicating how many operands a VALUE
    # argument would have expanded to.
    #
    # CONSTANTVALUE indicates that the argument
    # is expected to be an integer constant and
    # expands to be a CONST argument. 
    'VALUE',
    'LENGTHVALUE',
    'CONSTANTVALUE',

    # This argument type just expands to the appropriate module object
    # (a pointer constant)
    'MODULE',

    # OTHER FLAGS
    # -----------

    # Indicates that the address of this Argument is taken by an
    # AddrArgument; for TempArguments, this requires that they be
    # placed on the stack.  It is not really valid for other
    # arguments.
    'ADDRTAKEN',

    # For constants, indicates that this constant value should not be
    # dumped when doing canonical dumps.  For example, the address of
    # library functions may be embedded into the instruction stream,
    # but it varies so that spoils the unit testing.  Set this flag on
    # those types of values and they will not be explicitely output.
    'MASKDURINGDUMP'
    
    )

class Argument:

    def __init__ (self, type):
        self.flags = type
        self.visit_tag = None

        # Some arguments are assignable; see the AssignableArgument
        # subclass for more information.  For those that aren't descended
        # from this class, they are always "assigned" to themselves.
        self.assignment = self
        pass

    def visit (self, tag):
        self.visit_tag = tag
        return

    def visited (self, tag):
        return self.visit_tag is tag

    def is_assigned_to_type (self, type):

        """ Equivalent to is_type() except for AssignableArguments, in
        which case it checks the assignment first. """
        
        return self.is_type (type)

    def is_type (self, type):
        return (self.flags & type) != 0

    def set_in_use (self):
        
        """ For those types of Arguments that do Dirty tracking, indicates
        that an Assignable argument was assigned to this Argument.
        Really only used in registers. """
        
        return
    
    def reset_in_use (self):
        
        """ For those types of Arguments that do Dirty or assignment
        tracking, indicates that an argument's assignment was  was assigned
        to this Argument.  Really only used in registers. """
        
        return

    def instantiate (self, emitter, instr):
        
        """ Instantiate is called when converting from a code gen
        template to the final list of operations for an entire
        function.  It removes argument types that reference the
        containing instruction (ValueArgument, for example) and things
        like that.  Note that it returns a list of Arguments as one
        Argument may be expand to many. """
        
        return abstract

    def is_assignable (self):
        """ See AssignableArgument """
        return False

    def set_addr_taken (self):
        self.flags |= argflags.ADDRTAKEN
        return

    def addr_taken (self):
        return (self.flags&argflags.ADDRTAKEN) != 0

    def set_mask_during_dump (self):
        self.flags |= argflags.MASKDURINGDUMP
        return

    def mask_during_dump (self):
        return (self.flags&argflags.MASKDURINGDUMP) != 0

    def describe (self, argnamer):
        
        """ Must be implement by all derivatives: describes this
        Argument in a succint way for printouts.  For args like TempArguments
        that are fundamental, the argnamer is a lambda that returns a
        canonical string value. """
        
        return abstract

    def _explicit (self):

        """ When describe() is called from __str__, any call to its
        argnamer argument ends up calling this routine.  Returns a
        value useful for debugging. """

        return "A(%x)" % id (self)

    def __str__ (self):

        """ describe the argument, using the pointer value as the
        argnamer """
        
        return self.describe (lambda x: x._explicit())

    pass

class AssignableArgument (Argument):

    """ Many types of Arguments will eventually be assigned to a
    concrete location during register/storage allocation.  This may
    vary over the course of a basic block and between blocks, so the
    assigned_by field contains an id of who gave the assignment.  We
    use this to ensure that the assignment is not inherited from a
    previous block. """

    def __init__ (self, type):
        Argument.__init__ (self, type)
        self.assignment = None
        self.assigned_by = None
        return

    def is_assignable (self):
        """ See AssignableArgument """
        return True

    def is_assigned_to_type (self, type):

        """ Overload the is_assigned_to_type to first check whether
        the Argument we are assigned to has this type, if any """

        if self.assignment: return self.assignment.is_type (type)
        return (self.flags & type) != 0
    
    def was_assigned_by (self, id):
        return self.assigned_by is id

    def set_assignment (self, assigned_by, assignment):
        if self.assigned_by is assigned_by and self.assignment:
            self.assignment.reset_in_use ()
            pass
        
        self.assigned_by = assigned_by
        self.assignment = assignment
        
        if assignment: assignment.set_in_use ()
        return
    
    pass
    
# These are the bits that can be set on a register in the machine defintion.
# They are used by the register allocator and other parts of the
# code as guidance.
regbits = util.BitsEnumeration (

    # Flags used by machine independent code like the register
    # allocator etc:

    'PRIVATE',     # Not available to register allocation
    'SP',          # Used as a stack pointer
    'FP',          # Used as a frame pointer

    # When a FunctionInfo object is created, it copies all of the
    # Registers on the machine to local per-function copies.  Then,
    # when a register is used, a DIRTY flag is set on it.  This allows
    # us to generate proper code in the prologue/epilogue.  The INUSE flag
    # is used by the ra_local code to track which registers are currrently
    # loaded.
    
    'DIRTY',       # Indicates that a register is used at all during a function
    'INUSE',       # Indicates that a register is used right now.

    # Flags used only by specific machinedefn, not machine indep code.
    # Their meanings are therefore somewhat less precise than the ones
    # above and dependent on the machine in question.
    
    'ARG',         # Functions args stored here.
    'CALLEESAVE',  # Useful flags for declaring who saves which register
    'CALLERSAVE',
    'RETVAL')      # Return value comes in here

class RegArgument (Argument):

    """ The class that is used in machine definitions to communicate
    information about the registers of a machine to the code
    generator.  'name' should be a descriptive name of the register,
    such as '$eax' or 'r0'.  'id' is opaque and is for the use of the
    machine definition; typically it would be the numeric constant
    will be embedded in the actual instruction.  'bits' indicates a
    set of flags defined by the BitsEnumeration 'regbits' and are used
    to guide the register allocator and other machine independent
    parts of the code as to how a particular register can be used.

    Each machine definition has a master set of RegArgument objects
    and creates a copy for each function that is generated; this
    allows some bookkeeping data to be stored in the RegArguments,
    such as whether they are in use or are modified during a function
    body. """

    def __init__ (self, name, id, bits):
        Argument.__init__ (self, argflags.REG)
        self.name = name
        self.id = id
        self.bits = bits
        pass

    def clone (self):
        return RegArgument (self.name, self.id, self.bits)

    def is_dirty (self):
        return (self.bits & regbits.DIRTY) != 0
    
    def is_in_use (self):
        return (self.bits & regbits.INUSE) != 0

    def set_in_use (self):
        self.bits |= regbits.INUSE
        self.bits |= regbits.DIRTY
        pass

    def reset_in_use (self):
        self.bits &= ~regbits.INUSE
        pass

    def check_flag (self, flag):
        return (self.bits & flag) != 0

    def describe (self, argnamer):
        return self.name

    def force_to_register (self, emitter):
        """ See Operand.force_to_register.  """
        return self

    def instantiate (self, emitter, instr):
        return (self,)

    pass

class SoftRegArgument (AssignableArgument):

    def __init__ (self):
        AssignableArgument.__init__ (self, argflags.SOFTREG)
        return

    def instantiate (self, emitter, instr):
        return (self,)

    def describe (self, argnamer):
        return "SR%s" % argnamer (self)

    pass

class TempArgument (AssignableArgument):

    def __init__ (self):
        AssignableArgument.__init__ (self, argflags.TEMP)
        self.regalloc = None # used during regalloc
        self.is_local = False
        self.lastblock = None
        self.stack_slot = None
        self.visit_tag = None
        pass

    def visit (self, tag):
        self.visit_tag = tag
        return

    def visited (self, tag):
        return self.visit_tag is tag

    def set_stack_slot (self, slot):

        """ Associates a stack slot with this temporary.  Note that a
        temporary only ever has one home on the stack. """
        
        assert not self.stack_slot
        self.stack_slot = slot
        return

    def used_by_block (self, block):
        
        """ Called by some register allocators for each block each
        time a temporary is used by block 'block'.  If we only ever
        get calls from one block, we know the temporary is local;
        otherwise, we know it spans blocks.  Later users can check the
        self.is_local flag to see what the result was.

        Returns True if this argument is still considered local. """

        if not self.lastblock:
            self.lastblock = block
            self.is_local = True
        elif self.lastblock is not block:
            self.is_local = False
            pass
        return self.is_local

    def instantiate (self, emitter, instr):
        return (self,)

    def describe (self, argnamer):
        return argnamer (self)

    pass

class ConstArgument (Argument):

    """ ConstArguments are used for small integer constants that can
    be directly embedded into instruction words.  The machine definition
    defines what is small by setting the maxconstantsize parameter to the
    Emitter.

    If a constant is too large to be embedded, a LargeConstArgument may be
    required. """

    def __init__ (self, value):
        Argument.__init__ (self, argflags.CONST)
        assert isinstance (value, (types.IntType,types.LongType))
        self.value = value
        pass

    def instantiate (self, emitter, instr):
        if emitter.md.is_embeddable_constant (self.value):
            return (self,) # probably always the case in practice
        return (emitter.md.get_integer_constant (self.value),)

    def _explicit (self):

        """ When describe() is called from __str__, any call to its
        argnamer argument ends up calling this routine.  Returns a
        value useful for debugging. """

        return "%x" % self.value

    def describe (self, argnamer):
        if self.mask_during_dump():
            return "0x%s" % argnamer (self)
        return "0x%x" % self.value

    pass

class LargeConstArgument (ConstArgument):

    """ See ConstArgument: this class is used for constants too large
    to be embedded in instructions.  The machine definition decides
    which constants qualify as LargeConstArguments. """

    def __init__ (self, value):
        Argument.__init__ (self, argflags.LARGECONST)
        assert isinstance (value, (types.IntType,types.LongType))
        self.value = value
        pass

    def instantiate (self, emitter, instr):
        if not emitter.md.is_embeddable_constant (self.value):
            return (self,) # probably always the case in practice
        return (emitter.md.get_integer_constant (self.value),)

    def describe (self, argnamer):
        return "%sL" % ConstArgument.describe (self, argnamer)

    pass

class MemArgument (Argument):

    def __init__ (self, base, offset):
        Argument.__init__ (self, argflags.MEM)
        assert isinstance (base, Argument)
        assert isinstance (offset, types.IntType)
        self.base = base
        self.offset = offset
        pass

    def instantiate (self, emitter, instr):
        base = self.base.instantiate (emitter, instr)
        assert len (base) == 1
        if base[0] is not self.base:
            return (MemArgument (base[0], self.offset),)
        return (self,)

    def describe (self, argnamer):
        return "*(%s+%d)" % (self.base.describe (argnamer), self.offset)

    pass

class SoftStackArgument (AssignableArgument):

    def __init__ (self):
        AssignableArgument.__init__ (self, argflags.SOFTSTACK)
        return

    def instantiate (self, emitter, instr):
        return (self,)

    def describe (self, argnamer):
        return "SS%s" % argnamer(self)

    pass

class AddrArgument (Argument):

    def __init__ (self, base):
        Argument.__init__ (self, argflags.ADDR)
        assert isinstance (base, Argument)
        self.base = base
        self.base.set_addr_taken ()
        pass

    def instantiate (self, emitter, instr):
        base = self.base.instantiate (emitter, instr)

        # If the thing we're taking the address of doesn't exist,
        # return NULL:
        if not base:
            return (emitter.md.get_integer_constant (0),)

        # Otherwise attempt to avoid creating a new AddrArgument if
        # it is not needed:
        assert len (base) == 1
        if base[0] is not self.base:
            return (AddrArgument (base[0]),)
        return (self,)

    def describe (self, argnamer):
        return "&(%s)>" % self.base.describe (argnamer)

    pass

class ValueArgument (Argument):

    def __init__ (self, name):
        Argument.__init__ (self, argflags.VALUE)
        self.name = name
        pass

    def instantiate (self, emitter, instr):
        return [ emitter.translate_value (val.value)
                 for val in instr.get_named (self.name) ]

    def describe (self, argnamer):
        return "VA<%s>" % self.name

    pass

class LengthValueArgument (Argument):

    def __init__ (self, name):
        Argument.__init__ (self, argflags.LENGTHVALUE)
        self.name = name
        pass

    def instantiate (self, emitter, instr):
        val = len (instr.get_named (self.name))
        return (emitter.md.get_integer_constant (val),)

    def describe (self, argnamer):
        return "LVA<%s>" % self.name

    pass

class ConstantValueArgument (Argument):

    def __init__ (self, name):
        Argument.__init__ (self, argflags.CONSTANTVALUE)
        self.name = name
        pass

    def instantiate (self, emitter, instr):
        valop = instr.get_named (self.name)
        assert len (valop) == 1
        valop = valop[0].get_value()
        assert valop.is_constant ()
        return (emitter.md.get_integer_constant (valop.get_value ()),)

    def describe (self, argnamer):
        return "CVA<%s>" % self.name

    pass

class ModuleArgument (Argument):

    def __init__ (self):
        Argument.__init__ (self, argflags.MODULE)
        pass

    def instantiate (self, emitter, instr):
        modptr = chelpers.ptr (emitter.finfo.add_constant (
            emitter.module_object))
        return (emitter.md.get_integer_constant (modptr),)

    def describe (self, argnamer):
        return "MODULEARG"

    pass

# Code Generation Template
# ------------------------

class CodeGenTemplate:

    """ The CodeGenTemplate objects holds a template for how to
    implement a particular Instruction object.  When asked, a machine
    definition will produce one of these objects for any given
    instruction.  The code generator will then copy the Operations
    specified into the full listing for the function.

    Instantiating the template also requires some modification: the
    opers may have arguments like ValueArguments or others that must
    have values substituted from the current instruction.  That
    modification is done by the code generator. """

    def __init__ (self):
        self.condfunc = None
        self.opers = []
        pass

    def applies (self, instr):
        if not self.condfunc: return True
        return self.condfunc (instr)

    def emit (self, oper):
        self.opers.append (oper)
        return

    pass

class InstrDefinition:

    """ A set of CodeGenTemplates grouped by the Instruction object
    they apply to.  There is one of these per backend, as well as one
    that is independent of all backends and forms the basis of them
    all (defined in instrsdefn.py).  See codegen.py for more
    information. """

    def __init__ (self):
        self.defs = {}
        pass

    pass

# Low Level CFG
# -------------

class LowLevelBlock:

    def __init__ (self, opers=None):
        # We keep our list of operations as a link list for simple insertion.
        if not opers:
            self.opers = util.LinkList ('op_prev', 'op_next')
        else:
            self.opers = opers
            pass
        self.fallthrough = None
        self.visit_tag = None
        self.regalloc = ()             # used during register allocation
        self.registers_on_entry = None # used during register allocation
        self.registers_on_exit = None  # used during register allocation
        self.pending_preds = ()        # used during register allocation
        self.transition_point = -1     # used during register allocation
        self.oper_point = -1           # used during register allocation
        pass

    def add_oper (self, oper):
        self.opers.append (oper)
        oper.set_block (self)
        pass

    def set_fallthrough (self, fthru):

        """ Sets the Operation that the last Operation falls through
        to.  """
        
        assert isinstance (fthru, Operation)
        self.fallthrough = fthru
        pass

    def successors (self):

        """ Returns a tuple of successor blocks to this block. """

        # Check to see if the last instruction branches anywhere.
        if self.opers:
            lastoper = self.opers[-1]
            if lastoper.tartype == tartypes.OPER:
                # We may also have a fallthrough:
                if self.fallthrough:
                    return (self.fallthrough.block, lastoper.target.block)
                return (lastoper.target.block,)
            pass

        # If not, check to see if we have a fallthrough:
        if self.fallthrough:
            return (self.fallthrough.block,)

        # If not, no successors:
        return ()

    def visit (self, tag):
        self.visit_tag = tag
        return

    def visited (self, tag):
        return self.visit_tag is tag

    # Register Allocation

    def set_registers_on_exit (self, regs):
        self.registers_on_exit = regs
        return
    
    def set_registers_on_entry (self, regs):
        self.registers_on_entry = regs
        return

    def add_pending_pred (self, predblk, linkobj):
        assert predblk
        assert linkobj
        if not self.pending_preds:
            self.pending_preds = [ (predblk, linkobj) ]
            return
        self.pending_preds.append ( (predblk, linkobj) )
        return

    pass

# Register Allocation
# -------------------

class RegAllocData:

    """ This class is attached to blocks to indicate which temporaries
    should be put into registers and where.  Though the exact register
    allocation method is parameterized depending on the machine
    definition, an object matching this protocol is always attached to
    each low level block as llblock.regalloc.

    This protocol lays out a method of dividing the block into regions
    and nomating within a region a set of TempArguments to be placed
    in registers.

    The code generation will do its best to put those temp arguments
    in registers.

    See code generation document for additional details on adding new
    register allocators and other obligations.  """

    def get_regions (self):

        """ Returns a list of Operation instructions which mark the
        beginning of each region.  The first region is implicitely the
        first Operation, and it is included in the list.  The list
        returned should never be empty unless the block has no
        Operations. """

        return abstract

    def get_temps_in_registers (self, regionindex):

        """ Given a region index returns a list of temporary arguments
        that should be in registers. """

        return abstract

    pass
