"""

Machine Definitions
===================

See codegen.py for more info about MDs.  This file contains support routines
etc for all MDs no matter what architecture they are targeting.

It also contains definitions for most of the code generation data structures,
such as MachineDefinitions, Operands, FunctionInfos, etc.

"""

import types, sys

import util
from util import PyntoException
import values
from cStringIO import StringIO
from chelpers import RJBuffer, ptr
import knodes, instrsdefn
import instrsdefn
from lowir import argflags, Operation, TempArgument, ConstArgument, \
     tartypes, MemArgument, LargeConstArgument, SoftRegArgument,    \
     SoftStackArgument, regbits

class MachineDefinition:

    """

    This is the base class for all machine definitions.  Subclasses have the
    obligation to do two things:

    1. Provide certain fields with information used by the machine
    independent parts of the code generator (located in codegen.py)

    2. Provide the routines that actually generate the instruction
    information from entries in the instrdefn.

    See codegen.py for a thorough discussion of the whole code
    generator.

    The data that is required by codegen.py is the following:

    1. An instruction definition: This is optional; a default ID is
       set by the MachineDefinition base class.  See codegen.py for
       more discussion.

    2. Register table: This consists of a list of RegisterOperand
       objects describing each register on the machine.

    3. Prefilter: an object for modifying low level instructions just
       prior to emitting them.

    4. Emit table: guides the emitter in reconciling instructions with
       this machine architecture.

    5. Max Constant Size: this field is not set but is passed into the
       constructor.  It should be the maximum size of an embeddable
       constant.  Typically, embedded constants are signed, and in
       that case this field should be set to the minimum negative
       value.  For examples, many machines include a signed 16 bit
       constant field; in this case, maxconstantsize would be -32768.
       Anything smaller than that or larger than 32767 would be put
       into a LargeConstantArgument; if some instructions can handle
       larger constants than others, then the Emit Table can describe
       that by mentioning which opcodes accept a
       LargeConstantArgument.

       If this simplistic technique does not allow you enough control
       over which constants are acceptable, or you need more grades
       than small and large, you can just overload
       is_embeddable_constant(); in the latter case, of course, you'd
       have to create more Argument types too, but that should be
       doable. """

    def __init__ (self, comp, maxconstantsize):
        # You should set these:
        self.instrdefn = instrsdefn.defn    # optional
        self.regs = None                    # required
        self.emit_table = None              # required
        self.prefilter = None               # required

        # Base class owns these:
        self.log = comp.log

        # The maxconstantsize is set by parameter.  If this system is too
        # crude for you, then overload is_embeddable_constant().
        if maxconstantsize < 0:
            self._constmin = maxconstantsize
            self._constmax = -(maxconstantsize+1)
        else:
            self._constmin = 0
            self._constmax = maxconstantsize
            pass

        pass

    # External Interface

    def create_function_info (self):

        """ Creates and returns a function info instance appropriate
        to this type of machine.  Function info objects track
        statistics about a given function, like the amount of stack
        space it will require etc. """
        
        assert not "Abstract.  Must be overloaded."
        return

    def create_emitter (self, finfo, modobj):
        
        """ Returns an emitter object that can be used to emit instructions
        and obtain the resulting list. """

        return Emitter (modobj, self, self.prefilter,
                        self.emit_table, finfo, self.log)

    def select_cg_template (self, instr):
        
        """ Given an actual Instruction instance, finds the
        appropriate cg template and returns it.  Raises a
        PyntoException if no appropriate match can be found.  This
        should only be called once for a given instruction. """

        try:
            tmpls = self.instrdefn.defs[instr.__class__]
            for t in tmpls:
                if t.applies (instr):
                    return t
                pass
            pass
        except KeyError: pass # failure in the hashtable lookup above
        raise PyntoException \
              ("No matching CodeGenTemplate could be found for %s" % instr)

    def is_embeddable_constant (self, c):

        """ Returns True if c, which must be an integer, could be embedded
        in instructions on this architecture. """

        return c >= self._constmin and c <= self._constmax

    def get_integer_constant (self, c):

        """ Returns a ConstArgument or LargeConstArgument as appropriate.
        'c' should be an Python integer or long. """

        self.log.low ('get_integer_constant: c=%s', c)

        assert isinstance (c, (types.IntType, types.LongType))
        if self.is_embeddable_constant (c): return ConstArgument (c)
        return LargeConstArgument (c)

    pass

# ----------------------------------------------------------------------
# FunctionInfo
#
# Tracks the state used while generating a single function's code.
# Unlike MachineDefinitions, which are immutable once created.

class FunctionInfo:

    """ The class that tracks the details of a function as code for it
    is being generated.  This includes the amount of stack space it
    requires etc. Basically, because MachineDefinition objects are
    immutable (and shared among all jit routines), any settings the
    machine definition or code generation needs to track that are
    specific to a function or a program are contained in these
    objects.

    These are actually overloaded on a per-machine basis for certain
    routines and things that are stack specific.  Those abstract
    routines are defined at the end of the class. """

    def __init__ (self, md, registers, bigendian):

        # Master list of stack slots we create:
        self.stack_slots = []

        # Master list of all Registers objects:
        self.registers = [ r.clone() for r in registers ]

        # Pointer to our machine definition:
        self.md = md
        self.log = md.log

        # Constant objects referenced by this function
        # these are eventually handed off to the generated code
        self.constants = {}

        # Where the code for the function will be generated:
        self.buffer = RJBuffer (1024, bigendian)

        # Find a few useful registers:
        self.fpreg = [reg for reg in self.registers
                      if reg.check_flag (regbits.FP)][0]
        self.spreg = [reg for reg in self.registers
                      if reg.check_flag (regbits.SP)][0]
        
        pass

    # Function Prologue and Epilogue
    # ------------------------------

    def emit_prologue (self):

        """ Called after all instructions are emitted.  Emits a prologue
        to self.buffer """
        
        return abstract

    def emit_epilogue (self):
        
        """ Called before any instructions are emitted.  Emits an epilogue
        to self.buffer """
        
        return abstract

    # Constants; we build up a list of unique objects and then hand the
    # list over to the Generated Code object which holds references on
    # them
    # -----------------------------------------------------------------
    
    def add_constant (self, constant):
        if constant not in self.constants:
            self.constants[constant] = constant
            return constant
        return self.constants[constant]
    
    def get_constant_list (self):
        return self.constants.keys ()

    # Inter Function Linking
    # ----------------------

    def link_call_to (self, linkpos, jitted):

        """ If 'jitted' is fully generated, completes the link.
        Otherwise adds it to the list of pending links. """
        
        if not jitted.generated_code: jitted.add_pending (linkpos)
        else: jitted.generated_code.complete_link (linkpos)
        return
    
    # Register Management
    # -------------------

    def get_available_registers (self):

        """ Returns a list of registers registers that the register
        allocator is free to use.  The returned list may be modified
        by the register allocator if it wishes."""

        result = [ x for x in self.registers
                   if not x.check_flag (regbits.PRIVATE) ]
        return result

    # Stack Slot Management
    # ---------------------

    def get_stack_slots (self):

        """ Returns the set of stack slots created so far using
        new_stack_slot() """

        return self.stack_slots

    def new_stack_slot (self):

        """ Returns a new DerefOperand object that points to a new
        stack slot that was created.  This grows the size of our stack
        frame.  """
        
        stackslot = MemArgument (self.fpreg, self.next_stack_offset())
        self.stack_slots.append (stackslot)
        return stackslot

    def next_stack_offset (self):
        return abstract

    # Finalizing
    # ----------

    def get_generated_code (self):

        """ Called when the entire function has been generated, epilogue
        and all.  Returns a GeneratedCode object to wrap this buffer. """

        return abstract

    pass

# ---------------------------------------------------------------------------
# GeneratedCode
#

class GeneratedCode:

    """ A generic wrapper for the generated code resulting from a
    compilation. This allows the recording of any environmental
    information that the function will need to be invoked, used, or
    disposed of; the base class simply stores the RJBuffer where
    the main code lives.

    PowerPC, for example, uses this class to store transition
    vectors. """

    def __init__ (self, callable, buffer, disasfunc, constants):

        """
        callable --- an object callable by the interpeter to invoke the
        generated code.  Typically this is obtained by buffer.callable_object()
        
        buffer --- an RJBuffer where the code lives.  
        
        disasfunc --- a function that can be called and which returns a string
        representing the disassembly of this buffer

        constants --- a list of objects to hold references to
        """

        self.callable = callable
        self.buffer = buffer
        self.disasfunc = disasfunc
        self.constants = constants
        return

    def disassemble (self, canonical):

        """ Given a boolean indicating whether canonical output is
        desired or not, returns a string disassembly of our information """

        return self.disasfunc (canonical)

    def complete_link (self, pendinglink):

        """ Given a "pending link" object, completes any patching
        required.  While the exact type of a "pending link" object
        depends on the machine defn, it will always be some object
        that encodes information about a piece of generated code that
        is calling this function.

        This default version just calls patch() with the beginning of
        the generated code, which is usually what is desired.

        Certain platforms might require more than one link object
        etc. """

        pendinglink.patch (self.buffer.ptr ())

        pass

    pass

# ---------------------------------------------------------------------------
# Prefilter
#
# The base class for Prefilters used by Machine Definitions.  You
# define methods named after the opcodes you want to do extra
# translation for.  The emitter will call this method which allows you
# to expand and customize the Operation as needed.  If no method
# exists, then no translation is done.

class Prefilter:

    """ The base class for all prefilters, this applies a few basic
    prefilters """

    def __init__ (self, md, log):
        self.md = md
        self.log = log
        pass

    def MOVE (self, emitter, oper):
        # Moves with no destination are silently dropped, unless
        # they have a label, in which case they are converted to
        # NO-ops
        if not oper.dests:
            if oper.label:
                return emitter.emit (Operation ('NOOP', label=oper.label))
            return

        # Otherwise just pass it on as normal
        return emitter._emit (oper)

    pass

class Emitter:

    """ A Emitter contains the list of Operations for one function
    being generated. You add new instructions to it by calling the
    routines emit(); this causes the instruction to be passed to the
    machine defn where it is lowered and expanded appropriately. """

    def __init__ (self, modobj, md, prefilter, emit_table, finfo, log):

        """
        modobj --- the module object we are emitting a function into
        
        md --- the machine definition
        
        prefilter --- the prefilter provided by the machine def'n

        emit_table --- the table indicating what arguments type each opcode
        accepts on this architecture
        
        finfo --- the function info for this function
        
        log --- where to log debugging info
        
        maxconstantsize --- the maximum size of constants that can be
        embedded in instructions.  If this is negative, then we assume
        that embedded constants are signed, and the maximum size
        ranges from -X to (X-1) where X is -maxconstantsize.  i.e., if
        you have 16 bit signed embededd constants (like many RISC
        architectures) then embedded constants range from -32768 to
        32767, so maxconstantsize should be -32768.  If you had 16 bit
        unsigned, then you would specify 65535 (or 2^16-1) """

        # Various parameters specified by the machine definition:
        self.finfo = finfo
        self.prefilter = prefilter
        self.emit_table = emit_table
        self.log = log
        self.md = md
        self.module_object = modobj

        self.creation_instr = None
        
        self.clear_equivalence_tables ()

        self.clear_translations ()

        # Keep our operations in a singly linked list so we can
        # move back and forth between them.
        self.opers = util.SinglyLinkedList ('em_next')

        self.fallthrutarget = 0

        # Used for linking Operations together in _emit:
        #
        #   when an Operation is added with a permanent label (one that begins
        #   with '_'), it is added to pending_labels.  Then when operations
        #   are added targeting those labels, they are linked to this entry.
        #   The entry remains and should never be overwritten.
        #
        #   when an Operation is added with a target that is not in
        #   pending_labels, it is added to pending_targets.  Then when
        #   an operation is added with that label, it is linked to it and
        #   removed from pending_targets.
        self.pending_labels = {}
        self.pending_targets = {}
        pass

    def translate_value (self, value):

        """ A helper which translates the instruction operand 'value'
        into a low level Argument.  Always returns the same Argument
        for the same Value. """

        assert isinstance (value, values.Value)
        
        try:
            return self.operand_translations [ value ]
        except KeyError:
            if value.is_constant():
                res = self.md.get_integer_constant (
                    ptr (self.finfo.add_constant (value.get_value())))
            else:
                res = TempArgument ()
                pass
            self.operand_translations [ value ] = res
            return res
        pass

    def clear_translations (self):
        self.operand_translations = util.ObjectDict ()
        return

    # Register-Memory Equivalences
    # ----------------------------
    #
    # As we generate instructions, we may force Derefs and Addrs to
    # registers.  This tracks what register we forced them to.
    # When we encounter an instruction with a label, meaning there may be
    # control flow that circumvented some of the equivalents, we call
    # clear_equivalence_tables() to restart.  Although this isn't perfect,
    # it's a simple system that gets us most of the way there.

    def clear_equivalence_tables (self):
        self.deref_equiv = {}
        self.addr_equiv = {}
        if self.log: self.log.low ("Clear equivalence tables")
        pass

    def _get_equiv (self, table, base, offset):
        return table.get ( (base, offset), None )

    def _set_equiv (self, table, base, offset, val):
        table[ (base, offset) ] = val
        return

    def get_deref_equivalent (self, base, offset):
        return self._get_equiv (self.deref_equiv, base, offset)

    def set_deref_equivalent (self, base, offset, val):
        return self._set_equiv (self.deref_equiv, base, offset, val)

    def get_addr_equivalent (self, base, offset):
        return self._get_equiv (self.addr_equiv, base, offset)

    def set_addr_equivalent (self, base, offset, val):
        return self._set_equiv (self.addr_equiv, base, offset, val)

    # Operation Emission
    # ------------------
    #
    # This code handles adding Operations to our internal list.  The main
    # entry point is the emit() routine, which invokes the prefilter if
    # necessary and otherwise falls through to _emit().
    #
    # _emit() does the work of comparing the types of the arguments against
    # those types that this architecture can directly support, as indicated
    # by the emit_table, and shuffling values into different types of
    # arguments as needed.  It also handles any linking between different
    # operations in the same functions (i.e., branches between and within
    # basic blocks).

    def set_creation_instr (self, instr):

        """ Sets the instruction pointer that will be used when new
        Operations are added.  Useful for tracing why an Operation
        came into being. """
        
        self.creation_instr = instr
        pass

    def emit (self, oper):

        """ Attempts to emits the oper.  This may involve breaking
        the Operation down at the discretion of the machine definition. """

        assert isinstance (oper, Operation)

        indent = self.log.indent ("emit: %s" % oper)

        # Now check to see if the prefilter wants to handle this guy.
        if self.prefilter:
            if hasattr (self.prefilter, oper.opcode):
                pref = getattr (self.prefilter, oper.opcode)
                pref (self, oper)
                return
            pass

        # No applicable prefilter, so pass him to _emit that applies
        # the emit_table.
        self._emit (oper)
        pass

    def _emit (self, oper):
        
        """ Reconciles an instruction with the emit table and appends it
        to the internal list of instructions.  Called from emit(), and should
        only be called directly from within the prefilter. 

        Also assumes that the prefilter has been applied. """

        # The Emit Table does most of the work here.  It compares the
        # instruction against the desired kinds of opcodes and
        # generates temporaries, moves, etc.  To add a source move it
        # will call the first function pointer (self._emit()) and to
        # add the actual instruction it calls the second one
        # (eslf._append_oper()).  Finally, it will call self._emit()
        # to deal with any destinations that had to be re-routed.
        cgfunc, dstwraps, srcwraps = self.emit_table.apply_to_oper (oper)

        # Remember the code generation function that was selected,
        # and the instruction responsible for the creation of this oper.
        oper.set_code_generation_func (cgfunc)
        oper.set_creation_instr (self.creation_instr)

        # First we have to emit instructions to move any wrapped source
        # arguments into their new locations:
        if srcwraps:
            sources = list (oper.sources)
            for argidx, wrapclass in srcwraps:
                newarg = wrapclass ()
                oldarg = sources [ argidx ]
                self._emit (Operation ('MOVE',
                                       dests=(newarg,),
                                       sources=(oldarg,),))
                sources [ argidx ] = newarg
                pass
            oper.set_source_arguments (sources)
            pass
        if dstwraps:
            olddests = oper.dests
            dests = list (oper.dests)
            for argidx, wrapclass in dstwraps:
                newarg = wrapclass ()
                dests [ argidx ] = newarg
                pass
            oper.set_dest_arguments (dests)
            pass

        # Now we emit this instruction itself; start by checking to
        # see if this instruction targets a label that already exists.
        # If not, add it to the list of pending targets.
        if oper.tartype == tartypes.LABEL:
            try:
                other = self.pending_labels [oper.target]
                oper.target_operation (other)
            except KeyError:
                try:
                    self.pending_targets [oper.target].append (oper)
                except KeyError:
                    self.pending_targets [oper.target] = [ oper ]
                    pass
                pass
            pass

        # Adjust the number of targets by 1 if the operation before
        # might fall through to this oper.
        oper.targeted += self.fallthrutarget

        # Now that we've adjusted the arguments, etc, we'll append the
        # actual instruction to our list.  
        self.opers.append (oper)

        # Set the code for whether we might fall thru to the next instr
        if oper.opcode == 'BR': self.fallthrutarget = 0
        else: self.fallthrutarget = 1

        # If we have a label, check to see whether anyone is pending
        # to target it.
        if oper.label:
            try:
                others = self.pending_targets [ oper.label ]
                for other in others:
                    other.target_operation (oper)
                    pass
                del self.pending_targets [oper.label]
            except KeyError:
                pass

            # Note that lame convention: permanent labels begin with an _
            if oper.label[0] == '_':
                assert not self.pending_labels.has_key (oper.label)
                self.pending_labels [oper.label] = oper
                pass

            # Remove the label from the oper: it is not needed anymore, and
            # it obscures the canonical unit testing print outs :)
            oper.label = None
            pass

        # Finally, we move any wrapped destinations back into their
        # original locations.
        for argidx, wrapclass in dstwraps:
            newarg = oper.dests[argidx]
            oldarg = olddests[argidx]
            self._emit (Operation ('MOVE',
                                   dests=(oldarg,),
                                   sources=(newarg,),))
            pass
            
        return
        
    pass

# ----------------------------------------------------------------------