"""

Usage Density Based Register Allocator

This code is not complete.   It's based on an interesting paper I read and
I wanted to try it out.

"""

import lowir, util
from lowir import regbits, argflags, MemArgument, AddrArgument
from ra_local import LocalAllocator

INT_MAX = 0x7FFFFFFF

# PRIORITIZATION
# --------------

class Allocation:

    """ Each block maintains an at least one Allocation for every
    temporary argument which must be considered.  In fact, each basic
    block is broken up into regions: whenever a temporary must be
    spilled, the a new region is begun, and new Allocations are
    required for those temporaries that were spilled, as well as those
    promoted to a register.

    """

    def __init__ (self, temp):

        # Which temporary do we correspond to?
        self.temp = temp

        # How many times has this temporary been used since the last def'n?
        self.totaluses = 0

        # What instruction index was this temporary written at?  If
        # this is negative, then it indicates the definition was in a
        # previous block and this is the number of instructions
        # between that definition and the current block (or some
        # average thereof).
        self.writeloc = 0

        # Whenever a use is added, we compute the active window.  This
        # is the index of the last operation for which any cached
        # usage density would be valid: this is basically a heuristic,
        # based on the fact that so long as one use occurs in the
        # active window, the usage density will be at least as high as
        # it was before.  
        self.activewindow = -1

        # The last cached usage density (if any)
        self.usagedensity = None

        # When we're computing averages, we track the totals in
        # (totaluses,writeloc) above, and we track the number of items
        # to divide by in this field.
        self.contributors = 0    # used during average

        # Is this variable currently in a register?  Note that this
        # only applies to the last Region in the block.
        self.inreg = False
        pass

    def add_contributor (self, blklen, alloc):

        """ Sometimes an Allocation's data is computed as the average of
        many contributing sources.  In that case, we call add_contributor()
        for each source.  'blklen' should be the number of operations in
        that block, and 'alloc' should be the contributing allocation from
        that block. """
        
        self.contributors += 1
        self.totaluses += alloc.totaluses
        self.writeloc += alloc.get_total_dist (blklen-1)
        pass

    def compute_average (self):

        """ Called when all contributors have been added.  Does the
        divide to compute the average. """
        
        if self.contributors:
            self.totaluses //= self.contributors  # make sure we use int div
            self.writeloc //= self.contributors
            self.writeloc = -self.writeloc
            pass
        self.contributors = 0
        return

    def set_defn (self, curpos):

        """ When the definition for this temporary occurs in the
        current block, we call this function with the index of the
        definition instruction.  This clears out the current usage
        density information, but doesn't affect the storage.  Note
        that a recently defined temporary is vulnerable to being bumped
        out if it is not used. """
        
        self.writeloc = curpos
        self.totaluses = 0
        self.activewindow = -1
        self.usagedensity = None
        pass

    def get_total_dist (self, curpos):
        return self.curpos - self.writeloc

    def add_use (self, curpos):

        """ Indicates that this temporary was used; adjusts total uses and
        the active window. """
        
        # adjust usage count:
        self.totaluses += 1

        # recompute active window:
        totaldist = self.get_total_dist (curpos)
        averageuse = self.totaluses / totaldist
        self.activewindow = (averageuse << 1) + curpos
        self.usagedensity = None # clear cached usage density if any
        return

    def get_usage_density (self, curpos):
        
        """ Gets the usage density at a particular point.  Tries to
        avoid recalculating if possible. """
        
        if self.usagedensity is not None and curpos < self.activewindow:
            return self.usagedensity
        
        totaldist = self.get_total_dist (curpos)
        averageuse = self.totaluses / totaldist
        self.usagedensity = averageuse / totaldist
        return self.usagedensity

    pass

class BlockData (lowir.RegAllocData):

    """ Each block maintains what the paper refers to as a set of
    Active Nodes.  I find that terminology confusing; basically, each
    basic block is broken into regions.  Within each region the
    allocations of temporaries to storage are constant; when something
    is spilled, a new region is begun.  For each region, a set of
    Allocations are stored, one for each temporary, containing relevant
    statistics.

    During the register selection phase, this structure also stores a
    pointer to the actual storage locations that each temporary ended
    up with.  This means that when selecting registers, we can lookup
    the current location for a temporary, and we can also look and see
    where each temporary was at the beginning of each block. """

    def __init__ (self, llblk):
        self.llblk = llblk
        self.dataversion = 0

        # The preds to the block.  These are uncovered during the initial
        # walk, as it is not stored in the representation.
        self.preds = []

        # A flag indicate whether we need recomputation.  Used to
        # avoid adding us to a list more than once.
        self.recompute = False

        # Maps from TempArgument to Allocation objects for this block
        self.tempmap = util.ObjectDict ()

        # Note: more members will be added when import_allocations() is called
        # see _clear_data() for a description of the members etc
        pass

    def add_pred (self, pred):

        """ Adds a predecessor to this block's list """
        
        self.preds.append (pred)
        return

    def _clear_data (self):

        """ Called by import_allocations().  Clears or initializes the
        region information, flags, etc """

        # Each region is stored as a tuple:
        # [0] -> The Operation where the region begins.  None == the beginning
        # [1] -> List of Allocation objects in registers
        self.regions = [ (None, []) ]

        # For quicker access, the current region is stored here.
        self.curregion = regions[0]

        # A list of the id() of each temporary initially in a
        # register when we called compute().  Used to determine
        # whether to iterate and recompute when preds change their
        # result.  The initialregset is a lazily computed Set used
        # in requires_recomputation() for faster lookup.
        self.initialregtemps = []
        self.initialregset = False
        pass

    # Other internal routines:
    
    def _current_regs (self):
        return self.curregion[1]

    def requires_recomputation (self):

        """ Determines if we need to call compute() again to get good
        results.  This is only called when a predecessor's data has
        changed, or when our original computation did not include a
        predecessor's data because it had not been computed yet.

        The way this works is that we remember the initial set of
        Allocations that ended up in registers; if any of our preds
        now have a new item in a register that was not initially in a
        register when we did our computation, we repeat it.  This is a
        heuristic, but it should work well enough to indicate whether
        the change is significant enough to be worth our time. """

        # If we don't have the initial registers in set form, put it
        # that way.  We don't do this initially because for the
        # majority of blocks we will never call
        # require_recomputation(), and building a set is doubtless
        # cheaper.  I know, it's a ludicrous micro optimization.
        if not self.initialregset:
            self.initialregset = util.Set ()
            for i in self.initialregtemps: self.initialregset.append (i)
            pass
        
        for p in self.preds:
            for palloc in p.regalloc._current_regs():
                if palloc.inreg:
                    if id(palloc.temp) not in self.initialregset:
                        return True
                    pass
                pass
            pass

        return False

    def compute (self, maxregs):

        """ Call to compute the regions and registers for this block;
        first walks to previous blocks and averages the incoming data.
        Then walks operations to compute usage density etc. """
        
        # Get our initial definitions inherited from previous blocks
        self._import_allocations (maxregs)
        
        # Compute usage density for everything we use and define in
        # this block
        curpos = 0               # Operation index
        firstoper = True
        for oper in self.llblk.opers:

            # Normally initialize to False so that we only create one
            # new region per Operation; in the case of the first
            # Operation, though, initialize to True since we are
            # already in a newly created region:
            newregion = firstoper
            firstoper = False
            
            # Go through the sources of this operation.  If the
            # source's address was taken, we must force it to be on
            # the stack and it's not very interesting.  No need to
            # compute an Allocation for it.  Otherwise, find an
            # Allocation: this may go backwards and compare previous
            # blocks, that's okay.  If this allocation is not in a
            # register, we need to find out if it should be.
            for arg in oper.sources:
                if not arg.is_type (argflags.TEMP): continue

                # report which blocks use it so we can tell if its
                # global or not.  Useful to reduce stack slots used
                # without doing much work:
                arg.used_by_block (self.llblk)
                
                if arg.get_addr_taken():
                    continue

                alloc = self._get_allocation (arg, True)
                assert alloc
                alloc.add_use (curpos)
                if not alloc.inreg:

                    # Do we have registers left?
                    availregs = maxregs-len(self._current_regs())
                    
                    if availregs:
                        # Yes!  That's easy, we'll give one to this
                        # guy.  Since no spilling is required, we
                        # don't bother to create a new region.
                        alloc.set_in_reg (True)
                        self._current_regs().append (alloc)
                    else:
                        # No, well, let's check if we can replace
                        # someone with this guy.

                        # Find the lowest item in registers
                        minud, minalloc, minidx = \
                               self.find_minimum_usage_density (curpos)

                        # If the lowest item's usage density is less
                        # than ours, give them the boot.
                        if minud < ud:
                            # If we're spilling someone, we have to
                            # start a new region.  But we can spill
                            # more than one item at a time, so if we
                            # already started a new region don't start
                            # a second one:
                            if not newregion:
                                self._start_new_region (oper)
                                newregion = True
                                pass

                            # Spill the item with minimum usage
                            # density:
                            regslist = self._current_regs()
                            assert minidx < len (regslist)

                            # Clear the old guy's "in register" flag:
                            minalloc.set_in_reg (False)
                            
                            # Set the "in register" flag on the new guy
                            # and put him the old guy's spot in the
                            # list:
                            alloc.set_in_reg (True)
                            regslist[replidx] = alloc

                            # Invalidate the minimum cache computation
                            # since we just removed him!
                            self._invalidate_cached_minimum (minalloc)
                            pass
                        pass
                    pass
                pass

            # Now look at those allocations we write to:
            for arg in oper.dests:

                # Ignore those args that are not TEMPs or must be on
                # the stack.
                if not arg.is_type (argflags.TEMP): continue
                if arg.get_addr_taken():
                    continue

                # Find the Allocation object.  This may create a new
                # one.  Call set_defn() to indicate we found a
                # definition, and the index we found it at.  This clears
                # various statistics, which may well make this argument
                # vulnerable to replacement.
                alloc = self.get_allocation (arg, False)
                alloc.set_defn (idx)

                # Note: we have to invalidate any cached minimums
                # because this one is probably much lower now...
                self._invalidate_cached_minimum (alloc)
                pass
                
            pass
        return
    
    def _import_allocations (self, maxregs):

        """ Goes through our preds and finds all of their
        Allocations.  Groups them by temp and averages the resulting
        values.  Clears out any previously computed regions etc.

        This is an internal method called by self.Compute() """

        self._clear_data ()
        curregs = self._current_regs ()
        
        for p in self.preds:
            plen = len (p.opers)
            for temp, palloc in p.regalloc.current_regs().items():
                try:
                    alloc = self.tempmap[temp]
                except KeyError:
                    alloc = Allocation (temp)
                    self.tempmap[temp] = alloc
                    pass
                
                alloc.add_contributor (plen, palloc)

                # We use a heuristic here: we only attempt to put in
                # registers the items that were in registers from
                # previous blocks.  This is because we assume that
                # only the highest usage densities from other blocks
                # will be highest now.  
                if palloc.inreg and not alloc.inreg:
                    alloc.set_in_reg (True)
                    curregs.append (alloc)
                    self.initialregtemps.append (id (alloc.temp))
                    pass
                pass
            pass

        # Finalize the averages
        for alloc in self.tempmap.values(): alloc.compute_average ()

        # If we have too many candidates for registers, then we need
        # to sort them and pull out the most worthy.
        if len (curregs) > maxregs:
            # We'll need to do a sort.  Use the negative usage density so
            # that those with highest usage density (lowest negative) come
            # first in the list.
            sortable = [ (-x.get_usage_density (0), x) for x in curregs ]
            sortable.sort ()

            # Extract the first 'maxregs' from the sorted list to be in
            # registers.
            del curregs[:]
            for ud, alloc in sortable[:maxregs]: curregs.append (alloc)

            # Clear the in reg flag on the others
            for ud, alloc in sortable[maxregs:]: alloc.set_in_reg (False)
            pass

        return

    def _get_allocation (self, arg, foruse):

        """ Finds an allocation for 'arg'.  If no Allocation is found,
        our behavior depends on the value of 'foruse'.

        If 'foruse' is False, then we want to find the allocation
        because we found a definition.  In that case, we will create a
        new allocation w/ zero'd data and return it.

        If 'foruse' is True, then we want to find the allocation
        because we found a use and need to update the usage density.
        In that case, we will backwards along the registered preds of
        this block in search of a block that has it defined.  All the
        values we find are averaged.  If none are found, then a new
        Allocation is created. 

        This is an internal method called by self.Compute() """

        curallocs = self.current_allocations ()

        # Return the allocation if we can find it.
        try: return curallocs[arg]
        except KeyError: pass

        # If it is for a def'n, fine, return a new Allocation.
        # set_defn() will be called on it later.
        assert not foruse
        res = Allocation (arg)
        curallocs[arg] = res
        return res

    def _invalidate_cached_minimum (self, alloc):

        """ When find_minimum_usage_density is called, we remember the
        item with the lowest usage density.  This function invalidates
        that cache if the answer it is caching is alloc, or always if
        alloc is None. 

        This is an internal method called by self.Compute() """
        
        if alloc and self.cached_minimum[1] is not alloc: return
        self.cached_minimum = None
        return

    def _find_minimum_usage_density (self, curpos):

        """ Finds the register allocation with the lowest usage
        density.  We cache the result if we can to avoid computing it
        when there is no need. 

        This is an internal method called by self.Compute() """

        if not self.cached_minimum:
            curregs = self._current_regs ()
            minud = INT_MAX
            minalloc = None
            minidx = None
            idx = 0
            for alloc in curregs:
                ud = alloc.get_usage_density (curpos)
                if ud < minud:
                    minud = ud
                    minalloc = alloc
                    minidx = idx
                    pass
                idx += 1
                pass
            self.cached_minimum = (minud, minalloc, minidx)
            pass
        return self.cached_minimum
    
    def _start_new_region (self, oper):

        """ Creates a new region in the list, copying the current
        allocations.  Sets the current region to this newly created
        region. 

        This is an internal method called by self.Compute() """
        
        newregion = (oper, list (self._current_regs()))
        self.regions.append (newregion)
        self.curregion = newregion
        pass

    pass

class UsageDensityAllocator (LocalAllocator):

    def subclass_init (self):
        pass

    def primary_selection (self):

        # Find the number of registers available to TEMPoraries.  Note
        # that we may need to use some of these regs for SOFTREGs later!
        # But for now we will work with this number.
        maxregs = len (md.get_registers (regbits.TEMP))

        # Once we do the initial BFS, we reiterate over blocks that are the
        # targets of backedges, and then over any that change as a result.
        # This work list will be initialized with any back edge targets we
        # find during our BFS.
        reiterate = util.WorkList ()

        # ----------------------------------------------------------------------
        # Do the BFS to compute initial values

        # Data for our initial BFS.  Note that we do the initialization on
        # the first block that the first pred of a block normally does:
        bfslist = util.WorkList ()
        bfslist.append (cfg)
        cfg.regalloc = BlockData (cfg)
        cfg.visit (vtag)

        vtag = comp.get_visit_tag(),
        def _process (blk):

            # Compute the data for this block based on what preds we have
            # seen so far.
            assert blk.regalloc
            blk.regalloc.compute ()

            # Now go through our successors on the worklist.  Note that if
            # we have a successor that has already been processed, we toss them
            # on a list for iteration.
            for sblk in blk.successors():
                if sblk.visited (vtag):
                    sblk.regalloc.add_pred (blk)
                    if not sblk.regalloc.recompute:
                        reiterate.append (sblk.regalloc)
                        sblk.regalloc.recompute = True
                        pass
                    pass
                else:
                    sblk.regalloc = BlockData (sblk)
                    sblk.regalloc.add_pred (blk)
                    sblk.visit (vtag)
                    bfslist.append (sblk)
                    pass
                pass

            return

        # Do the initial BFS:
        for blk in bfslist: _process (blk)

        # ----------------------------------------------------------------------
        # Do the iteration to finalize the values

        def _reprocess (blk):

            if blk.regalloc.requires_recomputation ():
                blk.regalloc.compute ()
                for sblk in blk.successors (): reiterate.append (sblk)
                pass

            return

        for blk in reiterate: _reprocess (blk)

        # ----------------------------------------------------------------------
        # After register allocation, we end up with a list of regions per
        # block and within each region a suggestion as to which
        # Temporaries should be in registers.  The job of this code then
        # is to walk over the block graph, assign temporaries and soft
        # registers to actual physical registers, and then generate the
        # bytes.  It does this all in one pass over the graph.
        #
        # It also must generate instructions to manage transitions between
        # blocks: at this point, temporaries may be spilled to the stack,
        # loaded into registers, or moved between registers.
        #
        # It tries its best to obey what the register allocator told it in
        # terms of what to keep in registers, but if forced may spill a
        # temporary to gain its register spot.
        #
        # It also tries when starting a block to use the same register for
        # a temporary that the blocks which have already been generated
        # use; this avoids unnecessary transition code between blocks.

        # A list of stack slots used by this block that can be reused by
        # other basic blocks because they are only used for local values.
        # We use this lame reuse strategy to keep total stack size down
        # while not doing the work to compute conflicts.
        localunusedslots = []
        localusedslots = []
        def _get_local_slot ():
            if localunusedslots: slot = localunusedslots.pop ()
            else: slot = finfo.new_stack_slot ()
            localusedslots.append (slot)
            return slot

        # Returns a MEM location for the temporary 'temp':
        # - handles insertion of the resulting value in the patch lists above
        # - always returns the same value for a given 'temp'
        def create_stack_slot (temp):
            if temp.stackslot: return temp.stackslot
            if temp.is_local: slot = _get_local_slot ()
            else: slot = finfo.new_stack_slot ()
            temp.stackslot = slot
            return slot

        # Creates a new local stack slot for use by a SOFTSTACK arg
        def create_soft_stack_slot ():
            return _get_local_slot ()

        def _assign (blk, vtag):

            # Get the list of registers we can use for temporaries and the
            # list for SoftRegisters.
            tempregs = md.get_registers (regbits.TEMP)
            softregs = md.get_registers (regbits.SOFTREG)

            # When we finish with a soft stack slot we created, we put it
            # in this list and then we can re-use it.
            softstacks = []

            # A list of those TempArguments currently in registers.
            curreglist = []

            # We will do the assignments in a single walk.  We will be
            # visting the regions in reverse order, basically.
            regionidx = -1

            loadregionflag = True

            # Determine the initial set of registers.  This is the set of
            # registers after the last operation, and it is determined by
            # what is in registers for the blocks that follow us.  If a
            # block follows us but has not yet been visited, then we
            # ignore it.  Otherwise, we put a variable in a register
            # initially if all the successors that have it in a register
            # agree on which register it belongs to.  This keeps things
            # simpler since it means that when loading data between blocks
            # a temporary must either be loaded from the stack into a
            # register or stored from a register into the stack, but never
            # moved between registers.
            filter = False
            for succ in blk.successors():
                if not succ.visited (vtag): continue
                for temp, reg in succ.regalloc.initial_registers:
                    if temp.was_assigned_by (blk):
                        if temp.assignment is not reg: 
                            temp.set_assignment (blk, None)
                            filter = True
                            pass
                        pass
                    else:
                        temp.set_assignment (blk, reg)
                        curreglist.append (temp)
                        pass
                    pass
                pass

            # If we cleared any assignment fields to None, purge them from
            # curreglist
            if filter: curreglist = [x for x in curreglist if x.assignment]

            # Now that we know what our initial set of registers is, we
            # can construct the transition code to those successors that
            # have already been generated.  They will need to be patched
            # to point at the appropriate entrance point.
            for succ in blk.successors():
                if not succ.visited (vtag): continue
                firstoper = succ.opers[0]

                # We first want to store anything that is in a register in
                # our block but NOT in a register for them.  Then we want
                # to load anything that is in a register for them but not
                # for us.  This is actually a bit of a pain to
                # compute...  we do it by iterating over the items in
                # register at the beginning of the successor and leaving a
                # mark on each temp.  Then we iterate through the items we
                # have in registers and check for those without a mark.
                # This sucks.
                regvtag = comp.get_visit_tag()
                loads = []
                stores = []
                for temp, reg in succ.regalloc.initial_registers:
                    temp.visit (regvtag)
                    if not temp.was_assigned_by (blk) or not temp.assignment:
                        # Identify the stack slot for this temp:
                        sslot = create_stack_slot (temp)
                        loads.append ( Operation ('MOVE',
                                                  dests=(reg,),
                                                  sources=(sslot,),
                                                  comment="Load: blk transition") )
                        pass
                    pass
                for temp in curreglist:
                    if temp.visited (regvtag): continue
                    if not temp.assignment: continue
                    sslot = create_stack_slot (temp)
                    stores.append ( Operation ('MOVE',
                                               dests=(temp.assignment),
                                               sources=(sslot),
                                               comment="Store: blk transition") )
                    pass
                pass

            # Walk the operations:

            for oper in blk.opers.reverse_iter():

                # Useful subroutines that moves a value from oldloc to
                # newloc by inserting a MOVE Operation after/before the
                # current operation.
                def xfer (fromarg, toarg):
                    moveop = Operation ('MOVE',
                                        dests=(fromarg,),
                                        sources=(toarg,),
                                        comment="Transfer to new home")
                    blk.opers.insert_after (oper, moveop)
                    pass

                # If this is the first Operation in a new region, we need to
                # do some initial region work:
                if loadregionflag:
                    firstoper, newreglist = blk.regalloc.regions[regionidx]
                    regionidx -= 1 # index into our block's region list

                    # Create a set of the temporaries that are in registers
                    newregset = util.ObjectSet ()
                    newregset.extend ([reg.temp for reg in reglist])

                    # Check the current temporary items in registers;
                    # if they should not be, spill them.  Note that some items
                    # may be in here with an assignment of None,
                    for temp in curreglist:
                        if temp not in newregset:
                            newloc = create_stack_slot (temp)
                            oldloc = curlocs[temp]
                            assert newloc is not oldloc and \
                                   oldloc.is_type (argflags.REG)
                            tempregs.append (oldloc)    # return to avail list
                            xfer (newloc, oldloc)
                            curlocs[temp] = newloc
                            pass
                        pass

                    # Create a new list to hold the temporaries currently in
                    # registers.
                    curreglist = []

                    for alloc in reglist:

                        # Check whether this temporary already has a location.
                        # If it does, and it is already in a register, great.
                        # Otherwise, we try to find it a register.
                        temp = alloc.temp

                        # Load the current location.  If it was already in
                        # a register, then go to the next item in reglist:
                        curloc = None
                        if temp.was_assigned_by (blk):
                            curloc = temp.assignment
                            pass

                        # Check we can find a register for this temporary;
                        # if not, put it on the stack.
                        if not curloc.is_type (argflags.REG): 
                            if tempregs: newloc = tempregs.pop ()
                            else: newloc = create_stack_slot (temp)
                        else: newloc = curloc

                        # If we just changed where it was located we have
                        # to move the value from the new home to the old
                        # one.  Note that since we are walking backwards
                        # we must move INTO the old home.
                        if curloc and curloc is not newloc: xfer (newloc, curloc)
                        temp.set_assignment (blk, newloc)
                        if newloc.is_type (argflags.REG): curreglist.append (temp)
                        pass

                    pass

                # When we reach the first Operation in a region, set the flag
                # so we will start the next region afterwards.
                if oper is firstoper:
                    loadregionflag = True
                    pass

                # We need to process the arguments of this Operation.  Because
                # arguments can be recursive we define some subroutines here
                # to do the necessary work:

                # Useful subroutines to do the various kinds of work we
                # have to do:
                def assign_soft_reg (arg):
                    # If we find a SOFTREG that is not currently
                    # assigned, it *must* go in a register.  This
                    # means if there are no registers in the softregs
                    # list, we must steal from tempregs.  If tempregs
                    # is empty, we must spill something.

                    # Already has a home?
                    if arg.was_assigned_by (blk): return

                    if softregs: regloc = softregs.pop ()
                    elif tempregs: regloc = tempregs.pop ()
                    else:
                        # We need to spill.  Pick at random.  Note
                        # that the item is in the register *after*
                        # this point; before this it will be on
                        # the stack.
                        assert curreglist
                        spilltemp = curreglist.pop ()
                        regloc = spilltemp.assignment
                        stackslot = create_stack_slot (spilltemp)
                        xfer (oper, stackslot, regloc)
                        curreglist.remove (spilltemp)
                        spilltemp.set_assignment (blk, stackslot)
                        pass

                    # No matter where we go the register from, it
                    # is now associated with this SOFTREG.
                    arg.set_assignment (blk, regloc)
                    return 

                def assign_temporary (arg):
                    # If we find a TEMP, and it has no home, then it needs
                    # to go to its home on the stack.
                    if arg.was_assigned_by (blk): return
                    stackslot = create_stack_slot (arg)
                    arg.set_assignment (blk, stackslot)
                    return 

                def assign_soft_stack (arg):
                    if arg.was_assigned_by (blk): return
                    if not softstacks:
                        stackslot = create_soft_stack_slot ()
                    else:
                        stackslot = softstacks.pop ()
                        pass
                    arg.set_assignment (blk, stackslot)
                    return 

                def assign_arg (arg):

                    """ Purges the argument in a deep fashion so that it
                    contains no references to a SOFTREG, TEMP, or
                    SOFTSTACK.  Return value is irrelevant. """

                    if arg.is_type (argtypes.SOFTREG):
                        return assign_soft_reg (arg)
                    elif arg.is_type (argtypes.TEMP):
                        return assign_temporary (arg)
                    elif arg.is_type (argtypes.SOFTSTACK):
                        return assign_stack_slot (arg)
                    elif arg.is_type (argtypes.MEM) or arg.is_type (argtypes.ADDR):
                        return assign_arg (arg.base)
                    return

                dstmoves = []
                srcmoves = []
                while 1: # (the break is in the middle of the loop)

                    # Ensure that all Operands that need one have an appropriate
                    # assignment.  This may cause temporaries to be spilled.
                    for src in oper.sources: assign_arg (arg)
                    for dst in oper.dests: assign_arg (arg)

                    # Check to ensure that the Operation's arguments are
                    # all valid.  If so, we're done.
                    cgfunc, dstwraps, srcwraps = md.emit_table.apply (oper)
                    if not dstwraps and not srcwraps: break

                    # Otherwise, new SoftRegisters or SoftStacks were
                    # created so we have to process those.  This may cause
                    # temporaries to be spilled etc, in which case we will
                    # need to REprocess the arguments to the operation,
                    # and keep doing this until everyone is happy and has
                    # an assignment.

                    for argidx, wrapclass in dstwraps:
                        oldarg = oper.dests[argidx]
                        newarg = wrapclass ()
                        oper.dests[argidx] = newarg
                        dstmoves.append (Operation ('MOVE',
                                                    dests=(oldarg,),
                                                    sources=(newarg,)))
                        assign_arg (newarg)
                        pass

                    for argidx, wrapclass in srcwraps:
                        oldarg = oper.sources[argidx]
                        newarg = wrapclass ()
                        oper.sources[argidx] = newarg
                        srcmoves.append (Operation ('MOVE',
                                                    dests=(newarg,),
                                                    sources=(oldarg,)))
                        assign_arg (newarg)
                        pass

                    pass

                # At this point, we have found a location that satisfies
                # everyone, possibly spilling temporaries along the way.
                # Now, we may have accumulated some moves from temporary
                # destinations to the original destination which we must
                # emit.  Likewise for the source moves.  Note that because
                # we put the Operation 'm' in front of this operation, it
                # will be processed by the next iteration of the loop.
                # That's okay, it will handle releasing the soft arguments
                # that were allocated on it.
                for m in dstmoves: blk.opers.insert_after (oper, m)
                for m in srcmoves: blk.opers.insert_before (oper, m)

                # Now go through the destinations: the only thing we
                # really care about is if we see a SOFT{REG|STACKSLOT}
                # that is written, we can free its associated storage.
                # Note that there should be no SOFT* Arguments w/o uses,
                # so they should have storage assigned.  Also SOFT* must
                # be SSA.
                for arg in oper.dests:
                    if arg.is_type (argtypes.SOFTREG):
                        srcloc = arg.assignment
                        arg.set_assignment (None, None)
                        if srcloc.check_flag (regbits.SOFTREG):
                            softregs.append (srcloc)
                        else:
                            tempregs.append (srcloc)
                            # improvement: put something we spilled back
                            # in register?  could put annotation on arg to
                            # remember who we displaced
                            pass
                        pass
                    elif arg.is_type (argtypes.SOFTSTACK):
                        softstacks.append (curlocs[arg])
                        del curlocs[arg]
                        pass
                    pass

                pass  # end of loop over all operations in block

            # Now that we've set up all the storage for this block, we
            # need to introduce any transitions.  By transitions I mean
            # loads and stores that must be put in the beginning of a
            # block to handle moving data from its location in a previous
            # block to its new location.
            # 
            # Unfortunately, since we do all these assignments in a single
            # pass over the blocks we need to handle transitions both in the
            # beginning and in the end of the block.

            # Okay, all done.  Free up any local used slots and make them
            # unused for the next guy.  Also return the highest region id
            # we used so they can start from there and keep the region ids
            # unique across the entire function.
            localunusedslots += localusedslots
            localusedslots = []
            return  # end of _assign()

        return

    pass
