Module spatial_inequality.optimization.lazy_heap

Implements a "lazy heap" data structure, which provides a max-heap behavior but uses lazy evaluation for optimized node update and removal.

Expand source code
"""
Implements a "lazy heap" data structure, which provides a max-heap behavior but
uses lazy evaluation for optimized node update and removal.
"""
import copy

from heapq import heappush, heappop, heapify

class _LazyHeapNode:
    """
    Wrapper class for any item to be added onto the LazyHeap.

    Attributes:
        __lt (function): 'Less than' function to compare _LazyHeapNode
            instances.
        __data (Object): Stored item.
        __is_deleted (bool): Boolean flag to mark node for deletion.
    """
    __lt = None
    __data = None
    __is_deleted = None
    
    def __init__(self, data, lt):
        self.__lt = lt
        self.__data = data
        self.__is_deleted = False
    
    def get_data(self):
        """
        Getter method for the stored (unmodified) item.

        Returns:
            Object: Unmodified stored item.
        """
        return self.__data
    
    def delete(self, freeze_mode="shallow"):
        """
        Marks node for (lazy) deletion and freezes its current state by making a
        copy of its data.

        Args:
            freeze_mode (str): Should be "shallow", "deep" or None.

        Raises:
            AttributeError: Whenever an invalid freeze mode is specified.
        """
        # Freeze object reference
        if freeze_mode == "shallow":
            self.__data = copy.copy(self.__data)
        elif freeze_mode == "deep":
            self.__data = copy.deepcopy(self.__data)
        elif freeze_mode is None:
            pass
        else:
            raise AttributeError(f"Invalid freeze mode '{freeze_mode}'...")
        # Mark node as deleted
        self.__is_deleted = True
    
    def is_deleted(self):
        """
        Checks if a node is marked for (lazy) deletion.

        Returns:
            bool: 'true' if node is marked for deletion, 'false' otherwise
        """
        return self.__is_deleted
    
    def __lt__(self, other):
        """
        Checks whether node is 'less than' another node.

        Args:
            other (_LazyHeapNode): Node to compare against.

        Returns:
            bool: 'true' if node is lesser than its peer, 'false' otherwise.
        """
        return self.__lt(self.get_data(), other.get_data())
    
    def __str__(self):
        return " {}\n| Data: {}\n| Is deleted: {}\n {}".format(
            "-"*16,
            self.__data,
            self.__is_deleted,
            "-"*16
        )
    
    def __repr__(self):
        return str(self)

class LazyHeap:
    """
    This class implements a lazy max heap. In this implementation, lazy refers
    to how nodes are deleted. Instead of immediately removing a node from the
    heap and then fixing the underlying binary tree, each node is
    flagged as 'deleted' and discarded when popped.

    To prevent the heap from getting too large, if too many lazy deletions
    happen without nodes being popped, there is a maximum number of nodes
    allowed before the whole tree is pruned of deleted nodes and rebuilt.

    Attributes:
        __data (list): Heapified list of all items.
        __item_id (function): Function to extract a unique ID from an item.
        __gt (function): 'Greater than' function to compare items in the heap.
        __lazy_eval_map (dict): Mapping from a unique ID to all (non-deleted)
            _LazyHeapNode instances.
        __max_elems (int): Maximum number of elements allowed in the heap.

    Example:
        >>> class SoccerPlayer:
        ...     def __init__(self, name, goals):
        ...         self.name = name
        ...         self.goals = goals
        ...
        >>> # Create three distinct players
        >>> players = [
        ...     SoccerPlayer("A", 10),
        ...     SoccerPlayer("B", 7),
        ...     SoccerPlayer("C", 5)]
        >>> # Initialize max heap with all players
        >>> max_heap = LazyHeap(
        ...     item_id=lambda x: x.name,
        ...     gt=lambda x,y: x.goals > y.goals)
        >>> for player in players:
        ...     heap.push(player)
        >>> print(heap.pop().name)
        'A'
        >>> # Update heap entries
        >>> players[2].goals = 9
        >>> heap.update(players[2])
        >>> print(heap.pop().name)
        'C'
    """
    __data = None
    __item_id = None
    __gt = None
    __lazy_eval_map = None
    
    __max_elems = None
    
    def __init__(self, item_id=lambda x:x, gt=lambda x,y:x>y, max_elems=None):
        self.__data = []
        heapify(self.__data)
        self.__item_id = item_id
        self.__gt = gt
        self.__lazy_eval_map = {}
        self.__max_elems = max_elems
        self.__is_full = False
            
    def push(self, item):
        """
        Wraps a new item with _LazyHeapNode and adds it to the max heap.

        Args:
            item (Object): Item to be added.

        Raises:
            IndexError: Whenever the maximum number of nodes allowed in the heap
                has been reached.
        """
        new_node = _LazyHeapNode(item, self.__gt)
        # Check if heap size has not exceeded its maximum
        is_full = lambda: (self.__max_elems is not None) and (len(self.__data) >= self.__max_elems)
        if is_full():
            # Prune deleted nodes and re-check
            self.__prune_heap()
            if is_full(): raise IndexError("No more values can be added to the heap...")
        # Push item onto heap and update lazy evaluation map
        heappush(self.__data, new_node)
        self.__lazy_eval_map[self.__item_id(item)] = new_node

    def pop(self):
        """
        Retrieves the first non-deleted _LazyHeapNode from the max heap.

        Returns:
            _LazyHeapNode: First non-deleted item in the max heap, or None if
                heap is empty.
        """
        # Lazy evaluation of max heap
        while True:
            node = heappop(self.__data)
            if node.is_deleted():
                continue
            item = node.get_data()
            self.__lazy_eval_map.pop(self.__item_id(item), None)
            return item
    
    def update(self, item):
        """
        Updates an item that already exists in the heap, by marking its previous
        instance as deleted and then pushing a new one onto the heap.

        NOTE: For this method to work as intended, __item_id has to equally
        identify both the updated instance of the item and its previously
        existing one.

        Args:
            item (Object): Item to update.
        """
        # Lazy delete of pre-existing item
        self.__lazy_eval_map[self.__item_id(item)].delete()
        # Re-push item into heap
        self.push(item)
    
    def __prune_heap(self):
        """
        Removes all lazily deleted nodes from the heap and heapifies the
        remaining ones.
        """
        self.__data = list(filter(lambda x: not x.is_deleted(), self.__data))
        heapify(self.__data)

Classes

class LazyHeap (item_id=<function LazyHeap.<lambda>>, gt=<function LazyHeap.<lambda>>, max_elems=None)

This class implements a lazy max heap. In this implementation, lazy refers to how nodes are deleted. Instead of immediately removing a node from the heap and then fixing the underlying binary tree, each node is flagged as 'deleted' and discarded when popped.

To prevent the heap from getting too large, if too many lazy deletions happen without nodes being popped, there is a maximum number of nodes allowed before the whole tree is pruned of deleted nodes and rebuilt.

Attributes

__data : list
Heapified list of all items.
__item_id : function
Function to extract a unique ID from an item.
__gt : function
'Greater than' function to compare items in the heap.
__lazy_eval_map : dict
Mapping from a unique ID to all (non-deleted) _LazyHeapNode instances.
__max_elems : int
Maximum number of elements allowed in the heap.

Example

>>> class SoccerPlayer:
...     def __init__(self, name, goals):
...         self.name = name
...         self.goals = goals
...
>>> # Create three distinct players
>>> players = [
...     SoccerPlayer("A", 10),
...     SoccerPlayer("B", 7),
...     SoccerPlayer("C", 5)]
>>> # Initialize max heap with all players
>>> max_heap = LazyHeap(
...     item_id=lambda x: x.name,
...     gt=lambda x,y: x.goals > y.goals)
>>> for player in players:
...     heap.push(player)
>>> print(heap.pop().name)
'A'
>>> # Update heap entries
>>> players[2].goals = 9
>>> heap.update(players[2])
>>> print(heap.pop().name)
'C'
Expand source code
class LazyHeap:
    """
    This class implements a lazy max heap. In this implementation, lazy refers
    to how nodes are deleted. Instead of immediately removing a node from the
    heap and then fixing the underlying binary tree, each node is
    flagged as 'deleted' and discarded when popped.

    To prevent the heap from getting too large, if too many lazy deletions
    happen without nodes being popped, there is a maximum number of nodes
    allowed before the whole tree is pruned of deleted nodes and rebuilt.

    Attributes:
        __data (list): Heapified list of all items.
        __item_id (function): Function to extract a unique ID from an item.
        __gt (function): 'Greater than' function to compare items in the heap.
        __lazy_eval_map (dict): Mapping from a unique ID to all (non-deleted)
            _LazyHeapNode instances.
        __max_elems (int): Maximum number of elements allowed in the heap.

    Example:
        >>> class SoccerPlayer:
        ...     def __init__(self, name, goals):
        ...         self.name = name
        ...         self.goals = goals
        ...
        >>> # Create three distinct players
        >>> players = [
        ...     SoccerPlayer("A", 10),
        ...     SoccerPlayer("B", 7),
        ...     SoccerPlayer("C", 5)]
        >>> # Initialize max heap with all players
        >>> max_heap = LazyHeap(
        ...     item_id=lambda x: x.name,
        ...     gt=lambda x,y: x.goals > y.goals)
        >>> for player in players:
        ...     heap.push(player)
        >>> print(heap.pop().name)
        'A'
        >>> # Update heap entries
        >>> players[2].goals = 9
        >>> heap.update(players[2])
        >>> print(heap.pop().name)
        'C'
    """
    __data = None
    __item_id = None
    __gt = None
    __lazy_eval_map = None
    
    __max_elems = None
    
    def __init__(self, item_id=lambda x:x, gt=lambda x,y:x>y, max_elems=None):
        self.__data = []
        heapify(self.__data)
        self.__item_id = item_id
        self.__gt = gt
        self.__lazy_eval_map = {}
        self.__max_elems = max_elems
        self.__is_full = False
            
    def push(self, item):
        """
        Wraps a new item with _LazyHeapNode and adds it to the max heap.

        Args:
            item (Object): Item to be added.

        Raises:
            IndexError: Whenever the maximum number of nodes allowed in the heap
                has been reached.
        """
        new_node = _LazyHeapNode(item, self.__gt)
        # Check if heap size has not exceeded its maximum
        is_full = lambda: (self.__max_elems is not None) and (len(self.__data) >= self.__max_elems)
        if is_full():
            # Prune deleted nodes and re-check
            self.__prune_heap()
            if is_full(): raise IndexError("No more values can be added to the heap...")
        # Push item onto heap and update lazy evaluation map
        heappush(self.__data, new_node)
        self.__lazy_eval_map[self.__item_id(item)] = new_node

    def pop(self):
        """
        Retrieves the first non-deleted _LazyHeapNode from the max heap.

        Returns:
            _LazyHeapNode: First non-deleted item in the max heap, or None if
                heap is empty.
        """
        # Lazy evaluation of max heap
        while True:
            node = heappop(self.__data)
            if node.is_deleted():
                continue
            item = node.get_data()
            self.__lazy_eval_map.pop(self.__item_id(item), None)
            return item
    
    def update(self, item):
        """
        Updates an item that already exists in the heap, by marking its previous
        instance as deleted and then pushing a new one onto the heap.

        NOTE: For this method to work as intended, __item_id has to equally
        identify both the updated instance of the item and its previously
        existing one.

        Args:
            item (Object): Item to update.
        """
        # Lazy delete of pre-existing item
        self.__lazy_eval_map[self.__item_id(item)].delete()
        # Re-push item into heap
        self.push(item)
    
    def __prune_heap(self):
        """
        Removes all lazily deleted nodes from the heap and heapifies the
        remaining ones.
        """
        self.__data = list(filter(lambda x: not x.is_deleted(), self.__data))
        heapify(self.__data)

Methods

def pop(self)

Retrieves the first non-deleted _LazyHeapNode from the max heap.

Returns

_LazyHeapNode
First non-deleted item in the max heap, or None if heap is empty.
Expand source code
def pop(self):
    """
    Retrieves the first non-deleted _LazyHeapNode from the max heap.

    Returns:
        _LazyHeapNode: First non-deleted item in the max heap, or None if
            heap is empty.
    """
    # Lazy evaluation of max heap
    while True:
        node = heappop(self.__data)
        if node.is_deleted():
            continue
        item = node.get_data()
        self.__lazy_eval_map.pop(self.__item_id(item), None)
        return item
def push(self, item)

Wraps a new item with _LazyHeapNode and adds it to the max heap.

Args

item : Object
Item to be added.

Raises

IndexError
Whenever the maximum number of nodes allowed in the heap has been reached.
Expand source code
def push(self, item):
    """
    Wraps a new item with _LazyHeapNode and adds it to the max heap.

    Args:
        item (Object): Item to be added.

    Raises:
        IndexError: Whenever the maximum number of nodes allowed in the heap
            has been reached.
    """
    new_node = _LazyHeapNode(item, self.__gt)
    # Check if heap size has not exceeded its maximum
    is_full = lambda: (self.__max_elems is not None) and (len(self.__data) >= self.__max_elems)
    if is_full():
        # Prune deleted nodes and re-check
        self.__prune_heap()
        if is_full(): raise IndexError("No more values can be added to the heap...")
    # Push item onto heap and update lazy evaluation map
    heappush(self.__data, new_node)
    self.__lazy_eval_map[self.__item_id(item)] = new_node
def update(self, item)

Updates an item that already exists in the heap, by marking its previous instance as deleted and then pushing a new one onto the heap.

NOTE: For this method to work as intended, __item_id has to equally identify both the updated instance of the item and its previously existing one.

Args

item : Object
Item to update.
Expand source code
def update(self, item):
    """
    Updates an item that already exists in the heap, by marking its previous
    instance as deleted and then pushing a new one onto the heap.

    NOTE: For this method to work as intended, __item_id has to equally
    identify both the updated instance of the item and its previously
    existing one.

    Args:
        item (Object): Item to update.
    """
    # Lazy delete of pre-existing item
    self.__lazy_eval_map[self.__item_id(item)].delete()
    # Re-push item into heap
    self.push(item)