added memory_allocator.py

2025-04-22 13:47:37 +00:00 · 2025-01-04 12:27:58 +01:00 · 2025-01-04 12:27:58 +01:00 · 0f1991a8e9
commit 0f1991a8e9
parent 12b1023a9d
1 changed files with 645 additions and 0 deletions
--- a/other/memory_allocator.py
+++ b/other/memory_allocator.py
@ -0,0 +1,645 @@
+"""Memory allocator that provides an interface similar to the C standard library:
+https://en.wikipedia.org/wiki/C_dynamic_memory_allocation . It uses a bytearray to
+simulate the heap, which is controlled similarly to the sbrk() syscall.
+
+It makes a heavy use of inheritance to progressively increase complexity without
+duplicating code across implementations. That lowers barrier entry and allows the
+comparison of test outputs, as all variations share the same interface.
+
+Based on https://github.com/danluu/malloc-tutorial but added features like a more
+advanced realloc() or alignment. Additionally, metadata is stored outside heap. This
+design choice simplifies implementation in Python, since serialization and parsing
+requires more steps compared to C, where such operations can be handled with simple
+struct casting.
+"""
+
+import operator
+from collections.abc import Iterable, Iterator
+from dataclasses import dataclass, field
+from typing import Self
+
+
+class Heap:
+    """The heap is a chunk of the virtual address space, typically used by libc for
+    dynamic memory allocation. Its beginning is fixed, but its end can move, allowing it
+    to grow or shrink. The point where heap ends is known as the 'program break'.
+
+    https://en.wikipedia.org/wiki/Data_segment#Heap
+
+    This is a simulation implemented as a constrained wrapper around a bytearray. It can
+    only add or remove elements at the end.
+    """
+
+    def __init__(self):
+        self.data = bytearray()
+
+    def print(self, show_offset=False, bytes_per_row=4):
+        """Pretty hexadecimal representation.
+
+        >>> heap = Heap()
+        >>> heap.data = bytearray(range(7))
+        >>> heap.print()
+        00 01 02 03
+        04 05 06
+
+        >>> heap.print(show_offset=True)
+        0x00: 00 01 02 03
+        0x04: 04 05 06
+        """
+        for offset in range(0, len(self.data), bytes_per_row):
+            if show_offset:
+                print(f"0x{offset:02X}: ", end="")
+            values = " ".join(
+                f"{c:02X}" for c in self.data[offset : offset + bytes_per_row]
+            )
+            print(values)
+
+    def sbrk(self, increment: int) -> int:
+        """Change the location of the program break and returns the previous location.
+        Analogous to the C function with the same name.
+
+        If the allocator calls this method with a positive/negative number, we
+        will say that it has requested/returned memory to the operative system.
+
+        >>> heap = Heap()
+        >>> heap.data
+        bytearray(b'')
+        >>> heap.sbrk(2)
+        0
+        >>> heap.print()
+        2E 2E
+        >>> heap.sbrk(4)
+        2
+        >>> heap.print()
+        2E 2E 2E 2E
+        2E 2E
+
+        To shrink pass a negative value:
+        >>> heap.sbrk(-5)
+        6
+        >>> heap.print()
+        2E
+        """
+        previous_program_break = len(self.data)
+
+        if increment > 0:
+            # Filling with something recognizable to facilitate debugging
+            self.data.extend(b"." * increment)
+        else:
+            del self.data[:-increment]
+
+        return previous_program_break
+
+    def __len__(self):
+        # This value should be minimized by a good memory allocator, as the OS uses
+        # unallocated memory for other processes.
+        return len(self.data)
+
+    def __getitem__(self, index):
+        # The IndexError exception raised here is analogous to the 'segmentation
+        # fault' signal emitted by the OS
+        return self.data[index]
+
+    def __setitem__(self, index, value: int):
+        if isinstance(index, slice):
+            # The ability of slice assignment to add or remove elements in the middle,
+            # altering the size of the bytearray, is not desired.
+            raise ValueError("Can not use slice assignment")
+        self.data[index] = value
+
+    def strcpy(self, dst: int, src: Iterable[int]):
+        """Copy from the given external bytes, to the specified location in the heap.
+        The name is derived from the C function.
+
+        >>> heap = Heap()
+        >>> heap.data = bytearray(range(8))
+        >>> heap.print()
+        00 01 02 03
+        04 05 06 07
+        >>> heap.strcpy(3, [255] * 3)
+        >>> heap.print()
+        00 01 02 FF
+        FF FF 06 07
+
+        >>> heap.strcpy(4, range(5))
+        Traceback (most recent call last):
+          ...
+        IndexError: bytearray index out of range
+        """
+        for index, value in enumerate(src):
+            self.data[dst + index] = value
+
+
+@dataclass
+class MemoryAllocatorInterface:
+    """These 3 methods are the most fundamental operations in
+    https://en.wikipedia.org/wiki/C_dynamic_memory_allocation#Overview_of_functions
+
+    All other methods added by subsequent implementations are either considered private
+    or helpers for testing.
+    """
+
+    heap: Heap = field(default_factory=Heap)
+
+    def malloc(self, size: int) -> int:
+        """Allocate memory and returns the position in the heap where allocated memory
+        starts.
+
+        It is the client's responsibility not to access memory out of bounds.
+        """
+        raise NotImplementedError
+
+    def free(self, pos: int):
+        """Returns the block to the allocator so it can be reused. `pos` should
+        be a value returned by malloc()
+
+        It is client's responsibility not to access the region after this call.
+        """
+        raise NotImplementedError
+
+    def realloc(self, pos: int, size: int) -> int:
+        """Changes the size of an allocated region, either increasing or decreasing it
+
+        Since there may not be enough space in the surrounding area to grow, it may move
+        and return the position of the new location.
+        """
+        raise NotImplementedError
+
+
+class MemoryAllocatorSimplest(MemoryAllocatorInterface):
+    """
+    >>> heap = Heap()
+    >>> allocator = MemoryAllocatorSimplest(heap)
+
+    >>> allocator.malloc(4)
+    0
+    >>> heap.strcpy(0, b'0000')
+    >>> heap.print()
+    30 30 30 30
+
+    >>> allocator.malloc(2)
+    4
+    >>> heap.strcpy(4, b'11')
+    >>> heap.print()
+    30 30 30 30
+    31 31
+
+    So simple that it does not support free
+    >>> allocator.free(4)
+    Traceback (most recent call last):
+      ...
+    NotImplementedError
+    """
+
+    def malloc(self, size):
+        return self.heap.sbrk(size)
+
+
+@dataclass
+class MemoryAllocatorFree(MemoryAllocatorInterface):
+    """Support free() by storing the size of each allocated and freed block. This
+    metadata will be stored in a linked list outside of the heap.
+
+    >>> allocator = MemoryAllocatorFree()
+    >>> _, p1, _ = [allocator.malloc_and_copy(d) for d in (b'00', b'1111', b'2222')]
+    >>> allocator.heap.print()
+    30 30 31 31
+    31 31 32 32
+    32 32
+    >>> p1
+    2
+    >>> allocator.free(p1)
+    >>> allocator.print()
+    pos  size  free  data
+      0     2        b'00'
+      2     4     Y  b'1111'
+      6     4        b'2222'
+    >>> allocator.malloc_and_copy(b'333')
+    2
+    >>> allocator.print()
+    pos  size  free  data
+      0     2        b'00'
+      2     4        b'3331'
+      6     4        b'2222'
+
+    The second block was reused, instead of growing the heap.
+    """
+
+    @dataclass
+    class Block:
+        size: int
+        pos: int
+        free: bool = False
+        next: Self | None = None
+
+        @property
+        def is_last(self):
+            return self.next is None
+
+    first: Block | None = None
+
+    def malloc(self, size):
+        # Try to reuse an existing free block
+        block = self.find_fit_block(size) or self.request_space(size)
+        block.free = False
+        return block.pos
+
+    def free_block(self, block):
+        assert not block.free
+        block.free = True
+
+    def free(self, pos):
+        self.free_block(self.get(pos))
+
+    def get(self, pos):
+        # It would be quicker if a Dict[pos,Block] were maintained.
+        return next(block for block in self.blocks if block.pos == pos)
+
+    def find_fit_block(self, size) -> Block | None:
+        try:
+            return next(self.find_fit_blocks(size))
+        except StopIteration:
+            return None
+
+    def find_fit_blocks(self, size) -> Iterator[Block]:
+        return (block for block in self.blocks if block.free and block.size >= size)
+
+    @property
+    def blocks(self) -> Iterator[Block]:
+        block = self.first
+        while block:
+            yield block
+            block = block.next
+
+    @property
+    def is_empty(self):
+        return self.first is None
+
+    def get_last(self):
+        *_, last = self.blocks
+        return last
+
+    def request_space(self, size) -> Block:
+        """Grow the heap by requesting space to the OS."""
+        new_block = self.Block(size=size, pos=self.heap.sbrk(size))
+        self.linked_list_append(new_block)
+        return new_block
+
+    def linked_list_append(self, block):
+        if self.is_empty:
+            self.first = block
+        else:
+            last = self.get_last()
+            last.next = block
+        block.next = None
+
+    def print(self):
+        """To facilitate debugging"""
+        fmt = "{pos:>3}  {size:>4}  {free:>4}  {data}"
+        print(fmt.format(free="free", pos="pos", size="size", data="data"))
+        for block in self.blocks:
+            print(
+                fmt.format(
+                    free="Y" if block.free else "",
+                    pos=block.pos,
+                    size=block.size,
+                    data=bytes(self.heap[block.pos : block.pos + block.size]),
+                )
+            )
+
+    def malloc_and_copy(self, data: bytes) -> int:
+        """Helper for tests"""
+        pos = self.malloc(len(data))
+        self.heap.strcpy(pos, data)
+        return pos
+
+
+class MemoryAllocatorSplit(MemoryAllocatorFree):
+    """When reusing a block, split if it's larger than needed.
+
+    >>> allocator = MemoryAllocatorSplit()
+    >>> allocator.free(allocator.malloc(4))
+    >>> allocator.print()
+    pos  size  free  data
+      0     4     Y  b'....'
+    >>> allocator.malloc(2)
+    0
+    >>> allocator.print()
+    pos  size  free  data
+      0     2        b'..'
+      2     2     Y  b'..'
+    >>> allocator.malloc(2)
+    2
+    """
+
+    def malloc(self, size):
+        block = self.find_fit_block(size)
+        if block:
+            self.split(block, size)
+        else:
+            block = self.request_space(size)
+        block.free = False
+        return block.pos
+
+    def split(self, block, size):
+        if block.size <= size:
+            return None
+        new_block = self.Block(
+            size=block.size - size, free=True, pos=block.pos + size, next=block.next
+        )
+        block.size = size  # shrink
+        block.next = new_block  # insert in list
+        return new_block
+
+
+class MemoryAllocatorMerge(MemoryAllocatorSplit):
+    """When freeing a block, attempt to merge it with the next one if it's free.
+
+    >>> allocator = MemoryAllocatorMerge()
+    >>> # allocator = MemoryAllocatorSplit() # uncomment to compare
+    >>> p0 = allocator.malloc(2)
+    >>> p1 = allocator.malloc(2)
+    >>> allocator.free(p1)
+    >>> allocator.free(p0)
+    >>> allocator.print()
+    pos  size  free  data
+      0     4     Y  b'....'
+    >>> allocator.malloc(4)
+    0
+    """
+
+    def free_block(self, block):
+        super().free_block(block)
+        self.optimize_after_free(block)
+
+    def optimize_after_free(self, block):
+        self.merge_with_next(block)
+
+    def merge_with_next(self, block):
+        if block.next is None or not block.next.free:
+            return
+        block.size += block.next.size
+        block.next = block.next.next
+
+    def split(self, block, size):
+        if new_block := super().split(block, size):
+            self.optimize_after_free(new_block)
+            return new_block
+
+
+class MemoryAllocatorMergePrevious(MemoryAllocatorMerge):
+    """Similar to the previous implementation, but also checks the previous block for
+    merging.
+
+    >>> allocator = MemoryAllocatorMergePrevious()
+    >>> # allocator = MemoryAllocatorMerge() # uncomment to compare
+    >>> p0 = allocator.malloc(2)
+    >>> p1 = allocator.malloc(2)
+
+    This time we revert the order to use the new feature:
+    >>> allocator.free(p0)
+    >>> allocator.free(p1)
+
+    >>> allocator.malloc(4)
+    0
+    """
+
+    # Now, the list must be doubly linked.
+    @dataclass
+    class Block(MemoryAllocatorMerge.Block):
+        prev: Self | None = None
+
+        @property
+        def is_first(self):
+            return self.prev is None
+
+    def optimize_after_free(self, block):
+        super().optimize_after_free(block)
+        if block.prev and block.prev.free:
+            self.merge_with_next(block.prev)
+
+    def split(self, block, size):
+        if new_block := super().split(block, size):
+            new_block.prev = block
+            return new_block
+
+    def linked_list_append(self, block):
+        old_last = self.get_last() if not self.is_empty else None
+        super().linked_list_append(block)
+        block.prev = old_last
+
+
+class MemoryAllocatorAlign(MemoryAllocatorMergePrevious):
+    """The address returned by malloc() is a multiple of 4.
+
+    Data alignment improves performance at the CPU instruction level. That benefit is
+    not demonstrated here. More info:
+    https://en.wikipedia.org/wiki/Data_structure_alignment
+
+    >>> allocator_this = MemoryAllocatorAlign()
+    >>> allocator_prev = MemoryAllocatorMergePrevious()
+    >>> for data in [b'333', b'4444', b'55555', b'1']:
+    ...   _ = allocator_this.malloc_and_copy(data)
+    ...   _ = allocator_prev.malloc_and_copy(data)
+
+    >>> allocator_prev.heap.print(show_offset=True)
+    0x00: 33 33 33 34
+    0x04: 34 34 34 35
+    0x08: 35 35 35 35
+    0x0C: 31
+
+    >>> allocator_this.heap.print(show_offset=True)
+    0x00: 33 33 33 2E
+    0x04: 34 34 34 34
+    0x08: 35 35 35 35
+    0x0C: 35 2E 2E 2E
+    0x10: 31 2E 2E 2E
+
+    The downside is that it takes more space.
+    """
+
+    @staticmethod
+    def align(value: int, boundary=4) -> int:
+        """
+        >>> align = MemoryAllocatorAlign.align
+        >>> align(3)
+        4
+        >>> align(4)
+        4
+        >>> align(5)
+        8
+        """
+        res = (value // boundary) * boundary
+        if res < value:
+            res += boundary
+        return res
+
+    def malloc(self, size):
+        return super().malloc(self.align(size))
+
+
+class MemoryAllocatorRealloc(MemoryAllocatorMergePrevious):
+    """
+    >>> allocator = MemoryAllocatorRealloc()
+    >>> [allocator.malloc_and_copy(d) for d in (b'AA', b'BBBB')]
+    [0, 2]
+
+    >>> allocator.print()
+    pos  size  free  data
+      0     2        b'AA'
+      2     4        b'BBBB'
+    >>> allocator.realloc(0, size=3)
+    6
+    >>> allocator.print()
+    pos  size  free  data
+      0     2     Y  b'AA'
+      2     4        b'BBBB'
+      6     3        b'AA.'
+    """
+
+    def realloc(self, pos, size):
+        return self.realloc_block(self.get(pos), size)
+
+    def realloc_block(self, block, size):
+        # Simplest implementation: always copy
+        new_pos = self.malloc(size)
+        self.realloc_copy(dst=new_pos, src=block.pos, size=min(size, block.size))
+        self.free_block(block)
+        return new_pos
+
+    def realloc_copy(self, dst: int, src: int, size):
+        for i in range(size):
+            self.heap[dst + i] = self.heap[src + i]
+
+
+class MemoryAllocatorReallocShrink(MemoryAllocatorRealloc):
+    """When a decrease is requested, avoid the potentially expensive call to
+    realloc_copy()
+
+    >>> allocator = MemoryAllocatorReallocShrink()
+    >>> # allocator = MemoryAllocatorRealloc() # uncomment to compare
+    >>> allocator.malloc(4)
+    0
+    >>> allocator.realloc(0, size=2)
+    0
+    >>> allocator.print()
+    pos  size  free  data
+      0     2        b'..'
+      2     2     Y  b'..'
+    """
+
+    def realloc_block(self, block, new_size):
+        if new_size <= block.size:
+            self.split(block, new_size)
+            return block.pos
+        else:
+            return super().realloc_block(block, new_size)
+
+
+class MemoryAllocatorReallocExtend(MemoryAllocatorRealloc):
+    """When more space is needed and the next block is free, try extending the current
+    block instead of copying to a new location.
+
+    >>> allocator = MemoryAllocatorReallocExtend()
+    >>> allocator.malloc(2)
+    0
+    >>> allocator.free(allocator.malloc(2))
+    >>> allocator.print()
+    pos  size  free  data
+      0     2        b'..'
+      2     2     Y  b'..'
+    >>> allocator.realloc(0, size=4)
+    0
+    >>> allocator.print()
+    pos  size  free  data
+      0     4        b'....'
+    """
+
+    def realloc_block(self, block, new_size):
+        increase = new_size - block.size
+        if (
+            increase > 0
+            and block.next
+            and block.next.free
+            and block.next.size >= increase
+        ):
+            self.merge_with_next(block)
+            self.split(block, new_size)
+            return block.pos
+        return super().realloc_block(block, new_size)
+
+
+class MemoryAllocatorReturnsMemoryToOS(MemoryAllocatorMergePrevious):
+    """
+    >>> allocator = MemoryAllocatorReturnsMemoryToOS()
+    >>> [allocator.malloc(1) for i in range(3)]
+    [0, 1, 2]
+    >>> allocator.free(1)
+    >>> allocator.print()
+    pos  size  free  data
+      0     1        b'.'
+      1     1     Y  b'.'
+      2     1        b'.'
+    >>> allocator.free(2)
+    >>> allocator.print()
+    pos  size  free  data
+      0     1        b'.'
+    """
+
+    def optimize_after_free(self, block):
+        super().optimize_after_free(block)
+        if block.is_last:
+            self.shrink_heap()
+
+    def shrink_heap(self):
+        last = self.get_last() if not self.is_empty else None
+        if last is not None and last.free:
+            self.linked_list_remove_last()
+            self.heap.sbrk(-last.size)
+
+    def linked_list_remove_last(self):
+        last = self.get_last()
+        if last.is_first:
+            # There was only this block,
+            # the list is now empty.
+            self.first = None
+        else:
+            new_last = last.prev
+            new_last.next = None
+
+
+class MemoryAllocatorBestFit(MemoryAllocatorFree):
+    """When multiple blocks fit the requested size, choose the smallest one instead of
+    the first one found.
+
+    >>> allocator = MemoryAllocatorBestFit()
+    >>> # allocator = MemoryAllocatorFree() # uncomment to compare
+    >>> p0, _, p2, _ = map(allocator.malloc_and_copy, [b"A" * 8, b"B", b"CC", b"D"])
+    >>> allocator.free(p0)
+    >>> allocator.free(p2)
+    >>> allocator.print()
+    pos  size  free  data
+      0     8     Y  b'AAAAAAAA'
+      8     1        b'B'
+      9     2     Y  b'CC'
+     11     1        b'D'
+    >>> allocator.malloc(2)
+    9
+    >>> allocator.malloc(8)
+    0
+
+    If you repeat the experiment with the previous implementation you will see that it
+    has to grow the heap during the last malloc(), while this one doesn't. The reason is
+    fragmentation: both have the same total free space, but divided in smaller holes,
+    that can not be merged because they aren't contiguous.
+
+    https://en.wikipedia.org/wiki/Fragmentation_(computing)#External_fragmentation
+    """
+
+    def find_fit_block(self, size):
+        """Override first-fit implementation by best-fit."""
+        blocks = list(self.find_fit_blocks(size))
+        if not blocks:
+            return None
+        blocks.sort(key=operator.attrgetter("size"))
+        return blocks[0]