Per's Public Data Repository

Notes - Data structures

Table of contents for notes on data structures.

Disclaimer: Some of the code in this file was generated by GitHub Copilot. As I learn more about data structures, I will update the implementations, along with unique notes for each.

Basic Data Structures

Stack

Queue

Set

(The following code was generated by Githb Copilot)

class Set:
    def __init__(self):
        self.items = []

    def add(self, item):
        if item not in self.items:
            self.items.append(item)

    def remove(self, item):
        if item in self.items:
            self.items.remove(item)

    def union(self, other_set):
        union_set = Set()
        union_set.items = self.items + other_set.items
        return union_set

    def intersection(self, other_set):
        intersection_set = Set()
        for item in self.items:
            if item in other_set.items:
                intersection_set.add(item)
        return intersection_set

    def difference(self, other_set):
        difference_set = Set()
        for item in self.items:
            if item not in other_set.items:
                difference_set.add(item)
        return difference_set

    def is_subset(self, other_set):
        for item in self.items:
            if item not in other_set.items:
                return False
        return True

Map

(The following code was generated by Githb Copilot)

class Map:
    def __init__(self):
        self.items = []

    def get(self, key):
        for item in self.items:
            if item[0] == key:
                return item[1]
        return None
    
    def put(self, key, value):
        for item in self.items:
            if item[0] == key:
                item[1] = value
                return
        self.items.append([key, value])
    
    def pop(self, key):
        for item in self.items:
            if item[0] == key:
                self.items.remove(item)
                return
    
    def keys(self):
        return [item[0] for item in self.items]

    def values(self):
        return [item[1] for item in self.items]

    def items(self):
        return self.items
    
    def __str__(self):
        return str(self.items)

Array

(The following code was generated by Githb Copilot)

class Array:
    def __init__(self):
        self.items = []

    def get(self, index):
        return self.items[index]

    def put(self, index, value):
        self.items[index] = value

    def size(self):
        return len(self.items)

    def __str__(self):
        return str(self.items)

Graph

(The following code was generated by Githb Copilot)

class Graph:
    def __init__(self):
        self.nodes = set()
        self.edges = defaultdict(list)
        self.distances = {}

    def add_node(self, value):
        self.nodes.add(value)

    def add_edge(self, from_node, to_node, distance):
        self.edges[from_node].append(to_node)
        self.edges[to_node].append(from_node)
        self.distances[(from_node, to_node)] = distance

    def dijkstra(self, initial):
        visited = {initial: 0}
        path = {}

        nodes = set(self.nodes)

        while nodes:
            min_node = None
            for node in nodes:
                if node in visited:
                    if min_node is None:
                        min_node = node
                    elif visited[node] < visited[min_node]:
                        min_node = node

            if min_node is None:
                break

            nodes.remove(min_node)
            current_weight = visited[min_node]

            for edge in self.edges[min_node]:
                weight = current_weight + self.distance[(min_node, edge)]
                if edge not in visited or weight < visited[edge]:
                    visited[edge] = weight
                    path[edge] = min_node

        return visited, path

Tree

(The following code was generated by Githb Copilot)

class Node:
    def __init__(self, data):
        self.data = data
        self.children = []

    def add_child(self, node):
        self.children.append(node)

    def remove_child(self, node):
        self.children = [child for child in self.children if child is not node]

    def traverse(self):
        nodes = [self]
        while nodes:
            node = nodes.pop()
            print(node.data)
            nodes += node.children[::-1]

Advanced Data Structures

Priority Queue

# "cheating" by using heapq, but will implement heap later
import heapq


class PriorityQueue:
    def __init__(self):
        self.elements = []
    
    def is_empty(self):
        return not self.elements
    
    def put(self, item, priority):
        # note: tuples are sorted by first element
        heapq.heappush(self.elements, (priority, item))
    
    def get(self):
        # heapq finds the item with smallest first index
        return heapq.heappop(self.elements)[1]
    
    def __str__(self):
        return str(self.elements)
    

Linked List

(The following code was generated by Githb Copilot. I will add a simplified implementation later)

    class Node:
        def __init__(self, data):
            self.data = data
            self.next = None

    class LinkedList:
        def __init__(self):
            self.head = None

        def append(self, data):
            new_node = Node(data)
            if self.head is None:
                self.head = new_node
                return
            last_node = self.head
            while last_node.next:
                last_node = last_node.next
            last_node.next = new_node

        def prepend(self, data):
            new_node = Node(data)
            new_node.next = self.head
            self.head = new_node

        def insert_after_node(self, prev_node, data):
            if not prev_node:
                print("Previous node is not in the list")
                return
            new_node = Node(data)
            new_node.next = prev_node.next
            prev_node.next = new_node

        def delete_node(self, key):
            cur_node = self.head
            if cur_node and cur_node.data == key:
                self.head = cur_node.next
                cur_node = None
                return
            prev = None
            while cur_node and cur_node.data != key:
                prev = cur_node
                cur_node = cur_node.next
            if cur_node is None:
                return
            prev.next = cur_node.next
            cur_node = None

        def delete_node_at_pos(self, pos):
            cur_node = self.head
            if pos == 0:
                self.head = cur_node.next
                cur_node = None
                return
            prev = None
            count = 1
            while cur_node and count != pos:
                prev = cur_node
                cur_node = cur_node.next
                count += 1
            if cur_node is None:
                return
            prev.next = cur_node.next
            cur_node = None

        def len_iterative(self):
            count = 0
            cur_node = self.head
            while cur_node:
                count += 1
                cur_node = cur_node.next
            return count

        def len_recursive(self, node):
            if node is None:
                return 0
            return 1 + self.len_recursive(node.next)

        def print_list(self):
            cur_node = self.head
            while cur_node:
                print(cur_node.data)
                cur_node = cur_node.next

Heap

Min Heap

(The following code was generated by Githb Copilot)

class MinHeap:
    def __init__(self):
        self.heap = []

    def parent(self, i):
        return (i - 1) // 2

    def left(self, i):
        return 2 * i + 1

    def right(self, i):
        return 2 * i + 2

    def get_min(self):
        return self.heap[0]

    def extract_min(self):
        min_item = self.heap[0]
        self.heap[0] = self.heap[-1]
        self.heap.pop()
        self.min_heapify(0)
        return min_item

    def decrease_key(self, i, key):
        self.heap[i] = key
        while i > 0 and self.heap[self.parent(i)] > self.heap[i]:
            self.heap[i], self.heap[self.parent(i)] = self.heap[self.parent(i)], self.heap[i]
            i = self.parent(i)

    def insert(self, key):
        self.heap.append(key)
        self.decrease_key(len(self.heap) - 1, key)

    def min_heapify(self, i):
        l = self.left(i)
        r = self.right(i)
        if l < len(self.heap) and self.heap[l] < self.heap[i]:
            smallest = l
        else:
            smallest = i
        if r < len(self.heap) and self.heap[r] < self.heap[smallest]:
            smallest = r
        if smallest != i:
            self.heap[i], self.heap[smallest] = self.heap[smallest], self.heap[i]
            self.min_heapify(smallest)

Max Heap

(The following code was generated by Githb Copilot)

class MaxHeap:
    def __init__(self):
        self.heap = []

    def parent(self, i):
        return (i - 1) // 2

    def left(self, i):
        return 2 * i + 1

    def right(self, i):
        return 2 * i + 2

    def get_max(self):
        return self.heap[0]

    def extract_max(self):
        max_item = self.heap[0]
        self.heap[0] = self.heap[-1]
        self.heap.pop()
        self.max_heapify(0)
        return max_item

    def increase_key(self, i, key):
        self.heap[i] = key
        while i > 0 and self.heap[self.parent(i)] < self.heap[i]:
            self.heap[i], self.heap[self.parent(i)] = self.heap[self.parent(i)], self.heap[i]
            i = self.parent(i)

    def insert(self, key):
        self.heap.append(key)
        self.increase_key(len(self.heap) - 1, key)

    def max_heapify(self, i):
        l = self.left(i)
        r = self.right(i)
        if l < len(self.heap) and self.heap[l] > self.heap[i]:
            largest = l
        else:
            largest = i
        if r < len(self.heap) and self.heap[r] > self.heap[largest]:
            largest = r
        if largest != i:
            self.heap[i], self.heap[largest] = self.heap[largest], self.heap[i]
            self.max_heapify(largest)

Adjacency Matrix

(The following code was generated by Githb Copilot)

class AdjacencyMatrix:
    def __init__(self, num_vertices):
        self.matrix = [[0] * num_vertices for _ in range(num_vertices)]
        self.num_vertices = num_vertices

    def add_edge(self, v1, v2, weight=1):
        self.matrix[v1][v2] = weight
        self.matrix[v2][v1] = weight

    def remove_edge(self, v1, v2):
        self.matrix[v1][v2] = 0
        self.matrix[v2][v1] = 0

    def print_matrix(self):
        for row in self.matrix:
            print(row)

Adjacency List

(The following code was generated by Githb Copilot)

class AdjacencyList:
    def __init__(self):
        self.graph = defaultdict(list)

    def add_edge(self, v1, v2):
        self.graph[v1].append(v2)

    def remove_edge(self, v1, v2):
        self.graph[v1].remove(v2)

    def print_graph(self):
        for node in self.graph:
            print(node, "->", self.graph[node])

Hash Table

(The following code was generated by Githb Copilot)

class HashTable:
    def __init__(self):
        self.size = 11
        self.slots = [None] * self.size
        self.data = [None] * self.size

    def put(self, key, data):
        hash_value = self.hash_function(key, len(self.slots))

        if self.slots[hash_value] is None:
            self.slots[hash_value] = key
            self.data[hash_value] = data
        else:
            if self.slots[hash_value] == key:
                self.data[hash_value] = data  # replace
            else:
                next_slot = self.rehash(hash_value, len(self.slots))
                while self.slots[next_slot] is not None and self.slots[next_slot] != key:
                    next_slot = self.rehash(next_slot, len(self.slots))

                if self.slots[next_slot] is None:
                    self.slots[next_slot] = key
                    self.data[next_slot] = data
                else:
                    self.data[next_slot] = data  # replace
                
    def hash_function(self, key, size):
        return key % size

    def rehash(self, old_hash, size):
        return (old_hash + 1) % size

    def get(self, key):
        start_slot = self.hash_function(key, len(self.slots))

        data = None
        stop = False
        found = False
        position = start_slot
        while self.slots[position] is not None and not found and not stop:
            if self.slots[position] == key:
                found = True
                data = self.data[position]
            else:
                position = self.rehash(position, len(self.slots))
                if position == start_slot:
                    stop = True
        return data

    def __getitem__(self, key):
        return self.get(key)

    def __setitem__(self, key, data):
        self.put(key, data)

    def __str__(self):
        return str(self.slots) + "\n" + str(self.data)