diff --git a/imgs/Heap-as-array.svg b/imgs/Heap-as-array.svg new file mode 100644 index 0000000..a632998 --- /dev/null +++ b/imgs/Heap-as-array.svg @@ -0,0 +1,677 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 100 + 19 + 36 + 17 + 12 + 25 + 5 + 13 + 8 + 1 + 4 + 9 + 15 + 6 + 11 + + + + + + + + + + + + + + + + 100 + 19 + 36 + 17 + 12 + 25 + 5 + 13 + 8 + 1 + 4 + 9 + 15 + 6 + 11 + + + diff --git a/imgs/tree_actual.jpg b/imgs/tree_actual.jpg new file mode 100644 index 0000000..05378f0 Binary files /dev/null and b/imgs/tree_actual.jpg differ diff --git a/imgs/tree_representation.jpg b/imgs/tree_representation.jpg new file mode 100644 index 0000000..5b8931e Binary files /dev/null and b/imgs/tree_representation.jpg differ diff --git a/main.html b/main.html new file mode 100644 index 0000000..b568c1e --- /dev/null +++ b/main.html @@ -0,0 +1,2065 @@ + + + + + + + +Data Structures + + + + + + + + +
+

Data Structures

+
+

Table of Contents

+
+ +
+
+ +
+

1. Stack

+
+

+A stack is a data structure which only allows insertion and deletion from one end of the array. The insertion is always on the extreme end of the array. The deletion can only be done on the element which was most recently added. +
+It is similar to stacking plates. The plate can only be added at the top of the stack and also only the last added plate to the stack can be removed (which will be on top). +
+Due to this property, Last In elements are removed First from a stack. Therefore, it is called a Last In First Out (LIFO) data structure or a First In Last Out (FILO) data structure. +
+To create a stack, we will keep track of the index which is the top of the array. This top index will increment when we insert element and decrement when we remove element. +

+
+ +
+

1.1. Operation on stack

+
+

+A stack has two operations +

+
+
+
+ +
+

2. Direct Address Table

+
+

+Direct Address Tables are useful when we know that key is within a small range. Then, we can allocate an array such that each possible key gets an index and just add the values according to the keys. +
+This also assumes that keys are integers +

+ +
    +
  • Table creation
  • +
+ +
+
struct table{
+  int * values;
+  size_t min_key;
+  size_t max_key;
+};
+
+struct table create_table(size_t min_key, size_t max_key){
+  struct table r;
+  r.values = (int * ) malloc(sizeof(int) * (max_key - min_key + 1) );
+  r.min_key = min_key;
+  r.max_key = max_key;
+  return r;
+}
+
+
+ +
    +
  • Table insert
  • +
+ +
+
void table_insert(struct table t, size_t key, int value){
+  if(key > t.max_key || key < t.min_key)
+    assert(false && "Key value out of boundry");
+
+  t.values[key - t.min_key] = value;
+}
+
+
+ +
    +
  • Table delete
  • +
+ +
+
void table_delete(struct table t, size_t key){
+  if(key > t.max_key || key < t.min_key)
+    assert(false && "Key value out of boundry");
+
+  t.values[key - t.min_key] = 0x00;
+}
+
+
+ +
    +
  • Table Search / Table Get
  • +
+ +
+
int table_get(struct table t, size_t key){
+    if(key > t.max_key || key < t.min_key)
+      assert(false && "Key value out of boundry");
+
+    return t.values[key - t.min_key];
+}
+
+
+ +

+Using direct address tables is very useful when keys are enum values. +

+
+
+ +
+

3. Hash Table

+
+

+When the set of possible keys is large, it is impractical to allocate a table big enough for all keys. In order to fit all possible keys into a small table, rather than directly using keys as the index for our array, we wil first calculate a hash for it using a hash function. Since we are relying on hashes for this addressing in the table, we call it a hash table. +
+
+For a given key \(k_i\) in direct address table, we store value in \(table[k_i]\). +
+
+For a given key \(k_i\) in hash table, we store value in \(table[h(k_i)]\), where \(h()\) is the hash function. +
+
+So the main purpose of the hash function is to reduce the range of array indices. +

+
+ +
+

3.1. Collision

+
+

+Because we are reducing the range of indices, the hash function may hash two keys to the same slot. This is called a collision. +
+
+We should try to find a hash funtion which will minimise the number of collisions. +
+
+The number of keys is going to be greater than number of slots in table. Therefore avoiding all collisions is not possible. +
+There are two ways we will look at to resolve collision. +

+
    +
  1. Chaining
  2. +
  3. Open addressing
  4. +
+
+ +
+

3.1.1. Chaining

+
+

+In chaining, rather than storing values in table slots. We will have linked lists at each slot which will store (key, value) pairs. +
+
+When the hash gives us a slot, we will add the value to linked list at that slot. +

+ +
    +
  • Linked List structure
  • +
+ +
+
struct linked_list{
+  size_t key;
+  int value;
+  struct linked_list * next;
+};
+
+
+ +
    +
  • Table structure
  • +
+ +
+
struct table{
+  struct linked_list * table[];
+  size_t table_size;
+};
+
+
+ +
    +
  • Insertion
  • +
+ +

+Insertion can be done in \(\theta (1)\) time if we assume that key being inserted is not already in the linked list. But we can add a check to see if the key was already inserted and modify that value. +

+ +
+
// linked_list_add(struct linked_list * ll, size_t key, int value)
+// adds the given key,value to the start of the list
+void chained_hash_insert(struct table t, size_t key, int value){
+  linked_list_add(t.table[ h(key) ], key ,value);
+}
+
+
+ +
    +
  • Get / Search
  • +
+ +
+
// linked_list_search(struct linked_list * ll, size_t key)
+// gets the value stored with the given key
+void chained_hash_get(struct table t, size_t key){
+  return linked_list_search(t.table[ h(key) ], key);
+}
+
+
+ +
    +
  • Delete
  • +
+ +
+
// linked_list_delete(struct linked_list * ll, size_t key)
+// delete the node with the given key
+void chained_hash_delete(struct table t, size_t key){
+  linked_list_delete(t.table[ h(key) ], key);
+}
+
+
+
+
+ +
+

3.1.2. Performance of chaining hash table

+
+

+The load factor is defined as number of elements per slot and is calculated as +\[ \alpha \text{(Load factor)} = \frac{\text{number of elements in hash table}}{\text{number of slots in hash table}} \] +The worst case for chaining is when all keys are assigned to a single slot. In this case searching for an element takes \(\theta (n)\) time. +
+
+If we assume that any given element is equally likely to be hashed into any of the slots, this assumption is called simple uniform hashing. +
+
+If we also assume that hash funtion takes constant time, then in the average case, the time complexity for searching key in the chaining hash table is +\[ \text{Average Case Searching} : \theta (1 + \alpha) \] +

+
+
+
+

3.1.3. Open Addressing

+
+

+In open addressing, all the key and value pair of entries are stored in the table itself. Because of this, the load factor \(\left( \alpha \right)\) can never exceed 1. +
+
+When we get a key whose slot is already taken, we will look for another empty slot. This is done by what is called probing. To get which slot to check next, we have various methods. +
+
+The sequence in which empty slots are looked for is fixed for given key, this sequence is called probe sequence.
+It is necessary to keep probe sequence fixed for any given key, so that we can search for it later. +

+
+ +
    +
  1. Linear probing
    +
    +

    +For a given ordinary hash function \(h(k)\), the linear probing uses the hash function +\[ linear\_h(k, i) = (h(k) + 1)\ mod\ m \] +We refer to \(h(k)\) as the auxiliary hash function. +
    +
    +In linear probing, we first check the slot [h(k)], if it is not empty, we check [h(k) + 1] then [h(k) + 2] …. upto slot [m - 1] after which we wrap around to [1], [2] … till we have checked all the slots. +
    +
    +Linear probing is easy to implement, but it suffers from primary clustering. In long runs of linear probing, keys tend to cluster together. This causes the performance of operations on hash table to degrade. The time to query a random element from table degrades to \(\theta (n)\). +

    +
    +
  2. + +
  3. Quadratic probing
    +
    +

    +For given auxiliary hash function \(h(k)\), the quadratic probing uses +\[ quadratic\_h(k, i) = \left( h(k) + c_1i + c_2i^2 \right) \ mod\ m \] +Where, \(c_1\) and \(c_2\) are positive auxiliary constants. +

    +
      +
    • If m is not considered, we just assume \(c_1 = 0, c_2 = 1\), this is the simplest form of quadratic probing.
    • +
    • For \(m = 2^n\), a good choice for auxiliary constants is \(c_1=c_2=1/2\).
    • +
    • For \(m = n^p\) where m, n and p are positive integers greater or equal to 2, constants \(c_1 = 1, c_2 = n\) are a good choice.
    • +
    + +

    +Quadratic probing works much better than linear probing. +
    +
    +If \(quadratic\_h(k_1, 0) = quadratic\_h(k_2,0)\), then that implies that all \(quadratic\_h(k_1, i) = quadratic\_h(k_2,i)\), i.e, they will have the same probe sequence. This leads to a probe sequence getting clustered. This is called secondary clustering. This also effects performance but not as drastically as primary clustering. +

    +
    +
  4. +
  5. Double Hashing
    +
    +

    +Double hashing is one of the best available method for open addressing.
    +Double hashing uses two auxiliary hashing functions. +\[ double\_h(k, i) = \left( h_1(k) + i \times h_2(k) \right) \ mod\ m \] +The value of \(h_2(k)\) must be relatively prime (i.e, coprime) to number of slots (m).
    +

    +
      +
    • A convenient way to ensure this is let m be a power of 2 and \(h_2(k)\) be a hash function that always produces an odd number.
    • +
    • Another way is to let m be a prime and make \(h_2(k)\) such that is always produces a positive integer less than m.
    • +
    +

    +If we use one of the above two methods (either m is a power of 2 or a prime), then double hashing improves over linear and quadratic probing since keys will have distinct probe sequences. +
    +
    +When using the above values of m, performance of double hashing is very close to the performance of "ideal" scheme of uniform hashing. +*n Performace of open addressing +In open addressing load factor \(\left( \alpha \right) \le 1\). We will assume uniform hashing i.e, any element is equally likely to be hashed in any slot. We will also assume that for any key, each possible probe sequence is equally likely. +
    +
    +Under these assumptions, for load factor \(\alpha\). The number of probes in an unsuccessful search is at most \(1/(1 - \alpha )\) +
    +This means that for a constant load factor, an unsuccessful search will run in \(\theta (1)\) time. +
    +
    +The number of probes on average for inserting an element under these assumptions is \(1/(1- \alpha )\) +
    +The number of probes on averge in a successful search is at most \(\frac{1}{\alpha} ln\left( \frac{1}{1-\alpha} \right)\) +

    +
    +
  6. +
+
+
+
+

3.2. Hash Functions

+
+

+A good hash funtion will approximately satisfy the simple uniform hashing, which means that any element is equally likely to be hashed to any slot. +

+ +

+\[ m : \text{Number of slots in hash table} \] +\[ n : \text{Number of elements in hash table} \] +

+ +

+Suppose we knew that our keys are from a set of real numbers and the keys are picked uniformly. In this case, we could simply use the hash function \(h(k) = floor(mk)\). +
+
+Similarly, in many cases we can make a reasonably good hash funtion if we know the distribution of keys. +
+
+We will look at a few ways to make a hash function. +

+
+ +
+

3.2.1. The division method

+
+

+In division method, we map a key \(k\) into one of the \(m\) slots by taking the remainder of k divided by m. +\[ h(k) = k\ mod\ m = k\ \%\ m \] +In most cases, +\[ m : \text{Number of slots in hash table} \] +But there are some cases where \(m\) is chosen to be something else. +

+
    +
  • If \(m\) is a power of 2, then \(k\ mod\ m\) will give us the least significant \(log_2m\) bits of \(k\). When making a hash function, we want a function that depends on all bits of the key. So, we should not use this method if m is a power of 2.
  • +
  • A prime number not close to a power of 2 is a good choice for \(m\) in many cases. So when deciding the number of slots for the hash table, we can try to make \(m\) a prime which will accomodate our elements with less load factor.
  • +
+
+
+ +
+

3.2.2. The multiplication method

+
+

+In multiplication method, we first multiply the key \(k\) with a constant \(A\) which is in range \(0 < A < 1\). Then we get the fractional part of \(kA\). Then we multiply the fractional part by \(m\) and floor it to get the hash. +\[ h(k) = floor(m \times decimal\_part(kA) ) \] +The advantage of multiplication method is that we can choose any value of \(m\). We can even choose \(m\) to be a power of 2. +
+We can choose any value of \(A\). The value depends on characteristics of data, +\[ A \approx \frac{\sqrt{5} - 1}{2} \] +will work reasonably well. +
+
+Example, Suppose +

+ +

+\[ key\ (k) = 1234 \] +\[ m = 128 \] +And our value of \(A\) is, +\[ A = 0.618 \] +Then to get our \(h(k)\), +\[ kA = 762.612 \] +\[ decimal\ part(kA) = 0.612 \] +\[ floor(m \times decimal\_part(kA) ) = h(k) = 78 \] +

+ +

+In C language, +

+
+
size_t hash(size_t key, size_t m){
+  double kA = key * 0.618;
+  // get decimal part only
+  double kA = kA - ((int) kA);
+  // floor the product of decimal part and m
+  size_t h = floor(m * kA);
+  return h;
+}
+
+
+
+
+ +
+

3.2.3. Mid square method

+
+

+In this method, we square the keys and then we choose some digits from the middle. +Example, +\[ h(10) = middle\ digit \left( 10 \times 10 \right) = midlle\ digit (100) = 0 \] +\[ h(11) = middle\ digit \left( 11 \times 11 \right) = midlle\ digit (121) = 2 \] +\[ h(12) = middle\ digit \left( 12 \times 12 \right) = midlle\ digit (144) = 4 \] +With huge numbers, we need to take care of overflow conditions in this method. +

+
+
+ +
+

3.2.4. Folding method

+
+

+While this method can be used on integers, this method is usually used where the key is segmented. For example in arrays or when key is a string. +
+
+In this method, we add all of the segments and then we mod it with the number of slots. +\[ h(k) = \left( \text{Sum of all the segments} \right) mod\ m \] +Example, for string "hello" +
+sum = 'h' + 'e' + 'l' + 'l' + 'o' +
+sum = 104 + 101 + 108 + 108 + 111 = 532 +
+
+If m = 100, then +
+h(k) = 532 mod 100 +
+h(k) = 32 +

+
+
+
+ +
+

3.3. Universal Hashing

+
+

+TODO: Basics of universal hashing. +

+
+
+
+

3.4. Perfect Hashing

+
+

+NOTE: This doesn't seem to be in B.Tech syllabus, but it seems cool. +
+

+
+
+
+ +
+

4. Representing rooted trees using nodes

+
+

+We can represent trees using nodes. A node only stores a single element of the tree. What is a node will depend on the language being used. +
+In C, we make a struct which will store the element and pointers to other node structs. +

+ +
+
struct tree_node{
+  int element;
+  struct tree_node * left_child;
+  struct tree_node * right_child;
+};
+
+
+

+
+In languages with oop, we create node class which will store refrences to other node objects. +

+
+
class Node {
+    int value;
+    Node left;
+    Node right;
+
+    Node(int value) {
+        this.value = value;
+        right = null;
+        left = null;
+    }
+}
+
+
+
+ +
+

4.1. Fixed number of children

+
+

+When we know how many children any given node can have, i.e, the number of children is bounded. We can just use refrences or pointers to the nodes directly. +
+For example, if we know we are making a binary tree, then we can just store refrence to left children and right childern. +

+ +
+
struct tree_node{
+  int element;
+  struct tree_node * left_child;
+  struct tree_node * right_child;
+};
+
+
+
+
+ +
+

4.2. Unbounded number of children

+
+

+When we don't know how many children any given node will have. Thus any node can have any number of children, we can't just use refrences. We could create an array of refrences to nodes, but some nodes will only have one or two childs and some may have no childs. This will lead to a lot of wasted memory. +
+There is a way to represent such trees without wasting any memory. This is done by using sibling refrences or pointers. +
+

+
+
struct tree_node{
+  int element;
+  struct tree_node * left_child;
+  struct tree_node * right_sibling;
+};
+
+
+ +

+The right sibling pointer will point to the right sibling of the node. This allows us to chain siblings and have unbounded number of siblings to the given node, therefore having unbounded number of children to any given parent. To make this approach easier to use, we can also add a pointer back to the parent node, though it is not compulsary. +

+ +
+
struct tree_node{
+  struct tree_node * parent;
+
+  int element;
+
+  struct tree_node * left_child;
+  struct tree_node * right_sibling;
+};
+
+
+ +

+So a tree which is like : +
+tree_actual.jpg +
+
+can be represented using refrences and pointers as : +
+tree_representation.jpg +
+

+
+
+
+ +
+

5. Binary Search Trees

+
+

+A tree where any node can have only two child nodes is called a binary tree. +
+A binary search tree is a tree where for any give node the nodes stored in left sub-tree are less than the parent node and the nodes stored in right sub-tree are greater than the parent node (or vice versa). So the left-subtree always have smaller elements and right sub-tree always have greater elements. +
+
+This property allows us easily search for elements from the data structure. We start our search at the root node. If the element we want is less than the current node, we will go to the left node ,else we will go to the right node. The concept is similar to the binary search on arrays. +

+ +

+In C, we can make a binary tree as +

+
+
struct binary_tree{
+  int value;
+  struct binary_tree * left_child;
+  struct binary_tree * right_child;
+};
+
+
+
+ +
+

5.1. Quering a BST

+
+

+Some common ways in which we usually query a BST are searching for a node, minimum & maximum node and successor & predecessor nodes. We will also look at how we can get the parent node for a given node, if we already store a parent pointer then that algorithm will be unnecessary. +

+
+
+

5.1.1. Searching for node

+
+

+We can search for a node very effectively with the help of binary search tree property. The search will return the node if it is found, else it will return NULL. +

+
+
struct binary_tree *
+search_recursively(struct binary_tree * root, int value){
+  // If we reach a null, then value is not in tree
+  if(root == NULL)
+    return NULL;
+  // if we found the value, return the current node
+  if(root->value == value)
+    return root;
+  // compare value we are looking for
+  // and go to either left or right sub-tree
+  if(value < root->value)
+    return search_recursively(root->left, value);
+  else
+    return search_recursively(root->right, value);
+}
+
+
+

+We can also search iteratively rather than recursively. +

+
+
struct binary_tree *
+search_iterative(struct binary_tree * root, int value){
+  while(root != NULL){
+    // if we found the value, return the current node
+    if(root->value == value) return root;
+    // compare value and go to left or right sub-tree
+    root = (value < root->value) ? root->left : root->right;
+  }
+  // if not found then return NULL
+  return NULL;
+}
+
+
+
+
+
+

5.1.2. Minimum and maximum

+
+

+Finding the minimum and maximum is simple in a Binary Search Tree. The minimum element will be the leftmost node and maximum will be the rightmost node. We can get the minimum and maximum nodes by using these algorithms. +

+
    +
  • For minimum node
  • +
+
+
struct binary_tree * minimum(struct binary_tree * root){
+  if(root == NULL) return NULL;
+  while(root->left != NULL)
+    root = root->left;
+  return root;
+}
+
+
+
    +
  • For maximum node
  • +
+
+
struct binary_tree * maximum(struct binary_tree * root){
+  if(root == NULL) return NULL;
+  while(root->right != NULL)
+    root = root->right;
+  return root;
+}
+
+
+
+
+ +
+

5.1.3. Find Parent Node

+
+

+This algorithm will return the parent node. It uses a trailing node to get the parent. If the root node is given, then it will return NULL. This algorithm makes the assumption that the node is in the tree. +

+
+
struct binary_tree * 
+find_parent(struct binary_tree * tree, struct binary_tree * node){
+  if(tree == node) return NULL;
+
+  struct binary_tree * current_node = tree;
+  struct binary_tree * trailing_node = tree;
+
+  while(current_node != node){
+    trailing_node = current_node;
+    current_node = (node->value < current_node->value) ?
+      current_node->left :
+      current_node->right;
+  }
+
+  return trailing_node;
+}
+
+
+
+
+
+

5.1.4. Is ancestor

+
+

+This algorithm will take two nodes, ancestor and descendant. Then it will check if ancestor node is really the ancestor of descendant node. +

+
+
bool
+is_ancestor(struct binary_tree *ancestor,
+            struct binary_tree *descendant){
+  // both ancestor and descendant
+  // should not be NULL
+  if(ancestor == NULL || descendant == NULL)
+    return false;
+
+  while(ancestor != NULL){
+    if(ancestor == descendant) return true;
+    ancestor = (descendant->value < ancestor->value) ?
+               ancestor->left :
+               ancestor->right;
+  }
+  return false;
+}
+
+
+
+
+
+

5.1.5. Successor and predecessor

+
+

+We often need to find the successor or predecessor of an element in a Binary Search Tree. The search for predecessor and succesor is divided in to two cases. +

+
+ +
    +
  1. For Successor
    +
    +
    +
    // get successor of x
    +struct binary_tree *
    +successor(struct binary_tree * tree, struct binary_tree * x){
    +  // case 1 : right subtree is non-empty
    +  if(x->right != NULL){
    +    return minimum(x->right);
    +  }
    +  // case 2 : right subtree is empty
    +  struct binary_tree * y = find_parent(tree, x);
    +  while(y != NULL){
    +    if(is_ancestor(y, x) && is_ancestor(y->left, x)) return y;
    +    y = find_parent(tree, y);
    +  }
    +  return NULL;
    +}
    +
    +
    +

    +Case 1 : If the node x has a right subtree, then the minimum of right subtree of x is the succesor. +
    +Case 2 : If the node x has no right subtree, then successor may or may not exist. If it exists, the successor node will be the ancestor of x whose own left node is also the ancestor of x. +

    +
    +
  2. +
  3. For Predecessor
    +
    +
    +
    struct binary_tree *
    +predecessor(struct binary_tree * tree, struct binary_tree * x){
    +  // case 1 : left subtree is non-empty
    +  if(x->left != NULL){
    +    return maximum(x->left);
    +  }
    +  // case 2 : left subtree is empty
    +  struct binary_tree * y = find_parent(tree, x);
    +  while(y != NULL){
    +    if(is_ancestor(y, x) && is_ancestor(y->right, x)) return y;
    +    y = find_parent(tree, y);
    +  }
    +  return NULL;
    +}
    +
    +
    +

    +Case 1 : If the node x has a left subtree, then the maximum of left subtree of x is the predecessor. +
    +Case 2 : If the node x has no left subtree, then predecessor may or may not exist. If it exists, the predecessor node will be the ancestor of x whose own right node is also the ancestor of x. +

    +
    +
  4. +
+
+
+
+

5.2. Inserting and Deleting nodes

+
+

+When inserting and deleting nodes in BST, we need to make sure that the Binary Search Tree property continues to hold. Inserting node is easier in a binary search tree than deleting a node. +

+
+
+

5.2.1. Insertion

+
+

+Insertion is simple in a binary search tree. We search for the node we want to insert in the tree and insert it where we find first NULL spot. +

+
+
void
+insert_node(struct binary_tree ** tree, struct binary_tree * node){
+  // if found a null spot, insert the node
+  if(*tree == NULL){
+    *tree = node;
+    return;
+  }  
+  if(node->value < (*tree)->value){
+    // the node is to be inserted into left subtree
+    struct binary_tree ** left_tree = &((*tree)->left);
+    insert_node(left_tree, node);
+  }else{
+    // the node is to be inserted into right subtree
+    struct binary_tree ** right_tree = &((*tree)->right);
+    insert_node(right_tree, node);
+  }
+}
+
+
+

+The recursive algorithm for inserting into a Binary search tree is simpler than the iterative algorithm. +
+
+The algorithm for iterative insertion is +

+
+
void
+insert_node(struct binary_tree **tree, struct binary_tree * node){
+  // if no nodes in tree, then just node and return
+  if((*tree) == NULL){
+    *tree = node;
+    return;
+  }
+
+  struct binary_tree ** current_node = tree;
+  struct binary_tree ** trailing_node = tree;
+
+  // look for an empty place using current_node
+  while(*current_node != NULL){
+    trailing_node = current_node;
+    current_node = (node->value < (*current_node)->value) ?
+      &((*current_node)->left) : &((*current_node)->right);
+  }
+
+  // we need to insert node on the trailing node
+  if(node->value < (*trailing_node)->value)
+    (*trailing_node)->left = node;
+  else
+    (*trailing_node)->right = node;
+}
+
+
+
+
+
+

5.2.2. Deletion

+
+

+Deletion in Binary Search Trees is tricky because we need to delete nodes in a way that the property of the Binary Search Tree holds after the deletion of the node. So we first have to remove the node from the tree before we can free it. +
+
+TODO : Write four cases of node deletion here +

+
+
    +
  1. Implementation in code
    +
    +

    +We also use a helper function called Replace Child for deletion of node. This function will simply take parent node, old child node and new child node and replace old child with new child. +

    + +
    +
    void
    +replace_child(struct binary_tree *parent,
    +              struct binary_tree *old_child,
    +              struct binary_tree *new_child){
    +  if(parent->left == old_child) parent->left = new_child;
    +  else parent->right = new_child;
    +}
    +
    +
    + +

    +We will create a funtion that will remove the root node from a given subtree and then return the root node of the result subtree. +This will allow us to apply remove root node funtion on any node and then reattach the new subtree. +
    +
    +Making remove root node a different funtion will also allow us to not worry about attaching the the subtree immediately in the same funtion. +

    + +
    +
    struct binary_tree *
    +remove_root_node(struct binary_tree *root){
    +  // case 1 : no child
    +  // this case can be skipped in real implementation
    +  // as it is covered by the case 2
    +  if(root->left == NULL && root->right == NULL){
    +    return NULL;
    +  }
    +
    +  // case 2 : one child
    +  if(root->left == NULL){
    +    return root->right;
    +  }else if(root->right == NULL){
    +    return root->left;
    +  }
    +
    +  struct binary_tree *successor = minimum(root->right);
    +  // case 3 : two child and successor is right node of root node
    +  if(successor == root->right){
    +    successor->left = root->left;
    +    return successor;
    +  }
    +
    +  // case 4 : two child and successor is not the right node of root node
    +  struct binary_tree *successor_parent = find_parent(root, successor);
    +  replace_child(successor_parent, successor, successor->right);
    +  successor->left = root->left;
    +  successor->right = root->right;
    +  return successor;
    +}
    +
    +
    + +

    +Now we can make a delete node function which will remove the node, reattach the subtree and also free or delete the node. +

    + +
    +
    void
    +delete_node(struct binary_tre **tree, struct binary_tree *node){
    +  struct binary_tree *new_root = remove_root_node(node);
    +
    +  // if deleting root node of tree
    +  if(node == (*tree)){
    +    (*tree) = new_root;
    +    free(node);
    +    return;
    +  }
    +
    +  // when not deleting root node of tree
    +  replace_child(find_parent(*tree, node)
    +                ,node ,new_root);
    +  free(node);
    +}
    +
    +
    +
    +
  2. +
+
+
+ +
+

5.3. Performance of BST

+
+

+The performance of the search operation depends on the height of the tree. If the tree has \(n\) elements, the height of a binary tree can be between \(n\) and \(floor\left( 1+ log_2(n) \right)\). +
+
+To perform an operation on BST, we need to find the node where we have perform the operation. Since even in worst case we only need to traverse the height of the search tree to search for any node, the time taken to perform any operation on a Binary Search Tree is \(\theta (h)\) where, \(h\) is the height of the tree. +
+
+A binary tree with height of \(floor(1 + log_2(n))\) is called a balanced binary tree, otherwise it is an unbalanced tree. A balanced binary tree is the shortest height a binary tree with that number of nodes can have. +
+
+The worst case is when tree has a single branch, making the height of tree n. In this case, the worst case for any operation takes \(\theta (n)\) time. +
+A balanced binary search tree in worst case for any operation will take \(\theta (log_2n)\) time. +

+
+
+ +
+

5.4. Traversing a Binary Tree

+
+

+There are three ways to traverse a binary tree, inorder tree walk, preorder tree walk and postorder tree walk. All three algorithm will take \(\theta (n)\) time to traverse the \(n\) nodes. +

+
+ +
+

5.4.1. Inorder tree walk

+
+

+This algorithm is named so because it first traverses the left sub-tree recursively, then the node value and then traverses right sub-tree recursively. +

+ +
+
void inorder_print(struct binary_tree * node){
+  if(node == NULL)
+    return;
+  // recursively print left sub-tree
+  inorder_print(node->left_child);
+  // print the node value
+  printf("%d\t", node->value);
+  // recursively print right sub-tree
+  inorder_print(node->right_child);
+}
+
+
+ +
    +
  • Inorder algorithm will traverse the binary search tree in a sorted order. Thus, it can be used to get nodes in a sorted order.
  • +
  • This algorithm is not suitable to delete or free the nodes of the tree. It should not be used to delete a binary tree.
  • +
  • This algorithm cannot we used to make a copy of a binay search tree.
  • +
+
+
+
+

5.4.2. Preorder tree walk

+
+

+This algorithm is called preorder algorithm because it will first traverse the current node, then recursively traverses the left sub-tree and then recursively traverse the right sub-tree. +

+
+
void preorder_print(struct binary_tree * node){
+  if(node == NULL)
+    return;
+  // print the node
+  printf("%d\t", node->value);
+  // recursively print left sub-tree
+  preorder_print(node->left_child);
+  // recursively print right sub-tree
+  preorder_print(node->right_child);
+}
+
+
+
    +
  • This algorithm is used to create a copy of the Binary Search Tree. If we store nodes in an array using this algorithm and then later insert the nodes linearly in a simple binary search tree, we will have an exact copy of the tree.
  • +
  • This algorithm traverses the tree in a topologically sorted order.
  • +
  • This algorithm cannot be used to delete or free the nodes of the tree.
  • +
+
+
+
+

5.4.3. Postorder tree walk

+
+

+In this algorithm, we first traverse the left sub-tree recursively, then the right-sub tree recursively and finally the node. +

+
+
void postorder_print(struct binary_tree * node){
+  if(node == NULL)
+    return;
+  // recursively print left sub-tree
+  postorder_print(node->left_child);
+  // recursively print right sub-tree
+  postorder_print(node->right_child);
+  // print the node
+  printf("%d\t", node->value);
+}
+
+
+
    +
  • This algorithm can be used to delete or free all the nodes of a binary tree.
  • +
  • This algorithm cannot be used to create a copy of the tree
  • +
+
+
+
+
+ +
+

6. Binary Heap

+
+

+Heap is a data structure represented as a complete tree which follows the heap property. All levels in a heap tree are completely filled except possible the last one, which is filled from left to right. +
+
+The most common implementation of the heap is a binary heap. The binary heap is represented as a binary tree. We can use an array to implement binary heaps. +
+
+The heap data structure is used to implement priority queues. In many cases we even refer to heaps as priority queues and vice versa. +

+
+ +
+

6.1. Heap Property

+
+

+Heaps are of two types +

+
    +
  • min-heap : the smallest element is at the root of the tree.
  • +
  • max-heap : the largest element is at the root of the tree.
  • +
+

+The heap property is different for min-heaps and max-heaps. +

+
    +
  • for min-heap : the key stored in parent node is always less than or equal \((\le)\) to the key of child node.
  • +
  • for max-heap : the key stored in parent node is always greter than or equal \((\ge)\) to the key of child node.
  • +
+
+
+ +
+

6.2. Shape of Heap

+
+

+Also reffered to as shape property of heap. +
+A heap is represented as a complete tree. A complete tree is one where all the levels are completely filled except possible the last. The last level if not completely filled is filled from left to right. +

+
+
+
+

6.3. Array implementation

+
+

+We can implement binary heap using arrays. The root of tree is the first element of the array. The next two elements are elements of second level of tree and children of the root node. Similary, the next four elements are elements of third level of tree and so on. +
+
+For a given level, the position in array from left to right is the position of elements in tree from left to right. +
+
+For example, a max-heap implemented using array can be represented as tree as shown +
+
+Heap-as-array.svg +
+
+In C, we can create a heap struct for easier implementation of algorithms +

+
+
struct heap_type{
+  int array[];
+  size_t capacity;
+  size_t len;
+};
+
+
+
+
+
+

6.4. Operations on heaps

+
+

+Both insertion and deletion in heap must be done in a way which conform to the heap property as well as shape property of heap. Before we can look at insertion and deletion, we need a way to find parent and child for a given index. We will also first see up-heapify and down-heapfiy funtions. +

+
+
+

6.4.1. Parent and child indices

+
+

+In a binary heap, we can find parent and children for any given index using simple formulas. +

+
    +
  • If array is zero indexed, for element at index i +
      +
    • children at indices \((2i + 1)\) and \((2i + 2)\)
    • +
    • parent at index \(floor\left( (i - 1)/2 \right)\)
    • +
  • +
  • If array is one indexed, for element at index i +
      +
    • children at indices \((2i)\) and \((2i + 1)\)
    • +
    • parent at index \(floor\left( i/2 \right)\)
    • +
  • +
+
+
+ +
+

6.4.2. Down-heapify

+
+

+The down-heapify is a function which can re-heapify an array if no element of heap violates the heap property other than index and it's two children. +
+This function runs in \(\theta (log_2n)\) time. The algorithm for this works as follows +

+
    +
  1. Compare the index element with its children and stop if in correct order in relation to both children.
  2. +
  3. If not in correct order, swap the index element with the children which is not in correct order. Repeat till in correct order or at the lowest level.
  4. +
+ +
+
void down_heapify(struct heap_type heap, size_t index){
+  size_t left = 2 * index  + 1;
+  size_t right = 2 * index + 2;
+  size_t largest = index;
+
+  if(left < heap.len && heap.array[left] > heap.array[largest])
+    largest = left;
+
+  if(right < heap.len && heap.array[right] > heap.array[largest])
+    largest = right;
+
+  if(largest != index){
+    swap(heap.array[index], heap.array[largest]);
+    down_heapify(heap, largest);
+  }
+}
+
+
+ +

+Since we shift element downwards, this operation is often called down-heap operation. It is also known as trickle-down, swim-down, heapify-down, or cascade-down +

+
+
+ +
+

6.4.3. Up-heapify

+
+

+The up-heapify is a function which can re-heapify an array if no element of heap violates the heap property other than index and it's parent. +
+This function runs in \(\theta (log_2n)\) time. The algorithm for this works as follows +

+
    +
  1. Compare the index element to its parent and stop algorithm if it is in correct order.
  2. +
  3. If not in correct order, swap element with its parent. Repeat till element in correct position or at root position.
  4. +
+ +
+
void up_heapify(struct heap_type heap, size_t index){
+  size_t parent = (index - 1) / 2;
+  size_t smallest = index;
+
+  if(parent >= 0 && heap.array[smallest] > heap.array[parent])
+    smallest = parent;
+
+  if(smallest != index){
+    swap(heap.array[index], heap.array[smallest]);
+    up_heapify(heap, smallest);
+  }
+}
+
+
+ +

+Since we shift element upwards, this operation is often called up-heap operation. It is also known as trickle-up, swim-up, heapify-up, or cascade-up +
+
+TODO : Maybe up-heapfiy funtion should be made cleaner rather than trying to mirror down-heapify funtion. +

+
+
+ +
+

6.4.4. Insertion

+
+

+Insertion takes \(\theta (log_2n)\) time in a binary heap. To insert and element in heap, we will add it to the end of the heap and then apply up-heapify operation of the elment +
+The code shows example of insertion in a max-heap. +

+ +
+
void insert_element(struct heap_type heap, int element){
+  // add element
+  size_t element_index = heap.len;
+  if(element_index == heap.capacity){
+    printf("Heap reached full capacity");
+    return;
+  }
+
+  heap.array[heap.len++] = element;
+  up_heapify(heap, heap.len - 1);
+}
+
+
+
+
+ +
+

6.4.5. Deletion or Extraction

+
+

+Like insertion, extraction also takes \(\theta (log_2n)\) time. Extraction from heap will extract the root element of the heap. We can use the down-heapify function in order to re-heapify after extracting the root node. +
+
+The code shows example of extraction in max-heap. +

+ +
+
int extract_element(struct heap_type heap){
+  if(heap.len < 1){
+    printf("No elements in the heap");
+    return -1;
+  }
+
+  int r = heap.array[0];
+  heap.array[0] = heap.array[heap.len - 1];
+  heap.len -= 1;
+
+  down_heapify(heap, 0);
+
+  return r;
+}
+
+
+
+
+ +
+

6.4.6. Insert then extract

+
+

+Inserting an element and then extracting from the heap can be done more efficiently than simply calling these functions seperately as defined previously. If we call both funtions we define above, we have to do an up-heap operation followed by a down-heap. Instead, there is a way to do just a single down-heap. +
+
+The algorithm for this will work as follows in a max-heap. +

+
    +
  1. Compare whether the item we are trying to push is greater than root of heap.
  2. +
  3. If item we are pushing is greater, return it.
  4. +
  5. Else, +
      +
    1. Replace root element with new item
    2. +
    3. Apply down-heapify on the root of heap
    4. +
    5. Return the orignal root heap which we replaced.
    6. +
  6. +
+ +

+In python, this is implemented by the name of heap replace. +

+
+
int heap_replace(struct heap_type heap, int element){
+  if(element > heap.array[0])
+    return element;
+
+  int r = heap.array[0];
+  swap(heap.array[0], element);
+  down_heapify(heap, 0);
+  return r;
+}
+
+
+
+
+ +
+

6.4.7. Searching

+
+

+Searching for a arbitrary element takes linear time in a heap. We use linear search to search for element in array. +

+
+
+
+

6.4.8. Deleting arbitray element

+
+

+For a max-heap, deleting an arbitrary element is done as follows +

+
    +
  1. Find the element to delete and get its index \(i\).
  2. +
  3. swap last element and the element at index \(i\), and decrease the size of heap.
  4. +
  5. apply down-heapify on index \(i\) if any of it's children violate the heap property else apply up-heapify if the parent element violates the heapify property.
  6. +
+
+
+
+

6.4.9. Decrease and increase keys

+
+

+TODO : I don't know if it is neccessary to do this operation. It looks simple to implement. +

+
+
+
+
+

6.5. Building a heap from array

+
+

+We can convert a normal array into a heap using the down-heapify operation in linear time \(\left( \theta (n) \right)\) +

+ +
+
// array.array[..] contains an array which is not a heap yet
+// this funtion will turn it into a correct heap
+void build_heap(int array[], size_t len){
+  for(int i = (len/2) - 1; i >= 0; i--)
+    down_heapify(array, i);
+}
+
+
+

+As we see, for zero indexed language, the range of for loop is [(len(array)/2) - 1, 0] +
+If we are using a one indexed language, then range of for loop is [len(array)/2, 1] +

+
+
+
+ +
+

7. Graphs

+
+

+A graph is a data structure which consists of nodes/vertices, and edges. We sometimes write it as \(G=(V,E)\), where \(V\) is the set of vertices and \(E\) is the set of edges. When we are working on runtime of algorithms related to graphs, we represent runtime in two input sizes. \(|V|\) which we simply write as \(V\) is the number of vertices and similarly \(E\) is the number of edges. +

+
+
+

7.1. Representing graphs

+
+

+We need a way to represent graphs in computers and to search a graph. Searching a graph means to systematically follow edges of graphs in order to reach vertices. +
+
+The two common ways of representing graphs are either using adjacency lists and adjacency matrix. Either can represent both directed and undirected graphs. +

+
+ +
+

7.1.1. Adjacency List

+
+

+Every node in the graph is represented by a linked list. The list contains the nodes to which the list node is connected by an edge. +
+Example, if list-0 contains node-3, then node-0 is connected to node-3 by an edge. +

+
    +
  • For undirected graphs this will simply work by storing all nodes in list who have a shared edge with list node.
  • +
  • For directed graphs we will only add node to list, if edge goes from list node to the stored node.
  • +
+

+So in our previous example, if list-0 contains node-3, then the edge goes from 0 to 3 in the directed graph. +
+
+The space taken by adjacency list representation is \(\theta (V + E)\). +
+Since each node represents an edge, it is easy to convert an adjacency representation graph to a weighted graph. A weighted graph is a graph where each edge has an associated weight. So the weight of (u, v) edge can be stored in the node-v of u's list. +
+The adjacency list representation is very robust and can represent various types of graph variants. +

+
+
+ +
+

7.1.2. Adjacency Matrix

+
+

+We use a single matrix to represent the graph. The size of the matrix is \(\left( |V| \times |V| \right)\). When we make the matrix, all it's elements are zero, i.e the matrix is zero initialized. +
+
+If there is an edge between vertices (x , y), we show it by setting +
+matrix[x][y] = true or matrix[x][y] = 1 +
+If there is not an edge between vertices (x , y), we set +
+matrix[x][y] = false or matrix [x][y] = 0 +

+
    +
  • For undirected graphs, to show edge (u , v) we have to set both matrix[u][v] and matrix[v][u] to 1.
  • +
  • For directed graphs, to show edge (u , v) which goes from u to v, we only set matrix[u][v] to 1.
  • +
+ +

+The space taken by adjacency matrix is \(\theta (V^2)\). +
+For undirected graphs, the matrix will be symmetrical along the diagonal, because matrix will be equal to it's own transpose. So we can save space by only storing half the matrix in memory. +
+
+When comparing asymptotic results, the adjacency list seems more efficient, but matrix has advantage of only storing 1 bit for each cell. So in denser graphs, the matrix may use less space. +
+
+We can store weighted graphs in adjacency matrix by storing the weights along with the edge information in matrix cells. +

+
+
+
+ +
+

7.2. Vertex and edge attributes

+
+

+Many times we have to store attributes with either vertices or edges or sometimes both. How this is differs by language. In notation, we will write it using a dot (.) +
+
+For example, the attribute x of v will be denoted as v.x +
+Similarly, the attribute x of edge (u , v) will be denoted as (u , v).x +

+
+
+
+

7.3. Density of graph

+
+

+Knowing the density of a graph can help us choose the way in which we represent our graph. +
+The formula for density of graph is +\[ \text{density} = \frac{\text{number of edges}}{\text{maximum possible edges}} \] +Maximum possible number of edges for a simple undirected graph is +\[ \frac{|V| \left( |V| - 1 \right)}{2} \] +Maximum possible number of edges for a simple directed graph is +\[ |V| \left( |V| - 1 \right) \] +Therefore, the density of a simple undirected graph will be +\[ \text{density (simple undirected)} = \frac{2|E|}{|V| \left( |V| - 1 \right)} \] +And density of simple directed directed graph will be +\[ \text{density (simple directed)} = \frac{|E|}{|V| \left( |V| - 1 \right)} \] +

+ +

+Therefore, maximum density for a graph is 1. The minimum density for a graph is 0. +
+Knowing this, we can say graph with low density is a sparse graph and graph with high density is a dense graph. +

+
+
+

7.3.1. Which representation to use

+
+

+For a quick approximation, when undirected graph and \(2|E|\) is close to \(|V|^2\), we say that graph is dense, else we say it is sparse. +
+Similarly, for directed graph when \(|E|\) is close to \(|V|^2\), we can say graph is dense, else it is sparse. +
+
+The list representation provides a more compact way to represent graph when the graph is sparse. Whereas matrix representation is better for dense graphs. +
+Another criteria is how algorithm will use the graph. If we want to traverse to neighbouring nodes, then list representation works well. If we want to quickly tell if there is an edge between two nodes, then matrix representation is better. +

+
+
+
+ +
+

7.4. Searching Graphs

+
+

+Graph search (or graph traversal) algorithms are used to explore a graph to find nodes and edges. Vertices not connected by edges are not explored by such algorithms. These algorithms start at a source vertex and traverse as much of the connected graph as possible. +
+
+Searching graphs algorithm can also be used on trees, because trees are also graphs. +

+
+
+

7.4.1. Breadth first search

+
+

+BFS is one of the simplest algorithms for searching a graph and is used as an archetype for many other graph algorithms. This algorithm works well with the adjacency list representation. +
+
+In BFS, the nodes are explored based on their distance from the starting node. What we mean by distance between nodes is how many edges are in between the two nodes. +
+
+So in BFS, all nodes at distance 1 are explored first, then nodes at distance 2 are explored, then nodes at distance 3 and so on. That is, all nodes at distance \(k\) are explored before exploring nodes at distance \((k+1)\). +

+
+
BFS(graph_type graph, node_type start){
+  queue_type queue;
+  start.explored = true;
+  queue.add(start);
+
+  while(queue.len != 0){
+    node_type v = queue.dequeue();
+    node_list adjacency_list = grap.adj_list(v);
+
+    while(adjacency_list != NULL){
+      node_type u = adjacency_list.node;
+      if(u.explored == false){
+        u.explored = true;
+        queue.add(u);
+      }
+      adjacency_list = adjacency_list.next;
+    }
+  }
+}
+
+
+ +
    +
  • Analysis
  • +
+

+For an input graph \(G=(V,E)\), every node is enqued only once and hence, dequeued only once. The time taken to enqueue and dequeue a single node is \(\theta (1)\), then the time for \(|V|\) nodes is, \(\theta (V)\). Each node in adjacency list represents an edge, therefore the time taken to explore each node in adjacency lists is \(\theta (E)\). Therefore, the total time complexity is +\[ \text{Time complexity of BFS : } \theta(V + E) \] +

+
+
+
+

7.4.2. Breadth-first trees for shortest path

+
+

+For a simple graph, we may want to get the shortest path between two nodes. This can be done by making a Breadth-first tree. +
+
+When we are traversing nodes using BFS, we can create a breadth-first tree. To make this tree, we simply need to set parent of u in the inner while loop in the BFS algorithm to v. So our algorithm from earlier will become. +

+
+
BFS_shortest_path(graph_type graph, node_type start, node_type end){
+  queue_type queue;
+  start.explored = true;
+  start.parent = NULL; // the start node is root node of tree
+  queue.add(start);
+
+  while(queue.len != 0){
+    node_type v = queue.dequeue();
+    node_list adjacency_list = grap.adj_list(v);
+    while(adjacency_list != NULL){
+      node_type u = adjacency_list.node;
+      if(u.explored == false){
+        u.explored = true;
+        u.parent = v; // the parent of u is v
+        queue.add(u);
+
+        if(u == end) return; // if we found the end node,
+                             // we have the path to it.
+      }
+
+      adjacency_list = adjacency_list.next;
+    }
+  }
+
+  printf("end node not in graph");
+}
+
+
+

+In this tree, the path upwards from any given node to start node will be the shortest path to the start node. +
+Therefore, we can get the shortest path now as follows +

+
+
print_shortest_path(graph_type graph, node_type start, node_type end){
+  BFS_shortest_path(graph, start, end);
+  while(end != NULL){
+    print_node(end);
+    end = end.parent;
+  }
+}
+
+
+

+This will print shortest path from end node to start node. +

+
+
+
+

7.4.3. Depth first search

+
+

+Unlike BFS, depth first search is more biased towards the farthest nodes of a graph. It follows a single path till it reaches the end of a path. After that, it back tracks to the last open path and follows that one. This process is repeated till all nodes are covered. +
+
+Implementation of DFS is very similar to BFS with two differences. Rather than using a queue, we use a stack. In BFS, the explored nodes are added to the queue, but in DFS we will add unexplored nodes to the stack. +

+ +
+
DFS(graph_type graph, node_type start){
+  stack_type stack;
+  stack.push(start);
+  while(stack.len != 0){
+    node_type v = stack.pop();
+    if(v.explored == false){
+      v.explored = true;
+
+      node_list adjacency_list = graph.adj_list(start);
+      while(adjacency_list != NULL){
+        stack.push(adjacency_list.node);
+        adjacency_list = adjacency_list.next;
+      }
+    }
+  }
+}
+
+
+ +

+Another way to implement DFS is recursively. +

+ +
+
DFS(graph_type graph, node_type node){
+  node.discovered = true;
+  node_list adjacency_list = graph.adj_list(node);
+  while(adjacency_list != NULL){
+    node_type u = adjacency_list.node;
+    if(u.discovered == false)
+      DFS(graph, u);
+    adjacency_list = adjacency_list.next;
+  }
+}
+
+
+ +

+The difference between recursive and iterative version of DFS is that, recursive will choose the path of first neighbour in the adjacency list, whereas the iterative will choose the path of last neighbour in the adjacency list. +

+ +
    +
  • Analysis
  • +
+

+For an input graph \(G=(V,E)\), the time complexity for Depth first search is \(\theta (V + E)\), i.e, it is the same of breadth first search. The reasoning for this is the same as before, all nodes are pushed and popped from stack only once, giving use time complexity of \(\theta (V)\). We go through all the adjacency lists only once giving time complexity \(\theta (E)\). Thus adding the two will give us +\[ \text{Time complexity of DFS : } \theta (V + E) \] +

+
+
+ +
+

7.4.4. Properties of DFS

+
+

+DFS is very useful to understand the structure of a graph. To understand the +

+
+
+
+

7.4.5. Topological sort using DFS

+
+
+
+
+
+

Author: Anmol Nawani

+

Created: 2023-07-30 Sun 18:20

+

Validate

+
+ + diff --git a/main.org b/main.org new file mode 100644 index 0000000..0403aa7 --- /dev/null +++ b/main.org @@ -0,0 +1,1161 @@ +#+TITLE: Data Structures +#+html_head: + +* Stack +A stack is a data structure which only allows insertion and deletion from one end of the array. The insertion is always on the extreme end of the array. The deletion can only be done on the element which was most recently added. +\\ +*It is similar to stacking plates.* The plate can only be added at the *top* of the stack and also only the last added plate to the stack can be removed (which will be on top). +\\ +Due to this property, Last In elements are removed First from a stack. Therefore, it is called a *Last In First Out (LIFO)* data structure or a *First In Last Out (FILO)* data structure. +\\ +To create a stack, we will keep track of the index which is the *top* of the array. This top index will *increment when we insert element* and *decrement when we remove element.* + +** Operation on stack +A stack has two operations + +* Direct Address Table +Direct Address Tables are useful when we know that key is within a small range. Then, we can allocate an array such that each possible key gets an index and just add the values according to the keys. +\\ +This also assumes that keys are integers + ++ Table creation + +#+BEGIN_SRC c +struct table{ + int * values; + size_t min_key; + size_t max_key; +}; + +struct table create_table(size_t min_key, size_t max_key){ + struct table r; + r.values = (int * ) malloc(sizeof(int) * (max_key - min_key + 1) ); + r.min_key = min_key; + r.max_key = max_key; + return r; +} +#+END_SRC + ++ Table insert + +#+BEGIN_SRC c +void table_insert(struct table t, size_t key, int value){ + if(key > t.max_key || key < t.min_key) + assert(false && "Key value out of boundry"); + + t.values[key - t.min_key] = value; +} +#+END_SRC + ++ Table delete + +#+BEGIN_SRC c +void table_delete(struct table t, size_t key){ + if(key > t.max_key || key < t.min_key) + assert(false && "Key value out of boundry"); + + t.values[key - t.min_key] = 0x00; +} +#+END_SRC + ++ Table Search / Table Get + +#+BEGIN_SRC c +int table_get(struct table t, size_t key){ + if(key > t.max_key || key < t.min_key) + assert(false && "Key value out of boundry"); + + return t.values[key - t.min_key]; +} +#+END_SRC + +*Using direct address tables is very useful when keys are enum values.* + +* Hash Table +When the set of possible keys is large, it is impractical to allocate a table big enough for all keys. In order to fit all possible keys into a small table, rather than directly using keys as the index for our array, we wil first calculate a /*hash*/ for it using a /*hash function*/. Since we are relying on hashes for this addressing in the table, we call it a hash table. +\\ +\\ +For a given key $k_i$ in */direct address table/*, we store value in $table[k_i]$. +\\ +\\ +For a given key $k_i$ in */hash table/*, we store value in $table[h(k_i)]$, where $h()$ is the hash function. +\\ +\\ +So the main purpose of the hash function is to reduce the range of array indices. + +** Collision + +Because we are reducing the range of indices, the hash function may /*hash two keys to the same slot*/. This is called a collision. +\\ +\\ +We should try to find a hash funtion which will minimise the number of collisions. +\\ +\\ +The number of keys is going to be greater than number of slots in table. Therefore avoiding all collisions is not possible. +\\ +There are two ways we will look at to resolve collision. +1. Chaining +2. Open addressing + +*** Chaining +In chaining, rather than storing values in table slots. We will have /*linked lists at each slot*/ which will store (key, value) pairs. +\\ +\\ +When the hash gives us a slot, we will add the value to linked list at that slot. + ++ Linked List structure + +#+BEGIN_SRC c + struct linked_list{ + size_t key; + int value; + struct linked_list * next; + }; +#+END_SRC + ++ Table structure + +#+BEGIN_SRC c + struct table{ + struct linked_list * table[]; + size_t table_size; + }; +#+END_SRC + ++ Insertion + +Insertion can be done in $\theta (1)$ time if we assume that key being inserted is not already in the linked list. But we can add a check to see if the key was already inserted and modify that value. + +#+BEGIN_SRC c + // linked_list_add(struct linked_list * ll, size_t key, int value) + // adds the given key,value to the start of the list + void chained_hash_insert(struct table t, size_t key, int value){ + linked_list_add(t.table[ h(key) ], key ,value); + } +#+END_SRC + ++ Get / Search + +#+BEGIN_SRC c + // linked_list_search(struct linked_list * ll, size_t key) + // gets the value stored with the given key + void chained_hash_get(struct table t, size_t key){ + return linked_list_search(t.table[ h(key) ], key); + } +#+END_SRC + ++ Delete + +#+BEGIN_SRC c + // linked_list_delete(struct linked_list * ll, size_t key) + // delete the node with the given key + void chained_hash_delete(struct table t, size_t key){ + linked_list_delete(t.table[ h(key) ], key); + } +#+END_SRC + +*** Performance of chaining hash table +The *load factor* is defined as number of elements per slot and is calculated as +\[ \alpha \text{(Load factor)} = \frac{\text{number of elements in hash table}}{\text{number of slots in hash table}} \] +The worst case for chaining is when all keys are assigned to a single slot. In this case searching for an element takes $\theta (n)$ time. +\\ +\\ +If we assume that any given element is equally likely to be hashed into any of the slots, this assumption is called */simple uniform hashing/*. +\\ +\\ +If we also assume that hash funtion takes constant time, then in the average case, the time complexity for searching key in the chaining hash table is +\[ \text{Average Case Searching} : \theta (1 + \alpha) \] +*** Open Addressing +In open addressing, all the key and value pair of entries are stored in the table itself. Because of this, the load factor $\left( \alpha \right)$ can never exceed 1. +\\ +\\ +When we get a key whose slot is already taken, we will look for another empty slot. This is done by what is called */probing/*. To get which slot to check next, we have various methods. +\\ +\\ +The sequence in which empty slots are looked for is fixed for given key, this sequence is called *probe sequence*. \\ +It is necessary to keep probe sequence fixed for any given key, so that we can search for it later. + +**** *Linear probing* +For a given *ordinary hash function* $h(k)$, the linear probing uses the hash function +\[ linear\_h(k, i) = (h(k) + 1)\ mod\ m \] +We refer to $h(k)$ as the */auxiliary hash function/*. +\\ +\\ +In linear probing, we first check the slot [h(k)], if it is not empty, we check [h(k) + 1] then [h(k) + 2] .... upto slot [m - 1] after which we wrap around to [1], [2] ... till we have checked all the slots. +\\ +\\ +Linear probing is easy to implement, but it suffers from */primary clustering/*. In long runs of linear probing, keys tend to cluster together. This causes the performance of operations on hash table to degrade. The time to query a random element from table degrades to $\theta (n)$. + +**** *Quadratic probing* +For given auxiliary hash function $h(k)$, the quadratic probing uses +\[ quadratic\_h(k, i) = \left( h(k) + c_1i + c_2i^2 \right) \ mod\ m \] +Where, $c_1$ and $c_2$ are positive auxiliary constants. ++ If m is not considered, we just assume $c_1 = 0, c_2 = 1$, this is the simplest form of quadratic probing. ++ For $m = 2^n$, a good choice for auxiliary constants is $c_1=c_2=1/2$. ++ For $m = n^p$ where m, n and p are positive integers greater or equal to 2, constants $c_1 = 1, c_2 = n$ are a good choice. + +Quadratic probing works much better than linear probing. +\\ +\\ +If $quadratic\_h(k_1, 0) = quadratic\_h(k_2,0)$, then that implies that all $quadratic\_h(k_1, i) = quadratic\_h(k_2,i)$, i.e, they will have the same *probe sequence*. This leads to a probe sequence getting clustered. This is called /*secondary clustering*/. This also effects performance but not as drastically as primary clustering. +**** *Double Hashing* +Double hashing is one of the best available method for open addressing. \\ +*Double hashing uses /two auxiliary hashing functions/.* +\[ double\_h(k, i) = \left( h_1(k) + i \times h_2(k) \right) \ mod\ m \] +The value of $h_2(k)$ must be *relatively prime (i.e, coprime) to number of slots (m)*. \\ ++ A convenient way to ensure this is let *m be a power of 2* and $h_2(k)$ be a *hash function that always produces an odd number*. ++ Another way is to let *m be a prime* and make $h_2(k)$ such that is *always produces a positive integer less than m.* +If we use one of the above two methods (either m is a power of 2 or a prime), then double hashing improves over linear and quadratic probing since keys will have distinct probe sequences. +\\ +\\ +When using the above values of m, performance of double hashing is very close to the performance of "ideal" scheme of uniform hashing. +**n* Performace of open addressing +In open addressing *load factor* $\left( \alpha \right) \le 1$. We will assume *uniform hashing* i.e, any element is equally likely to be hashed in any slot. We will also assume that for any key, each possible probe sequence is equally likely. +\\ +\\ +Under these assumptions, for load factor $\alpha$. The number of probes in an unsuccessful search is at most $1/(1 - \alpha )$ +\\ +This means that for a constant load factor, an unsuccessful search will run in $\theta (1)$ time. +\\ +\\ +The number of probes on average for inserting an element under these assumptions is $1/(1- \alpha )$ +\\ +The number of probes on averge in a successful search is at most $\frac{1}{\alpha} ln\left( \frac{1}{1-\alpha} \right)$ +** Hash Functions +A good hash funtion will approximately satisfy the *simple uniform hashing*, which means that any element is equally likely to be hashed to any slot. + +\[ m : \text{Number of slots in hash table} \] +\[ n : \text{Number of elements in hash table} \] + +Suppose we knew that our keys are from a set of real numbers and the keys are picked uniformly. In this case, we could simply use the hash function $h(k) = floor(mk)$. +\\ +\\ +Similarly, in many cases we can make a reasonably good hash funtion if we know the distribution of keys. +\\ +\\ +We will look at a few ways to make a hash function. + +*** The division method +In division method, we map a key $k$ into one of the $m$ slots by taking the remainder of k divided by m. +\[ h(k) = k\ mod\ m = k\ \%\ m \] +In most cases, +\[ m : \text{Number of slots in hash table} \] +But there are some cases where $m$ is chosen to be something else. ++ If $m$ is a *power of 2*, then $k\ mod\ m$ will give us the least significant $log_2m$ bits of $k$. When making a hash function, we want a function that depends on all bits of the key. So, */we should not use this method if m is a power of 2/*. ++ A *prime number* not close to a power of 2 is a good choice for $m$ in many cases. So when deciding the number of slots for the hash table, we can /*try to make $m$ a prime*/ which will accomodate our elements with less load factor. + +*** The multiplication method +In multiplication method, we first multiply the key $k$ with a constant $A$ which is in range $0 < A < 1$. Then we get the *fractional part* of $kA$. Then we multiply the fractional part by $m$ and floor it to get the hash. +\[ h(k) = floor(m \times decimal\_part(kA) ) \] +The advantage of multiplication method is that we can choose any value of $m$. We can even choose $m$ to be a power of 2. +\\ +We can choose any value of $A$. The value depends on characteristics of data, +\[ A \approx \frac{\sqrt{5} - 1}{2} \] +will work reasonably well. +\\ +\\ +Example, Suppose + +\[ key\ (k) = 1234 \] +\[ m = 128 \] +And our value of $A$ is, +\[ A = 0.618 \] +Then to get our $h(k)$, +\[ kA = 762.612 \] +\[ decimal\ part(kA) = 0.612 \] +\[ floor(m \times decimal\_part(kA) ) = h(k) = 78 \] + +In C language, +#+BEGIN_SRC c + size_t hash(size_t key, size_t m){ + double kA = key * 0.618; + // get decimal part only + double kA = kA - ((int) kA); + // floor the product of decimal part and m + size_t h = floor(m * kA); + return h; + } +#+END_SRC + +*** Mid square method +In this method, we square the keys and then we choose some digits from the middle. +Example, +\[ h(10) = middle\ digit \left( 10 \times 10 \right) = midlle\ digit (100) = 0 \] +\[ h(11) = middle\ digit \left( 11 \times 11 \right) = midlle\ digit (121) = 2 \] +\[ h(12) = middle\ digit \left( 12 \times 12 \right) = midlle\ digit (144) = 4 \] +With huge numbers, we need to take care of overflow conditions in this method. + +*** Folding method + +While this method can be used on integers, this method is usually used where the key is segmented. For example in arrays or when key is a string. +\\ +\\ +In this method, we add all of the segments and then we mod it with the number of slots. +\[ h(k) = \left( \text{Sum of all the segments} \right) mod\ m \] +Example, for string "hello" +\\ +sum = 'h' + 'e' + 'l' + 'l' + 'o' +\\ +sum = 104 + 101 + 108 + 108 + 111 = 532 +\\ +\\ +If m = 100, then +\\ +h(k) = 532 mod 100 +\\ +h(k) = 32 + +** Universal Hashing +TODO: Basics of universal hashing. +** Perfect Hashing +*NOTE*: This doesn't seem to be in B.Tech syllabus, but it seems cool. +\\ + +* Representing rooted trees using nodes +We can represent trees using nodes. A node only stores a single element of the tree. What is a node will depend on the language being used. +\\ +In C, we make a struct which will store the element and pointers to other node structs. + +#+BEGIN_SRC c + struct tree_node{ + int element; + struct tree_node * left_child; + struct tree_node * right_child; + }; +#+END_SRC +\\ +In languages with oop, we create node class which will store refrences to other node objects. +#+BEGIN_SRC java + class Node { + int value; + Node left; + Node right; + + Node(int value) { + this.value = value; + right = null; + left = null; + } + } +#+END_SRC + +** Fixed number of children +When we know how many children any given node can have, i.e, the number of children is bounded. We can just use refrences or pointers to the nodes directly. +\\ +For example, if we know we are making a binary tree, then we can just store refrence to left children and right childern. + +#+BEGIN_SRC c + struct tree_node{ + int element; + struct tree_node * left_child; + struct tree_node * right_child; + }; +#+END_SRC + +** Unbounded number of children +When we don't know how many children any given node will have. Thus any node can have any number of children, we can't just use refrences. We could create an array of refrences to nodes, but some nodes will only have one or two childs and some may have no childs. This will lead to a lot of wasted memory. +\\ +There is a way to represent such trees without wasting any memory. This is done by using *sibling refrences or pointers*. +\\ +#+BEGIN_SRC c + struct tree_node{ + int element; + struct tree_node * left_child; + struct tree_node * right_sibling; + }; +#+END_SRC + +The right sibling pointer will point to the right sibling of the node. This allows us to chain siblings and have unbounded number of siblings to the given node, therefore having unbounded number of children to any given parent. To make this approach easier to use, we can also add a pointer back to the parent node, though it is not compulsary. + +#+BEGIN_SRC c + struct tree_node{ + struct tree_node * parent; + + int element; + + struct tree_node * left_child; + struct tree_node * right_sibling; + }; +#+END_SRC + +So a tree which is like : +\\ +[[./imgs/tree_actual.jpg]] +\\ +\\ +can be represented using refrences and pointers as : +\\ +[[./imgs/tree_representation.jpg]] +\\ + +* Binary Search Trees +A tree where any node can have only two child nodes is called a */binary tree/*. +\\ +A binary search tree is a tree where for any give node *the nodes stored in left sub-tree are less than the parent node* and the *nodes stored in right sub-tree are greater than the parent node* (or vice versa). So the left-subtree always have smaller elements and right sub-tree always have greater elements. +\\ +\\ +This property allows us easily search for elements from the data structure. We start our search at the root node. If the element we want is less than the current node, we will go to the left node ,else we will go to the right node. The concept is similar to the binary search on arrays. + +In C, we can make a binary tree as +#+BEGIN_SRC c + struct binary_tree{ + int value; + struct binary_tree * left_child; + struct binary_tree * right_child; + }; +#+END_SRC + +** Quering a BST +Some common ways in which we usually query a BST are searching for a node, minimum & maximum node and successor & predecessor nodes. We will also look at how we can get the parent node for a given node, if we already store a parent pointer then that algorithm will be unnecessary. +*** Searching for node +We can search for a node very effectively with the help of binary search tree property. The search will return the node if it is found, else it will return NULL. +#+BEGIN_SRC c + struct binary_tree * + search_recursively(struct binary_tree * root, int value){ + // If we reach a null, then value is not in tree + if(root == NULL) + return NULL; + // if we found the value, return the current node + if(root->value == value) + return root; + // compare value we are looking for + // and go to either left or right sub-tree + if(value < root->value) + return search_recursively(root->left, value); + else + return search_recursively(root->right, value); + } +#+END_SRC +We can also search iteratively rather than recursively. +#+BEGIN_SRC c + struct binary_tree * + search_iterative(struct binary_tree * root, int value){ + while(root != NULL){ + // if we found the value, return the current node + if(root->value == value) return root; + // compare value and go to left or right sub-tree + root = (value < root->value) ? root->left : root->right; + } + // if not found then return NULL + return NULL; + } +#+END_SRC +*** Minimum and maximum +Finding the minimum and maximum is simple in a Binary Search Tree. The minimum element will be the leftmost node and maximum will be the rightmost node. We can get the minimum and maximum nodes by using these algorithms. ++ For minimum node +#+BEGIN_SRC c + struct binary_tree * minimum(struct binary_tree * root){ + if(root == NULL) return NULL; + while(root->left != NULL) + root = root->left; + return root; + } +#+END_SRC ++ For maximum node +#+BEGIN_SRC c + struct binary_tree * maximum(struct binary_tree * root){ + if(root == NULL) return NULL; + while(root->right != NULL) + root = root->right; + return root; + } +#+END_SRC + +*** Find Parent Node +This algorithm will return the parent node. It uses a trailing node to get the parent. If the root node is given, then it will return NULL. *This algorithm makes the assumption that the node is in the tree*. +#+BEGIN_SRC c + struct binary_tree * + find_parent(struct binary_tree * tree, struct binary_tree * node){ + if(tree == node) return NULL; + + struct binary_tree * current_node = tree; + struct binary_tree * trailing_node = tree; + + while(current_node != node){ + trailing_node = current_node; + current_node = (node->value < current_node->value) ? + current_node->left : + current_node->right; + } + + return trailing_node; + } +#+END_SRC +*** Is ancestor +This algorithm will take two nodes, ancestor and descendant. Then it will check if ancestor node is really the ancestor of descendant node. +#+BEGIN_SRC c + bool + is_ancestor(struct binary_tree *ancestor, + struct binary_tree *descendant){ + // both ancestor and descendant + // should not be NULL + if(ancestor == NULL || descendant == NULL) + return false; + + while(ancestor != NULL){ + if(ancestor == descendant) return true; + ancestor = (descendant->value < ancestor->value) ? + ancestor->left : + ancestor->right; + } + return false; + } +#+END_SRC +*** Successor and predecessor +We often need to find the successor or predecessor of an element in a Binary Search Tree. The search for predecessor and succesor is divided in to two cases. + +**** *For Successor* +#+BEGIN_SRC c + // get successor of x + struct binary_tree * + successor(struct binary_tree * tree, struct binary_tree * x){ + // case 1 : right subtree is non-empty + if(x->right != NULL){ + return minimum(x->right); + } + // case 2 : right subtree is empty + struct binary_tree * y = find_parent(tree, x); + while(y != NULL){ + if(is_ancestor(y, x) && is_ancestor(y->left, x)) return y; + y = find_parent(tree, y); + } + return NULL; + } +#+END_SRC +*Case 1* : If the node x has a right subtree, then the minimum of right subtree of x is the succesor. +\\ +*Case 2* : If the node x has no right subtree, then successor may or may not exist. If it exists, the successor node will be the ancestor of x whose own left node is also the ancestor of x. +**** *For Predecessor* +#+BEGIN_SRC c + struct binary_tree * + predecessor(struct binary_tree * tree, struct binary_tree * x){ + // case 1 : left subtree is non-empty + if(x->left != NULL){ + return maximum(x->left); + } + // case 2 : left subtree is empty + struct binary_tree * y = find_parent(tree, x); + while(y != NULL){ + if(is_ancestor(y, x) && is_ancestor(y->right, x)) return y; + y = find_parent(tree, y); + } + return NULL; + } +#+END_SRC +*Case 1* : If the node x has a left subtree, then the maximum of left subtree of x is the predecessor. +\\ +*Case 2* : If the node x has no left subtree, then predecessor may or may not exist. If it exists, the predecessor node will be the ancestor of x whose own right node is also the ancestor of x. +** Inserting and Deleting nodes +When inserting and deleting nodes in BST, we need to make sure that the Binary Search Tree property continues to hold. Inserting node is easier in a binary search tree than deleting a node. +*** Insertion +Insertion is simple in a binary search tree. We search for the node we want to insert in the tree and insert it where we find first NULL spot. +#+BEGIN_SRC c + void + insert_node(struct binary_tree ** tree, struct binary_tree * node){ + // if found a null spot, insert the node + if(*tree == NULL){ + *tree = node; + return; + } + if(node->value < (*tree)->value){ + // the node is to be inserted into left subtree + struct binary_tree ** left_tree = &((*tree)->left); + insert_node(left_tree, node); + }else{ + // the node is to be inserted into right subtree + struct binary_tree ** right_tree = &((*tree)->right); + insert_node(right_tree, node); + } + } +#+END_SRC +The recursive algorithm for inserting into a Binary search tree is simpler than the iterative algorithm. +\\ +\\ +The algorithm for iterative insertion is +#+BEGIN_SRC c + void + insert_node(struct binary_tree **tree, struct binary_tree * node){ + // if no nodes in tree, then just node and return + if((*tree) == NULL){ + ,*tree = node; + return; + } + + struct binary_tree ** current_node = tree; + struct binary_tree ** trailing_node = tree; + + // look for an empty place using current_node + while(*current_node != NULL){ + trailing_node = current_node; + current_node = (node->value < (*current_node)->value) ? + &((*current_node)->left) : &((*current_node)->right); + } + + // we need to insert node on the trailing node + if(node->value < (*trailing_node)->value) + (*trailing_node)->left = node; + else + (*trailing_node)->right = node; + } +#+END_SRC +*** Deletion +Deletion in Binary Search Trees is tricky because we need to delete nodes in a way that the property of the Binary Search Tree holds after the deletion of the node. So we first have to remove the node from the tree before we can free it. +\\ +\\ +TODO : Write four cases of node deletion here +**** *Implementation in code* +We also use a helper function called Replace Child for deletion of node. This function will simply take parent node, old child node and new child node and replace old child with new child. + +#+BEGIN_SRC c + void + replace_child(struct binary_tree *parent, + struct binary_tree *old_child, + struct binary_tree *new_child){ + if(parent->left == old_child) parent->left = new_child; + else parent->right = new_child; + } +#+END_SRC + +We will create a funtion that will remove the root node from a given subtree and then return the root node of the result subtree. +This will allow us to apply remove root node funtion on any node and then reattach the new subtree. +\\ +\\ +Making remove root node a different funtion will also allow us to not worry about attaching the the subtree immediately in the same funtion. + +#+BEGIN_SRC c + struct binary_tree * + remove_root_node(struct binary_tree *root){ + // case 1 : no child + // this case can be skipped in real implementation + // as it is covered by the case 2 + if(root->left == NULL && root->right == NULL){ + return NULL; + } + + // case 2 : one child + if(root->left == NULL){ + return root->right; + }else if(root->right == NULL){ + return root->left; + } + + struct binary_tree *successor = minimum(root->right); + // case 3 : two child and successor is right node of root node + if(successor == root->right){ + successor->left = root->left; + return successor; + } + + // case 4 : two child and successor is not the right node of root node + struct binary_tree *successor_parent = find_parent(root, successor); + replace_child(successor_parent, successor, successor->right); + successor->left = root->left; + successor->right = root->right; + return successor; + } +#+END_SRC + +Now we can make a delete node function which will remove the node, reattach the subtree and also free or delete the node. + +#+BEGIN_SRC c + void + delete_node(struct binary_tre **tree, struct binary_tree *node){ + struct binary_tree *new_root = remove_root_node(node); + + // if deleting root node of tree + if(node == (*tree)){ + (*tree) = new_root; + free(node); + return; + } + + // when not deleting root node of tree + replace_child(find_parent(*tree, node) + ,node ,new_root); + free(node); + } +#+END_SRC + +** Performance of BST +The performance of the search operation depends on the height of the tree. If the tree has $n$ elements, the height of a binary tree can be between $n$ and $floor\left( 1+ log_2(n) \right)$. +\\ +\\ +To perform an operation on BST, we need to find the node where we have perform the operation. Since even in worst case *we only need to traverse the height of the search tree to search for any node*, the time taken to perform any operation on a Binary Search Tree is $\theta (h)$ where, $h$ is the height of the tree. +\\ +\\ +A binary tree with height of $floor(1 + log_2(n))$ is called a *balanced binary tree*, otherwise it is an unbalanced tree. A balanced binary tree is the shortest height a binary tree with that number of nodes can have. +\\ +\\ +The worst case is when tree has a single branch, making the height of tree n. In this case, the worst case for any operation takes $\theta (n)$ time. +\\ +A balanced binary search tree in worst case for any operation will take $\theta (log_2n)$ time. + +** Traversing a Binary Tree +There are three ways to traverse a binary tree, inorder tree walk, preorder tree walk and postorder tree walk. All three algorithm will take $\theta (n)$ time to traverse the $n$ nodes. + +*** Inorder tree walk +This algorithm is named so because it first traverses the left sub-tree recursively, then the node value and then traverses right sub-tree recursively. + +#+BEGIN_SRC c + void inorder_print(struct binary_tree * node){ + if(node == NULL) + return; + // recursively print left sub-tree + inorder_print(node->left_child); + // print the node value + printf("%d\t", node->value); + // recursively print right sub-tree + inorder_print(node->right_child); + } +#+END_SRC + ++ *Inorder algorithm will traverse the binary search tree in a sorted order.* Thus, it can be used to get nodes in a sorted order. ++ This algorithm is not suitable to delete or free the nodes of the tree. It should not be used to delete a binary tree. ++ This algorithm cannot we used to make a copy of a binay search tree. +*** Preorder tree walk +This algorithm is called preorder algorithm because it will first traverse the current node, then recursively traverses the left sub-tree and then recursively traverse the right sub-tree. +#+BEGIN_SRC c + void preorder_print(struct binary_tree * node){ + if(node == NULL) + return; + // print the node + printf("%d\t", node->value); + // recursively print left sub-tree + preorder_print(node->left_child); + // recursively print right sub-tree + preorder_print(node->right_child); + } +#+END_SRC ++ *This algorithm is used to create a copy of the Binary Search Tree*. If we store nodes in an array using this algorithm and then later insert the nodes linearly in a simple binary search tree, we will have an exact copy of the tree. ++ This algorithm traverses the tree in a *topologically sorted* order. ++ This algorithm cannot be used to delete or free the nodes of the tree. +*** Postorder tree walk +In this algorithm, we first traverse the left sub-tree recursively, then the right-sub tree recursively and finally the node. +#+BEGIN_SRC c + void postorder_print(struct binary_tree * node){ + if(node == NULL) + return; + // recursively print left sub-tree + postorder_print(node->left_child); + // recursively print right sub-tree + postorder_print(node->right_child); + // print the node + printf("%d\t", node->value); + } +#+END_SRC ++ *This algorithm can be used to delete or free all the nodes of a binary tree*. ++ This algorithm cannot be used to create a copy of the tree + +* Binary Heap +Heap is a data structure represented as a complete tree which follows the heap property. All levels in a heap tree are completely filled except possible the last one, which is filled from left to right. +\\ +\\ +The most common implementation of the heap is a *binary heap*. The binary heap is represented as a binary tree. We can use an array to implement binary heaps. +\\ +\\ +The heap data structure is used to implement *priority queues*. In many cases we even refer to heaps as priority queues and vice versa. + +** Heap Property +Heaps are of two types ++ *min-heap* : the smallest element is at the root of the tree. ++ *max-heap* : the largest element is at the root of the tree. +The heap property is different for min-heaps and max-heaps. ++ *for min-heap* : the key stored in parent node is always less than or equal $(\le)$ to the key of child node. ++ *for max-heap* : the key stored in parent node is always greter than or equal $(\ge)$ to the key of child node. + +** Shape of Heap +Also reffered to as *shape property* of heap. +\\ +A heap is represented as a complete tree. A complete tree is one where all the levels are completely filled except possible the last. The last level if not completely filled is filled from left to right. +** Array implementation +We can implement binary heap using arrays. The root of tree is the first element of the array. The next two elements are elements of second level of tree and children of the root node. Similary, the next four elements are elements of third level of tree and so on. +\\ +\\ +/*For a given level, the position in array from left to right is the position of elements in tree from left to right.*/ +\\ +\\ +For example, a max-heap implemented using array can be represented as tree as shown +\\ +\\ +[[./imgs/Heap-as-array.svg]] +\\ +\\ +In C, we can create a heap struct for easier implementation of algorithms +#+BEGIN_SRC c + struct heap_type{ + int array[]; + size_t capacity; + size_t len; + }; +#+END_SRC +** Operations on heaps +Both insertion and deletion in heap must be done in a way which conform to the heap property as well as shape property of heap. Before we can look at insertion and deletion, we need a way to find parent and child for a given index. We will also first see up-heapify and down-heapfiy funtions. +*** Parent and child indices +In a binary heap, we can find parent and children for any given index using simple formulas. ++ If array is zero indexed, for element at index i + + children at indices $(2i + 1)$ and $(2i + 2)$ + + parent at index $floor\left( (i - 1)/2 \right)$ ++ If array is one indexed, for element at index i + + children at indices $(2i)$ and $(2i + 1)$ + + parent at index $floor\left( i/2 \right)$ + +*** Down-heapify +The down-heapify is a function which can re-heapify an array if no element of heap violates the heap property other than index and it's two children. +\\ +This function runs in $\theta (log_2n)$ time. The algorithm for this works as follows +1. Compare the index element with its children and stop if in correct order in relation to both children. +2. If not in correct order, swap the index element with the children which is not in correct order. Repeat till in correct order or at the lowest level. + +#+BEGIN_SRC c + void down_heapify(struct heap_type heap, size_t index){ + size_t left = 2 * index + 1; + size_t right = 2 * index + 2; + size_t largest = index; + + if(left < heap.len && heap.array[left] > heap.array[largest]) + largest = left; + + if(right < heap.len && heap.array[right] > heap.array[largest]) + largest = right; + + if(largest != index){ + swap(heap.array[index], heap.array[largest]); + down_heapify(heap, largest); + } + } +#+END_SRC + +Since we shift element downwards, this operation is often called /down-heap/ operation. It is also known as /trickle-down, swim-down, heapify-down, or cascade-down/ + +*** Up-heapify +The up-heapify is a function which can re-heapify an array if no element of heap violates the heap property other than index and it's parent. +\\ +This function runs in $\theta (log_2n)$ time. The algorithm for this works as follows +1. Compare the index element to its parent and stop algorithm if it is in correct order. +2. If not in correct order, swap element with its parent. Repeat till element in correct position or at root position. + +#+BEGIN_SRC c + void up_heapify(struct heap_type heap, size_t index){ + size_t parent = (index - 1) / 2; + size_t smallest = index; + + if(parent >= 0 && heap.array[smallest] > heap.array[parent]) + smallest = parent; + + if(smallest != index){ + swap(heap.array[index], heap.array[smallest]); + up_heapify(heap, smallest); + } + } +#+END_SRC + +Since we shift element upwards, this operation is often called /up-heap/ operation. It is also known as /trickle-up, swim-up, heapify-up, or cascade-up/ +\\ +\\ +*TODO* : Maybe up-heapfiy funtion should be made cleaner rather than trying to mirror down-heapify funtion. + +*** Insertion +Insertion takes $\theta (log_2n)$ time in a binary heap. To insert and element in heap, we will add it to the end of the heap and then apply up-heapify operation of the elment +\\ +The code shows example of insertion in a max-heap. + +#+BEGIN_SRC c + void insert_element(struct heap_type heap, int element){ + // add element + size_t element_index = heap.len; + if(element_index == heap.capacity){ + printf("Heap reached full capacity"); + return; + } + + heap.array[heap.len++] = element; + up_heapify(heap, heap.len - 1); + } +#+END_SRC + +*** Deletion or Extraction +Like insertion, extraction also takes $\theta (log_2n)$ time. Extraction from heap will extract the root element of the heap. We can use the down-heapify function in order to re-heapify after extracting the root node. +\\ +\\ +The code shows example of extraction in max-heap. + +#+BEGIN_SRC c + int extract_element(struct heap_type heap){ + if(heap.len < 1){ + printf("No elements in the heap"); + return -1; + } + + int r = heap.array[0]; + heap.array[0] = heap.array[heap.len - 1]; + heap.len -= 1; + + down_heapify(heap, 0); + + return r; + } +#+END_SRC + +*** Insert then extract +Inserting an element and then extracting from the heap can be done more efficiently than simply calling these functions seperately as defined previously. If we call both funtions we define above, we have to do an up-heap operation followed by a down-heap. Instead, there is a way to do just a single down-heap. +\\ +\\ +The algorithm for this will work as follows in a max-heap. +1. Compare whether the item we are trying to push is greater than root of heap. +2. If item we are pushing is greater, return it. +3. Else, + 1. Replace root element with new item + 2. Apply down-heapify on the root of heap + 3. Return the orignal root heap which we replaced. + +In python, this is implemented by the name of */heap replace/*. +#+BEGIN_SRC c + int heap_replace(struct heap_type heap, int element){ + if(element > heap.array[0]) + return element; + + int r = heap.array[0]; + swap(heap.array[0], element); + down_heapify(heap, 0); + return r; + } +#+END_SRC + +*** Searching +Searching for a arbitrary element takes linear time in a heap. We use linear search to search for element in array. +*** Deleting arbitray element +For a max-heap, deleting an arbitrary element is done as follows +1. Find the element to delete and get its index $i$. +2. swap last element and the element at index $i$, and decrease the size of heap. +3. apply down-heapify on index $i$ if any of it's children violate the heap property else apply up-heapify if the parent element violates the heapify property. +*** Decrease and increase keys +TODO : I don't know if it is neccessary to do this operation. It looks simple to implement. +** Building a heap from array +We can convert a normal array into a heap using the down-heapify operation in linear time $\left( \theta (n) \right)$ + +#+BEGIN_SRC c + // array.array[..] contains an array which is not a heap yet + // this funtion will turn it into a correct heap + void build_heap(int array[], size_t len){ + for(int i = (len/2) - 1; i >= 0; i--) + down_heapify(array, i); + } +#+END_SRC +As we see, for */zero indexed language/*, the range of for loop is [(len(array)/2) - 1, 0] +\\ +If we are using a */one indexed language/*, then range of for loop is [len(array)/2, 1] + +* Graphs +A graph is a data structure which consists of nodes/vertices, and edges. We sometimes write it as $G=(V,E)$, where $V$ is the set of vertices and $E$ is the set of edges. When we are working on runtime of algorithms related to graphs, we represent runtime in two input sizes. $|V|$ which we simply write as $V$ is the number of vertices and similarly $E$ is the number of edges. +** Representing graphs +We need a way to represent graphs in computers and to search a graph. Searching a graph means to systematically follow edges of graphs in order to reach vertices. +\\ +\\ +The two common ways of representing graphs are either using adjacency lists and adjacency matrix. Either can represent both directed and undirected graphs. + +*** Adjacency List +Every node in the graph is represented by a linked list. The list contains the nodes to which the list node is connected by an edge. +\\ +Example, if list-0 contains node-3, then node-0 is connected to node-3 by an edge. ++ For *undirected graphs* this will simply work by storing all nodes in list who have a shared edge with list node. ++ For *directed graphs* we will only add node to list, if edge goes from list node to the stored node. +So in our previous example, if list-0 contains node-3, then the edge goes from 0 to 3 in the directed graph. +\\ +\\ +The space taken by adjacency list representation is $\theta (V + E)$. +\\ +Since each node represents an edge, it is easy to convert an adjacency representation graph to a *weighted graph*. A weighted graph is a graph where each edge has an associated weight. So the weight of (u, v) edge can be stored in the node-v of u's list. +\\ +The adjacency list representation is very robust and can represent various types of graph variants. + +*** Adjacency Matrix +We use a single matrix to represent the graph. The size of the matrix is $\left( |V| \times |V| \right)$. When we make the matrix, all it's elements are zero, i.e the matrix is zero initialized. +\\ +\\ +If there is an edge between vertices (x , y), we show it by setting +\\ +matrix[x][y] = true */or/* matrix[x][y] = 1 +\\ +If there is not an edge between vertices (x , y), we set +\\ +matrix[x][y] = false */or/* matrix [x][y] = 0 ++ For undirected graphs, to show edge (u , v) we have to set both matrix[u][v] and matrix[v][u] to 1. ++ For directed graphs, to show edge (u , v) which goes from u to v, we only set matrix[u][v] to 1. + +The space taken by adjacency matrix is $\theta (V^2)$. +\\ +For undirected graphs, the matrix will be symmetrical along the diagonal, because matrix will be equal to it's own *transpose*. So we can save space by only storing half the matrix in memory. +\\ +\\ +When comparing asymptotic results, the adjacency list seems more efficient, but matrix has advantage of only storing 1 bit for each cell. So in denser graphs, the matrix may use less space. +\\ +\\ +We can store weighted graphs in adjacency matrix by storing the weights along with the edge information in matrix cells. + +** Vertex and edge attributes +Many times we have to store attributes with either vertices or edges or sometimes both. How this is differs by language. In notation, we will write it using a dot (.) +\\ +\\ +For example, the attribute x of v will be denoted as v.x +\\ +Similarly, the attribute x of edge (u , v) will be denoted as (u , v).x +** Density of graph +Knowing the density of a graph can help us choose the way in which we represent our graph. +\\ +The formula for density of graph is +\[ \text{density} = \frac{\text{number of edges}}{\text{maximum possible edges}} \] +Maximum possible number of edges for a simple undirected graph is +\[ \frac{|V| \left( |V| - 1 \right)}{2} \] +Maximum possible number of edges for a simple directed graph is +\[ |V| \left( |V| - 1 \right) \] +Therefore, the density of a simple undirected graph will be +\[ \text{density (simple undirected)} = \frac{2|E|}{|V| \left( |V| - 1 \right)} \] +And density of simple directed directed graph will be +\[ \text{density (simple directed)} = \frac{|E|}{|V| \left( |V| - 1 \right)} \] + +Therefore, maximum density for a graph is 1. The minimum density for a graph is 0. +\\ +Knowing this, we can say graph with low density is a sparse graph and graph with high density is a dense graph. +*** Which representation to use +For a quick approximation, when undirected graph and $2|E|$ is close to $|V|^2$, we say that graph is dense, else we say it is sparse. +\\ +Similarly, for directed graph when $|E|$ is close to $|V|^2$, we can say graph is dense, else it is sparse. +\\ +\\ +The list representation provides a more compact way to represent graph when the graph is *sparse*. Whereas matrix representation is better for *dense* graphs. +\\ +Another criteria is how algorithm will use the graph. If we want to traverse to neighbouring nodes, then list representation works well. If we want to quickly tell if there is an edge between two nodes, then matrix representation is better. + +** Searching Graphs +Graph search (or graph traversal) algorithms are used to explore a graph to find nodes and edges. Vertices not connected by edges are not explored by such algorithms. These algorithms start at a source vertex and traverse as much of the connected graph as possible. +\\ +\\ +Searching graphs algorithm can also be used on trees, because trees are also graphs. +*** Breadth first search +BFS is one of the simplest algorithms for searching a graph and is used as an archetype for many other graph algorithms. This algorithm works well with the adjacency list representation. +\\ +\\ +In BFS, the nodes are explored based on their distance from the starting node. What we mean by distance between nodes is how many edges are in between the two nodes. +\\ +\\ +So in BFS, all nodes at distance 1 are explored first, then nodes at distance 2 are explored, then nodes at distance 3 and so on. That is, all nodes at distance $k$ are explored before exploring nodes at distance $(k+1)$. +#+BEGIN_SRC c + BFS(graph_type graph, node_type start){ + queue_type queue; + start.explored = true; + queue.add(start); + + while(queue.len != 0){ + node_type v = queue.dequeue(); + node_list adjacency_list = grap.adj_list(v); + + while(adjacency_list != NULL){ + node_type u = adjacency_list.node; + if(u.explored == false){ + u.explored = true; + queue.add(u); + } + adjacency_list = adjacency_list.next; + } + } + } +#+END_SRC + ++ *Analysis* +For an input graph $G=(V,E)$, every node is enqued only once and hence, dequeued only once. The time taken to enqueue and dequeue a single node is $\theta (1)$, then the time for $|V|$ nodes is, $\theta (V)$. Each node in adjacency list represents an edge, therefore the time taken to explore each node in adjacency lists is $\theta (E)$. Therefore, the total time complexity is +\[ \text{Time complexity of BFS : } \theta(V + E) \] +*** Breadth-first trees for shortest path +For a simple graph, we may want to get the shortest path between two nodes. This can be done by making a Breadth-first tree. +\\ +\\ +When we are traversing nodes using BFS, we can create a breadth-first tree. To make this tree, we simply need to set parent of u in the inner while loop in the BFS algorithm to v. So our algorithm from earlier will become. +#+BEGIN_SRC c + BFS_shortest_path(graph_type graph, node_type start, node_type end){ + queue_type queue; + start.explored = true; + start.parent = NULL; // the start node is root node of tree + queue.add(start); + + while(queue.len != 0){ + node_type v = queue.dequeue(); + node_list adjacency_list = grap.adj_list(v); + while(adjacency_list != NULL){ + node_type u = adjacency_list.node; + if(u.explored == false){ + u.explored = true; + u.parent = v; // the parent of u is v + queue.add(u); + + if(u == end) return; // if we found the end node, + // we have the path to it. + } + + adjacency_list = adjacency_list.next; + } + } + + printf("end node not in graph"); + } +#+END_SRC +In this tree, the path upwards from any given node to start node will be the shortest path to the start node. +\\ +Therefore, we can get the shortest path now as follows +#+BEGIN_SRC c + print_shortest_path(graph_type graph, node_type start, node_type end){ + BFS_shortest_path(graph, start, end); + while(end != NULL){ + print_node(end); + end = end.parent; + } + } +#+END_SRC +This will print shortest path from end node to start node. +*** Depth first search +Unlike BFS, depth first search is more biased towards the farthest nodes of a graph. It follows a single path till it reaches the end of a path. After that, it back tracks to the last open path and follows that one. This process is repeated till all nodes are covered. +\\ +\\ +Implementation of DFS is very similar to BFS with two differences. Rather than using a queue, we use a *stack*. In BFS, the explored nodes are added to the queue, but in DFS we will add unexplored nodes to the stack. + +#+BEGIN_SRC c + DFS(graph_type graph, node_type start){ + stack_type stack; + stack.push(start); + while(stack.len != 0){ + node_type v = stack.pop(); + if(v.explored == false){ + v.explored = true; + + node_list adjacency_list = graph.adj_list(start); + while(adjacency_list != NULL){ + stack.push(adjacency_list.node); + adjacency_list = adjacency_list.next; + } + } + } + } +#+END_SRC + +Another way to implement DFS is recursively. + +#+BEGIN_SRC c + DFS(graph_type graph, node_type node){ + node.discovered = true; + node_list adjacency_list = graph.adj_list(node); + while(adjacency_list != NULL){ + node_type u = adjacency_list.node; + if(u.discovered == false) + DFS(graph, u); + adjacency_list = adjacency_list.next; + } + } +#+END_SRC + +The difference between recursive and iterative version of DFS is that, recursive will choose the path of first neighbour in the adjacency list, whereas the iterative will choose the path of last neighbour in the adjacency list. + ++ *Analysis* +For an input graph $G=(V,E)$, the time complexity for Depth first search is $\theta (V + E)$, i.e, it is the same of breadth first search. The reasoning for this is the same as before, all nodes are pushed and popped from stack only once, giving use time complexity of $\theta (V)$. We go through all the adjacency lists only once giving time complexity $\theta (E)$. Thus adding the two will give us +\[ \text{Time complexity of DFS : } \theta (V + E) \] + +*** Properties of DFS +DFS is very useful to understand the structure of a graph. To understand the +*** Topological sort using DFS diff --git a/main.tsk b/main.tsk new file mode 100644 index 0000000..d48896f --- /dev/null +++ b/main.tsk @@ -0,0 +1,7 @@ +*Export to HTML +#do +emacs --script src/export.el + +*Remove intermediate +#do +rm main.html~ diff --git a/src/export.el b/src/export.el new file mode 100644 index 0000000..65d5dfd --- /dev/null +++ b/src/export.el @@ -0,0 +1,9 @@ +;; In elisp, default-directory is the current directory +(add-to-list 'load-path "src") +;; If htmlize is outdated, just replace htmlize.el with the newer version lmao. +(require 'htmlize) + +(load-theme 'tsdh-light) + +(find-file "main.org") +(org-html-export-to-html) diff --git a/src/htmlize.el b/src/htmlize.el new file mode 100644 index 0000000..b158a65 --- /dev/null +++ b/src/htmlize.el @@ -0,0 +1,1864 @@ +;;; htmlize.el --- Convert buffer text and decorations to HTML. -*- lexical-binding: t -*- + +;; Copyright (C) 1997-2003,2005,2006,2009,2011,2012,2014,2017,2018,2020 Hrvoje Niksic + +;; Author: Hrvoje Niksic +;; Homepage: https://github.com/hniksic/emacs-htmlize +;; Keywords: hypermedia, extensions +;; Version: 1.57 + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; This package converts the buffer text and the associated +;; decorations to HTML. Mail to to discuss +;; features and additions. All suggestions are more than welcome. + +;; To use it, just switch to the buffer you want HTML-ized and type +;; `M-x htmlize-buffer'. You will be switched to a new buffer that +;; contains the resulting HTML code. You can edit and inspect this +;; buffer, or you can just save it with C-x C-w. `M-x htmlize-file' +;; will find a file, fontify it, and save the HTML version in +;; FILE.html, without any additional intervention. `M-x +;; htmlize-many-files' allows you to htmlize any number of files in +;; the same manner. `M-x htmlize-many-files-dired' does the same for +;; files marked in a dired buffer. + +;; htmlize supports three types of HTML output, selected by setting +;; `htmlize-output-type': `css', `inline-css', and `font'. In `css' +;; mode, htmlize uses cascading style sheets to specify colors; it +;; generates classes that correspond to Emacs faces and uses ... to color parts of text. In this mode, the +;; produced HTML is valid under the 4.01 strict DTD, as confirmed by +;; the W3C validator. `inline-css' is like `css', except the CSS is +;; put directly in the STYLE attribute of the SPAN element, making it +;; possible to paste the generated HTML into existing HTML documents. +;; In `font' mode, htmlize uses ... to +;; colorize HTML, which is not standard-compliant, but works better in +;; older browsers. `css' mode is the default. + +;; You can also use htmlize from your Emacs Lisp code. When called +;; non-interactively, `htmlize-buffer' and `htmlize-region' will +;; return the resulting HTML buffer, but will not change current +;; buffer or move the point. htmlize will do its best to work on +;; non-windowing Emacs sessions but the result will be limited to +;; colors supported by the terminal. + +;; htmlize aims for compatibility with older Emacs versions. Please +;; let me know if it doesn't work on the version of GNU Emacs that you +;; are using. The package relies on the presence of CL extensions; +;; please don't try to remove that dependency. I see no practical +;; problems with using the full power of the CL extensions, except +;; that one might learn to like them too much. + +;; The latest version is available at: +;; +;; +;; +;; + +;; Thanks go to the many people who have sent reports and contributed +;; comments, suggestions, and fixes. They include Ron Gut, Bob +;; Weiner, Toni Drabik, Peter Breton, Ville Skytta, Thomas Vogels, +;; Juri Linkov, Maciek Pasternacki, and many others. + +;; User quotes: "You sir, are a sick, sick, _sick_ person. :)" +;; -- Bill Perry, author of Emacs/W3 + + +;;; Code: + +(require 'cl-lib) +(eval-when-compile + (defvar font-lock-auto-fontify) + (defvar font-lock-support-mode) + (defvar global-font-lock-mode)) + +(defconst htmlize-version "1.57") + +(defgroup htmlize nil + "Convert buffer text and faces to HTML." + :group 'hypermedia) + +(defcustom htmlize-head-tags "" + "Additional tags to insert within HEAD of the generated document." + :type 'string + :group 'htmlize) + +(defcustom htmlize-output-type 'css + "Output type of generated HTML, one of `css', `inline-css', or `font'. +When set to `css' (the default), htmlize will generate a style sheet +with description of faces, and use it in the HTML document, specifying +the faces in the actual text with . + +When set to `inline-css', the style will be generated as above, but +placed directly in the STYLE attribute of the span ELEMENT: . This makes it easier to paste the resulting HTML to +other documents. + +When set to `font', the properties will be set using layout tags +, , , , and . + +`css' output is normally preferred, but `font' is still useful for +supporting old, pre-CSS browsers, and both `inline-css' and `font' for +easier embedding of colorized text in foreign HTML documents (no style +sheet to carry around)." + :type '(choice (const css) (const inline-css) (const font)) + :group 'htmlize) + +(defcustom htmlize-use-images t + "Whether htmlize generates `img' for images attached to buffer contents." + :type 'boolean + :group 'htmlize) + +(defcustom htmlize-force-inline-images nil + "Non-nil means generate all images inline using data URLs. +Normally htmlize converts image descriptors with :file properties to +relative URIs, and those with :data properties to data URIs. With this +flag set, the images specified as a file name are loaded into memory and +embedded in the HTML as data URIs." + :type 'boolean + :group 'htmlize) + +(defcustom htmlize-max-alt-text 100 + "Maximum size of text to use as ALT text in images. + +Normally when htmlize encounters text covered by the `display' property +that specifies an image, it generates an `alt' attribute containing the +original text. If the text is larger than `htmlize-max-alt-text' characters, +this will not be done." + :type 'integer + :group 'htmlize) + +(defcustom htmlize-transform-image 'htmlize-default-transform-image + "Function called to modify the image descriptor. + +The function is called with the image descriptor found in the buffer and +the text the image is supposed to replace. It should return a (possibly +different) image descriptor property list or a replacement string to use +instead of of the original buffer text. + +Returning nil is the same as returning the original text." + :type 'boolean + :group 'htmlize) + +(defcustom htmlize-generate-hyperlinks t + "Non-nil means auto-generate the links from URLs and mail addresses in buffer. + +This is on by default; set it to nil if you don't want htmlize to +autogenerate such links. Note that this option only turns off automatic +search for contents that looks like URLs and converting them to links. +It has no effect on whether htmlize respects the `htmlize-link' property." + :type 'boolean + :group 'htmlize) + +(defcustom htmlize-hyperlink-style " + a { + color: inherit; + background-color: inherit; + font: inherit; + text-decoration: inherit; + } + a:hover { + text-decoration: underline; + } +" + "The CSS style used for hyperlinks when in CSS mode." + :type 'string + :group 'htmlize) + +(defcustom htmlize-replace-form-feeds t + "Non-nil means replace form feeds in source code with HTML separators. +Form feeds are the ^L characters at line beginnings that are sometimes +used to separate sections of source code. If this variable is set to +`t', form feed characters are replaced with the
separator. If this +is a string, it specifies the replacement to use. Note that
 is
+temporarily closed before the separator is inserted, so the default
+replacement is effectively \"

\".  If you specify
+another replacement, don't forget to close and reopen the 
 if you
+want the output to remain valid HTML.
+
+If you need more elaborate processing, set this to nil and use
+htmlize-after-hook."
+  :type 'boolean
+  :group 'htmlize)
+
+(defcustom htmlize-html-charset nil
+  "The charset declared by the resulting HTML documents.
+When non-nil, causes htmlize to insert the following in the HEAD section
+of the generated HTML:
+
+  
+
+where CHARSET is the value you've set for htmlize-html-charset.  Valid
+charsets are defined by MIME and include strings like \"iso-8859-1\",
+\"iso-8859-15\", \"utf-8\", etc.
+
+If you are using non-Latin-1 charsets, you might need to set this for
+your documents to render correctly.  Also, the W3C validator requires
+submitted HTML documents to declare a charset.  So if you care about
+validation, you can use this to prevent the validator from bitching.
+
+Needless to say, if you set this, you should actually make sure that
+the buffer is in the encoding you're claiming it is in.  (This is
+normally achieved by using the correct file coding system for the
+buffer.)  If you don't understand what that means, you should probably
+leave this option in its default setting."
+  :type '(choice (const :tag "Unset" nil)
+		 string)
+  :group 'htmlize)
+
+(defcustom htmlize-convert-nonascii-to-entities t
+  "Whether non-ASCII characters should be converted to HTML entities.
+
+When this is non-nil, characters with codes in the 128-255 range will be
+considered Latin 1 and rewritten as \"&#CODE;\".  Characters with codes
+above 255 will be converted to \"&#UCS;\", where UCS denotes the Unicode
+code point of the character.  If the code point cannot be determined,
+the character will be copied unchanged, as would be the case if the
+option were nil.
+
+When the option is nil, the non-ASCII characters are copied to HTML
+without modification.  In that case, the web server and/or the browser
+must be set to understand the encoding that was used when saving the
+buffer.  (You might also want to specify it by setting
+`htmlize-html-charset'.)
+
+Note that in an HTML entity \"&#CODE;\", CODE is always a UCS code point,
+which has nothing to do with the charset the page is in.  For example,
+\"©\" *always* refers to the copyright symbol, regardless of charset
+specified by the META tag or the charset sent by the HTTP server.  In
+other words, \"©\" is exactly equivalent to \"©\".
+
+For most people htmlize will work fine with this option left at the
+default setting; don't change it unless you know what you're doing."
+  :type 'sexp
+  :group 'htmlize)
+
+(defcustom htmlize-ignore-face-size 'absolute
+  "Whether face size should be ignored when generating HTML.
+If this is nil, face sizes are used.  If set to t, sizes are ignored
+If set to `absolute', only absolute size specifications are ignored.
+Please note that font sizes only work with CSS-based output types."
+  :type '(choice (const :tag "Don't ignore" nil)
+		 (const :tag "Ignore all" t)
+		 (const :tag "Ignore absolute" absolute))
+  :group 'htmlize)
+
+(defcustom htmlize-css-name-prefix ""
+  "The prefix used for CSS names.
+The CSS names that htmlize generates from face names are often too
+generic for CSS files; for example, `font-lock-type-face' is transformed
+to `type'.  Use this variable to add a prefix to the generated names.
+The string \"htmlize-\" is an example of a reasonable prefix."
+  :type 'string
+  :group 'htmlize)
+
+(defcustom htmlize-use-rgb-txt t
+  "Whether `rgb.txt' should be used to convert color names to RGB.
+
+This conversion means determining, for instance, that the color
+\"IndianRed\" corresponds to the (205, 92, 92) RGB triple.  `rgb.txt'
+is the X color database that maps hundreds of color names to such RGB
+triples.  When this variable is non-nil, `htmlize' uses `rgb.txt' to
+look up color names.
+
+If this variable is nil, htmlize queries Emacs for RGB components of
+colors using `color-instance-rgb-components' and `color-values'.
+This can yield incorrect results on non-true-color displays.
+
+If the `rgb.txt' file is not found (which will be the case if you're
+running Emacs on non-X11 systems), this option is ignored."
+  :type 'boolean
+  :group 'htmlize)
+
+(defvar htmlize-face-overrides nil
+  "Overrides for face definitions.
+
+Normally face definitions are taken from Emacs settings for fonts
+in the current frame.  For faces present in this plist, the
+definitions will be used instead.  Keys in the plist are symbols
+naming the face and values are the overriding definitions.  For
+example:
+
+  (setq htmlize-face-overrides
+        '(font-lock-warning-face \"black\"
+          font-lock-function-name-face \"red\"
+          font-lock-comment-face \"blue\"
+          default (:foreground \"dark-green\" :background \"yellow\")))
+
+This variable can be also be `let' bound when running `htmlize-buffer'.")
+
+(defcustom htmlize-untabify t
+  "Non-nil means untabify buffer contents during htmlization."
+  :type 'boolean
+  :group 'htmlize)
+
+(defcustom htmlize-html-major-mode nil
+  "The mode the newly created HTML buffer will be put in.
+Set this to nil if you prefer the default (fundamental) mode."
+  :type '(radio (const :tag "No mode (fundamental)" nil)
+		 (function-item html-mode)
+		 (function :tag "User-defined major mode"))
+  :group 'htmlize)
+
+(defcustom htmlize-pre-style nil
+  "When non-nil, `
' tags will be decorated with style
+information in `font' and `inline-css' modes. This allows a
+consistent background for captures of regions."
+  :type 'boolean
+  :group 'htmlize)
+
+(defvar htmlize-before-hook nil
+  "Hook run before htmlizing a buffer.
+The hook functions are run in the source buffer (not the resulting HTML
+buffer).")
+
+(defvar htmlize-after-hook nil
+  "Hook run after htmlizing a buffer.
+Unlike `htmlize-before-hook', these functions are run in the generated
+HTML buffer.  You may use them to modify the outlook of the final HTML
+output.")
+
+(defvar htmlize-file-hook nil
+  "Hook run by `htmlize-file' after htmlizing a file, but before saving it.")
+
+(defvar htmlize-buffer-places)
+
+;;; Some cross-Emacs compatibility.
+
+;; We need a function that efficiently finds the next change of a
+;; property regardless of whether the change occurred because of a
+;; text property or an extent/overlay.
+(defun htmlize-next-change (pos prop &optional limit)
+  (if prop
+      (next-single-char-property-change pos prop nil limit)
+    (next-char-property-change pos limit)))
+
+(defun htmlize-overlay-faces-at (pos)
+  (delq nil (mapcar (lambda (o) (overlay-get o 'face)) (overlays-at pos))))
+
+(defun htmlize-next-face-change (pos &optional limit)
+  ;; (htmlize-next-change pos 'face limit) would skip over entire
+  ;; overlays that specify the `face' property, even when they
+  ;; contain smaller text properties that also specify `face'.
+  ;; Emacs display engine merges those faces, and so must we.
+  (or limit
+      (setq limit (point-max)))
+  (let ((next-prop (next-single-property-change pos 'face nil limit))
+        (overlay-faces (htmlize-overlay-faces-at pos)))
+    (while (progn
+             (setq pos (next-overlay-change pos))
+             (and (< pos next-prop)
+                  (equal overlay-faces (htmlize-overlay-faces-at pos)))))
+    (setq pos (min pos next-prop))
+    ;; Additionally, we include the entire region that specifies the
+    ;; `display' property.
+    (when (get-char-property pos 'display)
+      (setq pos (next-single-char-property-change pos 'display nil limit)))
+    pos))
+
+(defmacro htmlize-lexlet (&rest letforms)
+  (declare (indent 1) (debug let))
+  (if (and (boundp 'lexical-binding)
+           lexical-binding)
+      `(let ,@letforms)
+    ;; cl extensions have a macro implementing lexical let
+    `(lexical-let ,@letforms)))
+
+
+;;; Transformation of buffer text: HTML escapes, untabification, etc.
+
+(defvar htmlize-basic-character-table
+  ;; Map characters in the 0-127 range to either one-character strings
+  ;; or to numeric entities.
+  (let ((table (make-vector 128 ?\0)))
+    ;; Map characters in the 32-126 range to themselves, others to
+    ;; &#CODE entities;
+    (dotimes (i 128)
+      (setf (aref table i) (if (and (>= i 32) (<= i 126))
+			       (char-to-string i)
+			     (format "&#%d;" i))))
+    ;; Set exceptions manually.
+    (setf
+     ;; Don't escape newline, carriage return, and TAB.
+     (aref table ?\n) "\n"
+     (aref table ?\r) "\r"
+     (aref table ?\t) "\t"
+     ;; Escape &, <, and >.
+     (aref table ?&) "&"
+     (aref table ?<) "<"
+     (aref table ?>) ">"
+     ;; Not escaping '"' buys us a measurable speedup.  It's only
+     ;; necessary to quote it for strings used in attribute values,
+     ;; which htmlize doesn't typically do.
+     ;(aref table ?\") """
+     )
+    table))
+
+;; A cache of HTML representation of non-ASCII characters.  Depending
+;; on the setting of `htmlize-convert-nonascii-to-entities', this maps
+;; non-ASCII characters to either "&#;" or "" (mapconcat's
+;; mapper must always return strings).  It's only filled as characters
+;; are encountered, so that in a buffer with e.g. French text, it will
+;; only ever contain French accented characters as keys.  It's cleared
+;; on each entry to htmlize-buffer-1 to allow modifications of
+;; `htmlize-convert-nonascii-to-entities' to take effect.
+(defvar htmlize-extended-character-cache (make-hash-table :test 'eq))
+
+(defun htmlize-protect-string (string)
+  "HTML-protect string, escaping HTML metacharacters and I18N chars."
+  ;; Only protecting strings that actually contain unsafe or non-ASCII
+  ;; chars removes a lot of unnecessary funcalls and consing.
+  (if (not (string-match "[^\r\n\t -%'-;=?-~]" string))
+      string
+    (mapconcat (lambda (char)
+		 (cond
+		  ((< char 128)
+		   ;; ASCII: use htmlize-basic-character-table.
+		   (aref htmlize-basic-character-table char))
+		  ((gethash char htmlize-extended-character-cache)
+		   ;; We've already seen this char; return the cached
+		   ;; string.
+		   )
+		  ((not htmlize-convert-nonascii-to-entities)
+		   ;; If conversion to entities is not desired, always
+		   ;; copy the char literally.
+		   (setf (gethash char htmlize-extended-character-cache)
+			 (char-to-string char)))
+		  ((< char 256)
+		   ;; Latin 1: no need to call encode-char.
+		   (setf (gethash char htmlize-extended-character-cache)
+			 (format "&#%d;" char)))
+		  ((encode-char char 'ucs)
+                   ;; Must check if encode-char works for CHAR;
+                   ;; it fails for Arabic and possibly elsewhere.
+		   (setf (gethash char htmlize-extended-character-cache)
+			 (format "&#%d;" (encode-char char 'ucs))))
+		  (t
+		   ;; encode-char doesn't work for this char.  Copy it
+		   ;; unchanged and hope for the best.
+		   (setf (gethash char htmlize-extended-character-cache)
+			 (char-to-string char)))))
+	       string "")))
+
+(defun htmlize-attr-escape (string)
+  ;; Like htmlize-protect-string, but also escapes double-quoted
+  ;; strings to make it usable in attribute values.
+  (setq string (htmlize-protect-string string))
+  (if (not (string-match "\"" string))
+      string
+    (mapconcat (lambda (char)
+                 (if (eql char ?\")
+                     """
+                   (char-to-string char)))
+               string "")))
+
+(defsubst htmlize-concat (list)
+  (if (and (consp list) (null (cdr list)))
+      ;; Don't create a new string in the common case where the list only
+      ;; consists of one element.
+      (car list)
+    (apply #'concat list)))
+
+(defun htmlize-format-link (linkprops text)
+  (let ((uri (if (stringp linkprops)
+                 linkprops
+               (plist-get linkprops :uri)))
+        (escaped-text (htmlize-protect-string text)))
+    (if uri
+        (format "%s" (htmlize-attr-escape uri) escaped-text)
+      escaped-text)))
+
+(defun htmlize-escape-or-link (string)
+  ;; Escape STRING and/or add hyperlinks.  STRING comes from a
+  ;; `display' property.
+  (let ((pos 0) (end (length string)) outlist)
+    (while (< pos end)
+      (let* ((link (get-char-property pos 'htmlize-link string))
+             (next-link-change (next-single-property-change
+                                pos 'htmlize-link string end))
+             (chunk (substring string pos next-link-change)))
+        (push
+         (cond (link
+                (htmlize-format-link link chunk))
+               ((get-char-property 0 'htmlize-literal chunk)
+                chunk)
+               (t
+                (htmlize-protect-string chunk)))
+         outlist)
+        (setq pos next-link-change)))
+    (htmlize-concat (nreverse outlist))))
+
+(defun htmlize-display-prop-to-html (display text)
+  (let (desc)
+    (cond ((stringp display)
+           ;; Emacs ignores recursive display properties.
+           (htmlize-escape-or-link display))
+          ((not (eq (car-safe display) 'image))
+           (htmlize-protect-string text))
+          ((null (setq desc (funcall htmlize-transform-image
+                                     (cdr display) text)))
+           (htmlize-escape-or-link text))
+          ((stringp desc)
+           (htmlize-escape-or-link desc))
+          (t
+           (htmlize-generate-image desc text)))))
+
+(defun htmlize-string-to-html (string)
+  ;; Convert the string to HTML, including images attached as
+  ;; `display' property and links as `htmlize-link' property.  In a
+  ;; string without images or links, this is equivalent to
+  ;; `htmlize-protect-string'.
+  (let ((pos 0) (end (length string)) outlist)
+    (while (< pos end)
+      (let* ((display (get-char-property pos 'display string))
+             (next-display-change (next-single-property-change
+                                   pos 'display string end))
+             (chunk (substring string pos next-display-change)))
+        (push
+         (if display
+             (htmlize-display-prop-to-html display chunk)
+           (htmlize-escape-or-link chunk))
+         outlist)
+        (setq pos next-display-change)))
+    (htmlize-concat (nreverse outlist))))
+
+(defun htmlize-default-transform-image (imgprops _text)
+  "Default transformation of image descriptor to something usable in HTML.
+
+If `htmlize-use-images' is nil, the function always returns nil, meaning
+use original text.  Otherwise, it tries to find the image for images that
+specify a file name.  If `htmlize-force-inline-images' is non-nil, it also
+converts the :file attribute to :data and returns the modified property
+list."
+  (when htmlize-use-images
+    (when (plist-get imgprops :file)
+      (let ((location (plist-get (cdr (find-image (list imgprops))) :file)))
+        (when location
+          (setq imgprops (plist-put (cl-copy-list imgprops) :file location)))))
+    (if htmlize-force-inline-images
+        (let ((location (plist-get imgprops :file))
+              data)
+          (when location
+            (with-temp-buffer
+              (condition-case nil
+                  (progn
+                    (insert-file-contents-literally location)
+                    (setq data (buffer-string)))
+                (error nil))))
+          ;; if successful, return the new plist, otherwise return
+          ;; nil, which will use the original text
+          (and data
+               (plist-put (plist-put imgprops :file nil)
+                          :data data)))
+      imgprops)))
+
+(defun htmlize-alt-text (_imgprops origtext)
+  (and (/= (length origtext) 0)
+       (<= (length origtext) htmlize-max-alt-text)
+       (not (string-match "[\0-\x1f]" origtext))
+       origtext))
+
+(defun htmlize-generate-image (imgprops origtext)
+  (let* ((alt-text (htmlize-alt-text imgprops origtext))
+         (alt-attr (if alt-text
+                       (format " alt=\"%s\"" (htmlize-attr-escape alt-text))
+                     "")))
+    (cond ((plist-get imgprops :file)
+           ;; Try to find the image in image-load-path
+           (let* ((found-props (cdr (find-image (list imgprops))))
+                  (file (or (plist-get found-props :file)
+                            (plist-get imgprops :file))))
+             (format ""
+                     (htmlize-attr-escape (file-relative-name file))
+                     alt-attr)))
+          ((plist-get imgprops :data)
+           (format ""
+                   (or (plist-get imgprops :type) "")
+                   (base64-encode-string (plist-get imgprops :data))
+                   alt-attr)))))
+
+(defconst htmlize-ellipsis "...")
+(put-text-property 0 (length htmlize-ellipsis) 'htmlize-ellipsis t htmlize-ellipsis)
+
+(defun htmlize-match-inv-spec (inv)
+  (cl-member inv buffer-invisibility-spec
+             :key (lambda (i)
+                    (if (symbolp i) i (car i)))))
+
+(defun htmlize-decode-invisibility-spec (invisible)
+  ;; Return t, nil, or `ellipsis', depending on how invisible text should be inserted.
+
+  (if (not (listp buffer-invisibility-spec))
+      ;; If buffer-invisibility-spec is not a list, then all
+      ;; characters with non-nil `invisible' property are visible.
+      (not invisible)
+
+    ;; Otherwise, the value of a non-nil `invisible' property can be:
+    ;; 1. a symbol -- make the text invisible if it matches
+    ;;    buffer-invisibility-spec.
+    ;; 2. a list of symbols -- make the text invisible if
+    ;;    any symbol in the list matches
+    ;;    buffer-invisibility-spec.
+    ;; If the match of buffer-invisibility-spec has a non-nil
+    ;; CDR, replace the invisible text with an ellipsis.
+    (let ((match (if (symbolp invisible)
+                     (htmlize-match-inv-spec invisible)
+                   (cl-some #'htmlize-match-inv-spec invisible))))
+      (cond ((null match) t)
+            ((cdr-safe (car match)) 'ellipsis)
+            (t nil)))))
+
+(defun htmlize-add-before-after-strings (beg end text)
+  ;; Find overlays specifying before-string and after-string in [beg,
+  ;; pos).  If any are found, splice them into TEXT and return the new
+  ;; text.
+  (let (additions)
+    (dolist (overlay (overlays-in beg end))
+      (let ((before (overlay-get overlay 'before-string))
+            (after (overlay-get overlay 'after-string)))
+        (when after
+          (push (cons (- (overlay-end overlay) beg)
+                      after)
+                additions))
+        (when before
+          (push (cons (- (overlay-start overlay) beg)
+                      before)
+                additions))))
+    (if additions
+        (let ((textlist nil)
+              (strpos 0))
+          (dolist (add (cl-stable-sort additions #'< :key #'car))
+            (let ((addpos (car add))
+                  (addtext (cdr add)))
+              (push (substring text strpos addpos) textlist)
+              (push addtext textlist)
+              (setq strpos addpos)))
+          (push (substring text strpos) textlist)
+          (apply #'concat (nreverse textlist)))
+      text)))
+
+(defun htmlize-copy-prop (prop beg end string)
+  ;; Copy the specified property from the specified region of the
+  ;; buffer to the target string.  We cannot rely on Emacs to copy the
+  ;; property because we want to handle properties coming from both
+  ;; text properties and overlays.
+  (let ((pos beg))
+    (while (< pos end)
+      (let ((value (get-char-property pos prop))
+            (next-change (htmlize-next-change pos prop end)))
+        (when value
+          (put-text-property (- pos beg) (- next-change beg)
+                             prop value string))
+        (setq pos next-change)))))
+
+(defun htmlize-get-text-with-display (beg end)
+  ;; Like buffer-substring-no-properties, except it copies the
+  ;; `display' property from the buffer, if found.
+  (let ((text (buffer-substring-no-properties beg end)))
+    (htmlize-copy-prop 'display beg end text)
+    (htmlize-copy-prop 'htmlize-link beg end text)
+    (setq text (htmlize-add-before-after-strings beg end text))
+    text))
+
+(defun htmlize-buffer-substring-no-invisible (beg end)
+  ;; Like buffer-substring-no-properties, but don't copy invisible
+  ;; parts of the region.  Where buffer-substring-no-properties
+  ;; mandates an ellipsis to be shown, htmlize-ellipsis is inserted.
+  (let ((pos beg)
+	visible-list invisible show last-show next-change)
+    ;; Iterate over the changes in the `invisible' property and filter
+    ;; out the portions where it's non-nil, i.e. where the text is
+    ;; invisible.
+    (while (< pos end)
+      (setq invisible (get-char-property pos 'invisible)
+	    next-change (htmlize-next-change pos 'invisible end)
+            show (htmlize-decode-invisibility-spec invisible))
+      (cond ((eq show t)
+	     (push (htmlize-get-text-with-display pos next-change)
+                   visible-list))
+            ((and (eq show 'ellipsis)
+                  (not (eq last-show 'ellipsis))
+                  ;; Conflate successive ellipses.
+                  (push htmlize-ellipsis visible-list))))
+      (setq pos next-change last-show show))
+    (htmlize-concat (nreverse visible-list))))
+
+(defun htmlize-trim-ellipsis (text)
+  ;; Remove htmlize-ellipses ("...") from the beginning of TEXT if it
+  ;; starts with it.  It checks for the special property of the
+  ;; ellipsis so it doesn't work on ordinary text that begins with
+  ;; "...".
+  (if (get-text-property 0 'htmlize-ellipsis text)
+      (substring text (length htmlize-ellipsis))
+    text))
+
+(defconst htmlize-tab-spaces
+  ;; A table of strings with spaces.  (aref htmlize-tab-spaces 5) is
+  ;; like (make-string 5 ?\ ), except it doesn't cons.
+  (let ((v (make-vector 32 nil)))
+    (dotimes (i (length v))
+      (setf (aref v i) (make-string i ?\ )))
+    v))
+
+(defun htmlize-untabify-string (text start-column)
+  "Untabify TEXT, assuming it starts at START-COLUMN."
+  (let ((column start-column)
+	(last-match 0)
+	(chunk-start 0)
+	chunks match-pos tab-size)
+    (while (string-match "[\t\n]" text last-match)
+      (setq match-pos (match-beginning 0))
+      (cond ((eq (aref text match-pos) ?\t)
+	     ;; Encountered a tab: create a chunk of text followed by
+	     ;; the expanded tab.
+	     (push (substring text chunk-start match-pos) chunks)
+	     ;; Increase COLUMN by the length of the text we've
+	     ;; skipped since last tab or newline.  (Encountering
+	     ;; newline resets it.)
+	     (cl-incf column (- match-pos last-match))
+	     ;; Calculate tab size based on tab-width and COLUMN.
+	     (setq tab-size (- tab-width (% column tab-width)))
+	     ;; Expand the tab, carefully recreating the `display'
+	     ;; property if one was on the TAB.
+             (let ((display (get-text-property match-pos 'display text))
+                   (expanded-tab (aref htmlize-tab-spaces tab-size)))
+               (when display
+                 (put-text-property 0 tab-size 'display display expanded-tab))
+               (push expanded-tab chunks))
+	     (cl-incf column tab-size)
+	     (setq chunk-start (1+ match-pos)))
+	    (t
+	     ;; Reset COLUMN at beginning of line.
+	     (setq column 0)))
+      (setq last-match (1+ match-pos)))
+    ;; If no chunks have been allocated, it means there have been no
+    ;; tabs to expand.  Return TEXT unmodified.
+    (if (null chunks)
+	text
+      (when (< chunk-start (length text))
+	;; Push the remaining chunk.
+	(push (substring text chunk-start) chunks))
+      ;; Generate the output from the available chunks.
+      (htmlize-concat (nreverse chunks)))))
+
+(defun htmlize-extract-text (beg end trailing-ellipsis)
+  ;; Extract buffer text, sans the invisible parts.  Then
+  ;; untabify it and escape the HTML metacharacters.
+  (let ((text (htmlize-buffer-substring-no-invisible beg end)))
+    (when trailing-ellipsis
+      (setq text (htmlize-trim-ellipsis text)))
+    ;; If TEXT ends up empty, don't change trailing-ellipsis.
+    (when (> (length text) 0)
+      (setq trailing-ellipsis
+            (get-text-property (1- (length text))
+                               'htmlize-ellipsis text)))
+    (when htmlize-untabify
+      (setq text (htmlize-untabify-string text (current-column))))
+    (setq text (htmlize-string-to-html text))
+    (cl-values text trailing-ellipsis)))
+
+(defun htmlize-despam-address (string)
+  "Replace every occurrence of '@' in STRING with %40.
+This is used to protect mailto links without modifying their meaning."
+  ;; Suggested by Ville Skytta.
+  (while (string-match "@" string)
+    (setq string (replace-match "%40" nil t string)))
+  string)
+
+(defun htmlize-make-tmp-overlay (beg end props)
+  (let ((overlay (make-overlay beg end)))
+    (overlay-put overlay 'htmlize-tmp-overlay t)
+    (while props
+      (overlay-put overlay (pop props) (pop props)))
+    overlay))
+
+(defun htmlize-delete-tmp-overlays ()
+  (dolist (overlay (overlays-in (point-min) (point-max)))
+    (when (overlay-get overlay 'htmlize-tmp-overlay)
+      (delete-overlay overlay))))
+
+(defun htmlize-make-link-overlay (beg end uri)
+  (htmlize-make-tmp-overlay beg end `(htmlize-link (:uri ,uri))))
+
+(defun htmlize-create-auto-links ()
+  "Add `htmlize-link' property to all mailto links in the buffer."
+  (save-excursion
+    (goto-char (point-min))
+    (while (re-search-forward
+            "<\\(\\(mailto:\\)?\\([-=+_.a-zA-Z0-9]+@[-_.a-zA-Z0-9]+\\)\\)>"
+            nil t)
+      (let* ((address (match-string 3))
+             (beg (match-beginning 0)) (end (match-end 0))
+             (uri (concat "mailto:" (htmlize-despam-address address))))
+        (htmlize-make-link-overlay beg end uri)))
+    (goto-char (point-min))
+    (while (re-search-forward "<\\(\\(URL:\\)?\\([a-zA-Z]+://[^;]+\\)\\)>"
+                              nil t)
+      (htmlize-make-link-overlay
+       (match-beginning 0) (match-end 0) (match-string 3)))))
+
+;; Tests for htmlize-create-auto-links:
+
+;; 
+;; 
+;; 
+;; 
+;; 
+;; 
+
+(defun htmlize-shadow-form-feeds ()
+  (let ((s "\n
")) + (put-text-property 0 (length s) 'htmlize-literal t s) + (let ((disp `(display ,s))) + (while (re-search-forward "\n\^L" nil t) + (let* ((beg (match-beginning 0)) + (end (match-end 0)) + (form-feed-pos (1+ beg)) + ;; don't process ^L if invisible or covered by `display' + (show (and (htmlize-decode-invisibility-spec + (get-char-property form-feed-pos 'invisible)) + (not (get-char-property form-feed-pos 'display))))) + (when show + (htmlize-make-tmp-overlay beg end disp))))))) + +(defun htmlize-defang-local-variables () + ;; Juri Linkov reports that an HTML-ized "Local variables" can lead + ;; visiting the HTML to fail with "Local variables list is not + ;; properly terminated". He suggested changing the phrase to + ;; syntactically equivalent HTML that Emacs doesn't recognize. + (goto-char (point-min)) + (while (search-forward "Local Variables:" nil t) + (replace-match "Local Variables:" nil t))) + + +;;; Color handling. + +(defvar htmlize-x-library-search-path + `(,data-directory + "/etc/X11/rgb.txt" + "/usr/share/X11/rgb.txt" + ;; the remainder of this list really belongs in a museum + "/usr/X11R6/lib/X11/" + "/usr/X11R5/lib/X11/" + "/usr/lib/X11R6/X11/" + "/usr/lib/X11R5/X11/" + "/usr/local/X11R6/lib/X11/" + "/usr/local/X11R5/lib/X11/" + "/usr/local/lib/X11R6/X11/" + "/usr/local/lib/X11R5/X11/" + "/usr/X11/lib/X11/" + "/usr/lib/X11/" + "/usr/local/lib/X11/" + "/usr/X386/lib/X11/" + "/usr/x386/lib/X11/" + "/usr/XFree86/lib/X11/" + "/usr/unsupported/lib/X11/" + "/usr/athena/lib/X11/" + "/usr/local/x11r5/lib/X11/" + "/usr/lpp/Xamples/lib/X11/" + "/usr/openwin/lib/X11/" + "/usr/openwin/share/lib/X11/")) + +(defun htmlize-get-color-rgb-hash (&optional rgb-file) + "Return a hash table mapping X color names to RGB values. +The keys in the hash table are X11 color names, and the values are the +#rrggbb RGB specifications, extracted from `rgb.txt'. + +If RGB-FILE is nil, the function will try hard to find a suitable file +in the system directories. + +If no rgb.txt file is found, return nil." + (let ((rgb-file (or rgb-file (locate-file + "rgb.txt" + htmlize-x-library-search-path))) + (hash nil)) + (when rgb-file + (with-temp-buffer + (insert-file-contents rgb-file) + (setq hash (make-hash-table :test 'equal)) + (while (not (eobp)) + (cond ((looking-at "^\\s-*\\([!#]\\|$\\)") + ;; Skip comments and empty lines. + ) + ((looking-at + "[ \t]*\\([0-9]+\\)[ \t]+\\([0-9]+\\)[ \t]+\\([0-9]+\\)[ \t]+\\(.*\\)") + (setf (gethash (downcase (match-string 4)) hash) + (format "#%02x%02x%02x" + (string-to-number (match-string 1)) + (string-to-number (match-string 2)) + (string-to-number (match-string 3))))) + (t + (error + "Unrecognized line in %s: %s" + rgb-file + (buffer-substring (point) (progn (end-of-line) (point)))))) + (forward-line 1)))) + hash)) + +;; Compile the RGB map when loaded. On systems where rgb.txt is +;; missing, the value of the variable will be nil, and rgb.txt will +;; not be used. +(defvar htmlize-color-rgb-hash (htmlize-get-color-rgb-hash)) + +;;; Face handling. + +(defun htmlize-face-color-internal (face fg) + ;; Used only under GNU Emacs. Return the color of FACE, but don't + ;; return "unspecified-fg" or "unspecified-bg". If the face is + ;; `default' and the color is unspecified, look up the color in + ;; frame parameters. + (let* ((function (if fg #'face-foreground #'face-background)) + (color (funcall function face nil t))) + (when (and (eq face 'default) (null color)) + (setq color (cdr (assq (if fg 'foreground-color 'background-color) + (frame-parameters))))) + (when (or (eq color 'unspecified) + (equal color "unspecified-fg") + (equal color "unspecified-bg")) + (setq color nil)) + (when (and (eq face 'default) + (null color)) + ;; Assuming black on white doesn't seem right, but I can't think + ;; of anything better to do. + (setq color (if fg "black" "white"))) + color)) + +(defun htmlize-face-foreground (face) + ;; Return the name of the foreground color of FACE. If FACE does + ;; not specify a foreground color, return nil. + (htmlize-face-color-internal face t)) + +(defun htmlize-face-background (face) + ;; Return the name of the background color of FACE. If FACE does + ;; not specify a background color, return nil. + ;; GNU Emacs. + (htmlize-face-color-internal face nil)) + +;; Convert COLOR to the #RRGGBB string. If COLOR is already in that +;; format, it's left unchanged. + +(defun htmlize-color-to-rgb (color) + (let ((rgb-string nil)) + (cond ((null color) + ;; Ignore nil COLOR because it means that the face is not + ;; specifying any color. Hence (htmlize-color-to-rgb nil) + ;; returns nil. + ) + ((string-match "\\`#" color) + ;; The color is already in #rrggbb format. + (setq rgb-string color)) + ((and htmlize-use-rgb-txt + htmlize-color-rgb-hash) + ;; Use of rgb.txt is requested, and it's available on the + ;; system. Use it. + (setq rgb-string (gethash (downcase color) htmlize-color-rgb-hash))) + (t + ;; We're getting the RGB components from Emacs. + (let ((rgb (mapcar (lambda (arg) + (/ arg 256)) + (color-values color)))) + (when rgb + (setq rgb-string (apply #'format "#%02x%02x%02x" rgb)))))) + ;; If RGB-STRING is still nil, it means the color cannot be found, + ;; for whatever reason. In that case just punt and return COLOR. + ;; Most browsers support a decent set of color names anyway. + (or rgb-string color))) + +;; We store the face properties we care about into an +;; `htmlize-fstruct' type. That way we only have to analyze face +;; properties, which can be time consuming, once per each face. The +;; mapping between Emacs faces and htmlize-fstructs is established by +;; htmlize-make-face-map. The name "fstruct" refers to variables of +;; type `htmlize-fstruct', while the term "face" is reserved for Emacs +;; faces. + +(cl-defstruct htmlize-fstruct + foreground ; foreground color, #rrggbb + background ; background color, #rrggbb + size ; size + boldp ; whether face is bold + italicp ; whether face is italic + underlinep ; whether face is underlined + overlinep ; whether face is overlined + strikep ; whether face is struck through + css-name ; CSS name of face + ) + +(defun htmlize-face-set-from-keyword-attr (fstruct attr value) + ;; For ATTR and VALUE, set the equivalent value in FSTRUCT. + (cl-case attr + (:foreground + (setf (htmlize-fstruct-foreground fstruct) (htmlize-color-to-rgb value))) + (:background + (setf (htmlize-fstruct-background fstruct) (htmlize-color-to-rgb value))) + (:height + (setf (htmlize-fstruct-size fstruct) value)) + (:weight + (when (string-match (symbol-name value) "bold") + (setf (htmlize-fstruct-boldp fstruct) t))) + (:slant + (setf (htmlize-fstruct-italicp fstruct) (or (eq value 'italic) + (eq value 'oblique)))) + (:bold + (setf (htmlize-fstruct-boldp fstruct) value)) + (:italic + (setf (htmlize-fstruct-italicp fstruct) value)) + (:underline + (setf (htmlize-fstruct-underlinep fstruct) value)) + (:overline + (setf (htmlize-fstruct-overlinep fstruct) value)) + (:strike-through + (setf (htmlize-fstruct-strikep fstruct) value)))) + +(defun htmlize-face-size (face) + ;; The size (height) of FACE, taking inheritance into account. + ;; Only works in Emacs 21 and later. + (let* ((face-list (list face)) + (head face-list) + (tail face-list)) + (while head + (let ((inherit (face-attribute (car head) :inherit))) + (cond ((listp inherit) + (setcdr tail (cl-copy-list inherit)) + (setq tail (last tail))) + ((eq inherit 'unspecified)) + (t + (setcdr tail (list inherit)) + (setq tail (cdr tail))))) + (pop head)) + (let ((size-list + (cl-loop + for f in face-list + for h = (and (facep f) (face-attribute f :height)) + collect (if (eq h 'unspecified) nil h)))) + (cl-reduce 'htmlize-merge-size (cons nil size-list))))) + +(defun htmlize-face-css-name (face) + ;; Generate the css-name property for the given face. Emacs places + ;; no restrictions on the names of symbols that represent faces -- + ;; any characters may be in the name, even control chars. We try + ;; hard to beat the face name into shape, both esthetically and + ;; according to CSS1 specs. + (let ((name (downcase (symbol-name face)))) + (when (string-match "\\`font-lock-" name) + ;; font-lock-FOO-face -> FOO. + (setq name (replace-match "" t t name))) + (when (string-match "-face\\'" name) + ;; Drop the redundant "-face" suffix. + (setq name (replace-match "" t t name))) + (while (string-match "[^-a-zA-Z0-9]" name) + ;; Drop the non-alphanumerics. + (setq name (replace-match "X" t t name))) + (when (string-match "\\`[-0-9]" name) + ;; CSS identifiers may not start with a digit. + (setq name (concat "X" name))) + ;; After these transformations, the face could come out empty. + (when (equal name "") + (setq name "face")) + ;; Apply the prefix. + (concat htmlize-css-name-prefix name))) + +(defun htmlize-face-to-fstruct-1 (face) + "Convert Emacs face FACE to fstruct, internal." + (let ((fstruct (make-htmlize-fstruct + :foreground (htmlize-color-to-rgb + (htmlize-face-foreground face)) + :background (htmlize-color-to-rgb + (htmlize-face-background face))))) + ;; GNU Emacs + (dolist (attr '(:weight :slant :underline :overline :strike-through)) + (let ((value (face-attribute face attr nil t))) + (when (and value (not (eq value 'unspecified))) + (htmlize-face-set-from-keyword-attr fstruct attr value)))) + (let ((size (htmlize-face-size face))) + (unless (eql size 1.0) ; ignore non-spec + (setf (htmlize-fstruct-size fstruct) size))) + (setf (htmlize-fstruct-css-name fstruct) (htmlize-face-css-name face)) + fstruct)) + +(defun htmlize-face-to-fstruct (face) + (let* ((face-list (or (and (symbolp face) + (cdr (assq face face-remapping-alist))) + (list face))) + (fstruct (htmlize-merge-faces + (mapcar (lambda (face) + (if (symbolp face) + (or (htmlize-get-override-fstruct face) + (htmlize-face-to-fstruct-1 face)) + (htmlize-attrlist-to-fstruct face))) + (nreverse face-list))))) + (when (symbolp face) + (setf (htmlize-fstruct-css-name fstruct) (htmlize-face-css-name face))) + fstruct)) + +(defmacro htmlize-copy-attr-if-set (attr-list dest source) + ;; Generate code with the following pattern: + ;; (progn + ;; (when (htmlize-fstruct-ATTR source) + ;; (setf (htmlize-fstruct-ATTR dest) (htmlize-fstruct-ATTR source))) + ;; ...) + ;; for the given list of boolean attributes. + (cons 'progn + (cl-loop for attr in attr-list + for attr-sym = (intern (format "htmlize-fstruct-%s" attr)) + collect `(when (,attr-sym ,source) + (setf (,attr-sym ,dest) (,attr-sym ,source)))))) + +(defun htmlize-merge-size (merged next) + ;; Calculate the size of the merge of MERGED and NEXT. + (cond ((null merged) next) + ((integerp next) next) + ((null next) merged) + ((floatp merged) (* merged next)) + ((integerp merged) (round (* merged next))))) + +(defun htmlize-merge-two-faces (merged next) + (htmlize-copy-attr-if-set + (foreground background boldp italicp underlinep overlinep strikep) + merged next) + (setf (htmlize-fstruct-size merged) + (htmlize-merge-size (htmlize-fstruct-size merged) + (htmlize-fstruct-size next))) + merged) + +(defun htmlize-merge-faces (fstruct-list) + (cond ((null fstruct-list) + ;; Nothing to do, return a dummy face. + (make-htmlize-fstruct)) + ((null (cdr fstruct-list)) + ;; Optimize for the common case of a single face, simply + ;; return it. + (car fstruct-list)) + (t + (cl-reduce #'htmlize-merge-two-faces + (cons (make-htmlize-fstruct) fstruct-list))))) + +;; GNU Emacs 20+ supports attribute lists in `face' properties. For +;; example, you can use `(:foreground "red" :weight bold)' as an +;; overlay's "face", or you can even use a list of such lists, etc. +;; We call those "attrlists". +;; +;; htmlize supports attrlist by converting them to fstructs, the same +;; as with regular faces. + +(defun htmlize-attrlist-to-fstruct (attrlist &optional name) + ;; Like htmlize-face-to-fstruct, but accepts an ATTRLIST as input. + (let ((fstruct (make-htmlize-fstruct))) + (cond ((eq (car attrlist) 'foreground-color) + ;; ATTRLIST is (foreground-color . COLOR) + (setf (htmlize-fstruct-foreground fstruct) + (htmlize-color-to-rgb (cdr attrlist)))) + ((eq (car attrlist) 'background-color) + ;; ATTRLIST is (background-color . COLOR) + (setf (htmlize-fstruct-background fstruct) + (htmlize-color-to-rgb (cdr attrlist)))) + (t + ;; ATTRLIST is a plist. + (while attrlist + (let ((attr (pop attrlist)) + (value (pop attrlist))) + (when (and value (not (eq value 'unspecified))) + (htmlize-face-set-from-keyword-attr fstruct attr value)))))) + (setf (htmlize-fstruct-css-name fstruct) (or name "custom")) + fstruct)) + +(defun htmlize-decode-face-prop (prop) + "Turn face property PROP into a list of face-like objects." + ;; PROP can be a symbol naming a face, a string naming such a + ;; symbol, a cons (foreground-color . COLOR) or (background-color + ;; COLOR), a property list (:attr1 val1 :attr2 val2 ...), or a list + ;; of any of those. + ;; + ;; (htmlize-decode-face-prop 'face) -> (face) + ;; (htmlize-decode-face-prop '(face1 face2)) -> (face1 face2) + ;; (htmlize-decode-face-prop '(:attr "val")) -> ((:attr "val")) + ;; (htmlize-decode-face-prop '((:attr "val") face (foreground-color "red"))) + ;; -> ((:attr "val") face (foreground-color "red")) + ;; + ;; Unrecognized atoms or non-face symbols/strings are silently + ;; stripped away. + (cond ((null prop) + nil) + ((symbolp prop) + (and (facep prop) + (list prop))) + ((stringp prop) + (and (facep (intern-soft prop)) + (list prop))) + ((atom prop) + nil) + ((and (symbolp (car prop)) + (eq ?: (aref (symbol-name (car prop)) 0))) + (list prop)) + ((or (eq (car prop) 'foreground-color) + (eq (car prop) 'background-color)) + (list prop)) + (t + (apply #'nconc (mapcar #'htmlize-decode-face-prop prop))))) + +(defun htmlize-get-override-fstruct (face) + (let* ((raw-def (plist-get htmlize-face-overrides face)) + (def (cond ((stringp raw-def) (list :foreground raw-def)) + ((listp raw-def) raw-def) + (t + (error (format (concat "face override must be an " + "attribute list or string, got %s") + raw-def)))))) + (and def + (htmlize-attrlist-to-fstruct def (symbol-name face))))) + +(defun htmlize-make-face-map (faces) + ;; Return a hash table mapping Emacs faces to htmlize's fstructs. + ;; The keys are either face symbols or attrlists, so the test + ;; function must be `equal'. + (let ((face-map (make-hash-table :test 'equal)) + css-names) + (dolist (face faces) + (unless (gethash face face-map) + ;; Haven't seen FACE yet; convert it to an fstruct and cache + ;; it. + (let ((fstruct (htmlize-face-to-fstruct face))) + (setf (gethash face face-map) fstruct) + (let* ((css-name (htmlize-fstruct-css-name fstruct)) + (new-name css-name) + (i 0)) + ;; Uniquify the face's css-name by using NAME-1, NAME-2, + ;; etc. + (while (member new-name css-names) + (setq new-name (format "%s-%s" css-name (cl-incf i)))) + (unless (equal new-name css-name) + (setf (htmlize-fstruct-css-name fstruct) new-name)) + (push new-name css-names))))) + face-map)) + +(defun htmlize-unstringify-face (face) + "If FACE is a string, return it interned, otherwise return it unchanged." + (if (stringp face) + (intern face) + face)) + +(defun htmlize-faces-in-buffer () + "Return a list of faces used in the current buffer. +This is the set of faces specified by the `face' text property and by buffer +overlays that specify `face'." + (let (faces) + ;; Faces used by text properties. + (let ((pos (point-min)) face-prop next) + (while (< pos (point-max)) + (setq face-prop (get-text-property pos 'face) + next (or (next-single-property-change pos 'face) (point-max))) + (setq faces (cl-nunion (htmlize-decode-face-prop face-prop) + faces :test 'equal)) + (setq pos next))) + ;; Faces used by overlays. + (dolist (overlay (overlays-in (point-min) (point-max))) + (let ((face-prop (overlay-get overlay 'face))) + (setq faces (cl-nunion (htmlize-decode-face-prop face-prop) + faces :test 'equal)))) + faces)) + +;; htmlize-faces-at-point returns the faces in use at point. The +;; faces are sorted by increasing priority, i.e. the last face takes +;; precedence. +;; +;; This returns all the faces in the `face' property and all the faces +;; in the overlays at point. + +(defun htmlize-faces-at-point () + (let (all-faces) + ;; Faces from text properties. + (let ((face-prop (get-text-property (point) 'face))) + ;; we need to reverse the `face' prop because we want + ;; more specific faces to come later + (setq all-faces (nreverse (htmlize-decode-face-prop face-prop)))) + ;; Faces from overlays. + (let ((overlays + ;; Collect overlays at point that specify `face'. + (cl-delete-if-not (lambda (o) + (overlay-get o 'face)) + (nreverse (overlays-at (point) t)))) + list face-prop) + (dolist (overlay overlays) + (setq face-prop (overlay-get overlay 'face) + list (nconc (htmlize-decode-face-prop face-prop) list))) + ;; Under "Merging Faces" the manual explicitly states + ;; that faces specified by overlays take precedence over + ;; faces specified by text properties. + (setq all-faces (nconc all-faces list))) + all-faces)) + +;; htmlize supports generating HTML in several flavors, some of which +;; use CSS, and others the element. We take an OO approach and +;; define "methods" that indirect to the functions that depend on +;; `htmlize-output-type'. The currently used methods are `doctype', +;; `insert-head', `body-tag', `pre-tag', and `text-markup'. Not all +;; output types define all methods. +;; +;; Methods are called either with (htmlize-method METHOD ARGS...) +;; special form, or by accessing the function with +;; (htmlize-method-function 'METHOD) and calling (funcall FUNCTION). +;; The latter form is useful in tight loops because `htmlize-method' +;; conses. + +(defmacro htmlize-method (method &rest args) + ;; Expand to (htmlize-TYPE-METHOD ...ARGS...). TYPE is the value of + ;; `htmlize-output-type' at run time. + `(funcall (htmlize-method-function ',method) ,@args)) + +(defun htmlize-method-function (method) + ;; Return METHOD's function definition for the current output type. + ;; The returned object can be safely funcalled. + (let ((sym (intern (format "htmlize-%s-%s" htmlize-output-type method)))) + (indirect-function (if (fboundp sym) + sym + (let ((default (intern (concat "htmlize-default-" + (symbol-name method))))) + (if (fboundp default) + default + 'ignore)))))) + +(defvar htmlize-memoization-table (make-hash-table :test 'equal)) + +(defmacro htmlize-memoize (key generator) + "Return the value of GENERATOR, memoized as KEY. +That means that GENERATOR will be evaluated and returned the first time +it's called with the same value of KEY. All other times, the cached +\(memoized) value will be returned." + (let ((value (cl-gensym))) + `(let ((,value (gethash ,key htmlize-memoization-table))) + (unless ,value + (setq ,value ,generator) + (setf (gethash ,key htmlize-memoization-table) ,value)) + ,value))) + +;;; Default methods. + +(defun htmlize-default-doctype () + nil ; no doc-string + ;; Note that the `font' output is technically invalid under this DTD + ;; because the DTD doesn't allow embedding in
.
+  ""
+  )
+
+(defun htmlize-default-body-tag (face-map)
+  nil					; no doc-string
+  face-map ; shut up the byte-compiler
+  "")
+
+(defun htmlize-default-pre-tag (face-map)
+  nil					; no doc-string
+  face-map ; shut up the byte-compiler
+  "
")
+
+
+;;; CSS based output support.
+
+;; Internal function; not a method.
+(defun htmlize-css-specs (fstruct)
+  (let (result)
+    (when (htmlize-fstruct-foreground fstruct)
+      (push (format "color: %s;" (htmlize-fstruct-foreground fstruct))
+	    result))
+    (when (htmlize-fstruct-background fstruct)
+      (push (format "background-color: %s;"
+		    (htmlize-fstruct-background fstruct))
+	    result))
+    (let ((size (htmlize-fstruct-size fstruct)))
+      (when (and size (not (eq htmlize-ignore-face-size t)))
+	(cond ((floatp size)
+	       (push (format "font-size: %d%%;" (* 100 size)) result))
+	      ((not (eq htmlize-ignore-face-size 'absolute))
+	       (push (format "font-size: %spt;" (/ size 10.0)) result)))))
+    (when (htmlize-fstruct-boldp fstruct)
+      (push "font-weight: bold;" result))
+    (when (htmlize-fstruct-italicp fstruct)
+      (push "font-style: italic;" result))
+    (when (htmlize-fstruct-underlinep fstruct)
+      (push "text-decoration: underline;" result))
+    (when (htmlize-fstruct-overlinep fstruct)
+      (push "text-decoration: overline;" result))
+    (when (htmlize-fstruct-strikep fstruct)
+      (push "text-decoration: line-through;" result))
+    (nreverse result)))
+
+(defun htmlize-css-insert-head (buffer-faces face-map)
+  (insert "    \n"))
+
+(defun htmlize-css-text-markup (fstruct-list buffer)
+  ;; Open the markup needed to insert text colored with FACES into
+  ;; BUFFER.  Return the function that closes the markup.
+
+  ;; In CSS mode, this is easy: just nest the text in one  tag for each face in FSTRUCT-LIST.
+  (dolist (fstruct fstruct-list)
+    (princ "" buffer))
+  (htmlize-lexlet ((fstruct-list fstruct-list) (buffer buffer))
+    (lambda ()
+      (dolist (fstruct fstruct-list)
+        (ignore fstruct)                ; shut up the byte-compiler
+        (princ "" buffer)))))
+
+;; `inline-css' output support.
+
+(defun htmlize-inline-css-body-tag (face-map)
+  (format ""
+	  (mapconcat #'identity (htmlize-css-specs (gethash 'default face-map))
+		     " ")))
+
+(defun htmlize-inline-css-pre-tag (face-map)
+  (if htmlize-pre-style
+      (format "
"
+              (mapconcat #'identity (htmlize-css-specs (gethash 'default face-map))
+                         " "))
+    (format "
")))
+
+(defun htmlize-inline-css-text-markup (fstruct-list buffer)
+  (let* ((merged (htmlize-merge-faces fstruct-list))
+	 (style (htmlize-memoize
+		 merged
+		 (let ((specs (htmlize-css-specs merged)))
+		   (and specs
+			(mapconcat #'identity (htmlize-css-specs merged) " "))))))
+    (when style
+      (princ "" buffer))
+    (htmlize-lexlet ((style style) (buffer buffer))
+      (lambda ()
+        (when style
+          (princ "" buffer))))))
+
+;;; `font' tag based output support.
+
+(defun htmlize-font-body-tag (face-map)
+  (let ((fstruct (gethash 'default face-map)))
+    (format ""
+	    (htmlize-fstruct-foreground fstruct)
+	    (htmlize-fstruct-background fstruct))))
+
+(defun htmlize-font-pre-tag (face-map)
+  (if htmlize-pre-style
+      (let ((fstruct (gethash 'default face-map)))
+        (format "
"
+                (htmlize-fstruct-foreground fstruct)
+                (htmlize-fstruct-background fstruct)))
+    (format "
")))
+       
+(defun htmlize-font-text-markup (fstruct-list buffer)
+  ;; In `font' mode, we use the traditional HTML means of altering
+  ;; presentation:  tag for colors,  for bold,  for
+  ;; underline, and  for strike-through.
+  (let* ((merged (htmlize-merge-faces fstruct-list))
+	 (markup (htmlize-memoize
+		  merged
+		  (cons (concat
+			 (and (htmlize-fstruct-foreground merged)
+			      (format "" (htmlize-fstruct-foreground merged)))
+			 (and (htmlize-fstruct-boldp merged)      "")
+			 (and (htmlize-fstruct-italicp merged)    "")
+			 (and (htmlize-fstruct-underlinep merged) "")
+			 (and (htmlize-fstruct-strikep merged)    ""))
+			(concat
+			 (and (htmlize-fstruct-strikep merged)    "")
+			 (and (htmlize-fstruct-underlinep merged) "")
+			 (and (htmlize-fstruct-italicp merged)    "")
+			 (and (htmlize-fstruct-boldp merged)      "")
+			 (and (htmlize-fstruct-foreground merged) ""))))))
+    (princ (car markup) buffer)
+    (htmlize-lexlet ((markup markup) (buffer buffer))
+      (lambda ()
+        (princ (cdr markup) buffer)))))
+
+(defun htmlize-buffer-1 ()
+  ;; Internal function; don't call it from outside this file.  Htmlize
+  ;; current buffer, writing the resulting HTML to a new buffer, and
+  ;; return it.  Unlike htmlize-buffer, this doesn't change current
+  ;; buffer or use switch-to-buffer.
+  (save-excursion
+    ;; Protect against the hook changing the current buffer.
+    (save-excursion
+      (run-hooks 'htmlize-before-hook))
+    ;; Convince font-lock support modes to fontify the entire buffer
+    ;; in advance.
+    (htmlize-ensure-fontified)
+    (clrhash htmlize-extended-character-cache)
+    (clrhash htmlize-memoization-table)
+    ;; It's important that the new buffer inherits default-directory
+    ;; from the current buffer.
+    (let ((htmlbuf (generate-new-buffer (if (buffer-file-name)
+                                            (htmlize-make-file-name
+                                             (file-name-nondirectory
+                                              (buffer-file-name)))
+                                          "*html*")))
+          (completed nil))
+      (unwind-protect
+          (let* ((buffer-faces (htmlize-faces-in-buffer))
+                 (face-map (htmlize-make-face-map (cl-adjoin 'default buffer-faces)))
+                 (places (cl-gensym))
+                 (title (if (buffer-file-name)
+                            (file-name-nondirectory (buffer-file-name))
+                          (buffer-name))))
+            (when htmlize-generate-hyperlinks
+              (htmlize-create-auto-links))
+            (when htmlize-replace-form-feeds
+              (htmlize-shadow-form-feeds))
+
+            ;; Initialize HTMLBUF and insert the HTML prolog.
+            (with-current-buffer htmlbuf
+              (buffer-disable-undo)
+              (insert (htmlize-method doctype) ?\n
+                      (format "\n"
+                              htmlize-version htmlize-output-type)
+                      "\n  ")
+              (put places 'head-start (point-marker))
+              (insert "\n"
+                      "    " (htmlize-protect-string title) "\n"
+                      (if htmlize-html-charset
+                          (format (concat "    \n")
+                                  htmlize-html-charset)
+                        "")
+                      htmlize-head-tags)
+              (htmlize-method insert-head buffer-faces face-map)
+              (insert "  ")
+              (put places 'head-end (point-marker))
+              (insert "\n  ")
+              (put places 'body-start (point-marker))
+              (insert (htmlize-method body-tag face-map)
+                      "\n    ")
+              (put places 'content-start (point-marker))
+              (insert (htmlize-method pre-tag face-map) "\n"))
+            (let ((text-markup
+                   ;; Get the inserter method, so we can funcall it inside
+                   ;; the loop.  Not calling `htmlize-method' in the loop
+                   ;; body yields a measurable speed increase.
+                   (htmlize-method-function 'text-markup))
+                  ;; Declare variables used in loop body outside the loop
+                  ;; because it's faster to establish `let' bindings only
+                  ;; once.
+                  next-change text face-list trailing-ellipsis
+                  fstruct-list last-fstruct-list
+                  (close-markup (lambda ())))
+              ;; This loop traverses and reads the source buffer, appending
+              ;; the resulting HTML to HTMLBUF.  This method is fast
+              ;; because: 1) it doesn't require examining the text
+              ;; properties char by char (htmlize-next-face-change is used
+              ;; to move between runs with the same face), and 2) it doesn't
+              ;; require frequent buffer switches, which are slow because
+              ;; they rebind all buffer-local vars.
+              (goto-char (point-min))
+              (while (not (eobp))
+                (setq next-change (htmlize-next-face-change (point)))
+                ;; Get faces in use between (point) and NEXT-CHANGE, and
+                ;; convert them to fstructs.
+                (setq face-list (htmlize-faces-at-point)
+                      fstruct-list (delq nil (mapcar (lambda (f)
+                                                       (gethash f face-map))
+                                                     face-list)))
+                (cl-multiple-value-setq (text trailing-ellipsis)
+                  (htmlize-extract-text (point) next-change trailing-ellipsis))
+                ;; Don't bother writing anything if there's no text (this
+                ;; happens in invisible regions).
+                (when (> (length text) 0)
+                  ;; Open the new markup if necessary and insert the text.
+                  (when (not (cl-equalp fstruct-list last-fstruct-list))
+                    (funcall close-markup)
+                    (setq last-fstruct-list fstruct-list
+                          close-markup (funcall text-markup fstruct-list htmlbuf)))
+                  (princ text htmlbuf))
+                (goto-char next-change))
+
+              ;; We've gone through the buffer; close the markup from
+              ;; the last run, if any.
+              (funcall close-markup))
+
+            ;; Insert the epilog and post-process the buffer.
+            (with-current-buffer htmlbuf
+              (insert "
") + (put places 'content-end (point-marker)) + (insert "\n ") + (put places 'body-end (point-marker)) + (insert "\n\n") + (htmlize-defang-local-variables) + (goto-char (point-min)) + (when htmlize-html-major-mode + ;; What sucks about this is that the minor modes, most notably + ;; font-lock-mode, won't be initialized. Oh well. + (funcall htmlize-html-major-mode)) + (set (make-local-variable 'htmlize-buffer-places) + (symbol-plist places)) + (run-hooks 'htmlize-after-hook) + (buffer-enable-undo)) + (setq completed t) + htmlbuf) + + (when (not completed) + (kill-buffer htmlbuf)) + (htmlize-delete-tmp-overlays))))) + +;; Utility functions. + +(defmacro htmlize-with-fontify-message (&rest body) + ;; When forcing fontification of large buffers in + ;; htmlize-ensure-fontified, inform the user that he is waiting for + ;; font-lock, not for htmlize to finish. + `(progn + (if (> (buffer-size) 65536) + (message "Forcing fontification of %s..." + (buffer-name (current-buffer)))) + ,@body + (if (> (buffer-size) 65536) + (message "Forcing fontification of %s...done" + (buffer-name (current-buffer)))))) + +(defun htmlize-ensure-fontified () + ;; If font-lock is being used, ensure that the "support" modes + ;; actually fontify the buffer. If font-lock is not in use, we + ;; don't care because, except in htmlize-file, we don't force + ;; font-lock on the user. + (when font-lock-mode + ;; In part taken from ps-print-ensure-fontified in GNU Emacs 21. + (when (and (boundp 'jit-lock-mode) + (symbol-value 'jit-lock-mode)) + (htmlize-with-fontify-message + (jit-lock-fontify-now (point-min) (point-max)))) + + (if (fboundp 'font-lock-ensure) + (font-lock-ensure) + ;; Emacs prior to 25.1 + (with-no-warnings + (font-lock-mode 1) + (font-lock-fontify-buffer))))) + + +;;;###autoload +(defun htmlize-buffer (&optional buffer) + "Convert BUFFER to HTML, preserving colors and decorations. + +The generated HTML is available in a new buffer, which is returned. +When invoked interactively, the new buffer is selected in the current +window. The title of the generated document will be set to the buffer's +file name or, if that's not available, to the buffer's name. + +Note that htmlize doesn't fontify your buffers, it only uses the +decorations that are already present. If you don't set up font-lock or +something else to fontify your buffers, the resulting HTML will be +plain. Likewise, if you don't like the choice of colors, fix the mode +that created them, or simply alter the faces it uses." + (interactive) + (let ((htmlbuf (with-current-buffer (or buffer (current-buffer)) + (htmlize-buffer-1)))) + (when (interactive-p) + (switch-to-buffer htmlbuf)) + htmlbuf)) + +;;;###autoload +(defun htmlize-region (beg end) + "Convert the region to HTML, preserving colors and decorations. +See `htmlize-buffer' for details." + (interactive "r") + ;; Don't let zmacs region highlighting end up in HTML. + (when (fboundp 'zmacs-deactivate-region) + (zmacs-deactivate-region)) + (let ((htmlbuf (save-restriction + (narrow-to-region beg end) + (htmlize-buffer-1)))) + (when (interactive-p) + (switch-to-buffer htmlbuf)) + htmlbuf)) + +(defun htmlize-region-for-paste (beg end) + "Htmlize the region and return just the HTML as a string. +This forces the `inline-css' style and only returns the HTML body, +but without the BODY tag. This should make it useful for inserting +the text to another HTML buffer." + (let* ((htmlize-output-type 'inline-css) + (htmlbuf (htmlize-region beg end))) + (unwind-protect + (with-current-buffer htmlbuf + (buffer-substring (plist-get htmlize-buffer-places 'content-start) + (plist-get htmlize-buffer-places 'content-end))) + (kill-buffer htmlbuf)))) + +(defun htmlize-region-save-screenshot (beg end) + "Save the htmlized (see `htmlize-region-for-paste') region in +the kill ring. Uses `inline-css', with style information in +`
' tags, so that the rendering of the marked up text
+approximates the buffer as closely as possible."
+  (interactive "r")
+  (let ((htmlize-pre-style t))
+    (kill-new (htmlize-region-for-paste beg end)))
+  (deactivate-mark))
+
+(defun htmlize-make-file-name (file)
+  "Make an HTML file name from FILE.
+
+In its default implementation, this simply appends `.html' to FILE.
+This function is called by htmlize to create the buffer file name, and
+by `htmlize-file' to create the target file name.
+
+More elaborate transformations are conceivable, such as changing FILE's
+extension to `.html' (\"file.c\" -> \"file.html\").  If you want them,
+overload this function to do it and htmlize will comply."
+  (concat file ".html"))
+
+;; Older implementation of htmlize-make-file-name that changes FILE's
+;; extension to ".html".
+;(defun htmlize-make-file-name (file)
+;  (let ((extension (file-name-extension file))
+;	(sans-extension (file-name-sans-extension file)))
+;    (if (or (equal extension "html")
+;	    (equal extension "htm")
+;	    (equal sans-extension ""))
+;	(concat file ".html")
+;      (concat sans-extension ".html"))))
+
+;;;###autoload
+(defun htmlize-file (file &optional target)
+  "Load FILE, fontify it, convert it to HTML, and save the result.
+
+Contents of FILE are inserted into a temporary buffer, whose major mode
+is set with `normal-mode' as appropriate for the file type.  The buffer
+is subsequently fontified with `font-lock' and converted to HTML.  Note
+that, unlike `htmlize-buffer', this function explicitly turns on
+font-lock.  If a form of highlighting other than font-lock is desired,
+please use `htmlize-buffer' directly on buffers so highlighted.
+
+Buffers currently visiting FILE are unaffected by this function.  The
+function does not change current buffer or move the point.
+
+If TARGET is specified and names a directory, the resulting file will be
+saved there instead of to FILE's directory.  If TARGET is specified and
+does not name a directory, it will be used as output file name."
+  (interactive (list (read-file-name
+		      "HTML-ize file: "
+		      nil nil nil (and (buffer-file-name)
+				       (file-name-nondirectory
+					(buffer-file-name))))))
+  (let ((output-file (if (and target (not (file-directory-p target)))
+			 target
+		       (expand-file-name
+			(htmlize-make-file-name (file-name-nondirectory file))
+			(or target (file-name-directory file)))))
+	;; Try to prevent `find-file-noselect' from triggering
+	;; font-lock because we'll fontify explicitly below.
+	(font-lock-mode nil)
+	(font-lock-auto-fontify nil)
+	(global-font-lock-mode nil)
+	;; Ignore the size limit for the purposes of htmlization.
+	(font-lock-maximum-size nil))
+    (with-temp-buffer
+      ;; Insert FILE into the temporary buffer.
+      (insert-file-contents file)
+      ;; Set the file name so normal-mode and htmlize-buffer-1 pick it
+      ;; up.  Restore it afterwards so with-temp-buffer's kill-buffer
+      ;; doesn't complain about killing a modified buffer.
+      (let ((buffer-file-name file))
+	;; Set the major mode for the sake of font-lock.
+	(normal-mode)
+	;; htmlize the buffer and save the HTML.
+	(with-current-buffer (htmlize-buffer-1)
+	  (unwind-protect
+	      (progn
+		(run-hooks 'htmlize-file-hook)
+		(write-region (point-min) (point-max) output-file))
+	    (kill-buffer (current-buffer)))))))
+  ;; I haven't decided on a useful return value yet, so just return
+  ;; nil.
+  nil)
+
+;;;###autoload
+(defun htmlize-many-files (files &optional target-directory)
+  "Convert FILES to HTML and save the corresponding HTML versions.
+
+FILES should be a list of file names to convert.  This function calls
+`htmlize-file' on each file; see that function for details.  When
+invoked interactively, you are prompted for a list of files to convert,
+terminated with RET.
+
+If TARGET-DIRECTORY is specified, the HTML files will be saved to that
+directory.  Normally, each HTML file is saved to the directory of the
+corresponding source file."
+  (interactive
+   (list
+    (let (list file)
+      ;; Use empty string as DEFAULT because setting DEFAULT to nil
+      ;; defaults to the directory name, which is not what we want.
+      (while (not (equal (setq file (read-file-name
+				     "HTML-ize file (RET to finish): "
+				     (and list (file-name-directory
+						(car list)))
+				     "" t))
+			 ""))
+	(push file list))
+      (nreverse list))))
+  ;; Verify that TARGET-DIRECTORY is indeed a directory.  If it's a
+  ;; file, htmlize-file will use it as target, and that doesn't make
+  ;; sense.
+  (and target-directory
+       (not (file-directory-p target-directory))
+       (error "target-directory must name a directory: %s" target-directory))
+  (dolist (file files)
+    (htmlize-file file target-directory)))
+
+;;;###autoload
+(defun htmlize-many-files-dired (arg &optional target-directory)
+  "HTMLize dired-marked files."
+  (interactive "P")
+  (htmlize-many-files (dired-get-marked-files nil arg) target-directory))
+
+(provide 'htmlize)
+
+;; Local Variables:
+;; byte-compile-warnings: (not unresolved obsolete)
+;; End:
+
+;;; htmlize.el ends here
diff --git a/src/org.css b/src/org.css
new file mode 100644
index 0000000..18ebe3f
--- /dev/null
+++ b/src/org.css
@@ -0,0 +1,137 @@
+@import url("https://fonts.googleapis.com/css2?family=Lora:wght@500&display=swap");
+@import url("https://fonts.googleapis.com/css2?family=Inconsolata&display=swap");
+
+body {
+        margin: 40px auto;
+        width: 700px;
+        max-width: 100%;
+        line-height: 1.6;
+        font-size: 16px;
+        background: #fffff4;
+        color: #3a1616;
+        padding: 0 10px;
+        font-size: 18px;
+        line-height: 28px;
+        font-family: "Lora";
+        font-weight: 500;
+        /* Smooth the font a little bit, it's a
+         bit too bold on retina screens */
+        -webkit-font-smoothing: antialiased;
+}
+
+/* --------------------------------------------------- */
+/* Make a nice input form with rounded corners and hover
+ animations*/
+/* --------------------------------------------------- */
+input {
+        padding: 10px 16px;
+        margin: 2px 0;
+        box-sizing: border-box;
+        border: 2px solid #dabebe;
+        border-radius: 6px;
+        background: #fffff4;
+        color: #3a1616;
+        font-size: 16px;
+        -webkit-transition: 0.5s;
+        transition: 0.5s;
+        outline: none;
+}
+input:focus {
+        border: 2px solid #3a1616;
+}
+
+/* --------------------------------------- */
+/* The button is very similar to the input */
+/* --------------------------------------- */
+.button {
+        background-color: #fffff4;
+        border: none;
+        color: black;
+        padding: 6px 14px;
+        text-align: center;
+        text-decoration: none;
+        display: inline-block;
+        font-size: 16px;
+        margin: 4px 2px;
+        transition-duration: 0.4s;
+        cursor: pointer;
+        border: 2px solid #3a1616;
+        border-radius: 6px;
+}
+.button:hover {
+        background-color: #3a1616;
+        color: white;
+}
+
+/* ----------------------------------------------- */
+/* Various classes for messages of different kinds */
+/* ----------------------------------------------- */
+.isa_info,
+.isa_success,
+.isa_warning,
+.isa_error {
+        width: 90%;
+        margin: 10px 0px;
+        padding: 12px;
+}
+.isa_info {
+        color: #00529b;
+        background-color: #bde5f8;
+}
+.isa_success {
+        color: #4f8a10;
+        background-color: #dff2bf;
+}
+.isa_warning {
+        color: #9f6000;
+        background-color: #feefb3;
+}
+.isa_error {
+        color: #d8000c;
+        background-color: #ffd2d2;
+}
+
+h1,
+h2,
+h3 {
+        line-height: 1.2;
+        font-family: "Lora";
+}
+
+img {
+        width: 500px;
+        max-width: 100%;
+        border-radius: 10px;
+        text-align: center;
+}
+
+/* --------------------------------------------- */
+/* Bold hover animations on links and clickables */
+/* --------------------------------------------- */
+a {
+        cursor: pointer;
+        color: #217ab7;
+        line-height: inherit;
+        transition: .14s;
+}
+a:hover {
+        color: white;
+        background-color: #3297d3;
+}
+a:visited {
+        color: #43458b;
+        border-color: #43458b;
+}
+a:visited:hover {
+        color: white;
+        background-color: #9251ac;
+}
+
+pre {
+        font-family: "Inconsolata", monospace;
+}
+
+::selection {
+        color: white;
+        background: #ff4081;
+}