堆是一种抽象数据结构,可以看作是一颗完全二叉树。堆包括最大堆和最小堆,对于最大堆,父元素的值总是大于子元素,最小堆则相反。堆的这种特性特别适合用来做优先级队列
在C中,使用数组来保存堆时,对于元素i,数组的下标有如下关系:
#define HEAP_PARENT_INDEX(i) ((i – 1) / 2)
#define HEAP_LEFT_INDEX(i) (2 * i + 1)
#define HEAP_RIGHT_INDEX(i) (2 * i + 2)
BTW,别把这个数据结构的堆跟操作系统的堆混淆了,那个堆是用来动态分配内存空间的,虽然这种说法可能也不是太准确
下面以最大堆为例:
最大堆的插入很简单,直接在数组最后插入新的元素,然后依次跟父元素对比,把较大的元素交换到合适的位置。
当要移除最大的元素时,为了维护最大堆的性质,首先把数组最后一个元素移到开头填补空白,然后对这个堆执行heapify操作
代码中提供了使用尾递归和循环两种方式的heapify操作(尾递归能够用循环的方式来表现),以递归方式为例:heapify的操作是从root开始,对于每个节点i和它的两个子元素,选择一个最大值,然后交换节点i和最大值(有可能是同一个元素,那就是白干活呗……)
。然后以原最大值的下标为基准,递归的执行heapify操作,直到叶子节点。
利用heapify的性质,能够实现堆排序的算法,最简单的方式就是把整个数组插入堆中,然后依次取root元素。不过我们完全可以做到in place排序:
当对一个数组从尾到头执行heapify操作后,我们就得到一个合法的堆。那么要怎么排序呢?神奇的地方来了,直接交换数组首尾元素,然后告诉堆说最后一个元素你就别管了,然后对root元素执行heapify。重复这个步骤,直到堆认为自己已经没有元素了……其实它们都在后面排好队了,假如用的是最大堆就是从小到大排列,最小堆则是从大到小
注意代码中136-143, 179-183, 以及194-198这几行,这几行看起来莫名其妙……我回忆了很久,看头文件才发现是为了防止外部直接调用heap_update_key()接口时,插入了一个非法的值破坏了堆的性质。
heap.h
#ifndef _HEAP_H #define _HEAP_H #include <stdio.h> #include <stdbool.h> typedef struct _heap Heap; typedef enum _heap_type { MAX_HEAP = 0, MIN_HEAP, } HeapType; /* use heap_free() to free the return value */ Heap *heap_new(HeapType type, int size); void heap_free (Heap *heap); void heap_dump (Heap *heap, FILE *file); bool heap_is_empty (Heap *heap); HeapType heap_get_type (Heap *heap); int heap_get_size (Heap *heap); int heap_get_card (Heap *heap); int heap_get_growing_factor (Heap *heap); void heap_set_growing_factor (Heap *heap, int value); /* prority queue */ /** * @brief insert a value to heap * @note Complexity O( lg(n) ) */ void heap_insert (Heap *heap, int value); /** * @breif return the root element * @note Complexity O( 1 ) * @return for MAX_HEAP, return the largest; for MIN_HEAP, return the minimum. */ int heap_root (Heap *heap); /** * @breif delete and return the root element * @note Complexity O( lg(n) ) * @return for MAX_HEAP, delete & return the largest in heap; * for MIN_HEAP, delete & return the minimum in heap. * @see heap_root(). */ int heap_extract_root (Heap *heap); /** * @brief set an element in heap wiht another value; * * for MAX_HEAP, require key > current value in index i; * for MIN_HEAP, require key < current value in index i; * * @note Complexity O( lg(n) ) * @param heap * @param i index of the element to set. * @param key new value set to element. */ void heap_update_key (Heap *heap, int i, int key); /* heap sort func */ /** * @brief Heap sort * * Swap the first elem which is the largest in heap with last elem of heap, * decrease heap size by 1, loop until heap size is 0. * * @note Complexity O( n*lg(n) ) * @param array the array need to sort. * @param len length of array. * @return sorted array. */ int *heap_sort (int *array, int len); #endif /* _HEAP_H */
heap.c
#include <stdio.h> #include <stdlib.h> #include <assert.h> #include <stdlib.h> #include <limits.h> #include "heap.h" #include "base.h" typedef void (*heapify_func_t) (Heap *heap, int index); struct _heap { int *data; HeapType type; int size; int card; int growing_factor; heapify_func_t heapify_func; }; #define HEAP_PARENT_INDEX(i) ((i - 1) / 2) #define HEAP_LEFT_INDEX(i) (2 * i + 1) #define HEAP_RIGHT_INDEX(i) (2 * i + 2) #ifndef USE_LOOP static void max_heapify (Heap *heap, int index); static void min_heapify (Heap *heap, int index); #else static void max_heapify_loop (Heap *h, int i); static void min_heapify_loop (Heap *h, int i); #endif /* USE_LOOP */ static Heap *heap_new_from_array (HeapType heap_type, int *array, int size); Heap *heap_new (HeapType heap_type, int size) { Heap *h; h = (Heap *)malloc (sizeof (Heap)); if (h == NULL) { return NULL; } h->data = (int *)malloc (sizeof(int) *size); if (h->data == NULL) { free (h); return NULL; } h->growing_factor = 10; h->card = 0; h->size = size; h->type = heap_type; #ifndef USE_LOOP h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify : min_heapify; #else h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify_loop : min_heapify_loop; #endif /* USE_LOOP */ return h; } void heap_free (Heap *h) { free (h->data); free (h); } void heap_dump (Heap *h, FILE *file) { int i; assert (h != NULL); fprintf (file, "<HEAP TYPE=\"%s\" SIZE=\"%d\" CARD=\"%d\" GROWING_FACTOR=\"%d\">", ((h->type == MAX_HEAP) ? "MAX-HEAP" : "MIN-HEAP"), h->size, h->card, h->growing_factor); for (i = 0; i < h->card; i++) { fprintf (file, "<NODE>%d</NODE>", h->data[i]); } fprintf (file, "</HEAP>\n"); } bool heap_is_empty (Heap *h) { return (h->card == 0); } HeapType heap_get_type (Heap *heap) { assert (heap != NULL); return heap->type; } int heap_get_size (Heap *heap) { assert (heap != NULL); return heap->size; } int heap_get_card (Heap *heap) { assert (heap != NULL); return heap->card; } int heap_get_growing_factor (Heap *heap) { assert (heap != NULL); return heap->growing_factor; } void heap_set_growing_factor (Heap *heap, int value) { assert (heap != NULL); heap->growing_factor = value; } void heap_insert (Heap *h, int v) { assert (h != NULL); (h->card)++; /* avoiding array bounds exceeded*/ if (h->card > h->size) { h->data = realloc (h->data, (h->size + h->growing_factor)*sizeof(int)); if (h->data == NULL) { fprintf (stderr, "realloc error"); return; } h->size += h->growing_factor; } if (h->type == MAX_HEAP) { h->data[h->card - 1] = INT_MIN; } else { h->data[h->card - 1] = INT_MAX; } heap_update_key (h, h->card - 1, v); } /* priority queue*/ int heap_root (Heap *h) { assert (h != NULL); return h->data[0]; } int heap_extract_root (Heap *h) { int val; assert (h != NULL); if (h->card < 0) { fprintf (stderr, "error: heap empty\n"); exit (-1); } val = h->data[0]; h->data[0] = h->data[(h->card) - 1]; (h->card)--; h->heapify_func (h, 0); return val; } void heap_update_key (Heap *h, int i, int key) { if (h->type == MAX_HEAP) { if (key < h->data[i]) { fprintf (stderr, "Max-Heap: new key %d is samller than current key %d\n", key, h->data[i]); return; } h->data[i] = key; while ((i > 0) && (h->data[HEAP_PARENT_INDEX(i)] < h->data[i])) { swap (&(h->data[HEAP_PARENT_INDEX(i)]), &(h->data[i])); i = HEAP_PARENT_INDEX (i); } } else { if (key > h->data[i]) { fprintf (stderr, "Min-Heap: new key %d is larger than current key %d\n", key, h->data[i]); return; } h->data[i] = key; while ((i > 0) && (h->data[HEAP_PARENT_INDEX(i)] > h->data[i])) { swap (&(h->data[HEAP_PARENT_INDEX(i)]), &(h->data[i])); i = HEAP_PARENT_INDEX (i); } } } int *heap_sort (int *array, int len) { Heap *h; int i; assert (array != NULL); h = heap_new_from_array (MAX_HEAP, array, len); for (i = len - 1; i > 0; i--) { swap (&array[0], &array[i]); (h->card)--; h->heapify_func (h, 0); } free (h); return array; } /******************************************************* * private func ******************************************************/ #ifndef USE_LOOP /** * @breaif heapify a heap * * Assuming the two child tree rooted at HEAP_LEFT_INDEX (i) and * HEAP_RIGHT_INDEX (i) are max-heaps. * * @note Complexity: O( lg(n) ) * @param h the heap need to heapify * @param i the position need to heapify */ static void max_heapify (Heap *h, int i) { int l, r, largest; assert (h != NULL); l = HEAP_LEFT_INDEX (i); r = HEAP_RIGHT_INDEX (i); if ((l < h->card) && (h->data[l] > h->data[i])) { largest = l; } else { largest = i; } if ((r < h->card) && (h->data[r] > h->data[largest])) { largest = r; } if (largest != i) { swap (&(h->data[i]), &(h->data[largest])); max_heapify (h, largest); } } /** * @see max_heapify */ static void min_heapify (Heap *h, int i) { int l, r, minimum; assert (h != NULL); l = HEAP_LEFT_INDEX (i); r = HEAP_RIGHT_INDEX (i); if ((l < h->card) && (h->data[l] < h->data[i])) { minimum = l; } else { minimum = i; } if ((r < h->card) && (h->data[r] < h->data[minimum])) { minimum = r; } if (minimum != i) { swap (&(h->data[i]), &(h->data[minimum])); min_heapify (h, minimum); } } #else /** * @brief loop version of max_heapify, should be faster. * @see max_heapify(). */ static void max_heapify_loop (Heap *h, int i) { int l, r, largest; int tmp = i; /* save the previous value of tmp */ int p; do { p = tmp; l = HEAP_LEFT_INDEX (tmp); r = HEAP_RIGHT_INDEX (tmp); if ((l < h->card) && (h->data[l] > h->data[tmp])) { largest = l; } else { largest = tmp; } if ((r < h->card) && (h->data[r] > h->data[largest])) { largest = r; } if (largest != tmp) { swap (&(h->data[tmp]), &(h->data[largest])); tmp = largest; } } while (largest != p); } /** * @brief loop version of min_heapify, should be faster * @see min_heapify(). */ static void min_heapify_loop (Heap *h, int i) { int l, r, minimum; int tmp = i; int p; do { p = tmp; l = HEAP_LEFT_INDEX (tmp); r = HEAP_RIGHT_INDEX (tmp); if ((l < h->card) && (h->data[l] < h->data[tmp])) { minimum = l; } else { minimum = tmp; } if ((r < h->card) && (h->data[r] < h->data[minimum])) { minimum = r; } if (minimum != tmp) { swap (&(h->data[minimum]), &(h->data[tmp])); tmp = minimum; } } while (p != minimum); } #endif /* USE_LOOP */ static Heap *heap_new_from_array (HeapType heap_type, int *array, int size) { Heap *h; int i; assert (array != NULL); h = (Heap *)malloc (sizeof (Heap)); if (h == NULL) { return NULL; } h->data = array; h->growing_factor = 0; h->size = size; h->type = heap_type; #ifndef USE_LOOP h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify : min_heapify; #else h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify_loop : min_heapify_loop; #endif /* USE_LOOP */ h->card = size; for (i = HEAP_PARENT_INDEX (h->card); i >= 0; i--) { h->heapify_func (h, i); } return h; }