堆是一种抽象数据结构,可以看作是一颗完全二叉树。堆包括最大堆和最小堆,对于最大堆,父元素的值总是大于子元素,最小堆则相反。堆的这种特性特别适合用来做优先级队列
在C中,使用数组来保存堆时,对于元素i,数组的下标有如下关系:
#define HEAP_PARENT_INDEX(i) ((i – 1) / 2)
#define HEAP_LEFT_INDEX(i) (2 * i + 1)
#define HEAP_RIGHT_INDEX(i) (2 * i + 2)
BTW,别把这个数据结构的堆跟操作系统的堆混淆了,那个堆是用来动态分配内存空间的,虽然这种说法可能也不是太准确
下面以最大堆为例:
最大堆的插入很简单,直接在数组最后插入新的元素,然后依次跟父元素对比,把较大的元素交换到合适的位置。
当要移除最大的元素时,为了维护最大堆的性质,首先把数组最后一个元素移到开头填补空白,然后对这个堆执行heapify操作
代码中提供了使用尾递归和循环两种方式的heapify操作(尾递归能够用循环的方式来表现),以递归方式为例:heapify的操作是从root开始,对于每个节点i和它的两个子元素,选择一个最大值,然后交换节点i和最大值(有可能是同一个元素,那就是白干活呗……)
。然后以原最大值的下标为基准,递归的执行heapify操作,直到叶子节点。
利用heapify的性质,能够实现堆排序的算法,最简单的方式就是把整个数组插入堆中,然后依次取root元素。不过我们完全可以做到in place排序:
当对一个数组从尾到头执行heapify操作后,我们就得到一个合法的堆。那么要怎么排序呢?神奇的地方来了,直接交换数组首尾元素,然后告诉堆说最后一个元素你就别管了,然后对root元素执行heapify。重复这个步骤,直到堆认为自己已经没有元素了……其实它们都在后面排好队了,假如用的是最大堆就是从小到大排列,最小堆则是从大到小
注意代码中136-143, 179-183, 以及194-198这几行,这几行看起来莫名其妙……我回忆了很久,看头文件才发现是为了防止外部直接调用heap_update_key()接口时,插入了一个非法的值破坏了堆的性质。
heap.h
#ifndef _HEAP_H
#define _HEAP_H
#include <stdio.h>
#include <stdbool.h>
typedef struct _heap Heap;
typedef enum _heap_type
{
MAX_HEAP = 0,
MIN_HEAP,
} HeapType;
/* use heap_free() to free the return value */
Heap *heap_new(HeapType type, int size);
void heap_free (Heap *heap);
void heap_dump (Heap *heap, FILE *file);
bool heap_is_empty (Heap *heap);
HeapType heap_get_type (Heap *heap);
int heap_get_size (Heap *heap);
int heap_get_card (Heap *heap);
int heap_get_growing_factor (Heap *heap);
void heap_set_growing_factor (Heap *heap, int value);
/* prority queue */
/**
* @brief insert a value to heap
* @note Complexity O( lg(n) )
*/
void heap_insert (Heap *heap, int value);
/**
* @breif return the root element
* @note Complexity O( 1 )
* @return for MAX_HEAP, return the largest; for MIN_HEAP, return the minimum.
*/
int heap_root (Heap *heap);
/**
* @breif delete and return the root element
* @note Complexity O( lg(n) )
* @return for MAX_HEAP, delete & return the largest in heap;
* for MIN_HEAP, delete & return the minimum in heap.
* @see heap_root().
*/
int heap_extract_root (Heap *heap);
/**
* @brief set an element in heap wiht another value;
*
* for MAX_HEAP, require key > current value in index i;
* for MIN_HEAP, require key < current value in index i;
*
* @note Complexity O( lg(n) )
* @param heap
* @param i index of the element to set.
* @param key new value set to element.
*/
void heap_update_key (Heap *heap, int i, int key);
/* heap sort func */
/**
* @brief Heap sort
*
* Swap the first elem which is the largest in heap with last elem of heap,
* decrease heap size by 1, loop until heap size is 0.
*
* @note Complexity O( n*lg(n) )
* @param array the array need to sort.
* @param len length of array.
* @return sorted array.
*/
int *heap_sort (int *array, int len);
#endif /* _HEAP_H */heap.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdlib.h>
#include <limits.h>
#include "heap.h"
#include "base.h"
typedef void (*heapify_func_t) (Heap *heap, int index);
struct _heap
{
int *data;
HeapType type;
int size;
int card;
int growing_factor;
heapify_func_t heapify_func;
};
#define HEAP_PARENT_INDEX(i) ((i - 1) / 2)
#define HEAP_LEFT_INDEX(i) (2 * i + 1)
#define HEAP_RIGHT_INDEX(i) (2 * i + 2)
#ifndef USE_LOOP
static void max_heapify (Heap *heap, int index);
static void min_heapify (Heap *heap, int index);
#else
static void max_heapify_loop (Heap *h, int i);
static void min_heapify_loop (Heap *h, int i);
#endif /* USE_LOOP */
static Heap *heap_new_from_array (HeapType heap_type, int *array, int size);
Heap *heap_new (HeapType heap_type, int size)
{
Heap *h;
h = (Heap *)malloc (sizeof (Heap));
if (h == NULL)
{
return NULL;
}
h->data = (int *)malloc (sizeof(int) *size);
if (h->data == NULL)
{
free (h);
return NULL;
}
h->growing_factor = 10;
h->card = 0;
h->size = size;
h->type = heap_type;
#ifndef USE_LOOP
h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify : min_heapify;
#else
h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify_loop : min_heapify_loop;
#endif /* USE_LOOP */
return h;
}
void heap_free (Heap *h)
{
free (h->data);
free (h);
}
void heap_dump (Heap *h, FILE *file)
{
int i;
assert (h != NULL);
fprintf (file, "<HEAP TYPE=\"%s\" SIZE=\"%d\" CARD=\"%d\" GROWING_FACTOR=\"%d\">",
((h->type == MAX_HEAP) ? "MAX-HEAP" : "MIN-HEAP"), h->size, h->card, h->growing_factor);
for (i = 0; i < h->card; i++)
{
fprintf (file, "<NODE>%d</NODE>", h->data[i]);
}
fprintf (file, "</HEAP>\n");
}
bool heap_is_empty (Heap *h)
{
return (h->card == 0);
}
HeapType heap_get_type (Heap *heap)
{
assert (heap != NULL);
return heap->type;
}
int heap_get_size (Heap *heap)
{
assert (heap != NULL);
return heap->size;
}
int heap_get_card (Heap *heap)
{
assert (heap != NULL);
return heap->card;
}
int heap_get_growing_factor (Heap *heap)
{
assert (heap != NULL);
return heap->growing_factor;
}
void heap_set_growing_factor (Heap *heap, int value)
{
assert (heap != NULL);
heap->growing_factor = value;
}
void heap_insert (Heap *h, int v)
{
assert (h != NULL);
(h->card)++; /* avoiding array bounds exceeded*/
if (h->card > h->size)
{
h->data = realloc (h->data, (h->size + h->growing_factor)*sizeof(int));
if (h->data == NULL)
{
fprintf (stderr, "realloc error");
return;
}
h->size += h->growing_factor;
}
if (h->type == MAX_HEAP)
{
h->data[h->card - 1] = INT_MIN;
}
else
{
h->data[h->card - 1] = INT_MAX;
}
heap_update_key (h, h->card - 1, v);
}
/* priority queue*/
int heap_root (Heap *h)
{
assert (h != NULL);
return h->data[0];
}
int heap_extract_root (Heap *h)
{
int val;
assert (h != NULL);
if (h->card < 0)
{
fprintf (stderr, "error: heap empty\n");
exit (-1);
}
val = h->data[0];
h->data[0] = h->data[(h->card) - 1];
(h->card)--;
h->heapify_func (h, 0);
return val;
}
void heap_update_key (Heap *h, int i, int key)
{
if (h->type == MAX_HEAP)
{
if (key < h->data[i])
{
fprintf (stderr, "Max-Heap: new key %d is samller than current key %d\n", key, h->data[i]);
return;
}
h->data[i] = key;
while ((i > 0) && (h->data[HEAP_PARENT_INDEX(i)] < h->data[i]))
{
swap (&(h->data[HEAP_PARENT_INDEX(i)]), &(h->data[i]));
i = HEAP_PARENT_INDEX (i);
}
}
else
{
if (key > h->data[i])
{
fprintf (stderr, "Min-Heap: new key %d is larger than current key %d\n", key, h->data[i]);
return;
}
h->data[i] = key;
while ((i > 0) && (h->data[HEAP_PARENT_INDEX(i)] > h->data[i]))
{
swap (&(h->data[HEAP_PARENT_INDEX(i)]), &(h->data[i]));
i = HEAP_PARENT_INDEX (i);
}
}
}
int *heap_sort (int *array, int len)
{
Heap *h;
int i;
assert (array != NULL);
h = heap_new_from_array (MAX_HEAP, array, len);
for (i = len - 1; i > 0; i--)
{
swap (&array[0], &array[i]);
(h->card)--;
h->heapify_func (h, 0);
}
free (h);
return array;
}
/*******************************************************
* private func
******************************************************/
#ifndef USE_LOOP
/**
* @breaif heapify a heap
*
* Assuming the two child tree rooted at HEAP_LEFT_INDEX (i) and
* HEAP_RIGHT_INDEX (i) are max-heaps.
*
* @note Complexity: O( lg(n) )
* @param h the heap need to heapify
* @param i the position need to heapify
*/
static void max_heapify (Heap *h, int i)
{
int l, r, largest;
assert (h != NULL);
l = HEAP_LEFT_INDEX (i);
r = HEAP_RIGHT_INDEX (i);
if ((l < h->card) && (h->data[l] > h->data[i]))
{
largest = l;
}
else
{
largest = i;
}
if ((r < h->card) && (h->data[r] > h->data[largest]))
{
largest = r;
}
if (largest != i)
{
swap (&(h->data[i]), &(h->data[largest]));
max_heapify (h, largest);
}
}
/**
* @see max_heapify
*/
static void min_heapify (Heap *h, int i)
{
int l, r, minimum;
assert (h != NULL);
l = HEAP_LEFT_INDEX (i);
r = HEAP_RIGHT_INDEX (i);
if ((l < h->card) && (h->data[l] < h->data[i]))
{
minimum = l;
}
else
{
minimum = i;
}
if ((r < h->card) && (h->data[r] < h->data[minimum]))
{
minimum = r;
}
if (minimum != i)
{
swap (&(h->data[i]), &(h->data[minimum]));
min_heapify (h, minimum);
}
}
#else
/**
* @brief loop version of max_heapify, should be faster.
* @see max_heapify().
*/
static void max_heapify_loop (Heap *h, int i)
{
int l, r, largest;
int tmp = i;
/* save the previous value of tmp */
int p;
do {
p = tmp;
l = HEAP_LEFT_INDEX (tmp);
r = HEAP_RIGHT_INDEX (tmp);
if ((l < h->card) && (h->data[l] > h->data[tmp]))
{
largest = l;
}
else
{
largest = tmp;
}
if ((r < h->card) && (h->data[r] > h->data[largest]))
{
largest = r;
}
if (largest != tmp)
{
swap (&(h->data[tmp]), &(h->data[largest]));
tmp = largest;
}
} while (largest != p);
}
/**
* @brief loop version of min_heapify, should be faster
* @see min_heapify().
*/
static void min_heapify_loop (Heap *h, int i)
{
int l, r, minimum;
int tmp = i;
int p;
do {
p = tmp;
l = HEAP_LEFT_INDEX (tmp);
r = HEAP_RIGHT_INDEX (tmp);
if ((l < h->card) && (h->data[l] < h->data[tmp]))
{
minimum = l;
}
else
{
minimum = tmp;
}
if ((r < h->card) && (h->data[r] < h->data[minimum]))
{
minimum = r;
}
if (minimum != tmp)
{
swap (&(h->data[minimum]), &(h->data[tmp]));
tmp = minimum;
}
} while (p != minimum);
}
#endif /* USE_LOOP */
static Heap *heap_new_from_array (HeapType heap_type, int *array, int size)
{
Heap *h;
int i;
assert (array != NULL);
h = (Heap *)malloc (sizeof (Heap));
if (h == NULL)
{
return NULL;
}
h->data = array;
h->growing_factor = 0;
h->size = size;
h->type = heap_type;
#ifndef USE_LOOP
h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify : min_heapify;
#else
h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify_loop : min_heapify_loop;
#endif /* USE_LOOP */
h->card = size;
for (i = HEAP_PARENT_INDEX (h->card); i >= 0; i--)
{
h->heapify_func (h, i);
}
return h;
}