堆/堆排序

堆是一种抽象数据结构,可以看作是一颗完全二叉树。堆包括最大堆和最小堆,对于最大堆,父元素的值总是大于子元素,最小堆则相反。堆的这种特性特别适合用来做优先级队列

在C中,使用数组来保存堆时,对于元素i,数组的下标有如下关系:

#define HEAP_PARENT_INDEX(i) ((i – 1) / 2)
#define HEAP_LEFT_INDEX(i) (2 * i + 1)
#define HEAP_RIGHT_INDEX(i) (2 * i + 2)

BTW,别把这个数据结构的堆跟操作系统的堆混淆了,那个堆是用来动态分配内存空间的,虽然这种说法可能也不是太准确

下面以最大堆为例:
最大堆的插入很简单,直接在数组最后插入新的元素,然后依次跟父元素对比,把较大的元素交换到合适的位置。

当要移除最大的元素时,为了维护最大堆的性质,首先把数组最后一个元素移到开头填补空白,然后对这个堆执行heapify操作

代码中提供了使用尾递归和循环两种方式的heapify操作(尾递归能够用循环的方式来表现),以递归方式为例:heapify的操作是从root开始,对于每个节点i和它的两个子元素,选择一个最大值,然后交换节点i和最大值(有可能是同一个元素,那就是白干活呗……)
。然后以原最大值的下标为基准,递归的执行heapify操作,直到叶子节点。

利用heapify的性质,能够实现堆排序的算法,最简单的方式就是把整个数组插入堆中,然后依次取root元素。不过我们完全可以做到in place排序:

当对一个数组从尾到头执行heapify操作后,我们就得到一个合法的堆。那么要怎么排序呢?神奇的地方来了,直接交换数组首尾元素,然后告诉堆说最后一个元素你就别管了,然后对root元素执行heapify。重复这个步骤,直到堆认为自己已经没有元素了……其实它们都在后面排好队了,假如用的是最大堆就是从小到大排列,最小堆则是从大到小

注意代码中136-143, 179-183, 以及194-198这几行,这几行看起来莫名其妙……我回忆了很久,看头文件才发现是为了防止外部直接调用heap_update_key()接口时,插入了一个非法的值破坏了堆的性质。

heap.h

#ifndef _HEAP_H
#define _HEAP_H

#include <stdio.h>
#include <stdbool.h>

typedef struct _heap Heap;

typedef enum _heap_type
{
	MAX_HEAP = 0,
	MIN_HEAP,
} HeapType;

/* use heap_free() to free the return value */
Heap *heap_new(HeapType type, int size);
void heap_free (Heap *heap);
void heap_dump (Heap *heap, FILE *file);
bool heap_is_empty (Heap *heap);

HeapType heap_get_type (Heap *heap);
int heap_get_size (Heap *heap);
int heap_get_card (Heap *heap);
int heap_get_growing_factor (Heap *heap);
void heap_set_growing_factor (Heap *heap, int value);

/* prority queue */
/**
 * @brief insert a value to heap
 * @note Complexity O( lg(n) )
 */
void heap_insert (Heap *heap, int value);
/**
 * @breif return the root element
 * @note Complexity O( 1 )
 * @return for MAX_HEAP, return the largest; for MIN_HEAP, return the minimum.
 */
int heap_root (Heap *heap);

/**
 * @breif delete and return the root element
 * @note Complexity O( lg(n) )
 * @return for MAX_HEAP, delete & return the largest in heap;
 *		   for MIN_HEAP, delete & return the minimum in heap.
 * @see heap_root().
 */
int heap_extract_root (Heap *heap);

/**
 * @brief set an element in heap wiht another value;
 *
 * for MAX_HEAP, require key > current value in index i;
 * for MIN_HEAP, require key < current value in index i;
 *
 * @note Complexity O( lg(n) )
 * @param heap 
 * @param i index of the element to set.
 * @param key new value set to element.
 */
void heap_update_key (Heap *heap, int i, int key);

/* heap sort func */
/**
 * @brief Heap sort
 *
 * Swap the first elem which is the largest in heap with last elem of heap,
 * decrease heap size by 1, loop until heap size is 0.
 * 
 * @note Complexity O( n*lg(n) )
 * @param array the array need to sort.
 * @param len length of array.
 * @return sorted array.
 */
int *heap_sort (int *array, int len);
#endif /* _HEAP_H */

heap.c

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdlib.h>
#include <limits.h>
#include "heap.h"
#include "base.h"

typedef void (*heapify_func_t) (Heap *heap, int index);

struct _heap
{
	int *data;
	HeapType type;
	int size;
	int card;
	int growing_factor;
	heapify_func_t heapify_func;
};

#define HEAP_PARENT_INDEX(i) ((i - 1) / 2)
#define HEAP_LEFT_INDEX(i) (2 * i + 1)
#define HEAP_RIGHT_INDEX(i) (2 * i + 2)

#ifndef USE_LOOP
static void max_heapify (Heap *heap, int index);
static void min_heapify (Heap *heap, int index);
#else
static void max_heapify_loop (Heap *h, int i);
static void min_heapify_loop (Heap *h, int i);
#endif /* USE_LOOP */
static Heap *heap_new_from_array (HeapType heap_type, int *array, int size);

Heap *heap_new (HeapType heap_type, int size)
{
	Heap *h;

	h = (Heap *)malloc (sizeof (Heap));
	if (h == NULL)
	{
		return NULL;
	}

	h->data = (int *)malloc (sizeof(int) *size);
	if (h->data == NULL)
	{
		free (h);
		return NULL;
	}

	h->growing_factor = 10;
	h->card = 0;
	h->size = size;
	h->type = heap_type;

#ifndef USE_LOOP
	h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify : min_heapify;
#else
	h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify_loop : min_heapify_loop;
#endif /* USE_LOOP */

	return h;
}

void heap_free (Heap *h)
{
	free (h->data);
	free (h);
}

void heap_dump (Heap *h, FILE *file)
{
	int i;
	assert (h != NULL);

	fprintf (file, "<HEAP TYPE=\"%s\" SIZE=\"%d\" CARD=\"%d\" GROWING_FACTOR=\"%d\">",
			((h->type == MAX_HEAP) ? "MAX-HEAP" : "MIN-HEAP"), h->size, h->card, h->growing_factor);
	for (i = 0; i < h->card; i++)
	{
		fprintf (file, "<NODE>%d</NODE>", h->data[i]);
	}
	fprintf (file, "</HEAP>\n");
}

bool heap_is_empty (Heap *h)
{
	return (h->card == 0);
}

HeapType heap_get_type (Heap *heap)
{
	assert (heap != NULL);
	return heap->type;
}

int heap_get_size (Heap *heap)
{
	assert (heap != NULL);
	return heap->size;
}

int heap_get_card (Heap *heap)
{
	assert (heap != NULL);
	return heap->card;
}

int heap_get_growing_factor (Heap *heap)
{
	assert (heap != NULL);
	return heap->growing_factor;
}

void heap_set_growing_factor (Heap *heap, int value)
{
	assert (heap != NULL);
	heap->growing_factor = value;
}

void heap_insert (Heap *h, int v)
{
	assert (h != NULL);

	(h->card)++; /* avoiding array bounds exceeded*/
	if (h->card > h->size)
	{
		h->data = realloc (h->data, (h->size + h->growing_factor)*sizeof(int));
		if (h->data == NULL)
		{
			fprintf (stderr, "realloc error");
			return;
		}
		h->size += h->growing_factor;
	}

	if (h->type == MAX_HEAP)
	{
		h->data[h->card - 1] = INT_MIN;
	}
	else
	{
		h->data[h->card - 1] = INT_MAX;
	}
	heap_update_key (h, h->card - 1, v);
}

/* priority queue*/

int heap_root (Heap *h)
{
	assert (h != NULL);
	return h->data[0];
}

int heap_extract_root (Heap *h)
{
	int val;
	assert (h != NULL);

	if (h->card < 0)
	{
		fprintf (stderr, "error: heap empty\n");
		exit (-1);
	}

	val = h->data[0];
	h->data[0] = h->data[(h->card) - 1];
	(h->card)--;
	h->heapify_func (h, 0);

	return val;
}
void heap_update_key (Heap *h, int i, int key)
{
	if (h->type == MAX_HEAP)
	{
		if (key < h->data[i])
		{
			fprintf (stderr, "Max-Heap: new key %d is samller than current key %d\n", key, h->data[i]);
			return;
		}

		h->data[i] = key;
		while ((i > 0) && (h->data[HEAP_PARENT_INDEX(i)] < h->data[i]))
		{
			swap (&(h->data[HEAP_PARENT_INDEX(i)]), &(h->data[i]));
			i = HEAP_PARENT_INDEX (i);
		}
	}
	else
	{
		if (key > h->data[i])
		{
			fprintf (stderr, "Min-Heap: new key %d is larger than current key %d\n", key, h->data[i]);
			return;
		}
		h->data[i] = key;
		while ((i > 0) && (h->data[HEAP_PARENT_INDEX(i)] > h->data[i]))
		{
			swap (&(h->data[HEAP_PARENT_INDEX(i)]), &(h->data[i]));
			i = HEAP_PARENT_INDEX (i);
		}
	}
}

int *heap_sort (int *array, int len)
{
	Heap *h;
	int i;

	assert (array != NULL);

	h = heap_new_from_array (MAX_HEAP, array, len);
	for (i = len - 1; i > 0; i--)
	{
		swap (&array[0], &array[i]);
		(h->card)--;
		h->heapify_func (h, 0);
	}

	free (h);
	return array;
}

/*******************************************************
 * private func
 ******************************************************/

#ifndef USE_LOOP
/**
 * @breaif heapify a heap
 *
 * Assuming the two child tree rooted at HEAP_LEFT_INDEX (i) and
 * HEAP_RIGHT_INDEX (i) are max-heaps.
 *
 * @note Complexity: O( lg(n) )
 * @param h the heap need to heapify
 * @param i the position need to heapify
 */
static void max_heapify (Heap *h, int i)
{
	int l, r, largest;

	assert (h != NULL);

	l = HEAP_LEFT_INDEX (i);
	r = HEAP_RIGHT_INDEX (i);

	if ((l < h->card) && (h->data[l] > h->data[i]))
	{
		largest = l;
	}
	else
	{
		largest = i;
	}

	if ((r < h->card) && (h->data[r] > h->data[largest]))
	{
		largest = r;
	}

	if (largest != i)
	{
		swap (&(h->data[i]), &(h->data[largest]));
		max_heapify (h, largest);
	}
}

/**
 * @see max_heapify
 */
static void min_heapify (Heap *h, int i)
{
	int l, r, minimum;

	assert (h != NULL);

	l = HEAP_LEFT_INDEX (i);
	r = HEAP_RIGHT_INDEX (i);
	if ((l < h->card) && (h->data[l] < h->data[i]))
	{
		minimum = l;
	}
	else
	{
		minimum = i;
	}

	if ((r < h->card) && (h->data[r] < h->data[minimum]))
	{
		minimum = r;
	}

	if (minimum != i)
	{
		swap (&(h->data[i]), &(h->data[minimum]));
		min_heapify (h, minimum);
	}
}

#else
/**
 * @brief loop version of max_heapify, should be faster.
 * @see max_heapify().
 */
static void max_heapify_loop (Heap *h, int i)
{
	int l, r, largest;
	int tmp = i;

	/* save the previous value of tmp */
	int p;

	do {
		p = tmp;
		l = HEAP_LEFT_INDEX (tmp);
		r = HEAP_RIGHT_INDEX (tmp);

		if ((l < h->card) && (h->data[l] > h->data[tmp]))
		{
			largest = l;
		}
		else
		{
			largest = tmp;
		}

		if ((r < h->card) && (h->data[r] > h->data[largest]))
		{
			largest = r;
		}

		if (largest != tmp)
		{
			swap (&(h->data[tmp]), &(h->data[largest]));
			tmp = largest;
		}
	} while (largest != p);
}

/**
 * @brief loop version of min_heapify, should be faster
 * @see min_heapify().
 */
static void min_heapify_loop (Heap *h, int i)
{
	int l, r, minimum;
	int tmp = i;
	int p;

	do {
		p = tmp;
		l = HEAP_LEFT_INDEX (tmp);
		r = HEAP_RIGHT_INDEX (tmp);

		if ((l < h->card) && (h->data[l] < h->data[tmp]))
		{
			minimum = l;
		}
		else
		{
			minimum = tmp;
		}

		if ((r < h->card) && (h->data[r] < h->data[minimum]))
		{
			minimum = r;
		}

		if (minimum != tmp)
		{
			swap (&(h->data[minimum]), &(h->data[tmp]));
			tmp = minimum;
		}
	} while (p != minimum);
}
#endif /* USE_LOOP */

static Heap *heap_new_from_array (HeapType heap_type, int *array, int size)
{
	Heap *h;
	int i;

	assert (array != NULL);

	h = (Heap *)malloc (sizeof (Heap));
	if (h == NULL)
	{
		return NULL;
	}

	h->data = array;
	h->growing_factor = 0;
	h->size = size;
	h->type = heap_type;
#ifndef USE_LOOP
	h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify : min_heapify;
#else
	h->heapify_func = (heap_type == MAX_HEAP) ? max_heapify_loop : min_heapify_loop;
#endif /* USE_LOOP */

	h->card = size;
	for (i = HEAP_PARENT_INDEX (h->card); i >= 0; i--)
	{
		h->heapify_func (h, i);
	}

	return h;
}
updatedupdated2022-02-222022-02-22