05-树9 Huffman Codes

05-树9 Huffman Codes （30分）

题目描述

In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification

Each input file contains one test case. For each case, the first line gives an integer N (2 ≤ N ≤ 63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0’s and '1’s.

Output Specification

For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output

Yes
Yes
No
No

解题思路

本题的大意是：由于哈夫曼编码的不唯一性，从 WPL 的角度，最优编码的个数可以有多个，我们现在需要判断输入的编码是否是可能的一种最优编码类型。
例如，对于题目中所给出的测试案例，以下两种哈夫曼编码类型都是可能的：

两棵树的 WPL 均为53.

最小堆的实现

哈夫曼树的创建的前提是需要一个最小堆，代码如下：

// 哈夫曼树结点定义
typedef struct HuffmanTree {int frequency; // 表示每个字符出现的频率struct HuffmanTree *left;struct HuffmanTree *right;
} HuffmanTree;#define root 0 // 初始化堆的根结点为0
#define parent(idx) ( idx / 2 ) // 查找idx的父结点索引
#define lchild(idx) ( idx * 2 + 1 ) // 查找idx的左孩子结点索引
#define rchild(idx) ( idx * 2 + 2 ) // 查找idx的右孩子结点索引// 最小堆的定义
typedef struct Heap {HuffmanTree **data; // 存储值类型为HuffmanTree的数组int size; // 结点的个数int capacity; // 堆的最大容量
} MinHeap;// 创建及初始化最小堆
MinHeap *CreateHeap( int size ) {MinHeap *heap = ( MinHeap *) malloc(sizeof(MinHeap) );heap->data = ( HuffmanTree **) malloc(sizeof(HuffmanTree *) * size );heap->size = 0;heap->capacity = size;return heap;
}// 父子结点的交换,直接进行结点的交换
void swap( MinHeap *heap, int idx1, int idx2 ) {HuffmanTree *temp = heap->data[idx1];heap->data[idx1] = heap->data[idx2];heap->data[idx2] = temp;
}// 父子结点的比较，比较的依据是frequency
bool compare(MinHeap *heap, int idx1, int idx2 ) {return heap->data[idx1]->frequency < heap->data[idx2]->frequency;
}// 堆的上浮操作，通常用于向堆中插入元素，进行结点的调整
void Heap_Float(MinHeap *heap, int idx ) {int par = parent(idx);while ( par >= root ) {// 如果当前结点大于其父结点，进行交换if ( compare(heap, idx, par) ) {swap(heap, par, idx);idx = par;par = parent(idx);}else break;}
}// 判断堆是否已满
bool isFull(MinHeap *heap ) {return heap->size == heap->capacity;
}// 向堆中插入元素,与堆栈类似,不过需要进行值的上浮操作
bool Heap_Push(MinHeap *heap, HuffmanTree *val ) {if ( isFull(heap) ) {return false;}heap->data[heap->size ++] = val;Heap_Float(heap, heap->size - 1);return true;
}// 堆的下沉操作，通常用于向堆中删除元素，进行结点的调整
void Heap_Sink(MinHeap *heap, int idx ) {int child = lchild(idx);while ( child < heap->size ) {// 查找左右孩子中值最小的if ( rchild(idx) < heap->size ) {// 如果右孩子的值更小,应取右孩子的值if ( compare(heap, rchild(idx), child) ) {child = rchild(idx);}}// 如果当前孩子结点大于当前结点，进行交换if ( compare(heap, child, idx) ) {swap(heap, child, idx);idx = child;child = lchild(idx);}else {break;}}
}// 判断堆是否为空
bool isEmpty(MinHeap *heap ) {return heap->size == 0;
}// 在堆中删除元素,与堆栈类似,不过需要进行值的下沉操作
HuffmanTree *Heap_Popped(MinHeap *heap ) {if ( isEmpty(heap) ) {return NULL;}HuffmanTree *temp = heap->data[root];heap->data[root] = heap->data[-- heap->size];Heap_Sink(heap, root);return temp;
}// 释放堆
void Heap_Free(MinHeap *heap ) {free(heap->data);free(heap);
}

哈夫曼树的创建

整体思路较为简单，哈夫曼树创建完成的同时，堆就为空了，其作用就完成了，可以进行空间的释放。

HuffmanTree *CreateHuffmanTree( MinHeap *heap ) {HuffmanTree *HT = NULL;int size = heap->size;for ( int i = 1; i < size; i ++ ) {HT = ( HuffmanTree *) malloc( sizeof(HuffmanTree) );HT->left = Heap_Popped(heap);HT->right = Heap_Popped(heap);HT->frequency = HT->left->frequency + HT->right->frequency;Heap_Push(heap, HT);}return Heap_Popped(heap);
}

最优编码的判定

从哈夫曼编码的特点进行判断：

WPL 最小
满足前缀码
没有度唯一的结点

WPL 最小

在进行判断前，需要先计算最优编码的 WPL 的值，这也是前面创建哈夫曼树的目的。结合 WPL 的计算公式与递归，下面的代码应该好理解。

int getWPL( HuffmanTree *HT, int depth ) {if ( HT->left == NULL && HT->right == NULL ) {return depth * HT->frequency;}return getWPL(HT->left, depth + 1 ) + getWPL(HT->right, depth + 1);
}

满足前缀码及度不为1

我们没有办法直接通过输入直接看出其是否是前缀码以及度是否为1，最直接的办法就是，根据输入直接创建一棵树，不过与哈夫曼树的创建相比要轻松许多，因为此时创建树其实就是普通的二叉树的创建，与二叉树的层序生成过程类似。

#define NULLVALUE -1bool isPrefixCode( HuffmanTree *T, const char *code, int fre ) {HuffmanTree *temp = T;for ( int i = 0; code[i]; i ++ ) {// 当前code与上一次的code具有同一个前缀码，返回falseif ( temp->frequency != NULLVALUE ) {return false;}if ( code[i] == '0' ) {if ( temp->left == NULL ) {temp->left = ( HuffmanTree *) malloc( sizeof(HuffmanTree) );temp->left->frequency = NULLVALUE;temp->left->left = temp->left->right = NULL;}temp = temp->left;}else if ( code[i] == '1' ) {if ( temp->right == NULL ) {temp->right = ( HuffmanTree *) malloc( sizeof(HuffmanTree) );temp->right->frequency = NULLVALUE;temp->right->left = temp->right->right = NULL;}temp = temp->right;}}// 叶子结点且为空值if ( temp->left == NULL && temp->right == NULL && temp->frequency == NULLVALUE ) {temp->frequency = fre;return true;}return false;
}

主函数的实现

整个程序框架的逻辑如下：

int main(void) {int M, N;scanf("%d", &N); // 读取字符集的大小Ngetchar(); // 清除输入缓冲区的换行符MinHeap *heap = CreateHeap(N); // 创建一个最小堆，用于构建霍夫曼树char in; // 用于存储输入的字符int fre; // 用于存储字符的频率HuffmanTree *temp = NULL; // 临时变量，用于创建新的霍夫曼树节点int frequency[N]; // 数组，存储每个字符的频率// 读取每个字符及其频率，并将其压入堆中for (int i = 0; i < N; i++) {scanf("%c %d", &in, &fre);frequency[i] = fre; // 存储频率temp = (HuffmanTree *) malloc(sizeof(HuffmanTree)); // 分配新的霍夫曼树节点temp->frequency = fre; // 设置频率temp->left = temp->right = NULL; // 初始化左右子树为空Heap_Push(heap, temp); // 将新节点压入堆中getchar(); // 清除输入缓冲区的换行符}scanf("%d", &M); // 读取要检查的编码方案的数量Mgetchar(); // 清除输入缓冲区的换行符// 构建霍夫曼树，并计算最优WPLHuffmanTree *HT = CreateHuffmanTree(heap);int trueWPL = getWPL(HT, root); char code[SIZE]; // 用于存储输入的编码// 对于每个要检查的编码方案for (int i = 0; i < M; i++) {bool flag = true; // 标志，用于判断编码方案是否有效int WPL = 0; // 当前编码方案的WPLHuffmanTree *T = (HuffmanTree *) malloc(sizeof(HuffmanTree)); // 创建一个临时霍夫曼树节点T->frequency = NULLVALUE; // 设置为空值T->left = T->right = NULL; // 初始化左右子树为空// 读取每个字符的编码，并验证编码方案for (int j = 0; j < N; j++) {scanf("%c %s", &in, code);getchar(); // 清除输入缓冲区的换行符// 如果编码不是前缀码或者长度超过限制，则标记为无效if (flag && (strlen(code) > N - 1 || !isPrefixCode(T, code, frequency[j]))) {flag = false; // 标记编码方案无效}WPL += strlen(code) * frequency[j]; // 计算当前编码方案的WPL}// 如果编码方案有效且WPL与最优WPL相等，则输出"Yes"，否则输出"No"if (flag && WPL == trueWPL) {printf("Yes\n");}else {printf("No\n");}HuffmanTreeFree(T); // 释放临时霍夫曼树节点}Heap_Free(heap); // 释放堆内存return 0; // 程序结束
}

全部代码

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>#define SIZE 64
#define NULLVALUE -1typedef struct HuffmanTree {int frequency;struct HuffmanTree *left;struct HuffmanTree *right;
} HuffmanTree;#define root 0
#define parent(idx) ( idx / 2 )
#define lchild(idx) ( idx * 2 + 1 )
#define rchild(idx) ( idx * 2 + 2 )typedef struct Heap {HuffmanTree **data;int size;int capacity;
} MinHeap;MinHeap *CreateHeap( int size ) {MinHeap *heap = ( MinHeap *) malloc(sizeof(MinHeap) );heap->data = ( HuffmanTree **) malloc(sizeof(HuffmanTree *) * size );heap->size = 0;heap->capacity = size;return heap;
}void swap( MinHeap *heap, int idx1, int idx2 ) {HuffmanTree *temp = heap->data[idx1];heap->data[idx1] = heap->data[idx2];heap->data[idx2] = temp;
}bool compare(MinHeap *heap, int idx1, int idx2 ) {return heap->data[idx1]->frequency < heap->data[idx2]->frequency;
}void Heap_Float(MinHeap *heap, int idx ) {int par = parent(idx);while ( par >= root ) {if ( compare(heap, idx, par) ) {swap(heap, par, idx);idx = par;par = parent(idx);}else break;}
}bool isFull(MinHeap *heap ) {return heap->size == heap->capacity;
}bool Heap_Push(MinHeap *heap, HuffmanTree *val ) {if ( isFull(heap) ) {return false;}heap->data[heap->size ++] = val;Heap_Float(heap, heap->size - 1);return true;
}void Heap_Sink(MinHeap *heap, int idx ) {int child = lchild(idx);while ( child < heap->size ) {if ( rchild(idx) < heap->size ) {if ( compare(heap, rchild(idx), child) ) {child = rchild(idx);}}if ( compare(heap, child, idx) ) {swap(heap, child, idx);idx = child;child = lchild(idx);}else {break;}}
}bool isEmpty(MinHeap *heap ) {return heap->size == 0;
}HuffmanTree *Heap_Popped(MinHeap *heap ) {if ( isEmpty(heap) ) {return NULL;}HuffmanTree *temp = heap->data[root];heap->data[root] = heap->data[-- heap->size];Heap_Sink(heap, root);return temp;
}void Heap_Free(MinHeap *heap ) {free(heap->data);free(heap);
}HuffmanTree *CreateHuffmanTree( MinHeap *heap ) {HuffmanTree *HT = NULL;int size = heap->size;for ( int i = 1; i < size; i ++ ) {HT = ( HuffmanTree *) malloc( sizeof(HuffmanTree) );HT->left = Heap_Popped(heap);HT->right = Heap_Popped(heap);HT->frequency = HT->left->frequency + HT->right->frequency;Heap_Push(heap, HT);}return Heap_Popped(heap);
}int getWPL( HuffmanTree *HT, int depth ) {if ( HT->left == NULL && HT->right == NULL ) {return depth * HT->frequency;}return getWPL(HT->left, depth + 1 ) + getWPL(HT->right, depth + 1);
}bool isPrefixCode( HuffmanTree *T, const char *code, int fre ) {HuffmanTree *temp = T;for ( int i = 0; code[i]; i ++ ) {if ( temp->frequency != NULLVALUE ) {return false;}if ( code[i] == '0' ) {if ( temp->left == NULL ) {temp->left = ( HuffmanTree *) malloc( sizeof(HuffmanTree) );temp->left->frequency = NULLVALUE;temp->left->left = temp->left->right = NULL;}temp = temp->left;}else if ( code[i] == '1' ) {if ( temp->right == NULL ) {temp->right = ( HuffmanTree *) malloc( sizeof(HuffmanTree) );temp->right->frequency = NULLVALUE;temp->right->left = temp->right->right = NULL;}temp = temp->right;}}if ( temp->left == NULL && temp->right == NULL && temp->frequency == NULLVALUE ) {temp->frequency = fre;return true;}return false;
}void HuffmanTreeFree( HuffmanTree *HT ) {if ( HT ) {HuffmanTreeFree(HT->left);HuffmanTreeFree(HT->right);free(HT);}
}int main(void)
{int M, N;scanf("%d", &N);getchar();MinHeap *heap = CreateHeap(N);char in; int fre;HuffmanTree *temp = NULL;int frequency[N];for ( int i = 0; i < N; i ++ ) {scanf("%c %d", &in, &fre);frequency[i] = fre;temp = ( HuffmanTree * ) malloc( sizeof(HuffmanTree) );temp->frequency = fre;temp->left = temp->right = NULL;Heap_Push(heap, temp);getchar();}scanf("%d", &M);getchar();HuffmanTree *HT = CreateHuffmanTree(heap);int trueWPL = getWPL(HT, root);char code[SIZE];for ( int i = 0; i < M; i ++) {bool flag = true;int WPL = 0;HuffmanTree *T = ( HuffmanTree *) malloc( sizeof(HuffmanTree) );T->frequency = NULLVALUE;T->left = T->right = NULL;for ( int j = 0; j < N; j ++ ) {scanf("%c %s", &in, code);getchar();if ( flag && ( strlen(code) > N - 1 || !isPrefixCode(T, code, frequency[j]) ) ) {flag = false;}WPL += strlen(code) * frequency[j];}if ( flag && WPL == trueWPL ) {printf("Yes\n");}else {printf("No\n");}HuffmanTreeFree(T);}Heap_Free(heap);return 0;
}