代碼:
#include<iostream>
#include<fstream>
#define ASCLL_NUMBER 128
// 一個混雜了鏈表、樹的結點結構定義
typedef char ElementType;
typedef int Frequency;
typedef char Code;
struct Element{
ElementType e;
Frequency f;
Element* next;
Element* left;
Element* right;
};
// 初始化結構數組
void initElements(Element** A, int n){
for(int i = 0; i < n; i++){
A[i] = new Element;
A[i]->e = 0;
A[i]->f = 0;
A[i]->next = NULL;
A[i]->left = NULL;
A[i]->right = NULL;
}
}
// 掃描并獲取每種字符的頻率, 以ASCLL碼作為索引
void scanAndGetFrequency(char* s, Element** A, Frequency* fmax, long* count){
int i = 0;
for(char c = s[i]; c != '\0'; c = s[++i]){
(*count)++;
A[c]->e = c;
A[c]->f++;
if(*fmax < A[c]->f)
*fmax = A[c]->f;
}
}
// 找到一個大于傳入數字1/2的質數
int Prime(int p){
p = p/2 + 1;
while(1){
int i = 0;
for(i = p/2; i >= 2; i--)
if(p%i == 0) break;
if(i == 1) return p;
p++;
}
}
// 向哈希表里插入元素, 相同索引依頻率從小到大排序
void insert(Element** hash, int p, Element* ele){
if(ele->f == 0) return;
int n = ele->f % p;
while(hash[n]->next && hash[n]->next->f < ele->f)
hash[n] = hash[n]->next;
ele->next = hash[n]->next;
hash[n]->next = ele;
}
// 依頻率排序, 為哈弗曼編碼做準備
void sortWithFrequency(Element** A, int n, Element** hash, int p){
for(int i = 0; i < n; i++){
insert(hash, p, A[i]);
}
}
// 找到頻率最小的元素
Element* minEle(Element** hash, int p, int* now, long count){
while(1){
Element* ele = hash[*now%p]->next;
if(!ele || ele->f > *now){
(*now)++;
if(*now > count) return NULL;
continue;
}else{
hash[*now%p]->next = ele->next;
return ele;
}
}
}
// 合并建樹
void buildTree(Element** ele2, Element* tree){
tree->f = ele2[0]->f + ele2[1]->f;
tree->left = ele2[0];
tree->right = ele2[1];
}
// 哈弗曼編碼
Element* HuffmanCode(Element** hash, int p, long count){
int now = 1;
while(1){
Element* ele2[2];
initElements(ele2, 2);
Element* tree = new Element;
tree->e = -1;
tree->f = 0;
tree->next = NULL;
tree->left = NULL;
tree->right = NULL;
for(int i = 0; i < 2; i++){
Element* ele = minEle(hash, p, &now, count);
if(ele)
ele2[i] = ele;
else
return ele2[0];
}
buildTree(ele2, tree);
insert(hash, p, tree);
}
}
// 顯示編碼結構
void showCode(Element* tree, char* code, int i){
if(!tree) return;
if(tree->left){
code[i] = '0';
showCode(tree->left, code, i+1);
code[i] = '1';
showCode(tree->right, code, i+1);
}else{
code[i] = '\0';
std::cout<<tree->e<<'\t'<<tree->f<<'\t'<<code<<'\n';
}
}
// 解碼并顯示
void decode(char* s, Element* tree){
if(!tree) return;
Element* t = tree;
int i = 0;
while(1){
char c = s[i];
if(t->left){
if(c == '0')
t = t->left;
if(c == '1')
t = t->right;
i++;
if((c = s[i]) == '\0'){
std::cout<<std::endl;
return;
}
}else{
std::cout<<t->e;
t = tree;
}
}
}
int main(){
// 讀待編碼文件, 將其存入到字符串s1中
std::ifstream q1;
int length;
q1.open("q1.txt");
q1.seekg(0, std::ios::end);
length = q1.tellg();
q1.seekg(0, std::ios::beg);
char s1[length];
q1.read(s1, length);
q1.close();
// 掃描s1, 獲取每種字符的頻率, 儲存到數組A中
Element* A[ASCLL_NUMBER];
initElements(A, ASCLL_NUMBER);
int fmax = 0;
long count = 0;
scanAndGetFrequency(s1, A, &fmax, &count);
// 按頻率排序
int p = Prime(fmax);
Element* hash[p];
initElements(hash, p);
sortWithFrequency(A, ASCLL_NUMBER, hash, p);
// 進行編碼
Element* tree = HuffmanCode(hash, p, count);
// 輸出編碼結果
char code[ASCLL_NUMBER/2];
int i = 0;
showCode(tree, code, i);
// 讀待解碼文件, 將其存入到字符串s2中
std::ifstream q2;
q2.open("q2.txt");
q2.seekg(0, std::ios::end);
length = q2.tellg();
q2.seekg(0, std::ios::beg);
char s2[length];
q2.read(s2, length);
q2.close();
// 解碼, 并輸出結果
decode(s2, tree);
return 0;
}
測試文件:
原始文件,q1.txt
:
We could represent a priority queue as a sorted contiguous list, in which case removal of an entry is immediate, but insertion would take time proportional to n, the number of entries in the queue. Or we could represent it as an unsorted list, in which case insertion is rpid but removal is slow.
根據q1
編碼結果,寫出對and
的編碼,q2.txt
:
111000011110
運行結果:
有任何問題請回復提出。然后歡迎關注微信公眾號格物致愚:
格物致愚