
数据结构二叉树实验报告——统计英文小说百年孤独词频并构建哈夫曼树
#include<bits/stdc++.h>
#define NUM 127
#define SOURCE_ADDRESS "C:\\Users\\XXX\\Desktop\\Input-HuffmanCode-Solitude.txt"
#define DESTINATION_ADDRESS "C:\\Users\\XXX\\Desktop\\Output-HuffmanCode-Solitude.bin"
using namespace std;
char table[NUM][1000] = {0};
struct HuffmanNode{
char name;
int fre;
HuffmanNode * left;
HuffmanNode * right;
};
char * Input(char * address){
FILE * f = fopen(address, "r");
if(!f){
perror("error opening file");
return NULL;
}
//确定文件大小
fseek(f, 0, SEEK_END);
long fsize = ftell(f);
fseek(f, 0, SEEK_SET);
//遍历储存
char * input = (char *)malloc(fsize + 1);
if(!input){
perror("memory error");
fclose(f);
return NULL;
}
//读取信息
fread(input, 1, fsize, f);
input[fsize] = 0;
fclose(f);
return input;
}
int cmp(const void * a, const void * b){
return (*(HuffmanNode **)b)->fre - (*(HuffmanNode **)a)->fre;
}
void makeTable(HuffmanNode * root, char pre[1000]){
if(root->left == NULL && root->right == NULL){
//叶节点时,即编码字符
strcpy(table[root->name], pre);
}
else{
//哈夫曼树节点度为2或0,不可能为1
char left[1000]; strcpy(left, pre); strcat(left, "1");
makeTable(root->left, left);
char right[1000]; strcpy(right, pre); strcat(right, "0");
makeTable(root->right, right);
}
}
HuffmanNode * creatNode(char name, int fre = 0, HuffmanNode * l = NULL, HuffmanNode * r = NULL){
//创建节点
HuffmanNode * tem = (HuffmanNode *)malloc(sizeof(HuffmanNode));
tem->name = name;
tem->fre = fre;
tem->left = l;
tem->right = r;
return tem;
}
void OutPut(char * adress, char input[], int n){
//输出到目标文件
FILE * f = fopen(adress, "wb"); //open
if(!f){
perror("error opening output file");
return;
}
for(int i = 0; i < n; i++){ //output
fprintf(f, "%s", table[input[i]]);
}
fclose(f); //close
}
int main(){
//文件读取
char address[100] = SOURCE_ADDRESS;
char * input;
int inputLength;
input = Input(address);
inputLength = strlen(input);
//频数统计
int frequency[NUM] = {0};
for(int i = 0; i < inputLength; i++){
if(input[i] == '\n') continue;
frequency[input[i]]++;
}
//树初始化
HuffmanNode * trees[NUM];
for(int i = 0; i < NUM; i++){
HuffmanNode * t = (HuffmanNode *)malloc(sizeof(HuffmanNode));
t->name = i;
t->fre = frequency[i];
t->left = t->right = NULL;
trees[i] = t;
}
//排序
qsort(trees, NUM, sizeof(HuffmanNode *), cmp);
//构建哈夫曼树
int size = NUM;
while(size > 1){
HuffmanNode * t1 = trees[size-1];
HuffmanNode * t2 = trees[size-2];
HuffmanNode * tem = creatNode('-', t1->fre + t2->fre, t1, t2);
trees[size-2] = tem;
size--;
qsort(trees, size, sizeof(HuffmanNode *), cmp);
}
//编码
char pre[1000] = "";
makeTable(trees[0], pre);
//编码输出
char address_output[] = DESTINATION_ADDRESS;
OutPut(address_output, input, inputLength);
return 0;
}