利用哈弗曼编码进行压缩-创新互联
// sZipDemo.cpp : 定义控制台应用程序的入口点。 // #include "stdafx.h" #include "HuffmanTree.cpp" #include "sZip.h" #include#include using namespace std; int _tmain(int argc, _TCHAR* argv[]) { char str1[10000]; ifstream in; in.open("text.txt",ios::in|ios::binary); in>>str1; cout<<"成功读取text.doc!"< Tree; Tree.creat(tempstr,data,count); char charCount[256]; for(int i = 0;count[i] != 0;i++){ static int n = -1; if(count[i] < 10){ //个位 charCount[++n] = count[i]+48; charCount[++n] = '#'; } else if(count[i] >= 10 && count[i] < 100){ //十位 charCount[++n] = count[i]/10+48; charCount[++n] = count[i]%10+48; charCount[++n] = '#'; } else if(count[i] >= 100 && count[i] < 1000){ //百位 charCount[++n] = count[i]/100+48; charCount[++n] = count[i]%100/10+48; charCount[++n] = count[i]%10+48; charCount[++n] = '#'; } else{ //千位 charCount[++n] = count[i]/1000+48; charCount[++n] = count[i]%1000/100+48; charCount[++n] = count[i]%100/10+48; charCount[++n] = count[i]%10+48; charCount[++n] = '#'; } charCount[n+1] = 0; } ofstream out; out.open("text11.txt",ios::out|ios::binary); out< #pragma once templatestruct HuffmanTreeNode{ T data; //数据 unsigned int count; //权值 char code[128]; //结点的哈弗曼编码 HuffmanTreeNode *leftChild; //左子女 HuffmanTreeNode *rightChild; //右子女 HuffmanTreeNode *parent; //父节点 HuffmanTreeNode():leftChild(NULL),rightChild(NULL),parent(NULL){} //构造函数 bool operator <=(HuffmanTreeNode &R){return count <= R.count;} //重载<=和>运算符 bool operator >(HuffmanTreeNode &R){return count > R.count;} }; template class HuffmanTree { public: HuffmanTree(); HuffmanTree(HuffmanTree &t); ~HuffmanTree(void); void printData(); int getWPL(); //获取WPL //打开文件 void creat(char str[],char data[],unsigned int count[]); //哈弗曼的建立 void creat(char data[],unsigned int count[]); //重载 template friend void findKey(HuffmanTreeNode *subTree,T key,char code[]){ //寻找结点的code static T firstNodeData = subTree->data; if(subTree != NULL){ if(subTree->data != key){ findKey(subTree->leftChild,key,code); findKey(subTree->rightChild,key,code); } else{ int i = 0; for(;subTree->code[i] != 0;i++) code[i] = subTree->code[i]; code[i] = 0; } } } HuffmanTreeNode *root; protected: private: int WPL; void census(unsigned int count[],char data[],char str[],unsigned int &size); //建立哈弗曼树时统计各字符出现的次数 void deleteTree(HuffmanTreeNode *subTree); //删除subTree结点的所有子结点 void encode(HuffmanTreeNode *subTree,char code[] ,char m = 0); //编码 void calculateWPL(HuffmanTreeNode *subTree,int i = 0); //计算WPL void printCode(HuffmanTreeNode *subTree); //输出哈弗曼编码的子函数 void printData(HuffmanTreeNode *subTree); //输出所有结点data和权值的子函数 }; #pragma once #include "StdAfx.h" #include "HuffmanTree.h" #include "MinHeap.cpp" #includetemplate HuffmanTree ::HuffmanTree(){ WPL = 0; root = new HuffmanTreeNode ; }; template HuffmanTree ::~HuffmanTree(){ deleteTree(root); //删除哈弗曼树 } template void HuffmanTree ::creat(char str[],char data[],unsigned int count[]){ unsigned int size; //字符的个数 for(unsigned int i = 0; i < 128; i++) //count的初始化 count[i] = 0; census(count,data,str,size); minHeap > hp; HuffmanTreeNode *parent, *first, *second; HuffmanTreeNode *work; for(unsigned int i = 0; i < size; i++){ //把每个元素都插入最小堆中 work = new HuffmanTreeNode ; work->data = data[i]; work->count = count[i]; work->leftChild = NULL; work->rightChild = NULL; work->parent = NULL; hp.insert(*work); } for(unsigned int i = 0; i < size-1; i++){ parent = new HuffmanTreeNode ; first = new HuffmanTreeNode ; second = new HuffmanTreeNode ; hp.removeMin(*first); //每次从最小堆中取出两个最小的数 hp.removeMin(*second); parent->leftChild = first; //parent作为他们的父节点 parent->rightChild = second; parent->count = first->count + second->count; parent->data = '#'; //parent不是根结点所以把它的data设为'#' first->parent = parent; second->parent = parent; hp.insert(*parent); //再把parent插入最小堆中,进行循环判断 } root = parent; //最后一个parent就是根结点 char code[128]; for(unsigned int i = 0;i < 128; i++) code[i] = 0; encode(root,code); //对结点进行哈弗曼编码(空的地方取0) }; template void HuffmanTree ::creat(char data[],unsigned int count[]){ unsigned int size = 0; for(unsigned int i = 0; count[i] != 0; i++) size++; minHeap > hp; HuffmanTreeNode *parent, *first, *second; HuffmanTreeNode *work; for(unsigned int i = 0; i < size; i++){ //把每个元素都插入最小堆中 work = new HuffmanTreeNode ; work->data = data[i]; work->count = count[i]; work->leftChild = NULL; work->rightChild = NULL; work->parent = NULL; hp.insert(*work); } for(unsigned int i = 0; i < size-1; i++){ parent = new HuffmanTreeNode ; first = new HuffmanTreeNode ; second = new HuffmanTreeNode ; hp.removeMin(*first); //每次从最小堆中取出两个最小的数 hp.removeMin(*second); parent->leftChild = first; //parent作为他们的父节点 parent->rightChild = second; parent->count = first->count + second->count; parent->data = '#'; //parent不是根结点所以把它的data设为'#' first->parent = parent; second->parent = parent; hp.insert(*parent); //再把parent插入最小堆中,进行循环判断 } root = parent; //最后一个parent就是根结点 char code[128]; for(unsigned int i = 0;i < 128; i++) code[i] = 0; encode(root,code); //对结点进行哈弗曼编码(空的地方取0) } template void HuffmanTree ::deleteTree(HuffmanTreeNode *subTree){ if(subTree != NULL){ deleteTree(subTree->leftChild); //后序遍历来删除结点 deleteTree(subTree->rightChild); delete subTree; } } template void HuffmanTree ::census(unsigned int count[],char data[],char str[],unsigned int &size){ //用于统计字符出现的次数 for(int i = 0; str[i] != 0;i++){ if(str[i] == '#') //当出现的是已出现过的字符时就执行下次循环 continue; static int n = 0; count[n]++; //第一次出现时加一 data[n] = str[i]; for(int j = 0; str[j] != 0;j++){ if(str[i] == str[j] && i != j){ count[n]++; //每次出现重复的时候加一并且 str[j] = '#'; //表明已出现过 } } data[++n] = 0; size = n; } } template void HuffmanTree ::encode(HuffmanTreeNode *subTree,char code[] ,char m = 0){ if(subTree != NULL){ int j; for(j = 0;code[j] != 0;j++){ subTree->code[j] = code[j]; } for(j = 0;code[j] != 0;j++){ } subTree->code[j] = m; subTree->code[j+1] = 0; encode(subTree->leftChild,subTree->code,'0'); encode(subTree->rightChild,subTree->code,'1'); } } template void HuffmanTree ::calculateWPL(HuffmanTreeNode *subTree,int i = 0){ if(subTree != NULL){ if(subTree->data != '#') WPL += i*subTree->count; //i为当前层数,count为结点权值 calculateWPL(subTree->leftChild,++i); //前序遍历 i--; calculateWPL(subTree->rightChild,++i); i--; } } template int HuffmanTree ::getWPL(){ return WPL; } #pragma once #define DafaultSize 50 templateclass minHeap{ public: minHeap(int size = DafaultSize); ~minHeap(); bool insert(const E& x); bool removeMin(E& x); private: E *heap; int currentSize; int maxHeapSize; void siftDown(int start, int m); void siftUp(int start); }; #pragma once #include "StdAfx.h" #include "MinHeap.h" templateminHeap ::minHeap(int size){ maxHeapSize = (DafaultSize minHeap ::~minHeap(){ delete heap; } template void minHeap ::siftDown(int start, int m){ int i = start; //初始位置 int j = 2*i+1; //j是i的左子女位置 E temp = heap[i]; while(j <= m){ if(j heap[j+1]) //左子女大于右子女时,j指向右子女 j++; if(temp <= heap[j]) break; else{ //大则向上移 heap[i] = heap[j]; i = j; j = 2*j+1; } } heap[i] = temp; }; template void minHeap ::siftUp(int start){ int j = start; int i = (j-1)/2; //i的左子女是j E temp = heap[j]; while(j>0){ if(heap[i] <= temp) //如果父节点大 break; else{ //如果父节点小,上滑 heap[j] = heap[i]; j = i; i = (i-1)/2; } } heap[j] = temp; }; template bool minHeap ::insert(const E& x){ if(currentSize == maxHeapSize){ //堆满时 // cout<<"堆已满"< bool minHeap ::removeMin(E& x){ if(currentSize == 0){ //堆空时 // cout<<"堆为空!!"< #pragma once #include "HuffmanTree.cpp" class sZip { public: sZip(void); ~sZip(void); void zip(char str1[],char str2[],HuffmanTreeNode* subTree,int &size,char data[]); void unzip(char str1[],char str2[]); private: bool codeEqual(char code1[],char code2[][128],int &num); void getHuffCode(HuffmanTreeNode * subTree,char code[][128],char data[]); void strBinary(char str[],int &size); void binaryStr(char str1[]); }; #include "StdAfx.h" #include "sZip.h" #include网站建设哪家好,找创新互联公司!专注于网页设计、网站建设、微信开发、重庆小程序开发、集团企业网站建设等服务项目。为回馈新老客户创新互联还提供了曲松免费建站欢迎大家使用!using namespace std; sZip::sZip(void){ } sZip::~sZip(void) { } void sZip::zip(char str1[],char str2[],HuffmanTreeNode * subTree,int &size,char data[]){ char code[128][128]; getHuffCode(subTree,code,data); //获取subTree的哈弗曼编码 unsigned int i = 0; unsigned int n = -1; for(; str1[i] != 0 ;i++){ //遍历str1,把里面的字符转化为哈弗曼编码存在str2中 int j = 0; while(data[j] != str1[i]){ j++; if(data[j] == 0) break; } if(data[j] == str1[i]){ for(int count = 0;code[j][count] != 0;count++) str2[++n] = code[j][count]; str2[n+1]=0; } } strBinary(str2,size); //把str2的每一个字符变成每一个bit,8个bit合成一个字符 } void sZip::unzip(char str1[],char str2[]){ unsigned int count[128]; for(int i = 0;i < 127;i++) count[i] = 0; char code[128][128]; char data[128]; for(int i = 0;i < 128;i++) code[i][0] = 0; int i = 0; for(;str1[i] != '#';i++) data[i] = str1[i]; data[i] = 0; int j = i+2; i = -1; while(1){ if(str1[j] != '#' && str1[j+1] == '#') //个位 count[++i] = str1[j]-48; else if(str1[j+1] != '#' && str1[j+2] == '#'){ //十位 count[++i] = int(str1[j]-48)*10+int(str1[j+1]-48); j++; } else if(str1[j+1] != '#' && str1[j+2] != '#' && str1[j+3] == '#'){ //百位 count[++i] = int(str1[j]-48)*100+int(str1[j+1]-48)*10+int(str1[j+2]-48); j = j+2; } else if(str1[j+1] != '#' && str1[j+2] != '#' && str1[j+3] != '#'&& str1[j+4] =='#'){ //千位 count[++i] = int(str1[j]-48)*1000+int(str1[j+1]-48)*100+int(str1[j+2]-48)*10+int(str1[j+3]-48); } else if(str1[j] == '#' && str1[j+1] == '#') //读完 break; else cout<<"读取错误"< tree; tree.creat(data,count); getHuffCode(tree.root,code,data); j = j+2; i = -1; char tempchar[100000]; for(;str1[j] != 0;j++) tempchar[++i] = str1[j]; tempchar[i+1] = 0; binaryStr(tempchar); //把压缩文件转化为二进制编码 char tempcode[128]; j = -1; int num; int n = -1; i = 0; for(;tempchar[i] != 0;i++){ //遍历tempchar,把它从哈弗曼编码转化为对应的字符 tempcode[++n] = tempchar[i]; tempcode[n+1] = 0; if(codeEqual(tempcode,code,num)){ str2[++j] = data[num]; for(n = 0;tempcode[n] != 0;n++) //重置tempcode tempcode[n] = 0; n = -1; } else continue; } str2[++j] = 0; } void sZip::getHuffCode(HuffmanTreeNode * subTree,char code[][128],char data[]){ for(int i = 0;data[i] != 0;i++) findKey(subTree,data[i],code[i]); } void sZip::strBinary(char str[],int &size){ char str2[100000]; char bit[8]; int n = 0; int count = 0; while(str[n] == '1' || str[n] == '0'){ for(int i = 0;i < 8 ;i++){ if(str[n] == 0){ str[n] = '0'; str[n+1] = 0; } bit[i] = str[n]; n++; } str2[count] = 0; for(int i = 0;i < 8 ;i++){ if(bit[i] == '1'){ char temp = 1; str2[count] = (str2[count]<<1)|temp; //给它最后一位加上一即:左移一位,然后和00000001求或 } else str2[count] = (str2[count]<<1); } count++; } for(int i = 0;i <= count;i++) str[i] = str2[i]; for(int i = count;str[i] != 0;i++) str[i] = 0; size = count; } void sZip::binaryStr(char str1[]){ char str2[100000]; int i = -1; int n = 0; while(str1[n] != 0){ int temp[8]; int tempchar = abs(str1[n]); for(int count = 0;count < 8;count++){ temp[count] = tempchar%2; tempchar /= 2; } if(str1[n]<0 && str1[n] > -128){ //当为负数时,二进制为正数第一位为1(符号位)的反码最后一位加一(补码) temp[7] = 1; for(int count = 0;count < 7;count++){ //求反码 if(temp[count] == 0) temp[count] = 1; else temp[count] = 0; } int x = 1; //进位数 for(int count = 0;count < 8;count++){ //末位加一 if(temp[count] == 0){ if(x == 1){ temp[count] = 1; x = 0; } } else if(x == 1) temp[count] = 0; } } for(int count = 7;count != -1;count--) str2[++i] = char(temp[count]+48); n++; } str2[++i] = 0; for(int count = 0;count <= i;count++) str1[count] = str2[count]; } bool sZip::codeEqual(char code1[],char code2 [][128],int &num){ unsigned int size1 = 0; unsigned int size2 = 0; for(int i = 0;code1[i] != 0;i++) size1++; for(int i = 0;code2[i][0] != 0;i++) size2++; for(int i = 0;i < size2;i++){ int j = 0; int size3 = 0; for(int n = 0;code2[i][n] != 0;n++) size3++; for(;j < size1;j++){ if(code1[j] != code2[i][j]) break; //有一位不等于就直接跳出 } if(j == size1 && j == size3){ num = i; return true; } } return false; } 另外有需要云服务器可以了解下创新互联scvps.cn,海内外云服务器15元起步,三天无理由+7*72小时售后在线,公司持有idc许可证,提供“云服务器、裸金属服务器、高防服务器、香港服务器、美国服务器、虚拟主机、免备案服务器”等云主机租用服务以及企业上云的综合解决方案,具有“安全稳定、简单易用、服务可用性高、性价比高”等特点与优势,专为企业上云打造定制,能够满足用户丰富、多元化的应用场景需求。
本文名称:利用哈弗曼编码进行压缩-创新互联
网址分享:http://pcwzsj.com/article/depdio.html