C++实现文件压缩及解压缩
原理:Huffman树的应用:Huffman编码,为出现频率较高的字符指定较短的码字,而为出现频率较低的字符指定较短的码字,可以实现二进制文件的压缩。
Heap.h
#pragmaonce#include<vector>//仿函数template<classT>structLesser{booloperator()(constT&l,constT&r){returnl<r;}};template<classT>structGreater{booloperator()(constT&l,constT&r){returnl>r;}};template<classT,classCompare=Lesser<T>>classHeap{public:Heap(){}Heap(constT*a,size_tsize){for(size_ti=0;i<size;++i){_a.push_back(a[i]);}for(inti=(_a.size()-2)/2;i>=0;--i){_AdjustDown(i);}}voidPush(constT&x){_a.push_back(x);_AdjustUp(_a.size()-1);}voidPop(){assert(!_a.empty());swap(_a[0],_a[_a.size()-1]);_a.pop_back();_AdjustDown(0);}T&Top(){assert(!_a.empty());return_a[0];}boolEmpty(){return_a.empty();}size_tSize(){return_a.size();}protected:void_AdjustUp(intchild){Comparecmp;intparent=(child-1)/2;while(child>0)//parent>=0?{if(cmp(_a[child],_a[parent])){swap(_a[child],_a[parent]);child=parent;parent=(child-1)/2;}else{break;}}}void_AdjustDown(intparent){Comparecmp;intchild=parent*2+1;while(child<_a.size()){if(child+1<_a.size()&&cmp(_a[child+1],_a[child])){++child;}if(cmp(_a[child],_a[parent])){swap(_a[child],_a[parent]);parent=child;child=parent*2+1;}else{break;}}}protected:vector<T>_a;};
HuffmanTree.h
#pragmaonce#include"Heap.h"template<classT>structHuffmanTreeNode{HuffmanTreeNode<T>*_left;HuffmanTreeNode<T>*_right;HuffmanTreeNode<T>*_parent;T_weight;HuffmanTreeNode(constT&weight):_left(NULL),_right(NULL),_parent(NULL),_weight(weight){}};template<classT>classHuffmanTree{typedefHuffmanTreeNode<T>Node;public:HuffmanTree():_root(NULL){}~HuffmanTree(){_Destory(_root);}HuffmanTree(constT*a,size_tsize,constT&invalid){_root=_CreateTree(a,size,invalid);}Node*GetRoot(){return_root;}protected:Node*_CreateTree(constT*a,size_tsize,constT&invalid){assert(a);structCompare{booloperator()(constNode*l,constNode*r){returnl->_weight<r->_weight;}};Heap<Node*,Compare>minHeap;for(size_ti=0;i<size;++i){if(a[i]!=invalid){minHeap.Push(newNode(a[i]));}}while(minHeap.Size()>1){Node*left=minHeap.Top();minHeap.Pop();Node*right=minHeap.Top();minHeap.Pop();Node*parent=newNode(left->_weight+right->_weight);parent->_left=left;parent->_right=right;left->_parent=parent;right->_parent=parent;minHeap.Push(parent);}returnminHeap.Top();}void_Destory(Node*root){if(root==NULL)return;_Destory(root->_left);_Destory(root->_right);}protected:Node*_root;};voidHuffmanTreeTest(){inta[]={0,1,2,3,4,5,6,7,8,9};HuffmanTree<int>ht(a,10,-1);}
FileCompress.h
#pragmaonce#include"HuffmanTree.h"#include<Windows.h>structCharInfo{char_ch;int_count;string_code;CharInfo(unsignedcharch=0):_ch(ch),_count(0){}CharInfooperator+(constCharInfo&x){CharInfotmp;tmp._count=_count+x._count;returntmp;}booloperator!=(constCharInfo&x)const{return_count!=x._count;}booloperator<(constCharInfo&x)const{return_count<x._count;}};template<classT>classFileCompress{public:FileCompress(){for(size_ti=0;i<256;++i){_infos[i]=i;}}voidCompress(constchar*filename){assert(filename);FILE*fOutFile=fopen(filename,"rb");assert(fOutFile);charch=fgetc(fOutFile);intcharCount=0;//统计字符总数while(!feof(fOutFile)){++charCount;_infos[(unsignedchar)ch]._count++;ch=fgetc(fOutFile);}//创建Huffman树CharInfoinvalid(0);HuffmanTree<CharInfo>t(_infos,256,invalid);//由Huffman树生成Huffman编码_GenerateHuffmanCode(t.GetRoot());//压缩stringcompressFilename=filename;compressFilename+=".compress";FILE*fInCompress=fopen(compressFilename.c_str(),"wb");assert(fInCompress);fseek(fOutFile,0,SEEK_SET);ch=fgetc(fOutFile);charvalue=0;intsize=0;while(!feof(fOutFile)){string&code=_infos[(unsignedchar)ch]._code;for(size_ti=0;i<code.size();++i){value<<=1;if(code[i]=='1'){value|=1;}++size;if(size==8){fputc(value,fInCompress);size=0;value=0;}}ch=fgetc(fOutFile);}if(size>0)//补位{value<<=(8-size);fputc(value,fInCompress);}//写配置文件,方便解压缩时读取stringconfigFilename=filename;configFilename+=".config";FILE*fInConfig=fopen(configFilename.c_str(),"wb");assert(fInConfig);stringline;charbuffer[128];//将字符总数写入配置文件第一行line+=itoa(charCount,buffer,10);line+="\n";fputs(line.c_str(),fInConfig);line.clear();for(size_ti=0;i<256;++i){if(_infos[i]._count){line+=_infos[i]._ch;line+=',';line+=itoa(_infos[i]._count,buffer,10);line+='\n';fputs(line.c_str(),fInConfig);}line.clear();}fclose(fOutFile);fclose(fInCompress);fclose(fInConfig);}voidUnCompress(constchar*filename){//读取配置文件stringconfigFilename=filename;configFilename+=".config";FILE*fOutConfig=fopen(configFilename.c_str(),"rb");assert(fOutConfig);stringline;//读取字符总数_ReadLine(fOutConfig,line);intcharCount=atoi(line.c_str());line.clear();while(_ReadLine(fOutConfig,line)){if(!line.empty()){unsignedcharch=line[0];_infos[ch]._count=atoi(line.substr(2).c_str());line.clear();}else{line.clear();_ReadLine(fOutConfig,line);unsignedcharch='\n';_infos[ch]._count=atoi(line.substr(1).c_str());line.clear();}}//重建Huffman树CharInfoinvalid(0);HuffmanTree<CharInfo>t(_infos,256,invalid);//读.compress文件,写.uncompress文件stringcompressFilename=filename;compressFilename+=".compress";FILE*fOutCompress=fopen(compressFilename.c_str(),"rb");assert(fOutCompress);stringuncompressFilename=filename;uncompressFilename+=".uncompress";FILE*fInUncompress=fopen(uncompressFilename.c_str(),"wb");assert(fInUncompress);HuffmanTreeNode<CharInfo>*root=t.GetRoot();HuffmanTreeNode<CharInfo>*cur=root;intpos=7;charch=fgetc(fOutCompress);while(1){if(ch&(1<<pos))cur=cur->_right;elsecur=cur->_left;if(cur->_left==NULL&&cur->_right==NULL){fputc(cur->_weight._ch,fInUncompress);cur=root;if(--charCount==0)//字符已读取完,遇到补位的0不再读取{break;}}--pos;if(pos==-1){ch=fgetc(fOutCompress);pos=7;}}fclose(fOutCompress);fclose(fInUncompress);}protected:void_GenerateHuffmanCode(HuffmanTreeNode<CharInfo>*root){if(root==NULL)return;_GenerateHuffmanCode(root->_left);_GenerateHuffmanCode(root->_right);if(root->_left==NULL&&root->_right==NULL){HuffmanTreeNode<CharInfo>*cur=root;HuffmanTreeNode<CharInfo>*parent=root->_parent;string&code=_infos[(unsignedchar)cur->_weight._ch]._code;while(parent){if(parent->_left==cur)code+='0';if(parent->_right==cur)code+='1';cur=parent;parent=cur->_parent;}reverse(code.begin(),code.end());}}bool_ReadLine(FILE*filename,string&line){charch=fgetc(filename);if(ch==EOF)returnfalse;while(ch!=EOF&&ch!='\n'){line+=ch;ch=fgetc(filename);}returntrue;}protected:CharInfo_infos[256];};voidCompressTest(){//压缩FileCompress<CharInfo>compress;intCompressBegin=GetTickCount();compress.Compress("Input.BIG");intCompressEnd=GetTickCount();cout<<"压缩用时:"<<CompressEnd-CompressBegin<<"ms"<<endl;}voidUncompressTest(){//解压缩FileCompress<CharInfo>uncompress;intUncompressBegin=GetTickCount();uncompress.UnCompress("Input.BIG");intUncompressEnd=GetTickCount();cout<<"解压缩用时:"<<UncompressEnd-UncompressBegin<<"ms"<<endl;}
Test.cpp
#include<iostream>usingnamespacestd;#include<assert.h>#include"FileCompress.h"intmain(){CompressTest();UncompressTest();return0;}
下面是对一个大小为8.04MB文件的测试:
结果成功压缩、解压缩:
压缩后的文件大小:
压缩后的文件:
配置文件:
用Beyond Compare文本比较工具检查原文件与解压后的文件:
无差异
声明:本站所有文章资源内容,如无特殊说明或标注,均为采集网络资源。如若本站内容侵犯了原著者的合法权益,可联系本站删除。