利用哈弗曼编码进行压缩
//sZipDemo.cpp:定义控制台应用程序的入口点。//#include"stdafx.h"#include"HuffmanTree.cpp"#include"sZip.h"#include<fstream>#include<iostream>usingnamespacestd;int_tmain(intargc,_TCHAR*argv[]){charstr1[10000];ifstreamin;in.open("text.txt",ios::in|ios::binary);in>>str1;cout<<"成功读取text.doc!"<<endl;intnum=in.gcount();in.close();chartempstr[100000];for(inti=0;i<=num;i++)tempstr[i]=str1[i];unsignedintcount[128];chardata[128];HuffmanTree<char>Tree;Tree.creat(tempstr,data,count);charcharCount[256];for(inti=0;count[i]!=0;i++){staticintn=-1;if(count[i]<10){//个位charCount[++n]=count[i]+48;charCount[++n]='#';}elseif(count[i]>=10&&count[i]<100){//十位charCount[++n]=count[i]/10+48;charCount[++n]=count[i]%10+48;charCount[++n]='#';}elseif(count[i]>=100&&count[i]<1000){//百位charCount[++n]=count[i]/100+48;charCount[++n]=count[i]%100/10+48;charCount[++n]=count[i]%10+48;charCount[++n]='#';}else{//千位charCount[++n]=count[i]/1000+48;charCount[++n]=count[i]%1000/100+48;charCount[++n]=count[i]%100/10+48;charCount[++n]=count[i]%10+48;charCount[++n]='#';}charCount[n+1]=0;}ofstreamout;out.open("text11.txt",ios::out|ios::binary);out<<data<<'#'<<'#'<<charCount;out<<'#'<<'#';sZipzip;charstr2[100000];intsize=0;zip.zip(str1,str2,Tree.root,size,data);out<<str2;out.close();cout<<"成功压缩text.doc文件,压缩文件为text11.doc"<<endl;cout<<"------------------------------------------------------------------------"<<endl;charstr3[100000];charstr4[10000];in.open("text11.txt",ios::in|ios::binary);in.read(str3,80000);in.close();str3[in.gcount()]=0;zip.unzip(str3,str4);out.open("text22.txt",ios::out|ios::binary);out<<str4;out.close();return0;}
#pragmaoncetemplate<classT>structHuffmanTreeNode{Tdata;//数据unsignedintcount;//权值charcode[128];//结点的哈弗曼编码HuffmanTreeNode<T>*leftChild;//左子女HuffmanTreeNode<T>*rightChild;//右子女HuffmanTreeNode<T>*parent;//父节点HuffmanTreeNode():leftChild(NULL),rightChild(NULL),parent(NULL){}//构造函数booloperator<=(HuffmanTreeNode&R){returncount<=R.count;}//重载<=和>运算符booloperator>(HuffmanTreeNode&R){returncount>R.count;}};template<classT>classHuffmanTree{public:HuffmanTree();HuffmanTree(HuffmanTree<T>&t);~HuffmanTree(void);voidprintData();intgetWPL();//获取WPL//打开文件voidcreat(charstr[],chardata[],unsignedintcount[]);//哈弗曼的建立voidcreat(chardata[],unsignedintcount[]);//重载template<classT>friendvoidfindKey(HuffmanTreeNode<T>*subTree,Tkey,charcode[]){//寻找结点的codestaticTfirstNodeData=subTree->data;if(subTree!=NULL){if(subTree->data!=key){findKey(subTree->leftChild,key,code);findKey(subTree->rightChild,key,code);}else{inti=0;for(;subTree->code[i]!=0;i++)code[i]=subTree->code[i];code[i]=0;}}}HuffmanTreeNode<T>*root;protected:private:intWPL;voidcensus(unsignedintcount[],chardata[],charstr[],unsignedint&size);//建立哈弗曼树时统计各字符出现的次数voiddeleteTree(HuffmanTreeNode<T>*subTree);//删除subTree结点的所有子结点voidencode(HuffmanTreeNode<T>*subTree,charcode[],charm=0);//编码voidcalculateWPL(HuffmanTreeNode<T>*subTree,inti=0);//计算WPLvoidprintCode(HuffmanTreeNode<T>*subTree);//输出哈弗曼编码的子函数voidprintData(HuffmanTreeNode<T>*subTree);//输出所有结点data和权值的子函数};
#pragmaonce#include"StdAfx.h"#include"HuffmanTree.h"#include"MinHeap.cpp"#include<fstream>template<classT>HuffmanTree<T>::HuffmanTree(){WPL=0;root=newHuffmanTreeNode<T>;};template<classT>HuffmanTree<T>::~HuffmanTree(){deleteTree(root);//删除哈弗曼树}template<classT>voidHuffmanTree<T>::creat(charstr[],chardata[],unsignedintcount[]){unsignedintsize;//字符的个数for(unsignedinti=0;i<128;i++)//count的初始化count[i]=0;census(count,data,str,size);minHeap<HuffmanTreeNode<T>>hp;HuffmanTreeNode<T>*parent,*first,*second;HuffmanTreeNode<T>*work;for(unsignedinti=0;i<size;i++){//把每个元素都插入最小堆中work=newHuffmanTreeNode<T>;work->data=data[i];work->count=count[i];work->leftChild=NULL;work->rightChild=NULL;work->parent=NULL;hp.insert(*work);}for(unsignedinti=0;i<size-1;i++){parent=newHuffmanTreeNode<T>;first=newHuffmanTreeNode<T>;second=newHuffmanTreeNode<T>;hp.removeMin(*first);//每次从最小堆中取出两个最小的数hp.removeMin(*second);parent->leftChild=first;//parent作为他们的父节点parent->rightChild=second;parent->count=first->count+second->count;parent->data='#';//parent不是根结点所以把它的data设为'#'first->parent=parent;second->parent=parent;hp.insert(*parent);//再把parent插入最小堆中,进行循环判断}root=parent;//最后一个parent就是根结点charcode[128];for(unsignedinti=0;i<128;i++)code[i]=0;encode(root,code);//对结点进行哈弗曼编码(空的地方取0)};template<classT>voidHuffmanTree<T>::creat(chardata[],unsignedintcount[]){unsignedintsize=0;for(unsignedinti=0;count[i]!=0;i++)size++;minHeap<HuffmanTreeNode<T>>hp;HuffmanTreeNode<T>*parent,*first,*second;HuffmanTreeNode<T>*work;for(unsignedinti=0;i<size;i++){//把每个元素都插入最小堆中work=newHuffmanTreeNode<T>;work->data=data[i];work->count=count[i];work->leftChild=NULL;work->rightChild=NULL;work->parent=NULL;hp.insert(*work);}for(unsignedinti=0;i<size-1;i++){parent=newHuffmanTreeNode<T>;first=newHuffmanTreeNode<T>;second=newHuffmanTreeNode<T>;hp.removeMin(*first);//每次从最小堆中取出两个最小的数hp.removeMin(*second);parent->leftChild=first;//parent作为他们的父节点parent->rightChild=second;parent->count=first->count+second->count;parent->data='#';//parent不是根结点所以把它的data设为'#'first->parent=parent;second->parent=parent;hp.insert(*parent);//再把parent插入最小堆中,进行循环判断}root=parent;//最后一个parent就是根结点charcode[128];for(unsignedinti=0;i<128;i++)code[i]=0;encode(root,code);//对结点进行哈弗曼编码(空的地方取0)}template<classT>voidHuffmanTree<T>::deleteTree(HuffmanTreeNode<T>*subTree){if(subTree!=NULL){deleteTree(subTree->leftChild);//后序遍历来删除结点deleteTree(subTree->rightChild);deletesubTree;}}template<classT>voidHuffmanTree<T>::census(unsignedintcount[],chardata[],charstr[],unsignedint&size){//用于统计字符出现的次数for(inti=0;str[i]!=0;i++){if(str[i]=='#')//当出现的是已出现过的字符时就执行下次循环continue;staticintn=0;count[n]++;//第一次出现时加一data[n]=str[i];for(intj=0;str[j]!=0;j++){if(str[i]==str[j]&&i!=j){count[n]++;//每次出现重复的时候加一并且str[j]='#';//表明已出现过}}data[++n]=0;size=n;}}template<classT>voidHuffmanTree<T>::encode(HuffmanTreeNode<T>*subTree,charcode[],charm=0){if(subTree!=NULL){intj;for(j=0;code[j]!=0;j++){subTree->code[j]=code[j];}for(j=0;code[j]!=0;j++){}subTree->code[j]=m;subTree->code[j+1]=0;encode(subTree->leftChild,subTree->code,'0');encode(subTree->rightChild,subTree->code,'1');}}template<classT>voidHuffmanTree<T>::calculateWPL(HuffmanTreeNode<T>*subTree,inti=0){if(subTree!=NULL){if(subTree->data!='#')WPL+=i*subTree->count;//i为当前层数,count为结点权值calculateWPL(subTree->leftChild,++i);//前序遍历i--;calculateWPL(subTree->rightChild,++i);i--;}}template<classT>intHuffmanTree<T>::getWPL(){returnWPL;}
#pragmaonce#defineDafaultSize50template<classE>classminHeap{public:minHeap(intsize=DafaultSize);~minHeap();boolinsert(constE&x);boolremoveMin(E&x);private:E*heap;intcurrentSize;intmaxHeapSize;voidsiftDown(intstart,intm);voidsiftUp(intstart);};
#pragmaonce#include"StdAfx.h"#include"MinHeap.h"template<classE>minHeap<E>::minHeap(intsize){maxHeapSize=(DafaultSize<size)?size:DafaultSize;heap=newE[maxHeapSize];if(heap==NULL){//cout<<"堆存储分配失败"<<endl;}currentSize=0;};template<classE>minHeap<E>::~minHeap(){deleteheap;}template<classE>voidminHeap<E>::siftDown(intstart,intm){inti=start;//初始位置intj=2*i+1;//j是i的左子女位置Etemp=heap[i];while(j<=m){if(j<m&&heap[j]>heap[j+1])//左子女大于右子女时,j指向右子女j++;if(temp<=heap[j])break;else{//大则向上移heap[i]=heap[j];i=j;j=2*j+1;}}heap[i]=temp;};template<classE>voidminHeap<E>::siftUp(intstart){intj=start;inti=(j-1)/2;//i的左子女是jEtemp=heap[j];while(j>0){if(heap[i]<=temp)//如果父节点大break;else{//如果父节点小,上滑heap[j]=heap[i];j=i;i=(i-1)/2;}}heap[j]=temp;};template<classE>boolminHeap<E>::insert(constE&x){if(currentSize==maxHeapSize){//堆满时//cout<<"堆已满"<<endl;returnfalse;}heap[currentSize]=x;//在heap尾插入xsiftUp(currentSize);//对x进行上滑判断currentSize++;returntrue;};template<classE>boolminHeap<E>::removeMin(E&x){if(currentSize==0){//堆空时//cout<<"堆为空!!"<<endl;returnfalse;}x=heap[0];//从heap头获取remove的数据heap[0]=heap[currentSize-1];//从heap尾获取元素补到heap头的位置currentSize--;//元素个数减一siftDown(0,currentSize-1);//再对heap从头到尾进行下滑判断returntrue;};
#pragmaonce#include"HuffmanTree.cpp"classsZip{public:sZip(void);~sZip(void);voidzip(charstr1[],charstr2[],HuffmanTreeNode<char>*subTree,int&size,chardata[]);voidunzip(charstr1[],charstr2[]);private:boolcodeEqual(charcode1[],charcode2[][128],int&num);voidgetHuffCode(HuffmanTreeNode<char>*subTree,charcode[][128],chardata[]);voidstrBinary(charstr[],int&size);voidbinaryStr(charstr1[]);};
#include"StdAfx.h"#include"sZip.h"#include<iostream>usingnamespacestd;sZip::sZip(void){}sZip::~sZip(void){}voidsZip::zip(charstr1[],charstr2[],HuffmanTreeNode<char>*subTree,int&size,chardata[]){charcode[128][128];getHuffCode(subTree,code,data);//获取subTree的哈弗曼编码unsignedinti=0;unsignedintn=-1;for(;str1[i]!=0;i++){//遍历str1,把里面的字符转化为哈弗曼编码存在str2中intj=0;while(data[j]!=str1[i]){j++;if(data[j]==0)break;}if(data[j]==str1[i]){for(intcount=0;code[j][count]!=0;count++)str2[++n]=code[j][count];str2[n+1]=0;}}strBinary(str2,size);//把str2的每一个字符变成每一个bit,8个bit合成一个字符}voidsZip::unzip(charstr1[],charstr2[]){unsignedintcount[128];for(inti=0;i<127;i++)count[i]=0;charcode[128][128];chardata[128];for(inti=0;i<128;i++)code[i][0]=0;inti=0;for(;str1[i]!='#';i++)data[i]=str1[i];data[i]=0;intj=i+2;i=-1;while(1){if(str1[j]!='#'&&str1[j+1]=='#')//个位count[++i]=str1[j]-48;elseif(str1[j+1]!='#'&&str1[j+2]=='#'){//十位count[++i]=int(str1[j]-48)*10+int(str1[j+1]-48);j++;}elseif(str1[j+1]!='#'&&str1[j+2]!='#'&&str1[j+3]=='#'){//百位count[++i]=int(str1[j]-48)*100+int(str1[j+1]-48)*10+int(str1[j+2]-48);j=j+2;}elseif(str1[j+1]!='#'&&str1[j+2]!='#'&&str1[j+3]!='#'&&str1[j+4]=='#'){//千位count[++i]=int(str1[j]-48)*1000+int(str1[j+1]-48)*100+int(str1[j+2]-48)*10+int(str1[j+3]-48);}elseif(str1[j]=='#'&&str1[j+1]=='#')//读完break;elsecout<<"读取错误"<<endl;j=j+2;}HuffmanTree<char>tree;tree.creat(data,count);getHuffCode(tree.root,code,data);j=j+2;i=-1;chartempchar[100000];for(;str1[j]!=0;j++)tempchar[++i]=str1[j];tempchar[i+1]=0;binaryStr(tempchar);//把压缩文件转化为二进制编码chartempcode[128];j=-1;intnum;intn=-1;i=0;for(;tempchar[i]!=0;i++){//遍历tempchar,把它从哈弗曼编码转化为对应的字符tempcode[++n]=tempchar[i];tempcode[n+1]=0;if(codeEqual(tempcode,code,num)){str2[++j]=data[num];for(n=0;tempcode[n]!=0;n++)//重置tempcodetempcode[n]=0;n=-1;}elsecontinue;}str2[++j]=0;}voidsZip::getHuffCode(HuffmanTreeNode<char>*subTree,charcode[][128],chardata[]){for(inti=0;data[i]!=0;i++)findKey(subTree,data[i],code[i]);}voidsZip::strBinary(charstr[],int&size){charstr2[100000];charbit[8];intn=0;intcount=0;while(str[n]=='1'||str[n]=='0'){for(inti=0;i<8;i++){if(str[n]==0){str[n]='0';str[n+1]=0;}bit[i]=str[n];n++;}str2[count]=0;for(inti=0;i<8;i++){if(bit[i]=='1'){chartemp=1;str2[count]=(str2[count]<<1)|temp;//给它最后一位加上一即:左移一位,然后和00000001求或}elsestr2[count]=(str2[count]<<1);}count++;}for(inti=0;i<=count;i++)str[i]=str2[i];for(inti=count;str[i]!=0;i++)str[i]=0;size=count;}voidsZip::binaryStr(charstr1[]){charstr2[100000];inti=-1;intn=0;while(str1[n]!=0){inttemp[8];inttempchar=abs(str1[n]);for(intcount=0;count<8;count++){temp[count]=tempchar%2;tempchar/=2;}if(str1[n]<0&&str1[n]>-128){//当为负数时,二进制为正数第一位为1(符号位)的反码最后一位加一(补码)temp[7]=1;for(intcount=0;count<7;count++){//求反码if(temp[count]==0)temp[count]=1;elsetemp[count]=0;}intx=1;//进位数for(intcount=0;count<8;count++){//末位加一if(temp[count]==0){if(x==1){temp[count]=1;x=0;}}elseif(x==1)temp[count]=0;}}for(intcount=7;count!=-1;count--)str2[++i]=char(temp[count]+48);n++;}str2[++i]=0;for(intcount=0;count<=i;count++)str1[count]=str2[count];}boolsZip::codeEqual(charcode1[],charcode2[][128],int&num){unsignedintsize1=0;unsignedintsize2=0;for(inti=0;code1[i]!=0;i++)size1++;for(inti=0;code2[i][0]!=0;i++)size2++;for(inti=0;i<size2;i++){intj=0;intsize3=0;for(intn=0;code2[i][n]!=0;n++)size3++;for(;j<size1;j++){if(code1[j]!=code2[i][j])break;//有一位不等于就直接跳出}if(j==size1&&j==size3){num=i;returntrue;}}returnfalse;}
声明:本站所有文章资源内容,如无特殊说明或标注,均为采集网络资源。如若本站内容侵犯了原著者的合法权益,可联系本站删除。