自己写的一个后缀树算法查找一个字符串的最长重复子串
在上个星期面试一家公司的笔试题上面的最后一道题就是写程序查找一个字符串的最长重复子串。当时想了很长时间没想出什么好方法,就把一个算法复杂度比较高的算法写上去了。回来上机把那个算法写了一遍测试没问题,然后自己又到网上面查查还有什么方法,然后发现好像有种叫做后缀树的方法,然后看那个方法,当时没给出代码,看图看了老半天加之自己想了好几个小时终于知道后缀树是个什么东西。然后自己萌生了一个自己写一个后缀树算法解决那个重复子串的问题。然后写了一天终于写出来了。后续有做了一些测试,发现自己写的一个只有几十个字母的字符串比之前的那个算法慢了好几十倍,想了很久想不出原因,后来自己随机生成了一个10000个字符的字符串使用后缀树算法比旧的方法快了4倍,所以后缀树算法还是一个比较优秀的算法的。但是为了以后能够回来看下自己写的东西,所以就写这篇博客记录一下自己写的后缀树算法的源代码。一下是代码
classSuffixTreeNode;typedefSuffixTreeNode*SuffixTreeNodePtr;classSuffixTreeNode{public:SuffixTreeNode();voidinitNode();SuffixTreeNodePtr&returnChildsAt(inti);intcmpSameLength(constchar*start);voidsetHead(constchar*start);constchar*getHead();voidsetLen(intlength);intgetLen();voidsetStartPos(intpos);intgetStartPos();private:constchar*pHead;intlen;intstart;SuffixTreeNode*childs[256];};classSuffixTree{public:SuffixTree();~SuffixTree();intinsert(constchar*start,intpos);private:SuffixTreeNode*allocNode();boolallocBufferNode(intsize=1024);intinnerInsert(SuffixTreeNode*pNode,constchar*start,intpos,intpreSameLength);SuffixTreeNode*root;SuffixTreeNode*freeNode;intmaxStrLen;std::vector<SuffixTreeNode*>nodeBuff;};SuffixTreeNode::SuffixTreeNode(){initNode();}voidSuffixTreeNode::initNode(){memset(this,0,sizeof(SuffixTreeNode));}SuffixTreeNodePtr&SuffixTreeNode::returnChildsAt(inti){returnchilds[i];}intSuffixTreeNode::cmpSameLength(constchar*start){intlength=0;if(pHead!=NULL)for(;(length<len)&&(pHead[length]==start[length]);length++);elsereturn0;returnlength;}voidSuffixTreeNode::setHead(constchar*start){pHead=start;}constchar*SuffixTreeNode::getHead(){returnpHead;}voidSuffixTreeNode::setLen(intlength){len=length;}intSuffixTreeNode::getLen(){returnlen;}voidSuffixTreeNode::setStartPos(intpos){start=pos;}intSuffixTreeNode::getStartPos(){returnstart;}SuffixTree::SuffixTree():root(NULL),freeNode(NULL){}SuffixTree::~SuffixTree(){for(size_ti=0;i<nodeBuff.size();i++){SuffixTreeNode*pNode=nodeBuff.at(i);if(pNode!=NULL){free(pNode);}}}boolSuffixTree::allocBufferNode(intsize){SuffixTreeNode*pNode=(SuffixTreeNode*)malloc(sizeof(SuffixTreeNode)*size);if(pNode==NULL){returnfalse;}nodeBuff.push_back(pNode);for(inti=0;i<size;i++){pNode->returnChildsAt(0)=freeNode;freeNode=pNode;pNode++;}returntrue;}SuffixTreeNode*SuffixTree::allocNode(){if(freeNode==NULL){if(!allocBufferNode())returnNULL;}assert(freeNode!=NULL);SuffixTreeNode*pNode=freeNode;freeNode=freeNode->returnChildsAt(0);returnpNode;}intSuffixTree::insert(constchar*start,intpos){if(root==NULL){root=allocNode();if(root==NULL){return0;}root->initNode();maxStrLen=strlen(start);}returninnerInsert(root,start,pos,0);}intSuffixTree::innerInsert(SuffixTreeNode*pNode,constchar*start,intpos,intpreSameLength){if(pNode==NULL)return0;intsameLength=pNode->cmpSameLength(start);if(sameLength<pNode->getLen()){SuffixTreeNode*pRetNode=allocNode();if(pRetNode==NULL){return0;}pRetNode->initNode();pRetNode->setHead(pNode->getHead()+sameLength);pRetNode->setLen(pNode->getLen()-sameLength);pRetNode->setStartPos(pNode->getStartPos());pNode->setLen(sameLength);for(inti=0;pNode->returnChildsAt(i)!=NULL;i++){pRetNode->returnChildsAt(i)=pNode->returnChildsAt(i);pNode->returnChildsAt(i)=NULL;}pNode->returnChildsAt(0)=pRetNode;pRetNode=allocNode();if(pRetNode==NULL){return0;}pRetNode->initNode();pRetNode->setHead(start+sameLength);pRetNode->setLen(maxStrLen-(pos+preSameLength+sameLength));pRetNode->setStartPos(pos);pNode->returnChildsAt(1)=pRetNode;}elseif(sameLength==pNode->getLen()){intindex=0;for(;pNode->returnChildsAt(index)!=NULL;index++){if((pNode->returnChildsAt(index)->getHead())[0]==start[sameLength]){returnsameLength+innerInsert(pNode->returnChildsAt(index),start+sameLength,pos,preSameLength+sameLength);}}SuffixTreeNode*pRetNode=allocNode();if(pRetNode==NULL){return0;}pRetNode->initNode();pRetNode->setHead(start+sameLength);pRetNode->setLen(maxStrLen-(pos+preSameLength+sameLength));pRetNode->setStartPos(pos);pNode->returnChildsAt(index)=pRetNode;}returnsameLength;}stringfindMax(string&ret){intmaxLen=0;intmaxPos=0;SuffixTreetree;constchar*str=ret.c_str();for(inti=0;str[i]!='\0';i++){intcurLen=tree.insert(str+i,i);if(curLen>maxLen){maxLen=curLen;maxPos=i;}}returnret.substr(maxPos,maxLen);}
findMax函数就是那个找到最长重复子串那个函数了。
声明:本站所有文章资源内容,如无特殊说明或标注,均为采集网络资源。如若本站内容侵犯了原著者的合法权益,可联系本站删除。