中文分词技术
//正向最大匹配分词算法,耗时长,这并不是一个很好的算法,我的这个输出是逆向输入的usingSystem;usingSystem.Collections.Generic;usingSystem.Linq;usingSystem.Text;namespaceClusterCharater{publicclassSplitChineseCharacter{privateString[]dictionary={"今天","是","星期","六","星期六"};//词典privateStringinput=null;publicList<String>Reslut=newList<string>();publicSplitChineseCharacter(Stringinput){this.input=input;}publicvoidstart(){Stringtemp=null;for(inti=0;i<this.input.Length;i++){temp=this.input.Substring(i);//每次从字符串的首部截取一个字,并存到temp中//如果该词在字典中,则删除该词并在原始字符串中截取该词if(this.isInDictionary(temp)){Reslut.Add(temp);this.input=this.input.Replace(temp,"");i=-1;//i=-1是因为要重新查找,而要先执行循环中的i++}}//当前循环完毕,词的末尾截去一个字,继续循环,直到词变为空if(null!=this.input&&!"".Equals(this.input)){this.input=this.input.Substring(0,this.input.Length-1);this.start();}}//判断当前词是否在字典中publicBooleanisInDictionary(Stringtemp){for(inti=0;i<this.dictionary.Length;i++){if(temp.Equals(this.dictionary[i])){returntrue;}}returnfalse;}}}usingSystem;usingSystem.Collections.Generic;usingSystem.ComponentModel;usingSystem.Data;usingSystem.Drawing;usingSystem.Linq;usingSystem.Text;usingSystem.Windows.Forms;namespaceClusterCharater{publicpartialclassForm1:Form{publicForm1(){InitializeComponent();}
private void button1_Click(object sender, EventArgs e)
{
String s=inputtext.Text.Trim();
SplitChineseCharacter scc = new SplitChineseCharacter(s);
scc.start();
foreach (String ss in scc.Reslut)
{
output.Text += ss+"/";
}
}
}
}
声明:本站所有文章资源内容,如无特殊说明或标注,均为采集网络资源。如若本站内容侵犯了原著者的合法权益,可联系本站删除。