lucene 同义词的索引
publicinterfaceSynonymEngine{String[]getSynonyms(Stringkey);}
publicclassSynonymEngineImplimplementsSynonymEngine{privatestaticHashMap<String,String[]>map=newHashMap<String,String[]>();static{map.put("quick",newString[]{"fast","speedy"});map.put("jumps",newString[]{"leaps","hops"});map.put("over",newString[]{"above"});map.put("lazy",newString[]{"apathetic","sluggish"});map.put("dog",newString[]{"canine","pooch"});}@OverridepublicString[]getSynonyms(Stringkey){//TODOAuto-generatedmethodstubreturnmap.get(key);}}
publicclassSynonymFilterextendsTokenFilter{privateSynonymEngineengine;privateCharTermAttributect;privatePositionIncrementAttributept;privateStack<String>stack;privateAttributeSource.Statecurrent;protectedSynonymFilter(TokenStreaminput,SynonymEngineengine){super(input);this.engine=engine;ct=this.addAttribute(CharTermAttribute.class);pt=this.addAttribute(PositionIncrementAttribute.class);stack=newStack<String>();}@OverridepublicbooleanincrementToken()throwsIOException{if(stack.size()>0){this.restoreState(current);Stringp=stack.pop();ct.setEmpty();ct.append(p);pt.setPositionIncrement(0);returntrue;}System.out.println("++++++"+ct);if(!input.incrementToken())returnfalse;System.out.println("------"+ct);if(addSynonym(ct.toString())){current=this.captureState();}returntrue;}privatebooleanaddSynonym(Stringname){String[]sa=engine.getSynonyms(name);if(sa!=null&&sa.length>0){for(Strings:sa){stack.push(s);}returntrue;}else{returnfalse;}}}
publicclassSynonymAnalyzerextendsAnalyzer{privateSynonymEngineengine;publicSynonymAnalyzer(SynonymEngineengine){this.engine=engine;}@OverridepublicTokenStreamtokenStream(Strings,Readerreader){//TODOAuto-generatedmethodstubreturnnewSynonymFilter(newStopFilter(Version.LUCENE_35,newLowerCaseFilter(Version.LUCENE_35,newStandardFilter(Version.LUCENE_35,newStandardTokenizer(Version.LUCENE_35,reader))),StopAnalyzer.ENGLISH_STOP_WORDS_SET),engine);}}
publicclassTestSynonym{privateRAMDirectorydirectory;@Testpublicvoidinit(){directory=newRAMDirectory();SynonymEngineengine=newSynonymEngineImpl();IndexWriterConfigconfig=newIndexWriterConfig(Version.LUCENE_35,newSynonymAnalyzer(engine));Stringcontent="Thequickbrownfoxjumpsoverthelazydog";try{IndexWriterwriter=newIndexWriter(directory,config);Documentdoc=newDocument();doc.add(newField("content",content,Field.Store.YES,Field.Index.ANALYZED));writer.addDocument(doc);writer.close();IndexReaderreader=IndexReader.open(directory);IndexSearchersearcher=newIndexSearcher(reader);TopDocsdocs=searcher.search(newTermQuery(newTerm("content","pooch")),10);for(ScoreDocsd:docs.scoreDocs){Documentd=searcher.doc(sd.doc);System.out.println(d.get("content"));}}catch(CorruptIndexExceptione){//TODOAuto-generatedcatchblocke.printStackTrace();}catch(LockObtainFailedExceptione){//TODOAuto-generatedcatchblocke.printStackTrace();}catch(IOExceptione){//TODOAuto-generatedcatchblocke.printStackTrace();}}}
声明:本站所有文章资源内容,如无特殊说明或标注,均为采集网络资源。如若本站内容侵犯了原著者的合法权益,可联系本站删除。