如何将tfidf映射到[1,3] 区间内


(1)建立映射x→arctanx,此为一一映射,把全体实数R映射到开区间(-π/2,π/2)
(2)开区间(-π/2,π/2)到开区间(0,1)只需要一个线性映射就可以,设为f(x)=ax+b,则有f(-π/2)=1且f(π/2)=3,解得a=2/π,b=2
综合一下,映射x→2(arctanx)/π+2,即把全体实数映射到(1,3)



所以表达式可以写成


"script_score" : {

"script" : "return atan(_score.doubleValue())*2/3.14159+2"

}



为了只使用函数值 设置"boost_mode": "replace",


所以整个请求如下


curl -XGET 'http://localhost:9200/*/*/_search?pretty&explain' -d '{

"size" : 1,

"query" : {

"function_score" : {

"query" : {

"bool" : {

"should" : [ {

"match" : {

"_all" : {

"query" : "关键字",

"type" : "boolean",

"operator" : "AND",

"boost": 10

}

}

}, {

"match" : {

"company_name" : {

"query" : "关键字",

"type" : "boolean",

"operator" : "AND",

"boost": 10

}

}

} ],

"minimum_should_match" : "1"

}

},

"functions" : [

{

"script_score" : {

"script" : "return atan(_score.doubleValue())*2/3.14159+2"

}

},{

"filter" : {

"match" : {

"company_name" : {

"query" : "关键字",

"type" : "boolean",

"operator" : "AND"

}

}

},

"weight" : 2

}


],

"score_mode" : "sum",

"boost_mode": "replace"

}

},

"aggregations" : {

"agg" : {

"terms" : {

"field" : "member_id",

"size" : 0,

"order" : {

"top_hit" : "desc"

}

},

"aggregations" : {

"top_hit" : {

"max" : {

"script" : {

"inline" : "_score"

}

}

}

}

}

}

}'