这篇文章主要讲解了“PostgreSQL中match_unsorted_outer函数分析”,文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习“PostgreSQL中match_unsorted_outer函数分析”吧!

一、数据结构

Cost相关
注意:实际使用的参数值通过系统配置文件定义,而不是这里的常量定义!

typedefdoubleCost;/*executioncost(inpage-accessunits)*//*defaultsforcostsize.c'sCostparameters*//*NB:cost-estimationcodeshouldusethevariables,nottheseconstants!*//*注意:实际值通过系统配置文件定义,而不是这里的常量定义!*//*Ifyouchangethese,updatebackend/utils/misc/postgresql.sample.conf*/#defineDEFAULT_SEQ_PAGE_COST1.0//顺序扫描page的成本#defineDEFAULT_RANDOM_PAGE_COST4.0//随机扫描page的成本#defineDEFAULT_CPU_TUPLE_COST0.01//处理一个元组的CPU成本#defineDEFAULT_CPU_INDEX_TUPLE_COST0.005//处理一个索引元组的CPU成本#defineDEFAULT_CPU_OPERATOR_COST0.0025//执行一次操作或函数的CPU成本#defineDEFAULT_PARALLEL_TUPLE_COST0.1//并行执行,从一个worker传输一个元组到另一个worker的成本#defineDEFAULT_PARALLEL_SETUP_COST1000.0//构建并行执行环境的成本#defineDEFAULT_EFFECTIVE_CACHE_SIZE524288/*先前已有介绍,measuredinpages*/doubleseq_page_cost=DEFAULT_SEQ_PAGE_COST;doublerandom_page_cost=DEFAULT_RANDOM_PAGE_COST;doublecpu_tuple_cost=DEFAULT_CPU_TUPLE_COST;doublecpu_index_tuple_cost=DEFAULT_CPU_INDEX_TUPLE_COST;doublecpu_operator_cost=DEFAULT_CPU_OPERATOR_COST;doubleparallel_tuple_cost=DEFAULT_PARALLEL_TUPLE_COST;doubleparallel_setup_cost=DEFAULT_PARALLEL_SETUP_COST;inteffective_cache_size=DEFAULT_EFFECTIVE_CACHE_SIZE;Costdisable_cost=1.0e10;//1后面10个0,通过设置一个巨大的成本,让优化器自动放弃此路径intmax_parallel_workers_per_gather=2;//每次gather使用的worker数二、源码解读

nested loop join的算法实现伪代码如下:
FOR row#1 IN (select * from dataset#1) LOOP
FOR row#2 IN (select * from dataset#2 where row#1 is matched) LOOP
output values from row#1 and row#2
END LOOP
END LOOP

match_unsorted_outer函数通过在每个可能的外表访问路径上使用迭代替换或合并连接的方式尝试构造连接访问路径。

//------------------------------------------------match_unsorted_outer/**match_unsorted_outer*Createspossiblejoinpathsforprocessingasinglejoinrelation*'joinrel'byemployingeitheriterativesubstitutionor*mergejoiningoneachofitspossibleouterpaths(considering*onlyouterpathsthatarealreadyorderedwellenoughformerging).*通过在每个可能的外表访问路径上使用迭代替换或合并连接(只考虑已经排序得足够好,用于合并的外表访问路径),*为处理最终的连接关系“joinrel”创建可能的连接路径。**Wealwaysgenerateanestlooppathforeachavailableouterpath.*Infactwemaygenerateasmanyasfive:oneonthecheapest-total-cost*innerpath,oneonthesamewithmaterialization,oneonthe*cheapest-startup-costinnerpath(ifdifferent),oneonthe*cheapest-totalinner-indexscanpath(ifany),andoneonthe*cheapest-startupinner-indexscanpath(ifdifferent).*我们总是为每个可用的外表访问路径生成一个nestedloop访问路径。*实际上,可能会生成多达5个:*一个是内表访问成本最低的路径,*一个是物化但总成本最低的路径,*一个是启动成本最低的内表访问路径(如果不同的话),*一个是成本最低的内表索引扫描路径(如果有的话),*一个是启动成本最低的内表索引扫描路径(如果不同的话)。**Wealsoconsidermergejoinsifmergejoinclausesareavailable.See*detailedcommentsingenerate_mergejoin_paths.*如果mergejoin条件子句可用,也会考虑mergejoin。详细请参阅generate_mergejoin_paths中的注释。**'joinrel'isthejoinrelation*'outerrel'istheouterjoinrelation*'innerrel'istheinnerjoinrelation*'jointype'isthetypeofjointodo*'extra'containsadditionalinputvalues*/staticvoidmatch_unsorted_outer(PlannerInfo*root,//优化器信息RelOptInfo*joinrel,//连接生成的relationRelOptInfo*outerrel,//外表RelOptInfo*innerrel,//内表JoinTypejointype,//连接类型JoinPathExtraData*extra)//额外的信息{JoinTypesave_jointype=jointype;boolnestjoinOK;booluseallclauses;Path*inner_cheapest_total=innerrel->cheapest_total_path;Path*matpath=NULL;ListCell*lc1;/**Nestlooponlysupportsinner,left,semi,andantijoins.Also,ifwe*aredoingarightorfullmergejoin,wemustuse*all*themergeclauses*asjoinclauses,elsewewillnothaveavalidplan.(Althoughthese*twoflagsarecurrentlyinverses,keepthemseparateforclarityand*possiblefuturechanges.)*Nestloop只支持内连接、左连接、半连接和反连接。*此外,如果正在做一个右连接或全合并连接,我们必须使用所有的mergeclauses条件作为连接子句,*否则我们将没有一个有效的计划。*(尽管这两个标志目前是反向的,但为了清晰和可能的未来变化,请将它们分开。)*/switch(jointype){caseJOIN_INNER:caseJOIN_LEFT:caseJOIN_SEMI:caseJOIN_ANTI:nestjoinOK=true;useallclauses=false;break;caseJOIN_RIGHT:caseJOIN_FULL:nestjoinOK=false;useallclauses=true;break;caseJOIN_UNIQUE_OUTER:caseJOIN_UNIQUE_INNER:jointype=JOIN_INNER;nestjoinOK=true;useallclauses=false;break;default:elog(ERROR,"unrecognizedjointype:%d",(int)jointype);nestjoinOK=false;/*keepcompilerquiet*/useallclauses=false;break;}/**Ifinner_cheapest_totalisparameterizedbytheouterrel,ignoreit;*wewillconsideritbelowasamemberofcheapest_parameterized_paths,*buttheotherpossibilitiesconsideredinthisroutinearen'tusable.*如果inner_cheapest_total被外表rel参数化,则忽略它;*下面将把它看作cheapest_parameterized_paths的成员,但是这个处理过程中的其他尝试则不可用此信息。*/if(PATH_PARAM_BY_REL(inner_cheapest_total,outerrel))inner_cheapest_total=NULL;/**Ifweneedtounique-ifytheinnerpath,wewillconsideronlythe*cheapest-totalinner.*如果需要唯一化内部路径,将只考虑成本最低的内表访问路径。*/if(save_jointype==JOIN_UNIQUE_INNER){/*Nowaytodothiswithaninnerpathparameterizedbyouterrel*///除了使用外表rel参数化的内表访问路径,没有其他办法.if(inner_cheapest_total==NULL)return;inner_cheapest_total=(Path*)create_unique_path(root,innerrel,inner_cheapest_total,extra->sjinfo);Assert(inner_cheapest_total);}elseif(nestjoinOK){/**Considermaterializingthecheapestinnerpath,unless*enable_materialisofforthepathinquestionmaterializesits*outputanyway.*尝试实现成本最低的内表访问路径,除非enable_material关闭或该路径以其他方式物化其输出。*/if(enable_material&&inner_cheapest_total!=NULL&&!ExecMaterializesOutput(inner_cheapest_total->pathtype))matpath=(Path*)create_material_path(innerrel,inner_cheapest_total);}foreach(lc1,outerrel->pathlist)//遍历外表访问路径{Path*outerpath=(Path*)lfirst(lc1);List*merge_pathkeys;/**Wecannotuseanouterpaththatisparameterizedbytheinnerrel.*不能使用使用内表参数化的外表访问路径*/if(PATH_PARAM_BY_REL(outerpath,innerrel))continue;/**Ifweneedtounique-ifytheouterpath,it'spointlesstoconsider*anybutthecheapestouter.(XXXwedon'tconsiderparameterized*outers,norinners,forunique-ifiedcases.Shouldwe?)*如果需要唯一化外表访问路径,那么只考虑成本最低的外表访问路径是毫无意义的。*不考虑参数化的outers,也不考虑inners,因为它们是独特的情况。那我们应该做的是?)*/if(save_jointype==JOIN_UNIQUE_OUTER){if(outerpath!=outerrel->cheapest_total_path)continue;outerpath=(Path*)create_unique_path(root,outerrel,outerpath,extra->sjinfo);Assert(outerpath);}/**Theresultwillhavethissortorder(evenifitisimplementedas*anestloop,andevenifsomeofthemergeclausesareimplementedby*qpqualsratherthanastruemergeclauses):*最终结果将会有这样的排序顺序(即使它是作为nestloop实现的,*即使一些mergeclauses是由qpquals而不是作为真正的mergeclauses实现的):*/merge_pathkeys=build_join_pathkeys(root,joinrel,jointype,outerpath->pathkeys);if(save_jointype==JOIN_UNIQUE_INNER){/**Considernestloopjoin,butonlywiththeunique-ifiedcheapest*innerpath*尝试nestloopjoin,只是使用保证唯一性的成本最低的内表访问路径*/try_nestloop_path(root,joinrel,outerpath,inner_cheapest_total,merge_pathkeys,jointype,extra);}elseif(nestjoinOK){/**Considernestloopjoinsusingthisouterpathandvarious*availablepathsfortheinnerrelation.Weconsiderthe*cheapest-totalpathsforeachavailableparameterizationofthe*innerrelation,includingtheunparameterizedcase.*尝试使用这个外表访问路径和内表的各种可用路径的nestloop连接。*尝试内部关系的每个可用参数化(包括非参数化用例)的成本最低路径。*/ListCell*lc2;foreach(lc2,innerrel->cheapest_parameterized_paths)//遍历参数化路径{Path*innerpath=(Path*)lfirst(lc2);try_nestloop_path(root,joinrel,outerpath,innerpath,merge_pathkeys,jointype,extra);//尝试nestloopjoin}/*Alsoconsidermaterializedformofthecheapestinnerpath*/if(matpath!=NULL)try_nestloop_path(root,joinrel,outerpath,matpath,merge_pathkeys,jointype,extra);//也会尝试成本最低的物化内表访问路径}/*Can'tdoanythingelseifouterpathneedstobeunique'd*///如外表需保证唯一,则继续下一循环if(save_jointype==JOIN_UNIQUE_OUTER)continue;/*Can'tdoanythingelseifinnerrelisparameterizedbyouter*///内表是被外表参数化的,继续下一循环if(inner_cheapest_total==NULL)continue;/*Generatemergejoinpaths*///构造mergejoin路径generate_mergejoin_paths(root,joinrel,innerrel,outerpath,save_jointype,extra,useallclauses,inner_cheapest_total,merge_pathkeys,false);}/**Considerpartialnestloopandmergejoinplanifouterrelhasany*partialpathandthejoinrelisparallel-safe.However,wecan't*handleJOIN_UNIQUE_OUTER,becausetheouterpathwillbepartial,and*thereforewewon'tbeabletoproperlyguaranteeuniqueness.Norcan*wehandleextra_lateral_rels,sincepartialpathsmustnotbe*parameterized.Similarly,wecan'thandleJOIN_FULLandJOIN_RIGHT,*becausetheycanproducefalsenullextendedrows.*考虑并行nestloop和合并计划,如果外表有并行访问路径,并且joinrel是是并行安全的。*但是,不能处理JOIN_UNIQUE_OUTER,因为外部路径是并行的,因此不能正确地保证惟一性。*同时,也不能处理extra_lateralal_rels,因为部分路径必须不被参数化。*类似地,我们不能处理JOIN_FULL和JOIN_RIGHT,因为它们会产生假空扩展行。*/if(joinrel->consider_parallel&&save_jointype!=JOIN_UNIQUE_OUTER&&save_jointype!=JOIN_FULL&&save_jointype!=JOIN_RIGHT&&outerrel->partial_pathlist!=NIL&&bms_is_empty(joinrel->lateral_relids)){if(nestjoinOK)consider_parallel_nestloop(root,joinrel,outerrel,innerrel,save_jointype,extra);//尝试并行netstloopjoin/**Ifinner_cheapest_totalisNULLornonparallel-safethenfindthe*cheapesttotalparallelsafepath.IfdoingJOIN_UNIQUE_INNER,we*can'tuseanyalternativeinnerpath.*如果inner_cheapest_total为空或非并行安全,那么可以找到成本最低的总并行安全访问路径。*如果执行JOIN_UNIQUE_INNER,则不能使用任何替代的内表访问路径。*/if(inner_cheapest_total==NULL||!inner_cheapest_total->parallel_safe){if(save_jointype==JOIN_UNIQUE_INNER)return;inner_cheapest_total=get_cheapest_parallel_safe_total_inner(innerrel->pathlist);}if(inner_cheapest_total)consider_parallel_mergejoin(root,joinrel,outerrel,innerrel,save_jointype,extra,inner_cheapest_total);//尝试并行mergejoin}}//------------------------------------try_nestloop_path/**try_nestloop_path*Consideranestloopjoinpath;ifitappearsuseful,pushitinto*thejoinrel'spathlistviaadd_path().*尝试nestloopjoin访问路径;*如果该路径可行,则通过函数add_path把该路径加入到joinrel'spathlist链表中.*/staticvoidtry_nestloop_path(PlannerInfo*root,RelOptInfo*joinrel,Path*outer_path,Path*inner_path,List*pathkeys,JoinTypejointype,JoinPathExtraData*extra){Relidsrequired_outer;JoinCostWorkspaceworkspace;RelOptInfo*innerrel=inner_path->parent;RelOptInfo*outerrel=outer_path->parent;Relidsinnerrelids;Relidsouterrelids;Relidsinner_paramrels=PATH_REQ_OUTER(inner_path);Relidsouter_paramrels=PATH_REQ_OUTER(outer_path);/**Pathsareparameterizedbytop-levelparents,sorunparameterization*testsontheparentrelids.*路径由最顶层的父类参数化,因此在父类集上运行参数化测试。*/if(innerrel->top_parent_relids)innerrelids=innerrel->top_parent_relids;elseinnerrelids=innerrel->relids;if(outerrel->top_parent_relids)outerrelids=outerrel->top_parent_relids;elseouterrelids=outerrel->relids;/**Checktoseeifproposedpathisstillparameterized,andrejectifthe*parameterizationwouldn'tbesensible---unlessallow_star_schema_join*saystoallowitanyway.Also,wemustrejectifhave_dangerous_phv*doesn'tlikethelookofit,whichcouldonlyhappenifthenestloopis*stillparameterized.*检查建议的访问路径是否仍然是参数化的,如果参数化不合理,则丢弃——除非allow_star_schema_join设置为允许。*另外,如果have_dangerousous_phv不允许,则必须丢弃它,而这种情况只有在nestloop仍然被参数化时才会发生。*/required_outer=calc_nestloop_required_outer(outerrelids,outer_paramrels,innerrelids,inner_paramrels);if(required_outer&&((!bms_overlap(required_outer,extra->param_source_rels)&&!allow_star_schema_join(root,outerrelids,inner_paramrels))||have_dangerous_phv(root,outerrelids,inner_paramrels))){/*Wastenomemorywhenwerejectapathhere*/bms_free(required_outer);return;//退出}/**Doaprechecktoquicklyeliminateobviously-inferiorpaths.We*calculateacheaplowerboundonthepath'scostandthenuse*add_path_precheck()toseeifthepathisclearlygoingtobedominated*bysomeexistingpathforthejoinrel.Ifnot,dothefullpushupwith*creatingafullyvalidpathstructureandsubmittingittoadd_path().*Thelattertwostepsareexpensiveenoughtomakethistwo-phase*methodologyworthwhile.*参见mergejoin的注释*/initial_cost_nestloop(root,&workspace,jointype,outer_path,inner_path,extra);//初步计算nestloop的成本if(add_path_precheck(joinrel,workspace.startup_cost,workspace.total_cost,pathkeys,required_outer))//初步检查{/**Iftheinnerpathisparameterized,itisparameterizedbythe*topmostparentoftheouterrel,nottheouterrelitself.Fix*that.*如果内表访问路径是参数化的,那么它是由外层rel的最上层父元素参数化的,而不是外层rel本身。*需解决这个问题。*/if(PATH_PARAM_BY_PARENT(inner_path,outer_path->parent)){inner_path=reparameterize_path_by_child(root,inner_path,outer_path->parent);/**Ifwecouldnottranslatethepath,wecan'tcreatenestloop*path.*如果不能处理此情况,则不能创建nestloop访问路径*/if(!inner_path){bms_free(required_outer);return;}}add_path(joinrel,(Path*)create_nestloop_path(root,joinrel,jointype,&workspace,extra,outer_path,inner_path,extra->restrictlist,pathkeys,required_outer));//创建并添加nestloop路径}else{/*Wastenomemorywhenwerejectapathhere*/bms_free(required_outer);}}//-----------------------create_nestloop_path/**create_nestloop_path*Createsapathnodecorrespondingtoanestloopjoinbetweentwo*relations.*创建nestloop连接路径节点.**'joinrel'isthejoinrelation.*'jointype'isthetypeofjoinrequired*'workspace'istheresultfrominitial_cost_nestloop*'extra'containsvariousinformationaboutthejoin*'outer_path'istheouterpath*'inner_path'istheinnerpath*'restrict_clauses'aretheRestrictInfonodestoapplyatthejoin*'pathkeys'arethepathkeysofthenewjoinpath*'required_outer'isthesetofrequiredouterrels**Returnstheresultingpathnode.*/NestPath*create_nestloop_path(PlannerInfo*root,//优化器信息RelOptInfo*joinrel,//连接生成的relationJoinTypejointype,//连接类型JoinCostWorkspace*workspace,//成本workspaceJoinPathExtraData*extra,//额外的信息Path*outer_path,//外表访问路径Path*inner_path,//内部访问路径List*restrict_clauses,//约束条件List*pathkeys,//排序键Relidsrequired_outer)//需依赖的外部relids{NestPath*pathnode=makeNode(NestPath);Relidsinner_req_outer=PATH_REQ_OUTER(inner_path);/**Iftheinnerpathisparameterizedbytheouter,wemustdropany*restrict_clausesthatareduetobemovedintotheinnerpath.Wehave*todothisnow,ratherthanpostponetheworktillcreateplantime,*becausetherestrict_clauseslistcanaffectthesizeandcost*estimatesforthispath.*如果内表访问路径是由外部参数化的,那么必须删除任何将移动到内表访问路径的约束条件子句。*现在必须这样做,而不是将此项工作推迟到createplan的时候,因为限制条件链表会影响此路径的大小和成本估计。*/if(bms_overlap(inner_req_outer,outer_path->parent->relids))//内表需依赖的relids与外表父路径的relids存在交集{Relidsinner_and_outer=bms_union(inner_path->parent->relids,inner_req_outer);//合并内部父路径的relids和内部依赖的relidsList*jclauses=NIL;ListCell*lc;foreach(lc,restrict_clauses){RestrictInfo*rinfo=(RestrictInfo*)lfirst(lc);if(!join_clause_is_movable_into(rinfo,inner_path->parent->relids,inner_and_outer))jclauses=lappend(jclauses,rinfo);}restrict_clauses=jclauses;}//创建路径pathnode->path.pathtype=T_NestLoop;pathnode->path.parent=joinrel;pathnode->path.pathtarget=joinrel->reltarget;pathnode->path.param_info=get_joinrel_parampathinfo(root,joinrel,outer_path,inner_path,extra->sjinfo,required_outer,&restrict_clauses);pathnode->path.parallel_aware=false;pathnode->path.parallel_safe=joinrel->consider_parallel&&outer_path->parallel_safe&&inner_path->parallel_safe;/*Thisisafoolishwaytoestimateparallel_workers,butfornow...*/pathnode->path.parallel_workers=outer_path->parallel_workers;pathnode->path.pathkeys=pathkeys;pathnode->jointype=jointype;pathnode->inner_unique=extra->inner_unique;pathnode->outerjoinpath=outer_path;pathnode->innerjoinpath=inner_path;pathnode->joinrestrictinfo=restrict_clauses;final_cost_nestloop(root,pathnode,workspace,extra);returnpathnode;}三、跟踪分析

测试脚本如下

testdb=#explainverboseselectdw.*,grjf.grbh,grjf.xm,grjf.ny,grjf.jetestdb-#fromt_dwxxdw,lateral(selectgr.grbh,gr.xm,jf.ny,jf.jetestdb(#fromt_grxxgrinnerjoint_jfxxjftestdb(#ongr.dwbh=dw.dwbhtestdb(#andgr.grbh=jf.grbh)grjftestdb-#wheredwbh='1001'testdb-#orderbydw.dwbh;QUERYPLAN----------------------------------------------------------------------------------------------NestedLoop(cost=0.87..112.00rows=10width=47)Output:dw.dwmc,dw.dwbh,dw.dwdz,gr.grbh,gr.xm,jf.ny,jf.je->NestedLoop(cost=0.58..28.80rows=10width=32)Output:dw.dwmc,dw.dwbh,dw.dwdz,gr.grbh,gr.xm->IndexScanusingt_dwxx_pkeyonpublic.t_dwxxdw(cost=0.29..8.30rows=1width=20)Output:dw.dwmc,dw.dwbh,dw.dwdzIndexCond:((dw.dwbh)::text='1001'::text)->IndexScanusingidx_t_grxx_dwbhonpublic.t_grxxgr(cost=0.29..20.40rows=10width=16)Output:gr.dwbh,gr.grbh,gr.xm,gr.xb,gr.nlIndexCond:((gr.dwbh)::text='1001'::text)->IndexScanusingidx_t_jfxx_grbhonpublic.t_jfxxjf(cost=0.29..8.31rows=1width=20)Output:jf.grbh,jf.ny,jf.jeIndexCond:((jf.grbh)::text=(gr.grbh)::text)(13rows)

启动gdb,设置断点跟踪

(gdb)bmatch_unsorted_outerBreakpoint1at0x7aff2c:filejoinpath.c,line1338.(gdb)cContinuing.Breakpoint1,match_unsorted_outer(root=0x2a987a8,joinrel=0x2aef8a0,outerrel=0x2aa20f0,innerrel=0x2aa3620,jointype=JOIN_INNER,extra=0x7ffc343da930)atjoinpath.c:13381338JoinTypesave_jointype=jointype;(gdb)

joinrel是1号RTE和3号RTE的连接,即t_dwxx和t_grxx

(gdb)p*joinrel->relids->words$3=10

内表成本最低的访问路径是索引扫描

(gdb)n1341Path*inner_cheapest_total=innerrel->cheapest_total_path;(gdb)1342Path*matpath=NULL;(gdb)p*inner_cheapest_total$1={type=T_IndexPath,pathtype=T_IndexScan,parent=0x2aa3620,pathtarget=0x2aa3858,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=10,startup_cost=0.29249999999999998,total_cost=20.399882614957409,pathkeys=0x0}

连接类型为JOIN_INNER,设置nestjoinOK为T,useallclauses为F

(gdb)psave_jointype$4=JOIN_INNER(gdb)n1352switch(jointype)(gdb)1358nestjoinOK=true;(gdb)1359useallclauses=false;(gdb)1360break;(gdb)

创建物化内表访问路径

(gdb)n1408if(enable_material&&inner_cheapest_total!=NULL&&(gdb)1409!ExecMaterializesOutput(inner_cheapest_total->pathtype))(gdb)1408if(enable_material&&inner_cheapest_total!=NULL&&(gdb)1410matpath=(Path*)(gdb)1414foreach(lc1,outerrel->pathlist)(gdb)p*matpath$5={type=T_MaterialPath,pathtype=T_Material,parent=0x2aa3620,pathtarget=0x2aa3858,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=10,startup_cost=0.29249999999999998,total_cost=20.44988261495741,pathkeys=0x0}

开始遍历外表访问路径

(gdb)n1416Path*outerpath=(Path*)lfirst(lc1);

尝试使用这个外表访问路径和内表的各种可用路径构建nestloop连接
尝试内部关系的每个可用的参数化(包括非参数化用例)成本最低路径

...1461elseif(nestjoinOK)(gdb)1471foreach(lc2,innerrel->cheapest_parameterized_paths)(gdb)n1473Path*innerpath=(Path*)lfirst(lc2);(gdb)n1475try_nestloop_path(root,(gdb)p*innerpath$6={type=T_IndexPath,pathtype=T_IndexScan,parent=0x2aa3620,pathtarget=0x2aa3858,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=10,startup_cost=0.29249999999999998,total_cost=20.399882614957409,pathkeys=0x0}(gdb)n1471foreach(lc2,innerrel->cheapest_parameterized_paths)(gdb)1473Path*innerpath=(Path*)lfirst(lc2);(gdb)1475try_nestloop_path(root,(gdb)1471foreach(lc2,innerrel->cheapest_parameterized_paths)(gdb)

同时,也会尝试成本最低的物化内表访问路径

1485if(matpath!=NULL)(gdb)1486try_nestloop_path(root,(gdb)1496if(save_jointype==JOIN_UNIQUE_OUTER)

完成match_unsorted_outer的调用

1522save_jointype!=JOIN_RIGHT&&(gdb)1550}(gdb)add_paths_to_joinrel(root=0x2a987a8,joinrel=0x2aef8a0,outerrel=0x2aa20f0,innerrel=0x2aa3620,jointype=JOIN_INNER,sjinfo=0x7ffc343daa20,restrictlist=0x0)atjoinpath.c:306306if(enable_hashjoin||jointype==JOIN_FULL)

结果joinrel的访问路径

(gdb)p*joinrel->pathlist$9={type=T_List,length=1,head=0x2aefde8,tail=0x2aefde8}(gdb)p*(Node*)joinrel->pathlist->head->data.ptr_value$10={type=T_NestPath}(gdb)p*(NestPath*)joinrel->pathlist->head->data.ptr_value$11={path={type=T_NestPath,pathtype=T_NestLoop,parent=0x2aef8a0,pathtarget=0x2aefad8,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=10,startup_cost=0.57750000000000001,total_cost=28.802382614957409,pathkeys=0x0},jointype=JOIN_INNER,inner_unique=false,outerjoinpath=0x2aeb0d8,innerjoinpath=0x2aeb408,joinrestrictinfo=0x0}

下面考察try_nestloop_path函数的处理逻辑,设置断点,进入try_nestloop_path函数

(gdb)btry_nestloop_pathBreakpoint1at0x7ae950:filejoinpath.c,line373.(gdb)cContinuing.Breakpoint1,try_nestloop_path(root=0x2a987a8,joinrel=0x2aef8a0,outer_path=0x2aeb0d8,inner_path=0x2aeb408,pathkeys=0x0,jointype=JOIN_INNER,extra=0x7ffc343da930)atjoinpath.c:373373RelOptInfo*innerrel=inner_path->parent;

joinrel初始时,pathlist为NULL

(gdb)p*(Node*)joinrel->pathlist->head->data.ptr_valueCannotaccessmemoryataddress0x8

外表访问路径为索引扫描(t_dwxx),内表访问路径为索引扫描(t_grxx)

(gdb)p*outer_path$2={type=T_IndexPath,pathtype=T_IndexScan,parent=0x2aa20f0,pathtarget=0x2aa2328,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=1,startup_cost=0.28500000000000003,total_cost=8.3025000000000002,pathkeys=0x0}(gdb)p*inner_path$3={type=T_IndexPath,pathtype=T_IndexScan,parent=0x2aa3620,pathtarget=0x2aa3858,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=10,startup_cost=0.29249999999999998,total_cost=20.399882614957409,pathkeys=0x0}

初步计算nestloop的成本

...(gdb)prequired_outer$4=(Relids)0x0(gdb)n422initial_cost_nestloop(root,&workspace,jointype,

创建nestloop path并添加到joinrel中

425if(add_path_precheck(joinrel,(gdb)434if(PATH_PARAM_BY_PARENT(inner_path,outer_path->parent))(gdb)451create_nestloop_path(root,(gdb)450add_path(joinrel,(Path*)(gdb)

生成的第一条路径

(gdb)p*(NestPath*)joinrel->pathlist->head->data.ptr_value$6={path={type=T_NestPath,pathtype=T_NestLoop,parent=0x2aef8a0,pathtarget=0x2aefad8,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=10,startup_cost=0.57750000000000001,total_cost=28.802382614957409,pathkeys=0x0},jointype=JOIN_INNER,inner_unique=false,outerjoinpath=0x2aeb0d8,innerjoinpath=0x2aeb408,joinrestrictinfo=0x0}

继续执行,尝试第二条路径

(gdb)cContinuing.Breakpoint1,try_nestloop_path(root=0x2a987a8,joinrel=0x2aef8a0,outer_path=0x2aeb0d8,inner_path=0x2aecf28,pathkeys=0x0,jointype=JOIN_INNER,extra=0x7ffc343da930)atjoinpath.c:373373RelOptInfo*innerrel=inner_path->parent;

查看inner_path,即内表的cheapest_parameterized_paths链表中的第二个元素

(gdb)p*inner_path$9={type=T_IndexPath,pathtype=T_IndexScan,parent=0x2aa3620,pathtarget=0x2aa3858,param_info=0x2aed6f8,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=1,startup_cost=0.29249999999999998,total_cost=0.35258,pathkeys=0x2aeceb0}

使用此路径尝试nest loop访问路径,但该路径依赖外部relids(4号RTE),被丢弃

403if(required_outer&&(gdb)405!allow_star_schema_join(root,outerrelids,inner_paramrels))||(gdb)404((!bms_overlap(required_outer,extra->param_source_rels)&&(gdb)409bms_free(required_outer);(gdb)p*required_outer$12={nwords=1,words=0x2aefe4c}(gdb)p*required_outer->words$13=16(gdb)n410return;(gdb)

感谢各位的阅读,以上就是“PostgreSQL中match_unsorted_outer函数分析”的内容了,经过本文的学习后,相信大家对PostgreSQL中match_unsorted_outer函数分析这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是亿速云,小编将为大家推送更多相关知识点的文章,欢迎关注!