PostgreSQL中hash_inner_and_outer函数分析
这篇文章主要讲解了“PostgreSQL中hash_inner_and_outer函数分析”,文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习“PostgreSQL中hash_inner_and_outer函数分析”吧!
一、数据结构Cost相关
注意:实际使用的参数值通过系统配置文件定义,而不是这里的常量定义!
typedefdoubleCost;/*executioncost(inpage-accessunits)*//*defaultsforcostsize.c'sCostparameters*//*NB:cost-estimationcodeshouldusethevariables,nottheseconstants!*//*注意:实际值通过系统配置文件定义,而不是这里的常量定义!*//*Ifyouchangethese,updatebackend/utils/misc/postgresql.sample.conf*/#defineDEFAULT_SEQ_PAGE_COST1.0//顺序扫描page的成本#defineDEFAULT_RANDOM_PAGE_COST4.0//随机扫描page的成本#defineDEFAULT_CPU_TUPLE_COST0.01//处理一个元组的CPU成本#defineDEFAULT_CPU_INDEX_TUPLE_COST0.005//处理一个索引元组的CPU成本#defineDEFAULT_CPU_OPERATOR_COST0.0025//执行一次操作或函数的CPU成本#defineDEFAULT_PARALLEL_TUPLE_COST0.1//并行执行,从一个worker传输一个元组到另一个worker的成本#defineDEFAULT_PARALLEL_SETUP_COST1000.0//构建并行执行环境的成本#defineDEFAULT_EFFECTIVE_CACHE_SIZE524288/*先前已有介绍,measuredinpages*/doubleseq_page_cost=DEFAULT_SEQ_PAGE_COST;doublerandom_page_cost=DEFAULT_RANDOM_PAGE_COST;doublecpu_tuple_cost=DEFAULT_CPU_TUPLE_COST;doublecpu_index_tuple_cost=DEFAULT_CPU_INDEX_TUPLE_COST;doublecpu_operator_cost=DEFAULT_CPU_OPERATOR_COST;doubleparallel_tuple_cost=DEFAULT_PARALLEL_TUPLE_COST;doubleparallel_setup_cost=DEFAULT_PARALLEL_SETUP_COST;inteffective_cache_size=DEFAULT_EFFECTIVE_CACHE_SIZE;Costdisable_cost=1.0e10;//1后面10个0,通过设置一个巨大的成本,让优化器自动放弃此路径intmax_parallel_workers_per_gather=2;//每次gather使用的worker数二、源码解读
hash join的算法实现伪代码如下:
Step 1
FOR small_table_row IN (SELECT * FROM small_table)
LOOP
slot := HASH(small_table_row.join_key);
INSERT_HASH_TABLE(slot,small_table_row);
END LOOP;
Step 2
FOR large_table_row IN (SELECT * FROM large_table) LOOP
slot := HASH(large_table_row.join_key);
small_table_row = LOOKUP_HASH_TABLE(slot,large_table_row.join_key);
IF small_table_row FOUND THEN
output small_table_row + large_table_row;
END IF;
END LOOP;
hash_inner_and_outer
该函数创建hash join访问路径。
//------------------------------------------------hash_inner_and_outer/**hash_inner_and_outer*Createhashjoinjoinpathsbyexplicitlyhashingboththeouterand*innerkeysofeachavailablehashclause.*通过显式对外表和内表(应用每个可用的hash条件)进行hash操作,创建hashjoin访问路径**'joinrel'isthejoinrelation*'outerrel'istheouterjoinrelation*'innerrel'istheinnerjoinrelation*'jointype'isthetypeofjointodo*'extra'containsadditionalinputvalues*/staticvoidhash_inner_and_outer(PlannerInfo*root,RelOptInfo*joinrel,RelOptInfo*outerrel,RelOptInfo*innerrel,JoinTypejointype,JoinPathExtraData*extra){JoinTypesave_jointype=jointype;boolisouterjoin=IS_OUTER_JOIN(jointype);List*hashclauses;ListCell*l;/**Weneedtobuildonlyonehashclauseslistforanygivenpairofouter*andinnerrelations;allofthehashableclauseswillbeusedaskeys.*只需要为给定的外表和内表对构建一个hashclauses条件链表;所有的hashable子句将用作hash键。**Scanthejoin'srestrictinfolisttofindhashjoinableclausesthatare*usablewiththispairofsub-relations.*扫描连接的约束条件restrictinfo链表,找到可用于这对子关系的hash连接hashjoinable子句。*/hashclauses=NIL;foreach(l,extra->restrictlist){RestrictInfo*restrictinfo=(RestrictInfo*)lfirst(l);/**Ifprocessinganouterjoin,onlyuseitsownjoinclausesfor*hashing.Forinnerjoinsweneednotbesopicky.*如果处理外连接,则仅使用其自己的连接子句进行哈希操作。对于内连接,则无需如此操作。*/if(isouterjoin&&RINFO_IS_PUSHED_DOWN(restrictinfo,joinrel->relids))continue;if(!restrictinfo->can_join||restrictinfo->hashjoinoperator==InvalidOid)continue;/*不能被hash.nothashjoinable*//**Checkifclausehastheform"outeropinner"or"inneropouter".*检查条件是否有形如outeropinner或者inneropouter的形式*/if(!clause_sides_match_join(restrictinfo,outerrel,innerrel))continue;/*nogoodfortheseinputrelations*/hashclauses=lappend(hashclauses,restrictinfo);//加入到hash条件中}/*Ifwefoundanyusablehashclauses,makepaths*///如发现可用于hash连接的条件,则构建hash连接访问路径,如无则无法构建if(hashclauses){/**Weconsiderboththecheapest-total-costandcheapest-startup-cost*outerpaths.There'snoneedtoconsideranybutthe*cheapest-total-costinnerpath,however.*外表:既考虑了成本最低的总成本,也考虑了外表启动成本最低的访问路径。*内表:除了成本最低的内部路径之外,不需要考虑任何其他路径。*/Path*cheapest_startup_outer=outerrel->cheapest_startup_path;Path*cheapest_total_outer=outerrel->cheapest_total_path;Path*cheapest_total_inner=innerrel->cheapest_total_path;/**Ifeithercheapest-totalpathisparameterizedbytheotherrel,we*can'tuseahashjoin.(There'snouselookingforalternative*inputpaths,sincetheseshouldalreadybetheleast-parameterized*availablepaths.)*如果其中一个关系参数化了其中一个成本最低的访问路径,那么不能使用hashjoin。*(没有必要寻找替代的输入路径,因为这些路径应该已经是参数化最少的可用路径了。)*/if(PATH_PARAM_BY_REL(cheapest_total_outer,innerrel)||PATH_PARAM_BY_REL(cheapest_total_inner,outerrel))return;//直接退出/*Unique-ifyifneedbe;weignoreparameterizedpossibilities*///如果需要保证唯一性,丢弃参数化if(jointype==JOIN_UNIQUE_OUTER){cheapest_total_outer=(Path*)create_unique_path(root,outerrel,cheapest_total_outer,extra->sjinfo);Assert(cheapest_total_outer);jointype=JOIN_INNER;try_hashjoin_path(root,joinrel,cheapest_total_outer,cheapest_total_inner,hashclauses,jointype,extra);/*nopossibilityofcheapstartuphere*/}elseif(jointype==JOIN_UNIQUE_INNER){cheapest_total_inner=(Path*)create_unique_path(root,innerrel,cheapest_total_inner,extra->sjinfo);Assert(cheapest_total_inner);jointype=JOIN_INNER;try_hashjoin_path(root,joinrel,cheapest_total_outer,cheapest_total_inner,hashclauses,jointype,extra);if(cheapest_startup_outer!=NULL&&cheapest_startup_outer!=cheapest_total_outer)try_hashjoin_path(root,joinrel,cheapest_startup_outer,cheapest_total_inner,hashclauses,jointype,extra);}else//其他连接类型{/**Forotherjointypes,weconsiderthecheapeststartupouter*togetherwiththecheapesttotalinner,andthenconsider*pairingsofcheapest-totalpathsincludingparameterizedones.*Thereisnouseingeneratingparameterizedpathsonthebasis*ofpossiblycheapstartupcost,sothisissufficient.*对于其他连接类型,我们考虑成本最低的的外表启动和内表启动访问路径,*然后考虑包括参数化路径在内的成本最低的访问路径对。*在基于可能较低的启动成本的基础上生成参数化路径是没有用的,上面的做法就足够了。*/ListCell*lc1;ListCell*lc2;if(cheapest_startup_outer!=NULL)//启动成本最低的外表访问路径try_hashjoin_path(root,joinrel,cheapest_startup_outer,cheapest_total_inner,hashclauses,jointype,extra);//构建hashjoin访问路径foreach(lc1,outerrel->cheapest_parameterized_paths)//遍历外表参数化路径{Path*outerpath=(Path*)lfirst(lc1);/**Wecannotuseanouterpaththatisparameterizedbythe*innerrel.*不能使用被内表参数化使用的外表访问路径*/if(PATH_PARAM_BY_REL(outerpath,innerrel))continue;foreach(lc2,innerrel->cheapest_parameterized_paths)//遍历内表参数化路径{Path*innerpath=(Path*)lfirst(lc2);/**Wecannotuseaninnerpaththatisparameterizedby*theouterrel,either.*同样的,不能使用被外表参数化使用的内表访问路径*/if(PATH_PARAM_BY_REL(innerpath,outerrel))continue;if(outerpath==cheapest_startup_outer&&innerpath==cheapest_total_inner)continue;/*alreadytriedit*/try_hashjoin_path(root,joinrel,outerpath,innerpath,hashclauses,jointype,extra);//构建hash连接访问路径}}}/**Ifthejoinrelisparallel-safe,wemaybeabletoconsidera*partialhashjoin.However,wecan'thandleJOIN_UNIQUE_OUTER,*becausetheouterpathwillbepartial,andthereforewewon'tbe*abletoproperlyguaranteeuniqueness.Similarly,wecan'thandle*JOIN_FULLandJOIN_RIGHT,becausetheycanproducefalsenull*extendedrows.Also,theresultingpathmustnotbeparameterized.*WewouldbeabletosupportJOIN_FULLandJOIN_RIGHTforParallel*Hash,sinceinthatcasewe'rebacktoasinglehashtablewitha*singlesetofmatchbitsforeachbatch,butthatwillrequire*figuringoutadeadlock-freewaytowaitfortheprobetofinish.*如果连接是并行安全的,可以考虑并行哈希连接。*但是,我们不能处理JOIN_UNIQUE_OUTER,因为外部路径是部分的,因此我们不能正确地保证惟一性。*类似地,我们不能处理JOIN_FULL和JOIN_RIGHT,因为它们会产生假空扩展行。*此外,生成的路径不能被参数化。*我们将能够支持JOIN_FULL和JOIN_RIGHT用于并行哈希,*因为在这种情况下,我们将返回到一个哈希表,每个批处理只有一组匹配位,*但这需要找到一种没有死锁的方式来等待探测完成。*/if(joinrel->consider_parallel&&save_jointype!=JOIN_UNIQUE_OUTER&&save_jointype!=JOIN_FULL&&save_jointype!=JOIN_RIGHT&&outerrel->partial_pathlist!=NIL&&bms_is_empty(joinrel->lateral_relids)){Path*cheapest_partial_outer;Path*cheapest_partial_inner=NULL;Path*cheapest_safe_inner=NULL;cheapest_partial_outer=(Path*)linitial(outerrel->partial_pathlist);/**Canweuseapartialinnerplantoo,sothatwecanbuilda*sharedhashtableinparallel?*我们是否也可以使用部分内表访问路径,以便并行构建共享哈希表?*/if(innerrel->partial_pathlist!=NIL&&enable_parallel_hash){cheapest_partial_inner=(Path*)linitial(innerrel->partial_pathlist);try_partial_hashjoin_path(root,joinrel,cheapest_partial_outer,cheapest_partial_inner,hashclauses,jointype,extra,true/*parallel_hash*/);}/**Normally,giventhatthejoinrelisparallel-safe,thecheapest*totalinnerpathwillalsobeparallel-safe,butifnot,we'll*havetosearchforthecheapestsafe,unparameterizedinner*path.IfdoingJOIN_UNIQUE_INNER,wecan'tuseanyalternative*innerpath.*通常,假设连接是并行安全的,最便宜的总内表访问路径也是并行安全的,*但如果不是,我们将不得不寻找成本最低的安全的、非参数化的内表访问路径。*如果执行JOIN_UNIQUE_INNER,则不能使用任何替代的内表访问路径。*/if(cheapest_total_inner->parallel_safe)cheapest_safe_inner=cheapest_total_inner;elseif(save_jointype!=JOIN_UNIQUE_INNER)cheapest_safe_inner=get_cheapest_parallel_safe_total_inner(innerrel->pathlist);if(cheapest_safe_inner!=NULL)try_partial_hashjoin_path(root,joinrel,cheapest_partial_outer,cheapest_safe_inner,hashclauses,jointype,extra,false/*parallel_hash*/);}}}//-----------------------------try_hashjoin_path/**try_hashjoin_path*Considerahashjoinpath;ifitappearsuseful,pushitinto*thejoinrel'spathlistviaadd_path().*尝试构造hashjoin访问路径.*如果该访问路径可用,通过add_path函数添加到连接新生成的关系joinrel中的pathlist链表中*/staticvoidtry_hashjoin_path(PlannerInfo*root,RelOptInfo*joinrel,Path*outer_path,Path*inner_path,List*hashclauses,JoinTypejointype,JoinPathExtraData*extra){Relidsrequired_outer;JoinCostWorkspaceworkspace;/**Checktoseeifproposedpathisstillparameterized,andrejectifthe*parameterizationwouldn'tbesensible.*检查建议的路径是否仍然是参数化的,如果参数化不合理,则拒绝。**/required_outer=calc_non_nestloop_required_outer(outer_path,inner_path);if(required_outer&&!bms_overlap(required_outer,extra->param_source_rels)){/*Wastenomemorywhenwerejectapathhere*/bms_free(required_outer);return;}/**Seecommentsintry_nestloop_path().Alsonotethathashjoinpaths*neverhaveanyoutputpathkeys,percommentsincreate_hashjoin_path.*参见try_nestloop_path()中的注释。*还要注意,hashjoin访问路径从来没有任何输出路径键,参见create_hashjoin_path中的注释.*/initial_cost_hashjoin(root,&workspace,jointype,hashclauses,outer_path,inner_path,extra,false);//初步估算成本if(add_path_precheck(joinrel,workspace.startup_cost,workspace.total_cost,NIL,required_outer))//初始判断{add_path(joinrel,(Path*)create_hashjoin_path(root,joinrel,jointype,&workspace,extra,outer_path,inner_path,false,/*parallel_hash*/extra->restrictlist,required_outer,hashclauses));//创建hashjoin访问路径,并添加}else{/*Wastenomemorywhenwerejectapathhere*/bms_free(required_outer);}}//------------------create_hashjoin_path/**create_hashjoin_path*Createsapathnodecorrespondingtoahashjoinbetweentworelations.*创建hashjoin访问路径Node**'joinrel'isthejoinrelation*'jointype'isthetypeofjoinrequired*'workspace'istheresultfrominitial_cost_hashjoin*'extra'containsvariousinformationaboutthejoin*'outer_path'isthecheapestouterpath*'inner_path'isthecheapestinnerpath*'parallel_hash'toselectParallelHashofinnerpath(sharedhashtable)*'restrict_clauses'aretheRestrictInfonodestoapplyatthejoin*'required_outer'isthesetofrequiredouterrels*'hashclauses'aretheRestrictInfonodestouseashashclauses*(thisshouldbeasubsetoftherestrict_clauseslist)*/HashPath*create_hashjoin_path(PlannerInfo*root,RelOptInfo*joinrel,JoinTypejointype,JoinCostWorkspace*workspace,JoinPathExtraData*extra,Path*outer_path,Path*inner_path,boolparallel_hash,List*restrict_clauses,Relidsrequired_outer,List*hashclauses){HashPath*pathnode=makeNode(HashPath);pathnode->jpath.path.pathtype=T_HashJoin;pathnode->jpath.path.parent=joinrel;pathnode->jpath.path.pathtarget=joinrel->reltarget;pathnode->jpath.path.param_info=get_joinrel_parampathinfo(root,joinrel,outer_path,inner_path,extra->sjinfo,required_outer,&restrict_clauses);pathnode->jpath.path.parallel_aware=joinrel->consider_parallel&¶llel_hash;pathnode->jpath.path.parallel_safe=joinrel->consider_parallel&&outer_path->parallel_safe&&inner_path->parallel_safe;/*Thisisafoolishwaytoestimateparallel_workers,butfornow...*/pathnode->jpath.path.parallel_workers=outer_path->parallel_workers;/**Ahashjoinneverhaspathkeys,sinceitsoutputorderingis*unpredictableduetopossiblebatching.XXXIftheinnerrelationis*smallenough,wecouldinstructtheexecutorthatitmustnotbatch,*andthenwecouldassumethattheoutputinheritstheouterrelation's*ordering,whichmightsaveasortstep.Howeverthereisconsiderable*downsideifourestimateoftheinnerrelationsizeisbadlyoff.For*themomentwedon'triskit.(Notealsothatifwewantedtotakethis*seriously,joinpath.cwouldhavetoconsidermanymorepathsforthe*outerrelthanitdoesnow.)*hashjoin从来没有路径键,因为由于可能的批处理,其输出顺序不可预测。*如果内部关系足够小,可以指示执行器它不执行批处理,然后可以假设输出继承外部关系的顺序,这样可以节省排序步骤。*然而,如果对内部关系大小的估计严重不足,就会有相当大的负面影响。*(还要注意,如果我们想认真对待这个问题,那就是joinpath.c将不得不考虑比现在更多的外表访问路径。)*/pathnode->jpath.path.pathkeys=NIL;pathnode->jpath.jointype=jointype;pathnode->jpath.inner_unique=extra->inner_unique;pathnode->jpath.outerjoinpath=outer_path;pathnode->jpath.innerjoinpath=inner_path;pathnode->jpath.joinrestrictinfo=restrict_clauses;pathnode->path_hashclauses=hashclauses;/*final_cost_hashjoinwillfillinpathnode->num_batches*/final_cost_hashjoin(root,pathnode,workspace,extra);//最终的成本估算returnpathnode;}三、跟踪分析
测试脚本如下
testdb=#explainverboseselectdw.*,grjf.grbh,grjf.xm,grjf.ny,grjf.jetestdb-#fromt_dwxxdw,lateral(selectgr.grbh,gr.xm,jf.ny,jf.jetestdb(#fromt_grxxgrinnerjoint_jfxxjftestdb(#ongr.dwbh=dw.dwbhtestdb(#andgr.grbh=jf.grbh)grjftestdb-#orderbydw.dwbh;QUERYPLAN-------------------------------------------------------------------------------------------------Sort(cost=20070.93..20320.93rows=100000width=47)Output:dw.dwmc,dw.dwbh,dw.dwdz,gr.grbh,gr.xm,jf.ny,jf.jeSortKey:dw.dwbh->HashJoin(cost=3754.00..8689.61rows=100000width=47)Output:dw.dwmc,dw.dwbh,dw.dwdz,gr.grbh,gr.xm,jf.ny,jf.jeInnerUnique:trueHashCond:((gr.dwbh)::text=(dw.dwbh)::text)->HashJoin(cost=3465.00..8138.00rows=100000width=31)Output:gr.grbh,gr.xm,gr.dwbh,jf.ny,jf.jeHashCond:((jf.grbh)::text=(gr.grbh)::text)->SeqScanonpublic.t_jfxxjf(cost=0.00..1637.00rows=100000width=20)Output:jf.ny,jf.je,jf.grbh->Hash(cost=1726.00..1726.00rows=100000width=16)Output:gr.grbh,gr.xm,gr.dwbh->SeqScanonpublic.t_grxxgr(cost=0.00..1726.00rows=100000width=16)Output:gr.grbh,gr.xm,gr.dwbh->Hash(cost=164.00..164.00rows=10000width=20)Output:dw.dwmc,dw.dwbh,dw.dwdz->SeqScanonpublic.t_dwxxdw(cost=0.00..164.00rows=10000width=20)Output:dw.dwmc,dw.dwbh,dw.dwdz(20rows)
启动gdb,设置断点跟踪
(gdb)bhash_inner_and_outerBreakpoint1at0x7b066b:filejoinpath.c,line1684.(gdb)cContinuing.Breakpoint1,hash_inner_and_outer(root=0x2676078,joinrel=0x26d2bc0,outerrel=0x26814e0,innerrel=0x2682a10,jointype=JOIN_INNER,extra=0x7ffd6ea6b9d0)atjoinpath.c:16841684JoinTypesave_jointype=jointype;
连接类型为JOIN_INNER
(gdb)pjointype$1=JOIN_INNER
1号和3号RTE的连接(即t_dwxx和t_grxx)
(gdb)p*joinrel->relids->words$3=10
开始遍历连接条件,获取hash连接条件
1697foreach(l,extra->restrictlist)(gdb)1699RestrictInfo*restrictinfo=(RestrictInfo*)lfirst(l);
成功获取,t_dwxx.dwbh = t_grxx.dwbh
(gdb)1697foreach(l,extra->restrictlist)(gdb)1722if(hashclauses)(gdb)p*hashclauses$4={type=T_List,length=1,head=0x26d4068,tail=0x26d4068}
获取成本最低的外表启动路径/成本最低的外表访问路径/成本最低的内部访问路径
分别是外表顺序扫描/外表顺序扫描/内部顺序扫描
(gdb)n1729Path*cheapest_startup_outer=outerrel->cheapest_startup_path;(gdb)1730Path*cheapest_total_outer=outerrel->cheapest_total_path;(gdb)1731Path*cheapest_total_inner=innerrel->cheapest_total_path;(gdb)p*cheapest_startup_outer$5={type=T_Path,pathtype=T_SeqScan,parent=0x26814e0,pathtarget=0x2681718,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=10000,startup_cost=0,total_cost=164,pathkeys=0x0}(gdb)p*cheapest_total_outer$6={type=T_Path,pathtype=T_SeqScan,parent=0x26814e0,pathtarget=0x2681718,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=10000,startup_cost=0,total_cost=164,pathkeys=0x0}(gdb)p*cheapest_total_inner$7={type=T_Path,pathtype=T_SeqScan,parent=0x2682a10,pathtarget=0x2682c48,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=100000,startup_cost=0,total_cost=1726,pathkeys=0x0}
如外表成本最低的启动路径不为NULL,则尝试hash连接
(gdb)n1740PATH_PARAM_BY_REL(cheapest_total_inner,outerrel))(gdb)1739if(PATH_PARAM_BY_REL(cheapest_total_outer,innerrel)||(gdb)1744if(jointype==JOIN_UNIQUE_OUTER)(gdb)1760elseif(jointype==JOIN_UNIQUE_INNER)(gdb)1796if(cheapest_startup_outer!=NULL)(gdb)1797try_hashjoin_path(root,
进入try_hashjoin_path
(gdb)steptry_hashjoin_path(root=0x2676078,joinrel=0x26d2bc0,outer_path=0x26853b8,inner_path=0x26cf610,hashclauses=0x26d4090,jointype=JOIN_INNER,extra=0x7ffd6ea6b9d0)atjoinpath.c:737737required_outer=calc_non_nestloop_required_outer(outer_path,
try_hashjoin_path->初步估算成本
...751initial_cost_hashjoin(root,&workspace,jointype,hashclauses,(gdb)pworkspace$9={startup_cost=3465,total_cost=4261,run_cost=796,inner_run_cost=0,inner_rescan_run_cost=6.9528109284473596e-310,outer_rows=3.7882102964330281e-317,inner_rows=2.0115578425988515e-316,outer_skip_rows=2.0115578425988515e-316,inner_skip_rows=6.9528109284331305e-310,numbuckets=131072,numbatches=2,inner_rows_total=100000}
try_hashjoin_path->进入函数create_hashjoin_path
(gdb)n759create_hashjoin_path(root,(gdb)stepcreate_hashjoin_path(root=0x2676078,joinrel=0x26d2bc0,jointype=JOIN_INNER,workspace=0x7ffd6ea6b850,extra=0x7ffd6ea6b9d0,outer_path=0x26853b8,inner_path=0x26cf610,parallel_hash=false,restrict_clauses=0x26d3098,required_outer=0x0,hashclauses=0x26d4090)atpathnode.c:23302330HashPath*pathnode=makeNode(HashPath);
try_hashjoin_path->create_hashjoin_path->计算成本并返回
(gdb)2370final_cost_hashjoin(root,pathnode,workspace,extra);(gdb)2372returnpathnode;(gdb)2373}(gdb)p*pathnode$10={jpath={path={type=T_HashPath,pathtype=T_HashJoin,parent=0x26d2bc0,pathtarget=0x26d2df8,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=100000,startup_cost=3465,total_cost=5386,pathkeys=0x0},jointype=JOIN_INNER,inner_unique=false,outerjoinpath=0x26853b8,innerjoinpath=0x26cf610,joinrestrictinfo=0x26d3098},path_hashclauses=0x26d4090,num_batches=2,inner_rows_total=100000}
try_hashjoin_path->添加路径
(gdb)ntry_hashjoin_path(root=0x2676078,joinrel=0x26d2bc0,outer_path=0x26853b8,inner_path=0x26cf610,hashclauses=0x26d4090,jointype=JOIN_INNER,extra=0x7ffd6ea6b9d0)atjoinpath.c:758758add_path(joinrel,(Path*)(gdb)776}(gdb)
回到hash_inner_and_outer,继续循环
(gdb)hash_inner_and_outer(root=0x2676078,joinrel=0x26d2bc0,outerrel=0x26814e0,innerrel=0x2682a10,jointype=JOIN_INNER,extra=0x7ffd6ea6b9d0)atjoinpath.c:18051805foreach(lc1,outerrel->cheapest_parameterized_paths)
结束函数调用
1904}(gdb)add_paths_to_joinrel(root=0x2676078,joinrel=0x26d2bc0,outerrel=0x26814e0,innerrel=0x2682a10,jointype=JOIN_INNER,sjinfo=0x7ffd6ea6bac0,restrictlist=0x26d3098)atjoinpath.c:315315if(joinrel->fdwroutine&&(gdb)p*joinrel->pathlist$11={type=T_List,length=2,head=0x26d4160,tail=0x26d3e30}
查看joinrel的路径链表
(gdb)p*(Node*)joinrel->pathlist->head->data.ptr_value$12={type=T_HashPath}(gdb)p*(Node*)joinrel->pathlist->head->next->data.ptr_value$13={type=T_MergePath}(gdb)p*(HashPath*)joinrel->pathlist->head->data.ptr_value$14={jpath={path={type=T_HashPath,pathtype=T_HashJoin,parent=0x26d2bc0,pathtarget=0x26d2df8,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=100000,startup_cost=3465,total_cost=5386,pathkeys=0x0},jointype=JOIN_INNER,inner_unique=false,outerjoinpath=0x26853b8,innerjoinpath=0x26cf610,joinrestrictinfo=0x26d3098},path_hashclauses=0x26d4090,num_batches=2,inner_rows_total=100000}(gdb)p*(MergePath*)joinrel->pathlist->head->next->data.ptr_value$15={jpath={path={type=T_MergePath,pathtype=T_MergeJoin,parent=0x26d2bc0,pathtarget=0x26d2df8,param_info=0x0,parallel_aware=false,parallel_safe=true,parallel_workers=0,rows=100000,startup_cost=10035.66023721841,total_cost=11955.396048959938,pathkeys=0x2685928},jointype=JOIN_INNER,inner_unique=false,outerjoinpath=0x26ce070,innerjoinpath=0x26cf610,joinrestrictinfo=0x26d3098},path_mergeclauses=0x26d3eb8,outersortkeys=0x0,innersortkeys=0x26d3f18,skip_mark_restore=false,materialize_inner=false}
感谢各位的阅读,以上就是“PostgreSQL中hash_inner_and_outer函数分析”的内容了,经过本文的学习后,相信大家对PostgreSQL中hash_inner_and_outer函数分析这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是亿速云,小编将为大家推送更多相关知识点的文章,欢迎关注!
声明:本站所有文章资源内容,如无特殊说明或标注,均为采集网络资源。如若本站内容侵犯了原著者的合法权益,可联系本站删除。