这篇文章主要讲解了“PostgreSQL绍聚合函数实现中怎么使用的simplehash”,文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习“PostgreSQL绍聚合函数实现中怎么使用的simplehash”吧!

//src/backend/executor/execGrouping.c#defineSH_HASH_KEY(tb,key)TupleHashTableHash(tb,key)//SH_HASH_KEY-->TupleHashTableHash#defineSH_EQUAL(tb,a,b)TupleHashTableMatch(tb,a,b)==0//SH_EQUAL-->TupleHashTableMatch一、数据结构

TupleHashTable
哈希表定义

typedefstructTupleHashTableData*TupleHashTable;typedefstructTupleHashTableData{//底层Hash表tuplehash_hash*hashtab;/*underlyinghashtable*///在检索键中的列数intnumCols;/*numberofcolumnsinlookupkey*///键列中的属性格式AttrNumber*keyColIdx;/*attrnumbersofkeycolumns*///数据类型的哈希函数FmgrInfo*tab_hash_funcs;/*hashfunctionsfortabledatatype(s)*///数据类型比较器ExprState*tab_eq_func;/*comparatorfortabledatatype(s)*///包含数据表的内存上下文MemoryContexttablecxt;/*memorycontextcontainingtable*///函数解析上下文MemoryContexttempcxt;/*contextforfunctionevaluations*///构造每个哈希条目的实际大小Sizeentrysize;/*actualsizetomakeeachhashentry*///依赖数据表条目的slotTupleTableSlot*tableslot;/*slotforreferencingtableentries*//*Thefollowingfieldsaresettransientlyforeachtablesearch:*///下面字段为每一个表检索时临时设置//当前输入tupleslotTupleTableSlot*inputslot;/*currentinputtuple'sslot*///输入数据类型的哈希函数FmgrInfo*in_hash_funcs;/*hashfunctionsforinputdatatype(s)*///inputvstable的比较器ExprState*cur_eq_func;/*comparatorforinputvs.table*///哈希函数IVuint32hash_iv;/*hash-functionIV*///表达式上下文ExprContext*exprcontext;/*expressioncontext*/}TupleHashTableData;typedeftuplehash_iteratorTupleHashIterator;/*typedefinitions*///哈希表类型定义typedefstructSH_TYPE//tuplehash_hash{/**Sizeofdata/bucketarray,64bitstohandleUINT32_MAXsizedhash*tables.Notethatthemaximumnumberofelementsislower*(SH_MAX_FILLFACTOR)*数据/桶数组大小,64bit用于处理UINT32_MAX哈希表.*注意元素最大格式小于(SH_MAX_FILLFACTOR)*/uint64size;/*howmanyelementshavevalidcontents*///有多少个元素具有有效内容uint32members;/*maskforbucketandsizecalculations,basedonsize*///基于大小,用于计算桶和大小的掩码uint32sizemask;/*boundaryafterwhichtogrowhashtable*///哈希表增长的阈值uint32grow_threshold;/*hashbuckets*///哈希桶SH_ELEMENT_TYPE*data;/*memorycontexttouseforallocations*///用于分配的内存上下文MemoryContextctx;/*userdefineddata,usefulforcallbacks*///用户自定义的数据,通常用于回调函数void*private_data;}SH_TYPE;//实际是tuplehash_hash

TupleHashEntryData
哈希表条目

typedefstructTupleHashEntryData*TupleHashEntry;typedefstructTupleHashTableData*TupleHashTable;typedefstructTupleHashEntryData{//该组第一个元组的拷贝MinimalTuplefirstTuple;/*copyoffirsttupleinthisgroup*///用户数据void*additional;/*userdata*///状态(见SH_STATUS)uint32status;/*hashstatus*///哈希值(已缓存)uint32hash;/*hashvalue(cached)*/}TupleHashEntryData;typedefenumSH_STATUS{SH_STATUS_EMPTY=0x00,SH_STATUS_IN_USE=0x01}SH_STATUS;

MinimalTuple
最小化的元组定义

/**MinimalTupleisanalternativerepresentationthatisusedfortransient*tuplesinsidetheexecutor,inplaceswheretransactionstatusinformation*isnotrequired,thetuplerowtypeisknown,andshavingoffafewbytes*isworthwhilebecauseweneedtostoremanytuples.Therepresentation*ischosensothattupleaccessroutinescanworkwitheitherfullor*minimaltuplesviaaHeapTupleDatapointerstructure.Theaccessroutines*seenodifference,exceptthattheymustnotaccessthetransactionstatus*ort_ctidfieldsbecausethosearen'tthere.**Forthemostpart,MinimalTuplesshouldbeaccessedviaTupleTableSlot*routines.Theseroutineswillpreventaccesstothe"systemcolumns"*andtherebypreventaccidentaluseofthenonexistentfields.**MinimalTupleDatacontainsalengthword,somepadding,andfieldsmatching*HeapTupleHeaderDatabeginningwitht_infomask2.Thepaddingischosenso*thatoffsetof(t_infomask2)isthesamemoduloMAXIMUM_ALIGNOFinboth*structs.Thismakesdataalignmentrulesequivalentinbothcases.**WhenaminimaltupleisaccessedviaaHeapTupleDatapointer,t_datais*settopointMINIMAL_TUPLE_OFFSETbytesbeforetheactualstartofthe*minimaltuple---thatis,whereafulltuplematchingtheminimaltuple's*datawouldstart.Thistrickiswhatmakesthestructsseemequivalent.**Notethatt_hoffiscomputedthesameasinafulltuple,henceitincludes*theMINIMAL_TUPLE_OFFSETdistance.t_lendoesnotincludethat,however.**MINIMAL_TUPLE_DATA_OFFSETistheoffsettothefirstuseful(non-pad)data*otherthanthelengthword.tuplesort.candtuplestore.cusethistoavoid*writingthepaddingtodisk.*/#defineMINIMAL_TUPLE_OFFSET\((offsetof(HeapTupleHeaderData,t_infomask2)-sizeof(uint32))/MAXIMUM_ALIGNOF*MAXIMUM_ALIGNOF)#defineMINIMAL_TUPLE_PADDING\((offsetof(HeapTupleHeaderData,t_infomask2)-sizeof(uint32))%MAXIMUM_ALIGNOF)#defineMINIMAL_TUPLE_DATA_OFFSET\offsetof(MinimalTupleData,t_infomask2)structMinimalTupleData{uint32t_len;/*actuallengthofminimaltuple*/charmt_padding[MINIMAL_TUPLE_PADDING];/*FieldsbelowheremustmatchHeapTupleHeaderData!*/uint16t_infomask2;/*numberofattributes+variousflags*/uint16t_infomask;/*variousflagbits,seebelow*/uint8t_hoff;/*sizeofheaderincl.bitmap,padding*//*^-23bytes-^*/bits8t_bits[FLEXIBLE_ARRAY_MEMBER];/*bitmapofNULLs*//*MOREDATAFOLLOWSATENDOFSTRUCT*/};/*typedefappearsinhtup.h*/#defineSizeofMinimalTupleHeaderoffsetof(MinimalTupleData,t_bits)typedefstructMinimalTupleDataMinimalTupleData;typedefMinimalTupleData*MinimalTuple;二、源码解读

TupleHashTableHash
TupleHashTableHash用于计算tuple的哈希值(分组列值)

/**Computethehashvalueforatuple*计算tuple的哈希值**Thepassed-inkeyisapointertoTupleHashEntryData.Inanactualhash*tableentry,thefirstTuplefieldpointstoatuple(inMinimalTuple*format).LookupTupleHashEntrysetsupadummyTupleHashEntryDatawitha*NULLfirstTuplefield---thatcuesustolookattheinputslotinstead.*Thisconventionavoidstheneedtomaterializevirtualinputtuplesunless*theyactuallyneedtogetcopiedintothetable.*传入的key是指向TupleHashEntryData结构体的指针.*在实际的哈希表条目中,firstTuple字段指向一个元组(以MinimalTuple格式保存).*LookupTupleHashEntry会使用NULLfirstTuple字段设置一个虚拟的TupleHashEntryData.*---这可以提示我们转而查看inputslot.*这个转换避免了物化虚拟输入元组,除非它们需要实际拷贝到数据表中.**Also,thecallermustselectanappropriatememorycontextforrunning*thehashfunctions.(dynahash.cdoesn'tchangeCurrentMemoryContext.)*同时,调用者必须选择合适的内存上下文用于运行哈希函数.*(dynahash.c不会改变CurrentMemoryContext)*/staticuint32TupleHashTableHash(structtuplehash_hash*tb,constMinimalTupletuple){//Tuple哈希表TupleHashTablehashtable=(TupleHashTable)tb->private_data;//列数intnumCols=hashtable->numCols;//属性编号AttrNumber*keyColIdx=hashtable->keyColIdx;//哈希keyuint32hashkey=hashtable->hash_iv;//元组slotTupleTableSlot*slot;//哈希函数指针FmgrInfo*hashfunctions;inti;if(tuple==NULL)//元组为NULL{/*Processthecurrentinputtupleforthetable*///处理当前输入元组slot=hashtable->inputslot;hashfunctions=hashtable->in_hash_funcs;}else{/**Processatuplealreadystoredinthetable.*处理已存储在数据表中的元组.**(thiscaseneveractuallyoccursduetothewaysimplehash.his*used,asthehash-valueisstoredintheentries)*(这种情况因为simplehash.h的使用,实际上不会发生,因为哈希值存储在条目中)*/slot=hashtable->tableslot;//存储MinimalTupleExecStoreMinimalTuple(tuple,slot,false);hashfunctions=hashtable->tab_hash_funcs;}for(i=0;i<numCols;i++){//-------循环遍历列数//获取属性编号AttrNumberatt=keyColIdx[i];Datumattr;//属性boolisNull;//是否为NULL?/*rotatehashkeyleft1bitateachstep*///每一步向左移动一位hashkey=(hashkey<<1)|((hashkey&0x80000000)?1:0);//获取属性值attr=slot_getattr(slot,att,&isNull);//如为null,则哈希key设置为0if(!isNull)/*treatnullsashavinghashkey0*/{//不为NULLuint32hkey;//调用哈希函数hkey=DatumGetUInt32(FunctionCall1(&hashfunctions[i],attr));hashkey^=hkey;}}/**Thewayhashesarecombinedabove,amongeachotherandwiththeIV,*doesn'tleadtogoodbitperturbation.AstheIV'sgoalistoleadto*achievethat,performaroundofhashingofthecombinedhash-*resultinginnearperfectperturbation.*上面哈希的的组合方式,彼此之间以及与IV的组合方式,都不会导致位扰动.*因为IV存在的目的是实现该目标,执行组合哈希的hashing取整--结果是完美的扰动.*/returnmurmurhash42(hashkey);}

TupleHashTableMatch
TupleHashTableMatch用于判断两个tuples是否匹配(有相同的hash值)

/**Seewhethertwotuples(presumablyofthesamehashvalue)match*检查两个tuples是否匹配(有相同的hash值)**Asabove,thepassedpointersarepointerstoTupleHashEntryData.*如上所述,传入的指针指向TupleHashEntryData*/staticintTupleHashTableMatch(structtuplehash_hash*tb,constMinimalTupletuple1,constMinimalTupletuple2){TupleTableSlot*slot1;TupleTableSlot*slot2;TupleHashTablehashtable=(TupleHashTable)tb->private_data;ExprContext*econtext=hashtable->exprcontext;/**Weassumethatsimplehash.hwillonlyevercalluswiththefirst*argumentbeinganactualtableentry,andthesecondargumentbeing*LookupTupleHashEntry'sdummyTupleHashEntryData.Theotherdirection*couldbesupportedtoo,butisnotcurrentlyrequired.*/Assert(tuple1!=NULL);slot1=hashtable->tableslot;ExecStoreMinimalTuple(tuple1,slot1,false);Assert(tuple2==NULL);slot2=hashtable->inputslot;/*Forcrosstypecomparisons,theinputslotmustbefirst*/econtext->ecxt_innertuple=slot2;econtext->ecxt_outertuple=slot1;return!ExecQualAndReset(hashtable->cur_eq_func,econtext);}三、跟踪分析

测试脚本

--禁用并行setmax_parallel_workers_per_gather=0;selectbh,avg(c1),min(c1),max(c2)fromt_agg_simplegroupbybh;

跟踪分析

(gdb)bTupleHashTableHashBreakpoint1at0x6d3b2b:fileexecGrouping.c,line379.(gdb)bTupleHashTableMatchBreakpoint2at0x6d3c79:fileexecGrouping.c,line446.(gdb)(gdb)cContinuing.Breakpoint1,TupleHashTableHash(tb=0x2dd2720,tuple=0x0)atexecGrouping.c:379379TupleHashTablehashtable=(TupleHashTable)tb->private_data;(gdb)

输入参数

(gdb)p*tb$1={size=256,members=0,sizemask=255,grow_threshold=230,data=0x2ddca00,ctx=0x2db5310,private_data=0x2dd2890}(gdb)p*tb->data$2={firstTuple=0x0,additional=0x0,status=0,hash=0}

获取分组列数

(gdb)n380intnumCols=hashtable->numCols;(gdb)p*hashtable$3={hashtab=0x2dd2720,numCols=1,keyColIdx=0x2dd2680,tab_hash_funcs=0x2db72d0,tab_eq_func=0x2ddea18,tablecxt=0x2dcc370,tempcxt=0x2db7320,entrysize=24,tableslot=0x2dd2928,inputslot=0x2db7238,in_hash_funcs=0x2db72d0,cur_eq_func=0x2ddea18,hash_iv=0,exprcontext=0x2ddf338}(gdb)ptb->private_data$4=(void*)0x2dd2890

获取分组列属性编号

(gdb)n381AttrNumber*keyColIdx=hashtable->keyColIdx;(gdb)382uint32hashkey=hashtable->hash_iv;(gdb)p*keyColIdx$5=1

如输入tuple为NULL,设置slot和哈希函数

(gdb)n387if(tuple==NULL)(gdb)phashkey$6=0(gdb)n390slot=hashtable->inputslot;(gdb)391hashfunctions=hashtable->in_hash_funcs;

开始遍历分组列
获取hashkey

(gdb)n406for(i=0;i<numCols;i++)(gdb)pnumCols$8=1(gdb)n408AttrNumberatt=keyColIdx[i];(gdb)413hashkey=(hashkey<<1)|((hashkey&0x80000000)?1:0);(gdb)patt$9=1(gdb)phashkey$10=0

获取属性值

(gdb)n415attr=slot_getattr(slot,att,&isNull);(gdb)phashkey$11=0(gdb)n417if(!isNull)/*treatnullsashavinghashkey0*/(gdb)pattr$12=140535426168416(gdb)x\16cattrInvalidcharacter'\'inexpression.(gdb)x/16cattr0x7fd0f427b660:11'\v'71'G'90'Z'48'0'49'1'0'\000'0'\000'0'\000'0x7fd0f427b668:1'\001'0'\000'0'\000'0'\000'1'\001'0'\000'0'\000'0'\000'

计算哈希值

(gdb)n421hkey=DatumGetUInt32(FunctionCall1(&hashfunctions[i],(gdb)phashfunctions[i]$13={fn_addr=0x4c8a31<hashtext>,fn_oid=400,fn_nargs=1,fn_strict=true,fn_retset=false,fn_stats=2'\002',fn_extra=0x0,fn_mcxt=0x2db5310,fn_expr=0x0}(gdb)pi$14=0(gdb)n423hashkey^=hkey;(gdb)phkey$15=3431319292(gdb)n406for(i=0;i<numCols;i++)(gdb)phashkey$16=3431319292

返回结果

(gdb)n433returnmurmurhash42(hashkey);(gdb)pmurmurhash42(hashkey)$17=443809650(gdb)n434}(gdb)tuplehash_insert(tb=0x2dd2720,key=0x0,found=0x7fff585be487)at../../../src/include/lib/simplehash.h:497497insertdist=0;(gdb)

TupleHashTableMatch
进入TupleHashTableMatch

(gdb)cContinuing.Breakpoint2,TupleHashTableMatch(tb=0x2dd2720,tuple1=0x2dcc488,tuple2=0x0)atexecGrouping.c:446446TupleHashTablehashtable=(TupleHashTable)tb->private_data;(gdb)

输入参数

(gdb)p*tb$18={size=256,members=1,sizemask=255,grow_threshold=230,data=0x2ddca00,ctx=0x2db5310,private_data=0x2dd2890}(gdb)p*tuple1$19={t_len=21,mt_padding="\000\000\000\000\000",t_infomask2=1,t_infomask=2,t_hoff=24'\030',t_bits=0x2dcc497""}

对比是否匹配

(gdb)n447ExprContext*econtext=hashtable->exprcontext;(gdb)455Assert(tuple1!=NULL);(gdb)456slot1=hashtable->tableslot;(gdb)457ExecStoreMinimalTuple(tuple1,slot1,false);(gdb)458Assert(tuple2==NULL);(gdb)459slot2=hashtable->inputslot;(gdb)462econtext->ecxt_innertuple=slot2;(gdb)463econtext->ecxt_outertuple=slot1;(gdb)464return!ExecQualAndReset(hashtable->cur_eq_func,econtext);(gdb)phashtable->cur_eq_func$20=(ExprState*)0x2ddea18(gdb)p*hashtable->cur_eq_func$21={tag={type=T_ExprState},flags=7'\a',resnull=false,resvalue=0,resultslot=0x0,steps=0x2ddeab0,evalfunc=0x6cd882<ExecInterpExprStillValid>,expr=0x0,evalfunc_private=0x6cb43e<ExecInterpExpr>,steps_len=7,steps_alloc=16,parent=0x0,ext_params=0x0,innermost_caseval=0x0,innermost_casenull=0x0,innermost_domainval=0x0,innermost_domainnull=0x0}

返回值

$22=true(gdb)n465}(gdb)tuplehash_insert(tb=0x2dd2720,key=0x0,found=0x7fff585be487)at../../../src/include/lib/simplehash.h:556556Assert(entry->status==SH_STATUS_IN_USE);(gdb)

感谢各位的阅读,以上就是“PostgreSQL绍聚合函数实现中怎么使用的simplehash”的内容了,经过本文的学习后,相信大家对PostgreSQL绍聚合函数实现中怎么使用的simplehash这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是亿速云,小编将为大家推送更多相关知识点的文章,欢迎关注!