PostgreSQL中Old Master节点分析
本篇内容介绍了“PostgreSQL中Old Master节点分析”的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所成!
基于streaming replication搭建的PostgreSQL HA环境,如出现网络访问/硬件故障等原因导致Standby节点升级为Master节点,但Old Master节点数据库并未损坏,在排除故障后Old Master节点可以通过pg_rewind工具而不需要通过备份的方式成为New Master节点的Standby节点.
在执行命令pg_rewind时,到底做了什么?
在PostgreSQL HA环境中,Standby节点升级为Master节点后,时间线会切换为新的时间线,比如从1变为2.而Old Master节点的时间线仍然为原来的时间线,比如仍为1,那么使用pg_rewind工具,Old Master节点如何从New Master节点读取相关的数据成为新的Standby节点?
简单来说,有以下几步:
1.确定New Master和Old Master数据一致性的Checkpoint位置.在该位置上,New Master和Old Master数据完全一致.这可以通过读取新Old Master节点时间线历史文件可以获得,该文件位于$PGDATA/pg_wal/目录下,文件名称为XX.history
2.Old Master节点根据上一步获取的Checkpoint读取本机日志文件WAL Record,获取在此Checkpoint之后出现变化的Block,并以链表的方式存储Block编号等信息
3.根据第2步获取的Block信息从New Master节点拷贝相应的Block,替换Old Master节点相应的Block
4.拷贝New Master节点上除数据文件外的所有其他文件,包括配置文件等(如果拷贝数据文件,与备份方式搭建区别不大)
5.Old Master启动数据库,应用从Checkpoint开始后的WAL Record.
在执行主备切换后,New Master节点的时间线切换为n + 1,通过pg_rewind可使Old Master在分叉点开始与New Master同步,成为New Standby节点.
XLogRecPtr
64bit的WAL Record寻址空间地址.
/**PointertoalocationintheXLOG.Thesepointersare64bitswide,*becausewedon'twantthemevertooverflow.*指向XLOG中的位置.*这些指针大小为64bit,以确保指针不会溢出.*/typedefuint64XLogRecPtr;
TimeLineID
时间线ID
typedefuint32TimeLineID;二、源码解读
pg_rewind的源码较为简单,详细请参考注释.
intmain(intargc,char**argv){staticstructoptionlong_options[]={{"help",no_argument,NULL,'?'},{"target-pgdata",required_argument,NULL,'D'},{"source-pgdata",required_argument,NULL,1},{"source-server",required_argument,NULL,2},{"version",no_argument,NULL,'V'},{"dry-run",no_argument,NULL,'n'},{"no-sync",no_argument,NULL,'N'},{"progress",no_argument,NULL,'P'},{"debug",no_argument,NULL,3},{NULL,0,NULL,0}};//命令选项intoption_index;//选项编号intc;//字符ASCII码XLogRecPtrdivergerec;//分支点intlastcommontliIndex;XLogRecPtrchkptrec;//checkpointRecord位置TimeLineIDchkpttli;//时间线XLogRecPtrchkptredo;checkpointREDO位置size_tsize;char*buffer;//缓冲区boolrewind_needed;//是否需要rewindXLogRecPtrendrec;//结束点TimeLineIDendtli;//结束时间线ControlFileDataControlFile_new;//新的控制文件set_pglocale_pgservice(argv[0],PG_TEXTDOMAIN("pg_rewind"));progname=get_progname(argv[0]);/*Processcommand-linearguments*///处理命令行参数if(argc>1){if(strcmp(argv[1],"--help")==0||strcmp(argv[1],"-?")==0){usage(progname);exit(0);}if(strcmp(argv[1],"--version")==0||strcmp(argv[1],"-V")==0){puts("pg_rewind(PostgreSQL)"PG_VERSION);exit(0);}}while((c=getopt_long(argc,argv,"D:nNP",long_options,&option_index))!=-1){switch(c){case'?':fprintf(stderr,_("Try\"%s--help\"formoreinformation.\n"),progname);exit(1);case'P':showprogress=true;break;case'n':dry_run=true;break;case'N':do_sync=false;break;case3:debug=true;break;case'D':/*-Dor--target-pgdata*/datadir_target=pg_strdup(optarg);break;case1:/*--source-pgdata*/datadir_source=pg_strdup(optarg);break;case2:/*--source-server*/connstr_source=pg_strdup(optarg);break;}}if(datadir_source==NULL&&connstr_source==NULL){fprintf(stderr,_("%s:nosourcespecified(--source-pgdataor--source-server)\n"),progname);fprintf(stderr,_("Try\"%s--help\"formoreinformation.\n"),progname);exit(1);}if(datadir_source!=NULL&&connstr_source!=NULL){fprintf(stderr,_("%s:onlyoneof--source-pgdataor--source-servercanbespecified\n"),progname);fprintf(stderr,_("Try\"%s--help\"formoreinformation.\n"),progname);exit(1);}if(datadir_target==NULL){fprintf(stderr,_("%s:notargetdatadirectoryspecified(--target-pgdata)\n"),progname);fprintf(stderr,_("Try\"%s--help\"formoreinformation.\n"),progname);exit(1);}if(optind<argc){fprintf(stderr,_("%s:toomanycommand-linearguments(firstis\"%s\")\n"),progname,argv[optind]);fprintf(stderr,_("Try\"%s--help\"formoreinformation.\n"),progname);exit(1);}/**Don'tallowpg_rewindtoberunasroot,toavoidoverwritingthe*ownershipoffilesinthedatadirectory.Weneedonlycheckforroot*--anyotheruserwon'thavesufficientpermissionstomodifyfilesin*thedatadirectory.*不需要以root用户运行pg_rewind,避免覆盖数据目录中的文件owner.*只需要检查root用户,其他用户没有足够的权限更新数据目录中的文件.*/#ifndefWIN32if(geteuid()==0){//root用户fprintf(stderr,_("cannotbeexecutedby\"root\"\n"));fprintf(stderr,_("Youmustrun%sasthePostgreSQLsuperuser.\n"),progname);exit(1);}#endifget_restricted_token(progname);/*SetmaskbasedonPGDATApermissions*///根据PGDATA的权限设置权限maskif(!GetDataDirectoryCreatePerm(datadir_target)){fprintf(stderr,_("%s:couldnotreadpermissionsofdirectory\"%s\":%s\n"),progname,datadir_target,strerror(errno));exit(1);}umask(pg_mode_mask);/*Connecttoremoteserver*///连接到远程服务器if(connstr_source)libpqConnect(connstr_source);/**Ok,wehavealltheoptionsandwe'rereadytostart.Readinallthe*informationweneedfrombothclusters.*现在,我们有了相关的执行运行,准备开始运行.*从两个dbclusters中读取所有需要的信息.*///读取目标控制文件buffer=slurpFile(datadir_target,"global/pg_control",&size);digestControlFile(&ControlFile_target,buffer,size);pg_free(buffer);//读取源控制文件buffer=fetchFile("global/pg_control",&size);digestControlFile(&ControlFile_source,buffer,size);pg_free(buffer);sanityChecks();/**Ifbothclustersarealreadyonthesametimeline,there'snothingto*do.*如果两个clusters已经是同一个时间线,没有什么好做的了,报错.*/if(ControlFile_target.checkPointCopy.ThisTimeLineID==ControlFile_source.checkPointCopy.ThisTimeLineID){printf(_("sourceandtargetclusterareonthesametimeline\n"));rewind_needed=false;}else{//找到分叉点findCommonAncestorTimeline(&divergerec,&lastcommontliIndex);printf(_("serversdivergedatWALlocation%X/%Xontimeline%u\n"),(uint32)(divergerec>>32),(uint32)divergerec,targetHistory[lastcommontliIndex].tli);/**Checkforthepossibilitythatthetargetisinfactadirect*ancestorofthesource.Inthatcase,thereisnodivergenthistory*inthetargetthatneedsrewinding.*检查目标是源的直接祖先的可能性.*在这种情况下,在需要调整的目标中就没有不同的历史.*/if(ControlFile_target.checkPoint>=divergerec){//如果目标的checkpoint>分叉点,则需要rewindrewind_needed=true;}else{//目标的checkpoint<=分叉点XLogRecPtrchkptendrec;/*Readthecheckpointrecordonthetargettoseewhereitends.*///读取目标的checkpoint记录,检查在哪结束?chkptendrec=readOneRecord(datadir_target,ControlFile_target.checkPoint,targetNentries-1);/**Ifthehistoriesdivergedexactlyattheendoftheshutdown*checkpointrecordonthetarget,therearenoWALrecordsin*thetargetthatdon'tbelonginthesource'shistory,andno*rewindisneeded.*如果正好在shutdowncheckpointRecord处出现分叉,*那么在目标cluster中没有WALRecord属于源cluster历史,*不需要进行rewind操作,否则需要rewind.*/if(chkptendrec==divergerec)rewind_needed=false;elserewind_needed=true;}}if(!rewind_needed){//不需要rewind,退出printf(_("norewindrequired\n"));exit(0);}//找到目标cluster最后的checkpoint点findLastCheckpoint(datadir_target,divergerec,lastcommontliIndex,&chkptrec,&chkpttli,&chkptredo);printf(_("rewindingfromlastcommoncheckpointat%X/%Xontimeline%u\n"),(uint32)(chkptrec>>32),(uint32)chkptrec,chkpttli);/**Buildthefilemap,bycomparingthesourceandtargetdatadirectories.*通过对比源和目标数据目录构建filemap*///创建filemapfilemap_create();pg_log(PG_PROGRESS,"readingsourcefilelist\n");fetchSourceFileList();pg_log(PG_PROGRESS,"readingtargetfilelist\n");traverse_datadir(datadir_target,&process_target_file);/**ReadthetargetWALfromlastcheckpointbeforethepointoffork,to*extractallthepagesthatweremodifiedonthetargetclusterafter*thefork.Wecanstopreadingafterreachingthefinalshutdownrecord.*XXX:Ifwesupportedrewindingaserverthatwasnotshutdowncleanly,*wewouldneedtoreplayuntiltheendofWALhere.*从在分叉点之前的最后一个checkpoint开始读取目标WALRecord,*提取目标cluster上在分叉后所有被修改的pages.*在到达最后一个shutdownrecord时停止读取.*XXX:如果我们支持非正常关闭的数据库rewind,需要在这里重放WALRecord到WAL的末尾.*///构造filemappg_log(PG_PROGRESS,"readingWALintarget\n");extractPageMap(datadir_target,chkptrec,lastcommontliIndex,ControlFile_target.checkPoint);filemap_finalize();if(showprogress)calculate_totals();/*thisistooverboseevenforverbosemode*///如为debug模式,则打印filemapif(debug)print_filemap();/**Ok,we'rereadytostartcopyingthingsover.*现在可以开始拷贝了.*/if(showprogress){pg_log(PG_PROGRESS,"needtocopy%luMB(totalsourcedirectorysizeis%luMB)\n",(unsignedlong)(filemap->fetch_size/(1024*1024)),(unsignedlong)(filemap->total_size/(1024*1024)));fetch_size=filemap->fetch_size;fetch_done=0;}/**Thisisthepointofnoreturn.Oncewestartcopyingthings,wehave*modifiedthetargetdirectoryandthereisnoturningback!*到了这里,已无回头路可走了.*一旦开始拷贝,就必须更新目标路径,无法回头!*///executeFileMap();progress_report(true);//创建backup_label文件并更新控制文件pg_log(PG_PROGRESS,"\ncreatingbackuplabelandupdatingcontrolfile\n");createBackupLabel(chkptredo,chkpttli,chkptrec);/**Updatecontrolfileoftarget.Makeitreadytoperformarchive*recoverywhenrestarting.*更新目标控制文件.在重启时可执行归档恢复.**minRecoveryPointissettothecurrentWALinsertlocationinthe*sourceserver.Likeinanonlinebackup,it'simportantthatwerecover*alltheWALthatwasgeneratedwhilewecopiedthefilesover.*minRecoveryPoint设置为目标服务器上当前WAL插入的位置.*与在线backup类似,在拷贝和覆盖文件时根据所有生成的WAL日志进行恢复是很重要的.*///更新控制文件memcpy(&ControlFile_new,&ControlFile_source,sizeof(ControlFileData));if(connstr_source){//获取源WAL插入的位置endrec=libpqGetCurrentXlogInsertLocation();//获取时间线endtli=ControlFile_source.checkPointCopy.ThisTimeLineID;}else{endrec=ControlFile_source.checkPoint;endtli=ControlFile_source.checkPointCopy.ThisTimeLineID;}//更新控制文件ControlFile_new.minRecoveryPoint=endrec;ControlFile_new.minRecoveryPointTLI=endtli;ControlFile_new.state=DB_IN_ARCHIVE_RECOVERY;update_controlfile(datadir_target,progname,&ControlFile_new,do_sync);pg_log(PG_PROGRESS,"syncingtargetdatadirectory\n");//同步数据目录(除数据文件之外)syncTargetDirectory();printf(_("Done!\n"));return0;}
“PostgreSQL中Old Master节点分析”的内容就介绍到这里了,感谢大家的阅读。如果想了解更多行业相关的知识可以关注亿速云网站,小编将为大家输出更多高质量的实用文章!
声明:本站所有文章资源内容,如无特殊说明或标注,均为采集网络资源。如若本站内容侵犯了原著者的合法权益,可联系本站删除。