1 Star 4 Fork 4

HighgoSoftware / hg_repmgr

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
repmgr-action-standby.c 205.10 KB
一键复制 编辑 原始数据 按行查看 历史
movead 提交于 2020-04-20 09:32 . change some built item
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637
/*
* repmgr-action-standby.c
*
* Implements standby actions for the repmgr command line utility
*
* Copyright (c) 2009-2020, HighGo Software Co.,Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include <sys/stat.h>
#include "repmgr.h"
#include "dirutil.h"
#include "compat.h"
#include "controldata.h"
#include "repmgr-client-global.h"
#include "repmgr-action-standby.h"
typedef struct TablespaceDataListCell
{
struct TablespaceDataListCell *next;
char *name;
char *oid;
char *location;
/* optional payload */
FILE *f;
} TablespaceDataListCell;
typedef struct TablespaceDataList
{
TablespaceDataListCell *head;
TablespaceDataListCell *tail;
} TablespaceDataList;
static PGconn *primary_conn = NULL;
static PGconn *source_conn = NULL;
static char local_data_directory[MAXPGPATH] = "";
static bool local_data_directory_provided = false;
static bool upstream_conninfo_found = false;
static int upstream_node_id = UNKNOWN_NODE_ID;
static char upstream_data_directory[MAXPGPATH] = "";
static t_conninfo_param_list recovery_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
static char recovery_conninfo_str[MAXLEN] = "";
static char upstream_repluser[NAMEDATALEN] = "";
static char upstream_user[NAMEDATALEN] = "";
static int source_server_version_num = UNKNOWN_SERVER_VERSION_NUM;
static t_configfile_list config_files = T_CONFIGFILE_LIST_INITIALIZER;
static standy_clone_mode mode = pg_basebackup;
/* used by barman mode */
static char local_repmgr_tmp_directory[MAXPGPATH] = "";
static char datadir_list_filename[MAXLEN] = "";
static char barman_command_buf[MAXLEN] = "";
static void _do_standby_promote_internal(PGconn *conn, int server_version_num);
static void _do_create_recovery_conf(void);
static void check_barman_config(void);
static void check_source_server(void);
static void check_source_server_via_barman(void);
static bool check_upstream_config(PGconn *conn, int server_version_num, t_node_info *node_info, bool exit_on_error);
static void check_primary_standby_version_match(PGconn *conn, PGconn *primary_conn);
static void check_recovery_type(PGconn *conn);
static void initialise_direct_clone(t_node_info *node_record);
static int run_basebackup(t_node_info *node_record);
static int run_file_backup(t_node_info *node_record);
static void copy_configuration_files(bool delete_after_copy);
static void tablespace_data_append(TablespaceDataList *list, const char *name, const char *oid, const char *location);
static void get_barman_property(char *dst, char *name, char *local_repmgr_directory);
static int get_tablespace_data_barman(char *, TablespaceDataList *);
static char *make_barman_ssh_command(char *buf);
static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file);
static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list);
static bool write_standby_signal(void);
static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint);
static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold);
static ConnectionStatus parse_remote_node_replication_connection(const char *node_check_output);
static bool parse_data_directory_config(const char *node_check_output);
/*
* STANDBY CLONE
*
* Event(s):
* - standby_clone
*
* Parameters:
* --upstream-conninfo
* --upstream-node-id
* --no-upstream-connection
* -F/--force
* --dry-run
* -c/--fast-checkpoint
* --copy-external-config-files
* --recovery-min-apply-delay
* --replication-user (only required if no upstream record)
* --without-barman
* --recovery-conf-only
*/
void
do_standby_clone(void)
{
PQExpBufferData event_details;
int r = 0;
/* dummy node record */
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
initialize_conninfo_params(&recovery_conninfo, false);
/*
* --recovery-conf-only provided - we'll handle that separately
*/
if (runtime_options.recovery_conf_only == true)
{
return _do_create_recovery_conf();
}
/*
* conninfo params for the actual upstream node (which might be different
* to the node we're cloning from) to write to recovery.conf
*/
mode = get_standby_clone_mode();
/*
* Copy the provided data directory; if a configuration file was provided,
* use the (mandatory) value from that; if -D/--pgdata was provided, use
* that; otherwise repmgr will default to using the same directory path as
* on the source host. The last case will only ever occur when executing
* "repmgr standby clone" with no configuration file.
*
* Note that barman mode requires -D/--pgdata.
*
* If no data directory is explicitly provided, and we're not cloning from
* barman, the source host's data directory will be fetched later, after
* we've connected to it, in check_source_server().
*
*/
get_node_data_directory(local_data_directory);
if (local_data_directory[0] != '\0')
{
local_data_directory_provided = true;
log_notice(_("destination directory \"%s\" provided"),
local_data_directory);
}
else if (mode == barman)
{
/*
* XXX in Barman mode it's still possible to connect to the upstream,
* so only fail if that's not available.
*/
log_error(_("Barman mode requires a data directory"));
log_hint(_("use -D/--pgdata to explicitly specify a data directory"));
exit(ERR_BAD_CONFIG);
}
/* Sanity-check barman connection and installation */
if (mode == barman)
{
/* this will exit with ERR_BARMAN if problems found */
check_barman_config();
}
init_node_record(&local_node_record);
local_node_record.type = STANDBY;
/*
* Initialise list of conninfo parameters which will later be used to
* create the `primary_conninfo` recovery parameter.
*
* We'll initialise it with the host settings specified on the command
* line. As it's possible the standby will be cloned from a node different
* to its intended upstream, we'll later attempt to fetch the upstream
* node record and overwrite the values set here with those from the
* upstream node record (excluding that record's application_name)
*/
copy_conninfo_params(&recovery_conninfo, &source_conninfo);
/* Set the default application name to this node's name */
if (config_file_options.node_id != UNKNOWN_NODE_ID)
{
char application_name[MAXLEN] = "";
param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
get_conninfo_value(config_file_options.conninfo, "application_name", application_name);
if (strlen(application_name) && strncmp(application_name, config_file_options.node_name, sizeof(config_file_options.node_name)) != 0)
{
log_notice(_("\"application_name\" is set in repmgr.conf but will be replaced by the node name"));
}
}
else
{
/*
* this will only happen in corner cases where the node is being
* cloned without a configuration file; fall back to "repmgr" if no
* application_name provided
*/
char *application_name = param_get(&source_conninfo, "application_name");
if (application_name == NULL)
param_set(&recovery_conninfo, "application_name", "repmgr");
}
/*
* Do some sanity checks on the proposed data directory; if it exists:
* - check it's openable
* - check if there's an instance running
*
* We do this here so the check can be part of a --dry-run.
*/
switch (check_dir(local_data_directory))
{
case DIR_ERROR:
log_error(_("unable to access specified data directory \"%s\""), local_data_directory);
log_detail("%s", strerror(errno));
exit(ERR_BAD_CONFIG);
break;
case DIR_NOENT:
/*
* directory doesn't exist
* TODO: in --dry-run mode, attempt to create and delete?
*/
break;
case DIR_EMPTY:
/* Present but empty */
break;
case DIR_NOT_EMPTY:
/* Present but not empty */
if (is_pg_dir(local_data_directory))
{
/* even -F/--force is not enough to overwrite an active directory... */
if (is_pg_running(local_data_directory))
{
log_error(_("specified data directory \"%s\" appears to contain a running PostgreSQL instance"),
local_data_directory);
log_hint(_("ensure the target data directory does not contain a running PostgreSQL instance"));
exit(ERR_BAD_CONFIG);
}
}
break;
default:
break;
}
/*
* By default attempt to connect to the source node. This will fail if no
* connection is possible, unless in Barman mode, in which case we can
* fall back to connecting to the source node via Barman.
*/
if (runtime_options.no_upstream_connection == false)
{
/*
* This connects to the source node and performs sanity checks, also
* sets "recovery_conninfo_str", "upstream_repluser", "upstream_user" and
* "upstream_node_id".
*
* Will error out if source connection not possible and not in
* "barman" mode.
*/
check_source_server();
}
else
{
upstream_node_id = runtime_options.upstream_node_id;
}
/*
* if --upstream-conninfo was supplied, use that (will overwrite value set
* by check_source_server(), but that's OK)
*/
if (runtime_options.upstream_conninfo[0] != '\0')
{
strncpy(recovery_conninfo_str, runtime_options.upstream_conninfo, MAXLEN);
upstream_conninfo_found = true;
}
else if (mode == barman && PQstatus(source_conn) != CONNECTION_OK)
{
/*
* Here we don't have a connection to the upstream node (either
* because --no-upstream-connection was supplied, or
* check_source_server() was unable to make a connection, and
* --upstream-conninfo wasn't supplied.
*
* As we're executing in Barman mode we can try and connect via the
* Barman server to extract the upstream node's conninfo string.
*
* To do this we need to extract Barman's conninfo string, replace the
* database name with the repmgr one (they could well be different)
* and remotely execute psql.
*
* This attempts to set "recovery_conninfo_str".
*/
check_source_server_via_barman();
}
if (recovery_conninfo_str[0] == '\0')
{
log_error(_("unable to determine a connection string to use as \"primary_conninfo\""));
log_hint(_("use \"--upstream-conninfo\" to explicitly provide a value for \"primary_conninfo\""));
if (PQstatus(source_conn) == CONNECTION_OK)
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
if (upstream_conninfo_found == true)
{
/*
* parse returned upstream conninfo string to recovery
* primary_conninfo params
*/
char *errmsg = NULL;
bool parse_success = false;
log_verbose(LOG_DEBUG, "parsing upstream conninfo string \"%s\"", recovery_conninfo_str);
/*
* parse_conninfo_string() here will remove the upstream's
* `application_name`, if set
*/
parse_success = parse_conninfo_string(recovery_conninfo_str, &recovery_conninfo, &errmsg, true);
if (parse_success == false)
{
log_error(_("unable to parse conninfo string \"%s\" for upstream node:\n %s"),
recovery_conninfo_str, errmsg);
if (PQstatus(source_conn) == CONNECTION_OK)
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
if (upstream_repluser[0] != '\0')
{
/* Write the replication user from the node's upstream record */
param_set(&recovery_conninfo, "user", upstream_repluser);
}
}
else
{
/*
* If no upstream node record found, we'll abort with an error here,
* unless -F/--force is used, in which case we'll use the parameters
* provided on the command line (and assume the user knows what
* they're doing).
*/
if (upstream_node_id == UNKNOWN_NODE_ID)
{
log_error(_("unable to determine upstream node"));
if (PQstatus(source_conn) == CONNECTION_OK)
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
if (!runtime_options.force)
{
log_error(_("no record found for upstream node (upstream_node_id: %i)"),
upstream_node_id);
log_hint(_("use -F/--force to create \"primary_conninfo\" based on command-line parameters"));
if (PQstatus(source_conn) == CONNECTION_OK)
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
}
/*
* If copying of external configuration files requested, and any are
* detected, perform sanity checks
*/
if (PQstatus(source_conn) == CONNECTION_OK && runtime_options.copy_external_config_files == true)
{
PGconn *superuser_conn = NULL;
PGconn *privileged_conn = NULL;
bool external_config_files = false;
int i = 0;
/*
* Obtain configuration file locations
*
* We'll check to see whether the configuration files are in the data
* directory - if not we'll have to copy them via SSH, if copying
* requested.
*
* This will require superuser permissions, so we'll attempt to
* connect as -S/--superuser (if provided), otherwise check the
* current connection user has superuser rights.
*
* XXX: if configuration files are symlinks to targets outside the
* data directory, they won't be copied by pg_basebackup, but we can't
* tell this from the below query; we'll probably need to add a check
* for their presence and if missing force copy by SSH
*/
get_superuser_connection(&source_conn, &superuser_conn, &privileged_conn);
if (get_configuration_file_locations(privileged_conn, &config_files) == false)
{
log_notice(_("unable to proceed without establishing configuration file locations"));
PQfinish(source_conn);
if (superuser_conn != NULL)
PQfinish(superuser_conn);
exit(ERR_BAD_CONFIG);
}
/* check if any files actually outside the data directory */
for (i = 0; i < config_files.entries; i++)
{
t_configfile_info *file = config_files.files[i];
if (file->in_data_directory == false)
{
external_config_files = true;
break;
}
}
if (external_config_files == true)
{
int r;
PQExpBufferData msg;
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("external configuration files detected, checking SSH connection to host \"%s\""),
runtime_options.host);
if (runtime_options.dry_run == true)
{
log_notice("%s", msg.data);
}
else
{
log_verbose(LOG_INFO, "%s", msg.data);
}
termPQExpBuffer(&msg);
r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
if (r != 0)
{
log_error(_("remote host \"%s\" is not reachable via SSH - unable to copy external configuration files"),
runtime_options.host);
if (superuser_conn != NULL)
PQfinish(superuser_conn);
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("SSH connection to host \"%s\" succeeded"),
runtime_options.host);
if (runtime_options.dry_run == true)
{
log_info("%s", msg.data);
}
else
{
log_verbose(LOG_INFO, "%s", msg.data);
}
termPQExpBuffer(&msg);
/*
* Here we'll attempt an initial test copy of the detected external
* files, to detect any issues before we run the base backup.
*
* Note this will exit with an error, unless -F/--force supplied.
*
* We don't do this during a --dry-run as it may introduce unexpected changes
* on the local node; during an actual clone operation, any problems with
* copying files will be detected early and the operation aborted before
* the actual database cloning commences.
*
* TODO: put the files in a temporary directory and move to their final
* destination once the database has been cloned.
*/
if (runtime_options.dry_run == false)
{
if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_SAMEPATH)
{
/*
* Files will be placed in the same path as on the source server;
* don't delete after copying.
*/
copy_configuration_files(false);
}
else
{
/*
* Files will be placed in the data directory - delete after copying.
* They'll be copied again later; see TODO above.
*/
copy_configuration_files(true);
}
}
}
if (superuser_conn != NULL)
PQfinish(superuser_conn);
}
if (runtime_options.dry_run == true)
{
if (upstream_node_id != UNKNOWN_NODE_ID)
{
log_notice(_("standby will attach to upstream node %i"), upstream_node_id);
}
else
{
log_warning(_("unable to determine a valid upstream node id"));
}
if (mode == pg_basebackup && runtime_options.fast_checkpoint == false)
{
log_hint(_("consider using the -c/--fast-checkpoint option"));
}
log_info(_("all prerequisites for \"standby clone\" are met"));
PQfinish(source_conn);
exit(SUCCESS);
}
if (mode != barman)
{
initialise_direct_clone(&local_node_record);
}
switch (mode)
{
case pg_basebackup:
log_notice(_("starting backup (using pg_basebackup)..."));
break;
case barman:
log_notice(_("retrieving backup from Barman..."));
break;
default:
/* should never reach here */
log_error(_("unknown clone mode"));
}
if (mode == pg_basebackup)
{
if (runtime_options.fast_checkpoint == false)
{
log_hint(_("this may take some time; consider using the -c/--fast-checkpoint option"));
}
}
switch (mode)
{
case pg_basebackup:
r = run_basebackup(&local_node_record);
break;
case barman:
r = run_file_backup(&local_node_record);
break;
default:
/* should never reach here */
log_error(_("unknown clone mode"));
}
/* If the backup failed then exit */
if (r != SUCCESS)
{
/* If a replication slot was previously created, drop it */
if (config_file_options.use_replication_slots == true)
{
drop_replication_slot(source_conn, local_node_record.slot_name);
}
log_error(_("unable to take a base backup of the primary server"));
log_hint(_("data directory (\"%s\") may need to be cleaned up manually"),
local_data_directory);
PQfinish(source_conn);
exit(r);
}
/*
* If `--copy-external-config-files` was provided, copy any configuration
* files detected to the appropriate location. Any errors encountered will
* not be treated as fatal.
*
* This won't run in Barman mode as "config_files" is only populated in
* "initialise_direct_clone()", which isn't called in Barman mode.
*/
if (runtime_options.copy_external_config_files == true && config_files.entries > 0)
{
/*
* If "--copy-external-config-files=samepath" was used, the files will already
* have been copied.
*/
if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_PGDATA)
copy_configuration_files(false);
}
/* Write the recovery.conf file */
if (create_recovery_file(&local_node_record,
&recovery_conninfo,
source_server_version_num,
local_data_directory,
true) == false)
{
/* create_recovery_file() will log an error */
if (source_server_version_num >= 120000)
{
log_notice(_("unable to write replication configuration; see preceding error messages"));
}
else
{
log_notice(_("unable to create recovery.conf; see preceding error messages"));
}
log_hint(_("data directory (\"%s\") may need to be cleaned up manually"),
local_data_directory);
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
switch (mode)
{
case pg_basebackup:
log_notice(_("standby clone (using pg_basebackup) complete"));
break;
case barman:
log_notice(_("standby clone (from Barman) complete"));
break;
}
/*
* TODO: It might be nice to provide an option to have repmgr start the
* PostgreSQL server automatically
*/
log_notice(_("you can now start your PostgreSQL server"));
if (config_file_options.service_start_command[0] != '\0')
{
log_hint(_("for example: %s"),
config_file_options.service_start_command);
}
else if (local_data_directory_provided)
{
log_hint(_("for example: pg_ctl -D %s start"),
local_data_directory);
}
else
{
log_hint(_("for example: /etc/init.d/postgresql start"));
}
/*
* XXX forgetting to (re) register the standby is a frequent cause of
* error; we should consider having repmgr automatically register the
* standby, either by default with an option "--no-register", or an option
* "--register".
*
* Note that "repmgr standby register" requires the standby to be running
* - if not, and we just update the node record, we'd have an incorrect
* representation of the replication cluster. Best combined with an
* automatic start of the server (see note above)
*/
/*
* Check for an existing node record, and output the appropriate command
* for registering or re-registering.
*/
{
t_node_info node_record = T_NODE_INFO_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
record_status = get_node_record(primary_conn,
config_file_options.node_id,
&node_record);
if (record_status == RECORD_FOUND)
{
log_hint(_("after starting the server, you need to re-register this standby with \"repmgr standby register --force\" to update the existing node record"));
}
else
{
log_hint(_("after starting the server, you need to register this standby with \"repmgr standby register\""));
}
}
/* Log the event */
initPQExpBuffer(&event_details);
/* Add details about relevant runtime options used */
appendPQExpBuffer(&event_details,
_("cloned from host \"%s\", port %s"),
runtime_options.host,
runtime_options.port);
appendPQExpBufferStr(&event_details,
_("; backup method: "));
switch (mode)
{
case pg_basebackup:
appendPQExpBufferStr(&event_details, "pg_basebackup");
break;
case barman:
appendPQExpBufferStr(&event_details, "barman");
break;
}
appendPQExpBuffer(&event_details,
_("; --force: %s"),
runtime_options.force ? "Y" : "N");
create_event_notification(primary_conn,
&config_file_options,
config_file_options.node_id,
"standby_clone",
true,
event_details.data);
if (primary_conn != source_conn && PQstatus(primary_conn) == CONNECTION_OK)
PQfinish(primary_conn);
if (PQstatus(source_conn) == CONNECTION_OK)
PQfinish(source_conn);
exit(r);
}
void
check_barman_config(void)
{
char command[MAXLEN];
bool command_ok = false;
/*
* Check that there is at least one valid backup
*/
log_info(_("connecting to Barman server to verify backup for %s"), config_file_options.barman_server);
maxlen_snprintf(command, "%s show-backup %s latest > /dev/null",
make_barman_ssh_command(barman_command_buf),
config_file_options.barman_server);
command_ok = local_command(command, NULL);
if (command_ok == false)
{
log_error(_("no valid backup for server %s was found in the Barman catalogue"),
config_file_options.barman_server);
log_hint(_("refer to the Barman documentation for more information"));
exit(ERR_BARMAN);
}
if (!create_pg_dir(local_data_directory, runtime_options.force))
{
log_error(_("unable to use directory %s"),
local_data_directory);
log_hint(_("use -F/--force option to force this directory to be overwritten"));
exit(ERR_BAD_CONFIG);
}
/*
* Create the local repmgr subdirectory
*/
maxlen_snprintf(local_repmgr_tmp_directory,
"%s/repmgr", local_data_directory);
maxlen_snprintf(datadir_list_filename,
"%s/data.txt", local_repmgr_tmp_directory);
if (!create_pg_dir(local_repmgr_tmp_directory, runtime_options.force))
{
log_error(_("unable to create directory \"%s\""),
local_repmgr_tmp_directory);
exit(ERR_BAD_CONFIG);
}
/*
* Fetch server parameters from Barman
*/
log_info(_("connecting to Barman server to fetch server parameters"));
maxlen_snprintf(command, "%s show-server %s > %s/show-server.txt",
make_barman_ssh_command(barman_command_buf),
config_file_options.barman_server,
local_repmgr_tmp_directory);
command_ok = local_command(command, NULL);
if (command_ok == false)
{
log_error(_("unable to fetch server parameters from Barman server"));
exit(ERR_BARMAN);
}
}
/*
* _do_create_recovery_conf()
*
* Create recovery.conf for a previously cloned instance.
*
* Prerequisites:
*
* - data directory must be provided
* - the instance should not be running
* - an existing "recovery.conf" file can only be overwritten with
* -F/--force
* - connection parameters for an existing, running node must be provided
* - --upstream-node-id, if provided, will be "primary_conninfo",
* otherwise primary node id; node must exist; unless -F/--force
* provided, must be active and connection possible
* - if replication slots in use, create (respect --dry-run)
*
* not compatible with --no-upstream-connection
*
*/
static void
_do_create_recovery_conf(void)
{
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
char recovery_file_path[MAXPGPATH] = "";
struct stat st;
bool node_is_running = false;
bool slot_creation_required = false;
PGconn *upstream_conn = NULL;
PGconn *upstream_repl_conn = NULL;
get_node_data_directory(local_data_directory);
if (local_data_directory[0] == '\0')
{
log_error(_("no data directory provided"));
log_hint(_("provide the node's \"repmgr.conf\" file with -f/--config-file or the data directory with -D/--pgdata"));
exit(ERR_BAD_CONFIG);
}
/*
* Do some sanity checks on the data directory to make sure
* it contains a valid but dormant instance
*/
switch (check_dir(local_data_directory))
{
case DIR_ERROR:
log_error(_("unable to access specified data directory \"%s\""), local_data_directory);
log_detail("%s", strerror(errno));
exit(ERR_BAD_CONFIG);
break;
case DIR_NOENT:
log_error(_("specified data directory \"%s\" does not exist"), local_data_directory);
exit(ERR_BAD_CONFIG);
break;
case DIR_EMPTY:
log_error(_("specified data directory \"%s\" is empty"), local_data_directory);
exit(ERR_BAD_CONFIG);
break;
case DIR_NOT_EMPTY:
/* Present but not empty */
if (!is_pg_dir(local_data_directory))
{
log_error(_("specified data directory \"%s\" does not contain a PostgreSQL instance"), local_data_directory);
exit(ERR_BAD_CONFIG);
}
if (is_pg_running(local_data_directory))
{
if (runtime_options.force == false)
{
log_error(_("specified data directory \"%s\" appears to contain a running PostgreSQL instance"),
local_data_directory);
log_hint(_("use -F/--force to create \"recovery.conf\" anyway"));
exit(ERR_BAD_CONFIG);
}
node_is_running = true;
if (runtime_options.dry_run == true)
{
log_warning(_("\"recovery.conf\" would be created in an active data directory"));
}
else
{
log_warning(_("creating \"recovery.conf\" in an active data directory"));
}
}
break;
default:
break;
}
/* check connection */
source_conn = establish_db_connection_by_params(&source_conninfo, true);
/* determine node for primary_conninfo */
if (runtime_options.upstream_node_id != UNKNOWN_NODE_ID)
{
upstream_node_id = runtime_options.upstream_node_id;
}
else
{
/* if --upstream-node-id not specifically supplied, get primary node id */
upstream_node_id = get_primary_node_id(source_conn);
if (upstream_node_id == NODE_NOT_FOUND)
{
log_error(_("unable to determine primary node for this replication cluster"));
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
log_debug("primary node determined as: %i", upstream_node_id);
}
/* attempt to retrieve upstream node record */
record_status = get_node_record(source_conn,
upstream_node_id,
&upstream_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve node record for upstream node %i"), upstream_node_id);
if (record_status == RECORD_ERROR)
{
log_detail("%s", PQerrorMessage(source_conn));
}
exit(ERR_BAD_CONFIG);
}
/* attempt to retrieve local node record */
record_status = get_node_record(source_conn,
config_file_options.node_id,
&local_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve node record for local node %i"), config_file_options.node_id);
if (record_status == RECORD_ERROR)
{
log_detail("%s", PQerrorMessage(source_conn));
}
else
{
log_hint(_("standby must be registered before a new recovery.conf file can be created"));
}
exit(ERR_BAD_CONFIG);
}
PQfinish(source_conn);
/* connect to upstream (which could be different to source) */
upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
if (PQstatus(upstream_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to upstream node \"%s\" (ID: %i)"),
upstream_node_record.node_name,
upstream_node_id);
exit(ERR_BAD_CONFIG);
}
/* Set the application name to this node's name */
if (config_file_options.node_name[0] != '\0')
param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
/* Set the replication user from the primary node record */
param_set(&recovery_conninfo, "user", upstream_node_record.repluser);
initialize_conninfo_params(&recovery_conninfo, false);
/* We ignore any application_name set in the primary's conninfo */
parse_conninfo_string(upstream_node_record.conninfo, &recovery_conninfo, NULL, true);
/* check that a replication connection can be made (--force = override) */
upstream_repl_conn = establish_db_connection_by_params(&recovery_conninfo, false);
if (PQstatus(upstream_repl_conn) != CONNECTION_OK)
{
if (runtime_options.force == false)
{
log_error(_("unable to initiate replication connection to upstream node \"%s\" (ID: %i)"),
upstream_node_record.node_name,
upstream_node_id);
PQfinish(upstream_conn);
exit(ERR_BAD_CONFIG);
}
}
/* if replication slots are in use, perform some checks */
if (config_file_options.use_replication_slots == true)
{
PQExpBufferData msg;
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
record_status = get_slot_record(upstream_conn, local_node_record.slot_name, &slot_info);
/* check if replication slot exists*/
if (record_status == RECORD_FOUND)
{
if (slot_info.active == true)
{
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("an active replication slot named \"%s\" already exists on upstream node \"%s\" (ID: %i)"),
local_node_record.slot_name,
upstream_node_record.node_name,
upstream_node_id);
if (runtime_options.force == false && runtime_options.dry_run == false)
{
log_error("%s", msg.data);
log_hint(_("use -F/--force to continue anyway"));
termPQExpBuffer(&msg);
PQfinish(upstream_conn);
exit(ERR_BAD_CONFIG);
}
log_warning("%s", msg.data);
termPQExpBuffer(&msg);
}
else
{
log_info(_("an inactive replication slot for this node exists on the upstream node"));
}
}
/* if not, if check one can and should be created */
else
{
get_node_replication_stats(upstream_conn, &upstream_node_record);
if (upstream_node_record.max_replication_slots > upstream_node_record.total_replication_slots)
{
slot_creation_required = true;
}
else
{
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("insufficient free replication slots on upstream node \"%s\" (ID: %i)"),
upstream_node_record.node_name,
upstream_node_id);
if (runtime_options.force == false && runtime_options.dry_run == false)
{
log_error("%s", msg.data);
log_hint(_("use -F/--force to continue anyway"));
termPQExpBuffer(&msg);
PQfinish(upstream_conn);
exit(ERR_BAD_CONFIG);
}
log_warning("%s", msg.data);
termPQExpBuffer(&msg);
}
}
}
/* check if recovery.conf exists (Pg11 and earlier only) */
if (PQserverVersion(upstream_conn) < 120000)
{
snprintf(recovery_file_path, sizeof(recovery_file_path),
"%s/%s",
local_data_directory,
RECOVERY_COMMAND_FILE);
if (stat(recovery_file_path, &st) == -1)
{
if (errno != ENOENT)
{
log_error(_("unable to check for existing \"recovery.conf\" file in \"%s\""),
local_data_directory);
log_detail("%s", strerror(errno));
exit(ERR_BAD_CONFIG);
}
}
else
{
if (runtime_options.force == false)
{
log_error(_("\"recovery.conf\" already exists in \"%s\""),
local_data_directory);
log_hint(_("use -F/--force to overwrite an existing \"recovery.conf\" file"));
exit(ERR_BAD_CONFIG);
}
if (runtime_options.dry_run == true)
{
log_warning(_("the existing \"recovery.conf\" file would be overwritten"));
}
else
{
log_warning(_("the existing \"recovery.conf\" file will be overwritten"));
}
}
}
if (runtime_options.dry_run == true)
{
char recovery_conf_contents[MAXLEN] = "";
create_recovery_file(&local_node_record,
&recovery_conninfo,
PQserverVersion(upstream_conn),
recovery_conf_contents, false);
if (PQserverVersion(upstream_conn) >= 120000)
{
log_info(_("following items would be added to \"postgresql.auto.conf\" in \"%s\""), local_data_directory);
}
else
{
log_info(_("would create \"recovery.conf\" file in \"%s\""), local_data_directory);
}
log_detail(_("\n%s"), recovery_conf_contents);
}
else
{
if (!create_recovery_file(&local_node_record,
&recovery_conninfo,
PQserverVersion(upstream_conn),
local_data_directory, true))
{
if (PQserverVersion(upstream_conn) >= 120000)
{
log_error(_("unable to write replication configuration to \"postgresql.auto.conf\""));
}
else
{
log_error(_("unable to create \"recovery.conf\""));
}
}
else
{
if (PQserverVersion(upstream_conn) >= 120000)
{
log_notice(_("replication configuration written to \"postgresql.auto.conf\""));
}
else
{
log_notice(_("\"recovery.conf\" created as \"%s\""), recovery_file_path);
}
if (node_is_running == true)
{
log_hint(_("node must be restarted for the new file to take effect"));
}
}
}
/* Pg12 and later: add standby.signal, if not already there */
if (PQserverVersion(upstream_conn) >= 120000)
{
if (runtime_options.dry_run == true)
{
log_info(_("would write \"standby.signal\" file"));
}
else
{
if (write_standby_signal() == false)
{
log_error(_("unable to write \"standby.signal\" file"));
}
}
}
/* add replication slot, if required */
if (slot_creation_required == true)
{
if (runtime_options.dry_run == true)
{
log_info(_("would create replication slot \"%s\" on upstream node \"%s\" (ID: %i)"),
local_node_record.slot_name,
upstream_node_record.node_name,
upstream_node_id);
}
else
{
PQExpBufferData msg;
initPQExpBuffer(&msg);
if (create_replication_slot(upstream_conn,
local_node_record.slot_name,
&msg) == false)
{
log_error("%s", msg.data);
PQfinish(upstream_conn);
termPQExpBuffer(&msg);
exit(ERR_BAD_CONFIG);
}
termPQExpBuffer(&msg);
log_notice(_("replication slot \"%s\" created on upstream node \"%s\" (ID: %i)"),
local_node_record.slot_name,
upstream_node_record.node_name,
upstream_node_id);
}
}
PQfinish(upstream_conn);
return;
}
/*
* do_standby_register()
*
* Event(s):
* - standby_register
* - standby_register_sync
*/
/* XXX check --upstream-node-id works when re-registering */
void
do_standby_register(void)
{
PGconn *conn = NULL;
PGconn *primary_conn = NULL;
bool record_created = false;
t_node_info node_record = T_NODE_INFO_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
PQExpBufferData details;
/* so we can pass info about the primary to event notification scripts */
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
t_node_info primary_node_record = T_NODE_INFO_INITIALIZER;
int primary_node_id = UNKNOWN_NODE_ID;
bool dry_run_ok = true;
log_info(_("connecting to local node \"%s\" (ID: %i)"),
config_file_options.node_name,
config_file_options.node_id);
conn = establish_db_connection_quiet(config_file_options.conninfo);
/*
* If unable to connect, and --force not provided, wait up to --wait-start
* seconds (default: 0) for the node to become reachable.
*
* Not that if --force provided, we don't wait for the node to start, as
* the normal use case will be re-registering an existing node, or
* registering an inactive/not-yet-extant one; we'll do the
* error handling for those cases in the next code block
*/
if (PQstatus(conn) != CONNECTION_OK && runtime_options.force == false)
{
bool conn_ok = false;
int timer = 0;
for (;;)
{
if (timer == runtime_options.wait_start)
break;
sleep(1);
log_verbose(LOG_INFO, _("%i of %i connection attempts"),
timer + 1,
runtime_options.wait_start);
conn = establish_db_connection_quiet(config_file_options.conninfo);
if (PQstatus(conn) == CONNECTION_OK)
{
conn_ok = true;
break;
}
timer++;
}
if (conn_ok == true)
{
log_info(_("connected to local node \"%s\" (ID: %i) after %i seconds"),
config_file_options.node_name,
config_file_options.node_id,
timer);
}
}
/*
* If still unable to connect, continue only if -F/--force provided,
* and primary connection parameters provided.
*/
if (PQstatus(conn) != CONNECTION_OK)
{
if (runtime_options.force == false)
{
log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
config_file_options.node_name,
config_file_options.node_id);
log_detail("\n%s", PQerrorMessage(conn));
log_hint(_("to register a standby which is not running, provide primary connection parameters and use option -F/--force"));
exit(ERR_BAD_CONFIG);
}
if (runtime_options.connection_param_provided == false)
{
log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
config_file_options.node_name,
config_file_options.node_id);
log_hint(_("to register a standby which is not running, additionally provide the primary connection parameters"));
exit(ERR_BAD_CONFIG);
}
}
/* connection OK - check this is actually a standby */
else
{
if (runtime_options.connection_param_provided)
{
log_warning(_("database connection parameters not required when the standby to be registered is running"));
log_detail(_("repmgr uses the \"conninfo\" parameter in \"repmgr.conf\" to connect to the standby"));
}
check_recovery_type(conn);
}
/* check if there is a primary in this cluster */
log_info(_("connecting to primary database"));
/* Normal case - we can connect to the local node */
if (PQstatus(conn) == CONNECTION_OK)
{
primary_conn = get_primary_connection(conn, &primary_node_id, NULL);
}
/*
* otherwise user is forcing a registration of a (potentially) inactive (or
* not-yet-extant) node and must have supplied primary connection info
*/
else
{
primary_conn = establish_db_connection_by_params(&source_conninfo, false);
}
/*
* no amount of --force will make it possible to register the standby
* without a primary server to connect to
*/
if (PQstatus(primary_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to the primary database"));
log_hint(_("a primary node must be configured before registering a standby node"));
exit(ERR_BAD_CONFIG);
}
/*
* Populate "event_info" with info about the primary for event notifications
*/
record_status = get_node_record(primary_conn,
primary_node_id,
&primary_node_record);
event_info.node_id = primary_node_id;
event_info.node_name = primary_node_record.node_name;
event_info.conninfo_str = primary_node_record.conninfo;
/*
* Verify that standby and primary are supported and compatible server
* versions
*
* If the user is registering an inactive standby, we'll trust they know
* what they're doing
*/
if (PQstatus(conn) == CONNECTION_OK)
{
check_primary_standby_version_match(conn, primary_conn);
}
/*
* Check that an active node with the same node_name doesn't exist already
*/
record_status = get_node_record_by_name(primary_conn,
config_file_options.node_name,
&node_record);
if (record_status == RECORD_FOUND)
{
if (node_record.active == true && node_record.node_id != config_file_options.node_id)
{
log_error(_("node %i exists already with node_name \"%s\""),
node_record.node_id,
config_file_options.node_name);
PQfinish(primary_conn);
if (PQstatus(conn) == CONNECTION_OK)
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
}
/* Check if node record exists */
record_status = get_node_record(primary_conn,
config_file_options.node_id,
&node_record);
if (record_status == RECORD_FOUND && !runtime_options.force)
{
log_error(_("node %i is already registered"),
config_file_options.node_id);
log_hint(_("use option -F/--force to overwrite an existing node record"));
PQfinish(primary_conn);
if (PQstatus(conn) == CONNECTION_OK)
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
/*
* If an upstream node is defined, check if that node exists and is active.
*
* If it doesn't exist, and --force set, create a minimal inactive record,
* in the assumption that the user knows what they are doing (usually some kind
* of provisioning where multiple servers are created in parallel) and will
* create the active record later.
*/
if (runtime_options.upstream_node_id != NO_UPSTREAM_NODE)
{
RecordStatus upstream_record_status = RECORD_NOT_FOUND;
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
if (runtime_options.upstream_node_id == config_file_options.node_id)
{
log_error(_("provided node ID for --upstream-node-id (%i) is the same as the configured local node ID (%i)"),
runtime_options.upstream_node_id,
config_file_options.node_id);
PQfinish(primary_conn);
if (PQstatus(conn) == CONNECTION_OK)
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
upstream_record_status = get_node_record(primary_conn,
runtime_options.upstream_node_id,
&upstream_node_record);
/* create placeholder upstream record if -F/--force set */
if (upstream_record_status != RECORD_FOUND)
{
t_node_info placeholder_upstream_node_record = T_NODE_INFO_INITIALIZER;
if (!runtime_options.force)
{
log_error(_("no record found for upstream node %i"),
runtime_options.upstream_node_id);
/* footgun alert - only do this if you know what you're doing */
log_hint(_("use option -F/--force to create a dummy upstream record"));
PQfinish(primary_conn);
if (PQstatus(conn) == CONNECTION_OK)
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
log_notice(_("creating placeholder record for upstream node %i"),
runtime_options.upstream_node_id);
placeholder_upstream_node_record.node_id = runtime_options.upstream_node_id;
placeholder_upstream_node_record.type = STANDBY;
placeholder_upstream_node_record.upstream_node_id = NO_UPSTREAM_NODE;
strncpy(placeholder_upstream_node_record.conninfo, runtime_options.upstream_conninfo, MAXLEN);
placeholder_upstream_node_record.active = false;
record_created = create_node_record(primary_conn,
"standby register",
&placeholder_upstream_node_record);
/*
* It's possible, in the kind of scenario this functionality is
* intended to support, that there's a race condition where the
* node's actual record gets inserted, causing the insert of the
* placeholder record to fail. If this is the case, we don't worry
* about this insert failing; if not we bail out.
*
* TODO: teach create_node_record() to use ON CONFLICT DO NOTHING
* for 9.5 and later.
*/
if (record_created == false)
{
upstream_record_status = get_node_record(primary_conn,
runtime_options.upstream_node_id,
&placeholder_upstream_node_record);
if (upstream_record_status != RECORD_FOUND)
{
log_error(_("unable to create placeholder record for upstream node %i"),
runtime_options.upstream_node_id);
PQfinish(primary_conn);
if (PQstatus(conn) == CONNECTION_OK)
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
log_info(_("a record for upstream node %i was already created"),
runtime_options.upstream_node_id);
}
}
else if (node_record.active == false)
{
/*
* upstream node is inactive and --force not supplied - refuse to
* register
*/
if (!runtime_options.force)
{
log_error(_("record for upstream node %i is marked as inactive"),
runtime_options.upstream_node_id);
log_hint(_("use option -F/--force to register a standby with an inactive upstream node"));
PQfinish(primary_conn);
if (PQstatus(conn) == CONNECTION_OK)
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
/*
* user is using the --force - notify about the potential footgun
*/
log_notice(_("registering node %i with inactive upstream node %i"),
config_file_options.node_id,
runtime_options.upstream_node_id);
}
/* check upstream node is accessible and this node is connected */
else
{
PGconn *upstream_conn = NULL;
upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
if (PQstatus(upstream_conn) != CONNECTION_OK)
{
if (!runtime_options.force)
{
log_error(_("unable to connect to upstream node \"%s\" (ID: %i)"),
upstream_node_record.node_name,
upstream_node_record.node_id);
log_hint(_("use -F/--force to continue anyway"));
PQfinish(primary_conn);
if (PQstatus(conn) == CONNECTION_OK)
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
log_warning(_("unable to connect to upstream node \"%s\" (ID: %i) but continuing anyway"),
upstream_node_record.node_name,
upstream_node_record.node_id);
}
else
{
/* check our standby is connected */
if (is_downstream_node_attached(upstream_conn, config_file_options.node_name) == true)
{
log_verbose(LOG_INFO, _("local node is attached to specified upstream node %i"), runtime_options.upstream_node_id);
}
else
{
if (!runtime_options.force)
{
log_error(_("this node does not appear to be attached to upstream node \"%s\" (ID: %i)"),
upstream_node_record.node_name,
upstream_node_record.node_id);
log_detail(_("no record for application name \"%s\" found in \"pg_stat_replication\""),
config_file_options.node_name);
log_hint(_("use -F/--force to continue anyway"));
PQfinish(primary_conn);
if (PQstatus(conn) == CONNECTION_OK)
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
log_warning(_("this node does not appear to be attached to upstream node \"%s\" (ID: %i)"),
upstream_node_record.node_name,
upstream_node_record.node_id);
}
PQfinish(upstream_conn);
}
}
}
/*
* populate node record structure with current values set in repmgr.conf
* and/or the command line (this will overwrite any existing values, which
* is what we want when updating the record)
*/
init_node_record(&node_record);
node_record.type = STANDBY;
/* if --upstream-node-id not provided, set to primary node id */
if (node_record.upstream_node_id == UNKNOWN_NODE_ID)
{
node_record.upstream_node_id = primary_node_id;
}
/*
* If --upstream-node-id not provided, we're defaulting to the primary as
* upstream node. If local node is available, double-check that it's attached
* to the primary, in case --upstream-node-id was an accidental ommission.
*
* Currently we'll only do this for newly registered nodes.
*/
if (runtime_options.upstream_node_id == NO_UPSTREAM_NODE && PQstatus(conn) == CONNECTION_OK)
{
/* only do this if record does not exist */
if (record_status != RECORD_FOUND)
{
log_warning(_("--upstream-node-id not supplied, assuming upstream node is primary (node ID %i)"),
primary_node_id);
/* check our standby is connected */
if (is_downstream_node_attached(primary_conn, config_file_options.node_name) == true)
{
log_verbose(LOG_INFO, _("local node is attached to primary"));
}
else if (runtime_options.force == false)
{
log_error(_("local node not attached to primary node %i"), primary_node_id);
/* TODO: 9.6 and later, display detail from pg_stat_wal_receiver */
log_hint(_("specify the actual upstream node id with --upstream-node-id, or use -F/--force to continue anyway"));
if (runtime_options.dry_run == true)
{
dry_run_ok = false;
}
else
{
PQfinish(primary_conn);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
}
else
{
log_warning(_("local node not attached to primary node %i"), primary_node_id);
log_notice(_("-F/--force supplied, continuing anyway"));
}
}
}
if (runtime_options.dry_run == true)
{
PQfinish(primary_conn);
if (PQstatus(conn) == CONNECTION_OK)
PQfinish(conn);
if (dry_run_ok == false)
{
log_warning(_("issue(s) encountered; see preceding log messages"));
exit(ERR_BAD_CONFIG);
}
log_info(_("all prerequisites for \"standby register\" are met"));
exit(SUCCESS);
}
/*
* node record exists - update it (at this point we have already
* established that -F/--force is in use)
*/
if (record_status == RECORD_FOUND)
{
record_created = update_node_record(primary_conn,
"standby register",
&node_record);
}
else
{
record_created = create_node_record(primary_conn,
"standby register",
&node_record);
}
initPQExpBuffer(&details);
if (record_created == false)
{
appendPQExpBufferStr(&details,
"standby registration failed");
if (runtime_options.force == true)
appendPQExpBufferStr(&details,
" (-F/--force option was used)");
create_event_notification_extended(
primary_conn,
&config_file_options,
config_file_options.node_id,
"standby_register",
false,
details.data,
&event_info);
termPQExpBuffer(&details);
PQfinish(primary_conn);
primary_conn = NULL;
if (PQstatus(conn) == CONNECTION_OK)
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
appendPQExpBufferStr(&details,
"standby registration succeeded");
if (runtime_options.force == true)
appendPQExpBufferStr(&details,
" (-F/--force option was used)");
/* Log the event */
create_event_notification_extended(
primary_conn,
&config_file_options,
config_file_options.node_id,
"standby_register",
true,
details.data,
&event_info);
termPQExpBuffer(&details);
/*
* If --wait-sync option set, wait for the records to synchronise
* (unless 0 seconds provided, which disables it, which is the same as
* not providing the option). The default value is -1, which means
* no timeout.
*/
if (PQstatus(conn) == CONNECTION_OK &&
runtime_options.wait_register_sync == true &&
runtime_options.wait_register_sync_seconds != 0)
{
bool sync_ok = false;
int timer = 0;
RecordStatus node_record_status = RECORD_NOT_FOUND;
t_node_info node_record_on_primary = T_NODE_INFO_INITIALIZER;
t_node_info node_record_on_standby = T_NODE_INFO_INITIALIZER;
node_record_status = get_node_record(primary_conn,
config_file_options.node_id,
&node_record_on_primary);
if (node_record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve node record from primary"));
PQfinish(primary_conn);
PQfinish(conn);
exit(ERR_REGISTRATION_SYNC);
}
for (;;)
{
bool records_match = true;
/*
* If timeout set to a positive value, check if we've reached it and
* exit the loop
*/
if (runtime_options.wait_register_sync_seconds > 0 && runtime_options.wait_register_sync_seconds == timer)
break;
node_record_status = get_node_record(conn,
config_file_options.node_id,
&node_record_on_standby);
if (node_record_status == RECORD_NOT_FOUND)
{
/* no record available yet on standby */
records_match = false;
}
else if (node_record_status == RECORD_FOUND)
{
/* compare relevant fields */
if (node_record_on_standby.upstream_node_id != node_record_on_primary.upstream_node_id)
records_match = false;
if (node_record_on_standby.type != node_record_on_primary.type)
records_match = false;
if (node_record_on_standby.priority != node_record_on_primary.priority)
records_match = false;
if (node_record_on_standby.active != node_record_on_primary.active)
records_match = false;
if (strcmp(node_record_on_standby.node_name, node_record_on_primary.node_name) != 0)
records_match = false;
if (strcmp(node_record_on_standby.conninfo, node_record_on_primary.conninfo) != 0)
records_match = false;
if (strcmp(node_record_on_standby.slot_name, node_record_on_primary.slot_name) != 0)
records_match = false;
if (records_match == true)
{
sync_ok = true;
break;
}
}
sleep(1);
timer++;
}
/* Log the event */
initPQExpBuffer(&details);
if (sync_ok == false)
{
appendPQExpBuffer(&details,
_("node record was not synchronised after %i seconds"),
runtime_options.wait_register_sync_seconds);
}
else
{
appendPQExpBuffer(&details,
_("node record synchronised after %i seconds"),
timer);
}
create_event_notification_extended(
primary_conn,
&config_file_options,
config_file_options.node_id,
"standby_register_sync",
sync_ok,
details.data,
&event_info);
if (sync_ok == false)
{
log_error("%s", details.data);
termPQExpBuffer(&details);
PQfinish(primary_conn);
PQfinish(conn);
exit(ERR_REGISTRATION_SYNC);
}
log_info(_("node record on standby synchronised from primary"));
log_detail("%s", details.data);
termPQExpBuffer(&details);
}
PQfinish(primary_conn);
if (PQstatus(conn) == CONNECTION_OK)
PQfinish(conn);
log_info(_("standby registration complete"));
log_notice(_("standby node \"%s\" (id: %i) successfully registered"),
config_file_options.node_name, config_file_options.node_id);
return;
}
/*
* do_standby_unregister()
*
* Event(s):
* - standby_unregister
*/
void
do_standby_unregister(void)
{
PGconn *conn = NULL;
PGconn *primary_conn = NULL;
int target_node_id = UNKNOWN_NODE_ID;
t_node_info node_info = T_NODE_INFO_INITIALIZER;
bool node_record_deleted = false;
log_info(_("connecting to local standby"));
conn = establish_db_connection(config_file_options.conninfo, true);
/* check if there is a primary in this cluster */
log_info(_("connecting to primary database"));
primary_conn = get_primary_connection(conn, NULL, NULL);
if (PQstatus(primary_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to primary server"));
log_detail("\n%s", PQerrorMessage(conn));
exit(ERR_BAD_CONFIG);
}
/*
* if --node-id was specified, unregister that node rather than the
* current one - this enables inactive nodes to be unregistered.
*/
if (runtime_options.node_id != UNKNOWN_NODE_ID)
target_node_id = runtime_options.node_id;
else
target_node_id = config_file_options.node_id;
/* Check node exists and is really a standby */
if (get_node_record(primary_conn, target_node_id, &node_info) != RECORD_FOUND)
{
log_error(_("no record found for node %i"), target_node_id);
PQfinish(primary_conn);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
if (node_info.type != STANDBY)
{
log_error(_("node %i is not a standby server"), target_node_id);
PQfinish(primary_conn);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
/* Now unregister the standby */
log_notice(_("unregistering node %i"), target_node_id);
node_record_deleted = delete_node_record(primary_conn,
target_node_id);
if (node_record_deleted == false)
{
PQfinish(primary_conn);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
/* Log the event */
create_event_notification(primary_conn,
&config_file_options,
target_node_id,
"standby_unregister",
true,
NULL);
PQfinish(primary_conn);
PQfinish(conn);
log_info(_("standby unregistration complete"));
return;
}
/*
* do_standby_promote()
*
* Event(s):
* - standby_promote
*/
void
do_standby_promote(void)
{
PGconn *conn = NULL;
PGconn *current_primary_conn = NULL;
RecoveryType recovery_type = RECTYPE_UNKNOWN;
int existing_primary_id = UNKNOWN_NODE_ID;
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
conn = establish_db_connection(config_file_options.conninfo, true);
log_verbose(LOG_INFO, _("connected to standby, checking its state"));
/* Verify that standby is a supported server version */
server_version_num = check_server_version(conn, "standby", true, NULL);
/* Check we are in a standby node */
recovery_type = get_recovery_type(conn);
if (recovery_type != RECTYPE_STANDBY)
{
if (recovery_type == RECTYPE_PRIMARY)
{
log_error(_("STANDBY PROMOTE can only be executed on a standby node"));
PQfinish(conn);
exit(ERR_PROMOTION_FAIL);
}
else
{
log_error(_("connection to node lost"));
PQfinish(conn);
exit(ERR_DB_CONN);
}
}
else if (runtime_options.dry_run == true)
{
log_info(_("node is a standby"));
}
/*
* Executing "pg_ctl ... promote" when WAL replay is paused and
* WAL is pending replay will mean the standby will not promote
* until replay is resumed.
*
* As that could happen at any time outside repmgr's control, we
* need to avoid leaving a "ticking timebomb" which might cause
* an unexpected status change in the replication cluster.
*/
{
ReplInfo replication_info;
bool replay_paused = false;
init_replication_info(&replication_info);
if (get_replication_info(conn, STANDBY, &replication_info) == false)
{
log_error(_("unable to retrieve replication information from local node"));
PQfinish(conn);
exit(ERR_PROMOTION_FAIL);
}
/*
* If the local node is recovering from archive, we can't tell
* whether there's still WAL which needs to be replayed, so
* we'll abort if WAL replay is paused.
*/
if (replication_info.receiving_streamed_wal == false)
{
/* just a simple check for paused WAL replay */
replay_paused = is_wal_replay_paused(conn, false);
if (replay_paused == true)
{
log_error(_("WAL replay is paused on this node"));
log_detail(_("node is in archive recovery and is not safe to promote in this state"));
log_detail(_("replay paused at %X/%X"),
format_lsn(replication_info.last_wal_replay_lsn));
}
}
else
{
/* check that replay is pause *and* WAL is pending replay */
replay_paused = is_wal_replay_paused(conn, true);
if (replay_paused == true)
{
log_error(_("WAL replay is paused on this node but not all WAL has been replayed"));
log_detail(_("replay paused at %X/%X; last WAL received is %X/%X"),
format_lsn(replication_info.last_wal_replay_lsn),
format_lsn(replication_info.last_wal_receive_lsn));
}
}
if (replay_paused == true)
{
if (PQserverVersion(conn) >= 100000)
log_hint(_("execute \"pg_wal_replay_resume()\" to unpause WAL replay"));
else
log_hint(_("execute \"pg_xlog_replay_resume()\" to unpause WAL replay"));
PQfinish(conn);
exit(ERR_PROMOTION_FAIL);
}
}
/* check that there's no existing primary */
current_primary_conn = get_primary_connection_quiet(conn, &existing_primary_id, NULL);
if (PQstatus(current_primary_conn) == CONNECTION_OK)
{
log_error(_("this replication cluster already has an active primary server"));
if (existing_primary_id != UNKNOWN_NODE_ID)
{
t_node_info primary_rec;
get_node_record(conn, existing_primary_id, &primary_rec);
log_detail(_("current primary is %s (node_id: %i)"),
primary_rec.node_name,
existing_primary_id);
}
PQfinish(current_primary_conn);
PQfinish(conn);
exit(ERR_PROMOTION_FAIL);
}
else if (runtime_options.dry_run == true)
{
log_info(_("no active primary server found in this replication cluster"));
}
PQfinish(current_primary_conn);
if (runtime_options.dry_run == true)
{
log_info(_("prerequisites for executing STANDBY PROMOTE are met"));
exit(SUCCESS);
}
_do_standby_promote_internal(conn, server_version_num);
/*
* highgo: after promoting local server to new primary, add the vip to local node
*/
if(check_vip_conf(config_file_options.virtual_ip, config_file_options.network_card))
{
bind_virtual_ip(config_file_options.virtual_ip, config_file_options.network_card);
}
}
static void
_do_standby_promote_internal(PGconn *conn, int server_version_num)
{
int i;
bool promote_success = false;
PQExpBufferData details;
RecoveryType recovery_type = RECTYPE_UNKNOWN;
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
char data_dir[MAXPGPATH];
get_node_config_directory(data_dir);
/* fetch local node record so we can add detail in log messages */
record_status = get_node_record(conn,
config_file_options.node_id,
&local_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve record for node %i"),
config_file_options.node_id);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
/*
* Promote standby to primary.
*
* `pg_ctl promote` returns immediately and (prior to 10.0) has no -w
* option so we can't be sure when or if the promotion completes. For now
* we'll poll the server until the default timeout (60 seconds)
*
* For PostgreSQL 12+, use the pg_promote() function - note this is
* experimental
*/
log_notice(_("promoting standby to primary"));
if (server_version_num >= 120000)
{
log_detail(_("promoting server \"%s\" (ID: %i) using pg_promote()"),
local_node_record.node_name,
local_node_record.node_id);
/*
* We'll check for promotion success ourselves, but will abort
* if some unrecoverable error prevented the function from being
* executed.
*/
if (!promote_standby(conn, false, 0))
{
log_error(_("unable to promote server from standby to primary"));
exit(ERR_PROMOTION_FAIL);
}
}
else
{
char script[MAXLEN];
int r;
get_server_action(ACTION_PROMOTE, script, (char *) data_dir);
log_detail(_("promoting server \"%s\" (ID: %i) using \"%s\""),
local_node_record.node_name,
local_node_record.node_id,
script);
r = system(script);
if (r != 0)
{
log_error(_("unable to promote server from standby to primary"));
exit(ERR_PROMOTION_FAIL);
}
}
log_notice(_("waiting up to %i seconds (parameter \"promote_check_timeout\") for promotion to complete"),
config_file_options.promote_check_timeout);
for (i = 0; i < config_file_options.promote_check_timeout; i += config_file_options.promote_check_interval)
{
recovery_type = get_recovery_type(conn);
if (recovery_type == RECTYPE_PRIMARY)
{
promote_success = true;
break;
}
sleep(config_file_options.promote_check_interval);
}
if (promote_success == false)
{
if (recovery_type == RECTYPE_STANDBY)
{
log_error(_("STANDBY PROMOTE failed, node is still a standby"));
log_detail(_("node still in recovery after %i seconds"), config_file_options.promote_check_timeout);
log_hint(_("the node may need more time to promote itself, check the PostgreSQL log for details"));
PQfinish(conn);
exit(ERR_PROMOTION_FAIL);
}
else
{
log_error(_("connection to node lost"));
PQfinish(conn);
exit(ERR_DB_CONN);
}
}
log_verbose(LOG_INFO, _("standby promoted to primary after %i second(s)"), i);
/* update node information to reflect new status */
if (update_node_record_set_primary(conn, config_file_options.node_id) == false)
{
initPQExpBuffer(&details);
appendPQExpBuffer(&details,
_("unable to update node record for node %i"),
config_file_options.node_id);
log_error("%s", details.data);
create_event_notification(NULL,
&config_file_options,
config_file_options.node_id,
"standby_promote",
false,
details.data);
exit(ERR_DB_QUERY);
}
initPQExpBuffer(&details);
appendPQExpBuffer(&details,
_("server \"%s\" (ID: %i) was successfully promoted to primary"),
local_node_record.node_name,
local_node_record.node_id);
log_notice(_("STANDBY PROMOTE successful"));
log_detail("%s", details.data);
/* Log the event */
create_event_notification(conn,
&config_file_options,
config_file_options.node_id,
"standby_promote",
true,
details.data);
termPQExpBuffer(&details);
return;
}
/*
* Follow a new primary.
*
* Node must be running. To start an inactive node and point it at a
* new primary, use "repmgr node rejoin".
*
* TODO: enable provision of new primary's conninfo parameters, which
* will be necessary if the primary's information has changed, but
* was not replicated to the current standby.
*/
void
do_standby_follow(void)
{
PGconn *local_conn = NULL;
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
PGconn *primary_conn = NULL;
int primary_node_id = UNKNOWN_NODE_ID;
PGconn *follow_target_conn = NULL;
int follow_target_node_id = UNKNOWN_NODE_ID;
t_node_info follow_target_node_record = T_NODE_INFO_INITIALIZER;
bool follow_target_is_primary = true;
RecordStatus record_status = RECORD_NOT_FOUND;
/* so we can pass info about the primary to event notification scripts */
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
int timer = 0;
PQExpBufferData follow_output;
bool success = false;
int follow_error_code = SUCCESS;
log_verbose(LOG_DEBUG, "do_standby_follow()");
local_conn = establish_db_connection(config_file_options.conninfo, false);
if (PQstatus(local_conn) != CONNECTION_OK)
{
log_hint(_("use \"repmgr node rejoin\" to re-add an inactive node to the replication cluster"));
exit(ERR_DB_CONN);
}
log_verbose(LOG_INFO, _("connected to local node"));
/* check this is a standby */
check_recovery_type(local_conn);
/* sanity-checks for 9.3 */
if (PQserverVersion(local_conn) < 90400)
check_93_config();
/* attempt to retrieve local node record */
record_status = get_node_record(local_conn,
config_file_options.node_id,
&local_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve record for local node %i"),
config_file_options.node_id);
PQfinish(local_conn);
exit(ERR_BAD_CONFIG);
}
/*
* --upstream-node-id provided - attempt to follow that node
*/
if (runtime_options.upstream_node_id != UNKNOWN_NODE_ID)
{
/* we can't follow ourselves */
if (runtime_options.upstream_node_id == config_file_options.node_id)
{
log_error(_("provided --upstream-node-id %i is the current node"),
runtime_options.upstream_node_id);
PQfinish(local_conn);
exit(ERR_FOLLOW_FAIL);
}
follow_target_node_id = runtime_options.upstream_node_id;
record_status = get_node_record(local_conn,
follow_target_node_id,
&follow_target_node_record);
/* but we must follow a node which exists (=registered) */
if (record_status != RECORD_FOUND)
{
log_error(_("unable to find record for intended upstream node %i"),
runtime_options.upstream_node_id);
PQfinish(local_conn);
exit(ERR_FOLLOW_FAIL);
}
}
/*
* otherwise determine the current primary and attempt to follow that
*/
else
{
log_notice(_("attempting to find and follow current primary"));
}
/*
* Attempt to connect to follow target - if this was provided with --upstream-node-id,
* we'll connect to that, otherwise we'll attempt to find the current primary.
*
* If --wait provided, loop for up `primary_follow_timeout` seconds
* before giving up
*
* XXX add `upstream_follow_timeout` ?
*/
for (timer = 0; timer < config_file_options.primary_follow_timeout; timer++)
{
/* --upstream-node-id provided - connect to specified node*/
if (follow_target_node_id != UNKNOWN_NODE_ID)
{
follow_target_conn = establish_db_connection_quiet(follow_target_node_record.conninfo);
}
/* attempt to find current primary node */
else
{
follow_target_conn = get_primary_connection_quiet(local_conn,
&follow_target_node_id,
NULL);
}
if (PQstatus(follow_target_conn) == CONNECTION_OK || runtime_options.wait_provided == false)
{
break;
}
sleep(1);
}
/* unable to connect to the follow target */
if (PQstatus(follow_target_conn) != CONNECTION_OK)
{
if (follow_target_node_id == UNKNOWN_NODE_ID)
{
log_error(_("unable to find a primary node"));
}
else
{
log_error(_("unable to connect to target node %i"), follow_target_node_id);
}
if (runtime_options.wait_provided == true)
{
if (follow_target_node_id == UNKNOWN_NODE_ID)
{
log_detail(_("no primary appeared after %i seconds"),
config_file_options.primary_follow_timeout);
}
else
{
log_detail(_("unable to connect to target node %i after %i seconds"),
follow_target_node_id,
config_file_options.primary_follow_timeout);
}
log_hint(_("alter \"primary_follow_timeout\" in \"repmgr.conf\" to change this value"));
}
PQfinish(local_conn);
exit(ERR_FOLLOW_FAIL);
}
/* --upstream-node-id not provided - retrieve record for node determined as primary */
if (runtime_options.upstream_node_id == UNKNOWN_NODE_ID)
{
if (runtime_options.dry_run == true)
{
log_info(_("connected to node %i, checking for current primary"), follow_target_node_id);
}
else
{
log_verbose(LOG_INFO, _("connected to node %i, checking for current primary"), follow_target_node_id);
}
record_status = get_node_record(follow_target_conn,
follow_target_node_id,
&follow_target_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to find record for follow target node %i"),
follow_target_node_id);
PQfinish(follow_target_conn);
exit(ERR_FOLLOW_FAIL);
}
}
/*
* Populate "event_info" with info about the node to follow for event notifications
*
* XXX need to differentiate between primary and non-primary?
*/
event_info.node_id = follow_target_node_id;
event_info.node_name = follow_target_node_record.node_name;
event_info.conninfo_str = follow_target_node_record.conninfo;
/*
* Check whether follow target is in recovery, so we know later whether
* we'll need to open a connection to the primary to update the metadata.
* Also emit an informative message.
*/
{
PQExpBufferData node_info_msg;
RecoveryType recovery_type = RECTYPE_UNKNOWN;
initPQExpBuffer(&node_info_msg);
recovery_type = get_recovery_type(follow_target_conn);
/*
* unlikely this will happen, but it's conceivable the follow target will
* have vanished since we last talked to it, or something
*/
if (recovery_type == RECTYPE_UNKNOWN)
{
log_error(_("unable to determine recovery type of follow target"));
PQfinish(follow_target_conn);
exit(ERR_FOLLOW_FAIL);
}
if (recovery_type == RECTYPE_PRIMARY)
{
follow_target_is_primary = true;
appendPQExpBuffer(&node_info_msg,
_("follow target is primary node \"%s\" (ID: %i)"),
follow_target_node_record.node_name,
follow_target_node_id);
}
else
{
follow_target_is_primary = false;
appendPQExpBuffer(&node_info_msg,
_("follow target is standby node \"%s\" (ID: %i)"),
follow_target_node_record.node_name,
follow_target_node_id);
}
if (runtime_options.dry_run == true)
{
log_info("%s", node_info_msg.data);
}
else
{
log_verbose(LOG_INFO, "%s", node_info_msg.data);
}
termPQExpBuffer(&node_info_msg);
}
/*
* if replication slots in use, check at least one free slot is available
* on the follow target
*/
if (config_file_options.use_replication_slots)
{
int free_slots = get_free_replication_slot_count(follow_target_conn);
if (free_slots < 0)
{
log_error(_("unable to determine number of free replication slots on node %i"),
follow_target_node_id);
PQfinish(follow_target_conn);
PQfinish(local_conn);
exit(ERR_FOLLOW_FAIL);
}
if (free_slots == 0)
{
log_error(_("no free replication slots available on node %i"), follow_target_node_id);
log_hint(_("consider increasing \"max_replication_slots\""));
PQfinish(follow_target_conn);
PQfinish(local_conn);
exit(ERR_FOLLOW_FAIL);
}
else if (runtime_options.dry_run == true)
{
log_info(_("replication slots in use, %i free slots on node %i"),
follow_target_node_id,
free_slots);
}
}
/* XXX check this is not current upstream anyway */
/* check if we can attach to the follow target */
{
t_conninfo_param_list local_repl_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
PGconn *local_repl_conn = NULL;
t_system_identification local_identification = T_SYSTEM_IDENTIFICATION_INITIALIZER;
bool can_follow;
XLogRecPtr local_xlogpos = get_node_current_lsn(local_conn);
/* Check local replication connection - we want to execute IDENTIFY_SYSTEM
* to get the current timeline ID, which might not yet be written to
* pg_control.
*
* TODO: from 9.6, query "pg_stat_wal_receiver" via the existing local connection
*/
initialize_conninfo_params(&local_repl_conninfo, false);
conn_to_param_list(local_conn, &local_repl_conninfo);
/* Set the replication user from the node record */
param_set(&local_repl_conninfo, "user", local_node_record.repluser);
param_set(&local_repl_conninfo, "replication", "1");
local_repl_conn = establish_db_connection_by_params(&local_repl_conninfo, false);
free_conninfo_params(&local_repl_conninfo);
if (PQstatus(local_repl_conn) != CONNECTION_OK)
{
log_error(_("unable to establish a replication connection to the local node"));
PQfinish(local_conn);
PQfinish(follow_target_conn);
exit(ERR_FOLLOW_FAIL);
}
else if (runtime_options.dry_run == true)
{
log_info(_("replication connection to the local node was successful"));
}
success = identify_system(local_repl_conn, &local_identification);
PQfinish(local_repl_conn);
if (success == false)
{
log_error(_("unable to query the local node's system identification"));
PQfinish(local_conn);
PQfinish(follow_target_conn);
exit(ERR_FOLLOW_FAIL);
}
can_follow = check_node_can_attach(local_identification.timeline,
local_xlogpos,
follow_target_conn,
&follow_target_node_record,
false);
if (can_follow == false)
{
PQfinish(local_conn);
PQfinish(follow_target_conn);
exit(ERR_FOLLOW_FAIL);
}
}
PQfinish(local_conn);
if (runtime_options.dry_run == true)
{
log_info(_("prerequisites for executing STANDBY FOLLOW are met"));
exit(SUCCESS);
}
/*
* Here we'll need a connection to the primary, if the upstream is not a primary.
*/
if (follow_target_is_primary == false)
{
/*
* We'll try and establish primary from follow target, in the assumption its node
* record is more up-to-date.
*/
primary_conn = get_primary_connection_quiet(follow_target_conn,
&primary_node_id,
NULL);
}
else
{
primary_conn = follow_target_conn;
}
initPQExpBuffer(&follow_output);
success = do_standby_follow_internal(
primary_conn,
follow_target_conn,
&follow_target_node_record,
&follow_output,
ERR_FOLLOW_FAIL,
&follow_error_code);
/* unable to restart the standby */
if (success == false)
{
create_event_notification_extended(
follow_target_conn,
&config_file_options,
config_file_options.node_id,
"standby_follow",
success,
follow_output.data,
&event_info);
PQfinish(follow_target_conn);
if (follow_target_is_primary == false)
PQfinish(primary_conn);
log_notice(_("STANDBY FOLLOW failed"));
if (strlen( follow_output.data ))
log_detail("%s", follow_output.data);
termPQExpBuffer(&follow_output);
exit(follow_error_code);
}
termPQExpBuffer(&follow_output);
initPQExpBuffer(&follow_output);
/*
* Wait up to "standby_follow_timeout" seconds for standby to connect to
* upstream.
* For 9.6 and later, we could check pg_stat_wal_receiver on the local node.
*/
/* assume success, necessary if standby_follow_timeout is zero */
success = true;
for (timer = 0; timer < config_file_options.standby_follow_timeout; timer++)
{
success = is_downstream_node_attached(follow_target_conn, config_file_options.node_name);
if (success == true)
break;
log_verbose(LOG_DEBUG, "sleeping %i of max %i seconds waiting for standby to attach to primary",
timer + 1,
config_file_options.standby_follow_timeout);
sleep(1);
}
if (success == true)
{
log_notice(_("STANDBY FOLLOW successful"));
appendPQExpBuffer(&follow_output,
"standby attached to upstream node \"%s\" (node ID: %i)",
follow_target_node_record.node_name,
follow_target_node_id);
}
else
{
log_error(_("STANDBY FOLLOW failed"));
appendPQExpBuffer(&follow_output,
"standby did not attach to upstream node \"%s\" (node ID: %i) after %i seconds",
follow_target_node_record.node_name,
follow_target_node_id,
config_file_options.standby_follow_timeout);
}
log_detail("%s", follow_output.data);
create_event_notification_extended(
primary_conn,
&config_file_options,
config_file_options.node_id,
"standby_follow",
success,
follow_output.data,
&event_info);
termPQExpBuffer(&follow_output);
PQfinish(follow_target_conn);
if (follow_target_is_primary == false)
PQfinish(primary_conn);
if (success == false)
exit(ERR_FOLLOW_FAIL);
return;
}
/*
* Perform the actuall "follow" operation; this is executed by
* "node rejoin" too.
*
* For PostgreSQL 9.3, ensure check_93_config() was called before calling
* this function.
*/
bool
do_standby_follow_internal(PGconn *primary_conn, PGconn *follow_target_conn, t_node_info *follow_target_node_record, PQExpBufferData *output, int general_error_code, int *error_code)
{
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
int original_upstream_node_id = UNKNOWN_NODE_ID;
t_node_info original_upstream_node_record = T_NODE_INFO_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
char *errmsg = NULL;
bool remove_old_replication_slot = false;
/*
* Fetch our node record so we can write application_name, if set, and to
* get the current upstream node ID, which we'll need to know if replication
* slots are in use and we want to delete this node's slot on the current
* upstream.
*/
record_status = get_node_record(primary_conn,
config_file_options.node_id,
&local_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve record for node %i"),
config_file_options.node_id);
*error_code = ERR_BAD_CONFIG;
return false;
}
/*
* If replication slots are in use, we'll need to create a slot on the
* follow target
*/
if (config_file_options.use_replication_slots)
{
/*
* Here we add a sanity check for the "slot_name" field - it's possible
* the node was initially registered with "use_replication_slots=false"
* but the configuration was subsequently changed, leaving the field NULL.
*
* To avoid annoying failures we can just update the node record and proceed.
*/
if (!strlen(local_node_record.slot_name))
{
create_slot_name(local_node_record.slot_name, config_file_options.node_id);
log_notice(_("setting node %i's slot name to \"%s\""),
config_file_options.node_id,
local_node_record.slot_name);
update_node_record_slot_name(primary_conn, config_file_options.node_id, local_node_record.slot_name);
}
if (create_replication_slot(follow_target_conn,
local_node_record.slot_name,
output) == false)
{
log_error("%s", output->data);
return false;
}
}
/* Initialise connection parameters to write as `primary_conninfo` */
initialize_conninfo_params(&recovery_conninfo, false);
/* We ignore any application_name set in the primary's conninfo */
parse_conninfo_string(follow_target_node_record->conninfo, &recovery_conninfo, &errmsg, true);
{
t_conninfo_param_list local_node_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
bool parse_success;
initialize_conninfo_params(&local_node_conninfo, false);
parse_success = parse_conninfo_string(local_node_record.conninfo, &local_node_conninfo, &errmsg, false);
if (parse_success == false)
{
/*
* this shouldn't happen, but if it does we'll plough on
* regardless
*/
log_warning(_("unable to parse conninfo string \"%s\":\n %s"),
local_node_record.conninfo, errmsg);
}
else
{
char *application_name = param_get(&local_node_conninfo, "application_name");
if (application_name != NULL && strlen(application_name))
param_set(&recovery_conninfo, "application_name", application_name);
}
free_conninfo_params(&local_node_conninfo);
/*
* store the original upstream node id so we can delete the
* replication slot, if exists
*/
if (local_node_record.upstream_node_id != UNKNOWN_NODE_ID)
{
original_upstream_node_id = local_node_record.upstream_node_id;
}
else
{
original_upstream_node_id = follow_target_node_record->node_id;
}
if (config_file_options.use_replication_slots && runtime_options.host_param_provided == false && original_upstream_node_id != UNKNOWN_NODE_ID)
{
remove_old_replication_slot = true;
}
}
/* Fetch original upstream's record */
if (remove_old_replication_slot == true)
{
PGconn *local_conn = NULL;
RecordStatus upstream_record_status = RECORD_NOT_FOUND;
/* abort if local connection not available */
local_conn = establish_db_connection(config_file_options.conninfo, true);
upstream_record_status = get_node_record(local_conn,
original_upstream_node_id,
&original_upstream_node_record);
PQfinish(local_conn);
if (upstream_record_status != RECORD_FOUND)
{
log_warning(_("unable to retrieve node record for old upstream node %i"),
original_upstream_node_id);
log_detail(_("replication slot will need to be removed manually"));
}
}
/* Set the application name to this node's name */
param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
/* Set the replication user from the follow target node record */
param_set(&recovery_conninfo, "user", follow_target_node_record->repluser);
log_notice(_("setting node %i's upstream to node %i"),
config_file_options.node_id, follow_target_node_record->node_id);
if (!create_recovery_file(&local_node_record,
&recovery_conninfo,
PQserverVersion(primary_conn),
config_file_options.data_directory, true))
{
*error_code = general_error_code;
return false;
}
/*
* start/restart the service
*/
{
char server_command[MAXLEN] = "";
bool server_up = is_server_available(config_file_options.conninfo);
char *action = NULL;
bool success;
PQExpBufferData output_buf;
initPQExpBuffer(&output_buf);
if (server_up == true)
{
/* no "service_restart_command" defined - stop and start using pg_ctl*/
if (config_file_options.service_restart_command[0] == '\0')
{
action = "stopp"; /* sic */
get_server_action(ACTION_STOP_WAIT, server_command, config_file_options.data_directory);
/* if translation needed, generate messages in the preceding if/else */
log_notice(_("%sing server using \"%s\""),
action,
server_command);
success = local_command(server_command, &output_buf);
if (success == false)
{
log_error(_("unable to %s server"), action);
*error_code = ERR_NO_RESTART;
return false;
}
action = "start";
get_server_action(ACTION_START, server_command, config_file_options.data_directory);
/* if translation needed, generate messages in the preceding if/else */
log_notice(_("%sing server using \"%s\""),
action,
server_command);
success = local_command(server_command, &output_buf);
if (success == false)
{
log_error(_("unable to %s server"), action);
*error_code = ERR_NO_RESTART;
return false;
}
}
else
{
action = "restart";
get_server_action(ACTION_RESTART, server_command, config_file_options.data_directory);
/* if translation needed, generate messages in the preceding if/else */
log_notice(_("%sing server using \"%s\""),
action,
server_command);
success = local_command(server_command, &output_buf);
if (success == false)
{
log_error(_("unable to %s server"), action);
*error_code = ERR_NO_RESTART;
return false;
}
}
}
else
{
action = "start";
get_server_action(ACTION_START, server_command, config_file_options.data_directory);
/* if translation needed, generate messages in the preceding if/else */
log_notice(_("%sing server using \"%s\""),
action,
server_command);
success = local_command(server_command, &output_buf);
if (success == false)
{
log_error(_("unable to %s server"), action);
*error_code = ERR_NO_RESTART;
return false;
}
}
}
/*
* If replication slots are in use, and an inactive one for this node
* exists on the former upstream, drop it.
*
* Note that if this function is called by do_standby_switchover(), the
* "repmgr node rejoin" command executed on the demotion candidate may already
* have removed the slot, so there may be nothing to do.
*
* XXX check if former upstream is current primary?
*/
if (remove_old_replication_slot == true)
{
if (original_upstream_node_record.node_id != UNKNOWN_NODE_ID)
{
PGconn *old_upstream_conn = establish_db_connection_quiet(original_upstream_node_record.conninfo);
if (PQstatus(old_upstream_conn) != CONNECTION_OK)
{
log_warning(_("unable to connect to old upstream node %i to remove replication slot"),
original_upstream_node_id);
log_hint(_("if reusing this node, you should manually remove any inactive replication slots"));
}
else
{
drop_replication_slot_if_exists(old_upstream_conn,
original_upstream_node_id,
local_node_record.slot_name);
PQfinish(old_upstream_conn);
}
}
}
/*
* It's possible this node was an inactive primary - update the relevant
* fields to ensure it's marked as an active standby
*/
if (update_node_record_status(primary_conn,
config_file_options.node_id,
"standby",
follow_target_node_record->node_id,
true) == false)
{
appendPQExpBufferStr(output,
_("unable to update upstream node"));
return false;
}
appendPQExpBuffer(output,
_("node %i is now attached to node %i"),
config_file_options.node_id,
follow_target_node_record->node_id);
return true;
}
/*
* Perform a switchover by:
*
* - stopping current primary node
* - promoting this standby node to primary
* - forcing the previous primary node to follow this node
*
* Where running and not already paused, repmgrd will be paused (and
* subsequently unpaused), unless --repmgrd-no-pause provided.
*
* TODO:
* - make connection test timeouts/intervals configurable (see below)
*/
void
do_standby_switchover(void)
{
PGconn *local_conn = NULL;
PGconn *remote_conn = NULL;
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
/* the remote server is the primary to be demoted */
char remote_conninfo[MAXCONNINFO] = "";
char remote_host[MAXLEN] = "";
int remote_node_id = UNKNOWN_NODE_ID;
t_node_info remote_node_record = T_NODE_INFO_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
RecoveryType recovery_type = RECTYPE_UNKNOWN;
PQExpBufferData remote_command_str;
PQExpBufferData command_output;
PQExpBufferData node_rejoin_options;
int r,
i;
bool command_success = false;
bool shutdown_success = false;
/* this flag will use to generate the final message generated */
bool switchover_success = true;
XLogRecPtr remote_last_checkpoint_lsn = InvalidXLogRecPtr;
ReplInfo replication_info;
/* store list of configuration files on the demotion candidate */
KeyValueList remote_config_files = {NULL, NULL};
/* store list of sibling nodes if --siblings-follow specified */
NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
int reachable_sibling_node_count = 0;
int reachable_sibling_nodes_with_slot_count = 0;
int unreachable_sibling_node_count = 0;
/* number of free walsenders required on promotion candidate */
int min_required_wal_senders = 1;
/* this will be calculated as max_wal_senders - COUNT(*) FROM pg_stat_replication */
int available_wal_senders = 0;
/* number of free replication slots required on promotion candidate */
int min_required_free_slots = 0;
/* gighgo: virtual ip setting variable */
char virtual_ip[MAXLEN] = "";
char network_card[MAXLEN] = "";
char unbind_vip[MAXLEN] = "";
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
/* used for handling repmgrd pause/unpause */
NodeInfoList all_nodes = T_NODE_INFO_LIST_INITIALIZER;
RepmgrdInfo **repmgrd_info = NULL;
int repmgrd_running_count = 0;
/*
* SANITY CHECKS
*
* We'll be doing a bunch of operations on the remote server (primary to
* be demoted) - careful checks needed before proceding.
*/
local_conn = establish_db_connection(config_file_options.conninfo, true);
/* Verify that standby is a supported server version */
server_version_num = check_server_version(local_conn, "standby", true, NULL);
record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve node record for node %i"),
config_file_options.node_id);
PQfinish(local_conn);
exit(ERR_DB_QUERY);
}
if (!is_streaming_replication(local_node_record.type))
{
log_error(_("switchover can only performed with streaming replication"));
PQfinish(local_conn);
exit(ERR_BAD_CONFIG);
}
if (runtime_options.dry_run == true)
{
log_notice(_("checking switchover on node \"%s\" (ID: %i) in --dry-run mode"),
local_node_record.node_name,
local_node_record.node_id);
}
else
{
log_notice(_("executing switchover on node \"%s\" (ID: %i)"),
local_node_record.node_name,
local_node_record.node_id);
}
/* Check that this is a standby */
recovery_type = get_recovery_type(local_conn);
if (recovery_type != RECTYPE_STANDBY)
{
log_error(_("switchover must be executed from the standby node to be promoted"));
if (recovery_type == RECTYPE_PRIMARY)
{
log_detail(_("this node (ID: %i) is the primary"),
local_node_record.node_id);
}
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
}
/* check remote server connection and retrieve its record */
remote_conn = get_primary_connection(local_conn, &remote_node_id, remote_conninfo);
if (PQstatus(remote_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to current primary node"));
log_hint(_("check that the cluster is correctly configured and this standby is registered"));
PQfinish(local_conn);
exit(ERR_DB_CONN);
}
record_status = get_node_record(remote_conn, remote_node_id, &remote_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve node record for current primary (node %i)"),
remote_node_id);
PQfinish(local_conn);
PQfinish(remote_conn);
exit(ERR_DB_QUERY);
}
log_verbose(LOG_DEBUG, "remote node name is \"%s\"", remote_node_record.node_name);
/*
* Check this standby is attached to the demotion candidate
*/
if (local_node_record.upstream_node_id != remote_node_record.node_id)
{
log_error(_("local node \"%s\" (ID: %i) is not a downstream of demotion candidate primary \"%s\" (ID: %i)"),
local_node_record.node_name,
local_node_record.node_id,
remote_node_record.node_name,
remote_node_record.node_id);
if (local_node_record.upstream_node_id == UNKNOWN_NODE_ID)
log_detail(_("local node has no registered upstream node"));
else
log_detail(_("registered upstream node ID is %i"),
local_node_record.upstream_node_id);
log_hint(_("execute \"repmgr standby register --force\" to update the local node's metadata"));
PQfinish(local_conn);
PQfinish(remote_conn);
exit(ERR_BAD_CONFIG);
}
if (is_downstream_node_attached(remote_conn, local_node_record.node_name) == false)
{
log_error(_("local node \"%s\" (ID: %i) is not attached to demotion candidate \"%s\" (ID: %i)"),
local_node_record.node_name,
local_node_record.node_id,
remote_node_record.node_name,
remote_node_record.node_id);
PQfinish(local_conn);
PQfinish(remote_conn);
exit(ERR_BAD_CONFIG);
}
/*
* Check that WAL replay on the standby is *not* paused, as that could lead
* to unexpected behaviour when the standby is promoted.
*
* For switchover we'll mandate that WAL replay *must not* be paused.
* For a promote operation we can proceed if WAL replay is paused and
* there is no more available WAL to be replayed, as we can be sure the
* primary is down already, but in a switchover context there's
* potentially a window for more WAL to be received before we shut down
* the primary completely.
*/
if (is_wal_replay_paused(local_conn, false) == true)
{
ReplInfo replication_info;
init_replication_info(&replication_info);
if (get_replication_info(local_conn, STANDBY, &replication_info) == false)
{
log_error(_("unable to retrieve replication information from local node"));
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
}
log_error(_("WAL replay is paused on this node and it is not safe to proceed"));
log_detail(_("replay paused at %X/%X; last WAL received is %X/%X"),
format_lsn(replication_info.last_wal_replay_lsn),
format_lsn(replication_info.last_wal_receive_lsn));
if (PQserverVersion(local_conn) >= 100000)
log_hint(_("execute \"pg_wal_replay_resume()\" to unpause WAL replay"));
else
log_hint(_("execute \"pg_xlog_replay_resume()\" to unpause WAL replay"));
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
}
/*
* Check that there are no exclusive backups running on the primary.
* We don't want to end up damaging the backup and also leaving the server in an
* state where there's control data saying it's in backup mode but there's no
* backup_label in PGDATA.
* If the user wants to do the switchover anyway, they should first stop the
* backup that's running.
*/
if (server_in_exclusive_backup_mode(remote_conn) != BACKUP_STATE_NO_BACKUP)
{
log_error(_("unable to perform a switchover while primary server is in exclusive backup mode"));
log_hint(_("stop backup before attempting the switchover"));
PQfinish(local_conn);
PQfinish(remote_conn);
exit(ERR_SWITCHOVER_FAIL);
}
/* this will fill the %p event notification parameter */
event_info.node_id = remote_node_record.node_id;
/* keep a running total of how many nodes will require a replication slot */
if (remote_node_record.slot_name[0] != '\0')
{
min_required_free_slots++;
}
/*
* If --force-rewind specified, check pg_rewind can be used, and
* pre-emptively fetch the list of configuration files which should be
* archived
*/
if (runtime_options.force_rewind_used == true)
{
PQExpBufferData reason;
PQExpBufferData msg;
initPQExpBuffer(&reason);
if (can_use_pg_rewind(remote_conn, config_file_options.data_directory, &reason) == false)
{
log_error(_("--force-rewind specified but pg_rewind cannot be used"));
log_detail("%s", reason.data);
termPQExpBuffer(&reason);
PQfinish(local_conn);
PQfinish(remote_conn);
exit(ERR_BAD_CONFIG);
}
termPQExpBuffer(&reason);
initPQExpBuffer(&msg);
appendPQExpBufferStr(&msg,
_("prerequisites for using pg_rewind are met"));
if (runtime_options.dry_run == true)
{
log_info("%s", msg.data);
}
else
{
log_verbose(LOG_INFO, "%s", msg.data);
}
termPQExpBuffer(&msg);
get_datadir_configuration_files(remote_conn, &remote_config_files);
}
/*
* Check that we can connect by SSH to the remote (current primary) server
*/
get_conninfo_value(remote_conninfo, "host", remote_host);
r = test_ssh_connection(remote_host, runtime_options.remote_user);
if (r != 0)
{
log_error(_("unable to connect via SSH to host \"%s\", user \"%s\""),
remote_host, runtime_options.remote_user);
PQfinish(remote_conn);
PQfinish(local_conn);
exit(ERR_BAD_CONFIG);
}
else
{
PQExpBufferData msg;
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("SSH connection to host \"%s\" succeeded"),
remote_host);
if (runtime_options.dry_run == true)
{
log_info("%s", msg.data);
}
else
{
log_verbose(LOG_INFO, "%s", msg.data);
}
termPQExpBuffer(&msg);
}
/* check remote repmgr binary can be found */
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str, &remote_node_record);
appendPQExpBufferStr(&remote_command_str, "--version 2>/dev/null && echo \"1\" || echo \"0\"");
initPQExpBuffer(&command_output);
command_success = remote_command(remote_host,
runtime_options.remote_user,
remote_command_str.data,
config_file_options.ssh_options,
&command_output);
termPQExpBuffer(&remote_command_str);
if (command_success == false || command_output.data[0] == '0')
{
PQExpBufferData hint;
log_error(_("unable to execute \"%s\" on \"%s\""),
progname(), remote_host);
if (strlen(command_output.data) > 2)
log_detail("%s", command_output.data);
termPQExpBuffer(&command_output);
initPQExpBuffer(&hint);
appendPQExpBufferStr(&hint,
_("check \"pg_bindir\" is set to the correct path in \"repmgr.conf\"; current value: "));
if (strlen(config_file_options.pg_bindir))
{
appendPQExpBuffer(&hint,
"\"%s\"", config_file_options.pg_bindir);
}
else
{
appendPQExpBufferStr(&hint,
"(not set)");
}
log_hint("%s", hint.data);
termPQExpBuffer(&hint);
PQfinish(remote_conn);
PQfinish(local_conn);
exit(ERR_BAD_CONFIG);
}
termPQExpBuffer(&command_output);
/*
* Sanity-check remote "data_directory" is correctly configured in repmgr.conf.
*
* This is important as we'll need to be able to run "repmgr node status" on the data
* directory after the remote (demotion candidate) has shut down.
*/
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str, &remote_node_record);
/*
* --data-directory-config is available from repmgr 4.3; it will fail
* if the remote repmgr is an earlier version, but the version should match
* anyway.
*/
appendPQExpBufferStr(&remote_command_str, "node check --data-directory-config --optformat -LINFO 2>/dev/null");
initPQExpBuffer(&command_output);
command_success = remote_command(remote_host,
runtime_options.remote_user,
remote_command_str.data,
config_file_options.ssh_options,
&command_output);
termPQExpBuffer(&remote_command_str);
if (command_success == false)
{
log_error(_("unable to execute \"%s node check --data-directory-config\" on \"%s\":"),
progname(), remote_host);
log_detail("%s", command_output.data);
PQfinish(remote_conn);
PQfinish(local_conn);
termPQExpBuffer(&command_output);
exit(ERR_BAD_CONFIG);
}
/* check remote repmgr has the data directory correctly configured */
if (parse_data_directory_config(command_output.data) == false)
{
log_error(_("\"data_directory\" parameter in repmgr.conf on \"%s\" is incorrectly configured"),
remote_node_record.node_name);
log_hint(_("execute \"repmgr node check --data-directory-config\" on \"%s\" to diagnose the issue"),
remote_node_record.node_name);
PQfinish(remote_conn);
PQfinish(local_conn);
termPQExpBuffer(&command_output);
exit(ERR_BAD_CONFIG);
}
termPQExpBuffer(&command_output);
if (runtime_options.dry_run == true)
{
log_info(_("able to execute \"%s\" on remote host \"localhost\""), progname());
}
/*
* populate local node record with current state of various replication-related
* values, so we can check for sufficient walsenders and replication slots
*/
get_node_replication_stats(local_conn, &local_node_record);
available_wal_senders = local_node_record.max_wal_senders -
local_node_record.attached_wal_receivers;
/*
* If --siblings-follow specified, get list and check they're reachable
* (if not just issue a warning)
*/
get_active_sibling_node_records(local_conn,
local_node_record.node_id,
local_node_record.upstream_node_id,
&sibling_nodes);
if (runtime_options.siblings_follow == false)
{
if (sibling_nodes.node_count > 0)
{
log_warning(_("%i sibling nodes found, but option \"--siblings-follow\" not specified"),
sibling_nodes.node_count);
log_detail(_("these nodes will remain attached to the current primary"));
}
}
else
{
char host[MAXLEN] = "";
NodeInfoListCell *cell;
log_verbose(LOG_INFO, _("%i active sibling nodes found"),
sibling_nodes.node_count);
if (sibling_nodes.node_count == 0)
{
log_warning(_("option \"--sibling-nodes\" specified, but no sibling nodes exist"));
}
else
{
/* include walsender for promotion candidate in total */
for (cell = sibling_nodes.head; cell; cell = cell->next)
{
/* get host from node record */
get_conninfo_value(cell->node_info->conninfo, "host", host);
r = test_ssh_connection(host, runtime_options.remote_user);
if (r != 0)
{
cell->node_info->reachable = false;
unreachable_sibling_node_count++;
}
else
{
cell->node_info->reachable = true;
reachable_sibling_node_count++;
min_required_wal_senders++;
if (cell->node_info->slot_name[0] != '\0')
{
reachable_sibling_nodes_with_slot_count++;
min_required_free_slots++;
}
}
}
if (unreachable_sibling_node_count > 0)
{
if (runtime_options.force == false)
{
log_error(_("%i of %i sibling nodes unreachable via SSH:"),
unreachable_sibling_node_count,
sibling_nodes.node_count);
}
else
{
log_warning(_("%i of %i sibling nodes unreachable via SSH:"),
unreachable_sibling_node_count,
sibling_nodes.node_count);
}
/* display list of unreachable sibling nodes */
for (cell = sibling_nodes.head; cell; cell = cell->next)
{
if (cell->node_info->reachable == true)
continue;
log_detail(" %s (ID: %i)",
cell->node_info->node_name,
cell->node_info->node_id);
}
if (runtime_options.force == false)
{
log_hint(_("use -F/--force to proceed in any case"));
PQfinish(local_conn);
exit(ERR_BAD_CONFIG);
}
if (runtime_options.dry_run == true)
{
log_detail(_("F/--force specified, would proceed anyway"));
}
else
{
log_detail(_("F/--force specified, proceeding anyway"));
}
}
else
{
char *msg = _("all sibling nodes are reachable via SSH");
if (runtime_options.dry_run == true)
{
log_info("%s", msg);
}
else
{
log_verbose(LOG_INFO, "%s", msg);
}
}
}
}
/*
* check there are sufficient free walsenders - obviously there's potential
* for a later race condition if some walsenders come into use before the
* switchover operation gets around to attaching the sibling nodes, but
* this should catch any actual existing configuration issue (and if anyone's
* performing a switchover in such an unstable environment, they only have
* themselves to blame).
*/
if (available_wal_senders < min_required_wal_senders)
{
if (runtime_options.force == false || runtime_options.dry_run == true)
{
log_error(_("insufficient free walsenders on promotion candidate"));
log_detail(_("at least %i walsenders required but only %i free walsenders on promotion candidate"),
min_required_wal_senders,
available_wal_senders);
log_hint(_("increase parameter \"max_wal_senders\" or use -F/--force to proceed in any case"));
if (runtime_options.dry_run == false)
{
PQfinish(local_conn);
exit(ERR_BAD_CONFIG);
}
}
else
{
log_warning(_("insufficient free walsenders on promotion candidate"));
log_detail(_("at least %i walsenders required but only %i free walsender(s) on promotion candidate"),
min_required_wal_senders,
available_wal_senders);
}
}
else
{
if (runtime_options.dry_run == true)
{
log_info(_("%i walsenders required, %i available"),
min_required_wal_senders,
available_wal_senders);
}
}
/* check demotion candidate can make replication connection to promotion candidate */
{
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str, &remote_node_record);
appendPQExpBuffer(&remote_command_str,
"node check --remote-node-id=%i --replication-connection",
local_node_record.node_id);
initPQExpBuffer(&command_output);
command_success = remote_command(remote_host,
runtime_options.remote_user,
remote_command_str.data,
config_file_options.ssh_options,
&command_output);
termPQExpBuffer(&remote_command_str);
if (command_success == true)
{
ConnectionStatus conn_status = parse_remote_node_replication_connection(command_output.data);
switch(conn_status)
{
case CONN_OK:
if (runtime_options.dry_run == true)
{
log_info(_("demotion candidate is able to make replication connection to promotion candidate"));
}
break;
case CONN_BAD:
log_error(_("demotion candidate is unable to make replication connection to promotion candidate"));
exit(ERR_BAD_CONFIG);
break;
default:
log_error(_("unable to determine whether demotion candidate is able to make replication connection to promotion candidate"));
exit(ERR_BAD_CONFIG);
break;
}
termPQExpBuffer(&command_output);
}
}
/* check archive/replication status */
{
int lag_seconds = 0;
CheckStatus status = CHECK_STATUS_UNKNOWN;
/* archive status - check when "archive_mode" is activated */
if (guc_set(remote_conn, "archive_mode", "!=", "off"))
{
int files = 0;
int threshold = 0;
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str, &remote_node_record);
appendPQExpBufferStr(&remote_command_str,
"node check --terse -LERROR --archive-ready --optformat");
initPQExpBuffer(&command_output);
command_success = remote_command(remote_host,
runtime_options.remote_user,
remote_command_str.data,
config_file_options.ssh_options,
&command_output);
termPQExpBuffer(&remote_command_str);
if (command_success == true)
{
status = parse_node_check_archiver(command_output.data, &files, &threshold);
}
termPQExpBuffer(&command_output);
switch (status)
{
case CHECK_STATUS_UNKNOWN:
{
if (runtime_options.force == false)
{
log_error(_("unable to check number of pending archive files on demotion candidate \"%s\""),
remote_node_record.node_name);
log_hint(_("use -F/--force to continue anyway"));
PQfinish(remote_conn);
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
}
log_warning(_("unable to check number of pending archive files on demotion candidate \"%s\""),
remote_node_record.node_name);
log_notice(_("-F/--force set, continuing with switchover"));
}
break;
case CHECK_STATUS_CRITICAL:
{
if (runtime_options.force == false)
{
log_error(_("number of pending archive files on demotion candidate \"%s\" is critical"),
remote_node_record.node_name);
log_detail(_("%i pending archive files (critical threshold: %i)"),
files, threshold);
log_hint(_("PostgreSQL will not shut down until all files are archived; use -F/--force to continue anyway"));
PQfinish(remote_conn);
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
}
log_warning(_("number of pending archive files on demotion candidate \"%s\" is critical"),
remote_node_record.node_name);
log_detail(_("%i pending archive files (critical threshold: %i)"),
files, threshold);
log_notice(_("-F/--force set, continuing with switchover"));
}
break;
case CHECK_STATUS_WARNING:
{
log_warning(_("number of pending archive files on demotion candidate \"%s\" is warning"),
remote_node_record.node_name);
log_detail(_("%i pending archive files (warning threshold: %i)"),
files, threshold);
log_hint(_("PostgreSQL will not shut down until all files are archived"));
}
break;
case CHECK_STATUS_OK:
{
PQExpBufferData msg;
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("%i pending archive files"),
files);
if (runtime_options.dry_run == true)
{
log_info("%s", msg.data);
}
else
{
log_verbose(LOG_INFO, "%s", msg.data);
}
termPQExpBuffer(&msg);
}
}
}
else
{
char *msg = _("archive mode is \"off\"");
if (runtime_options.dry_run == true)
{
log_info("%s", msg);
}
else
{
log_verbose(LOG_INFO, "%s", msg);
}
}
/*
* check replication lag on promotion candidate (TODO: check on all
* nodes attached to demotion candidate)
*/
lag_seconds = get_replication_lag_seconds(local_conn);
log_debug("lag is %i ", lag_seconds);
if (lag_seconds >= config_file_options.replication_lag_critical)
{
if (runtime_options.force == false)
{
log_error(_("replication lag on this node is critical"));
log_detail(_("lag is %i seconds (critical threshold: %i)"),
lag_seconds, config_file_options.replication_lag_critical);
log_hint(_("PostgreSQL on the demotion candidate will not shut down until pending WAL is flushed to the standby; use -F/--force to continue anyway"));
PQfinish(remote_conn);
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
}
log_warning(_("replication lag on this node is critical"));
log_detail(_("lag is %i seconds (critical threshold: %i)"),
lag_seconds, config_file_options.replication_lag_critical);
log_notice(_("-F/--force set, continuing with switchover"));
}
else if (lag_seconds >= config_file_options.replication_lag_warning)
{
log_warning(_("replication lag on this node is warning"));
log_detail(_("lag is %i seconds (warning threshold: %i)"),
lag_seconds, config_file_options.replication_lag_warning);
}
else if (lag_seconds == UNKNOWN_REPLICATION_LAG)
{
if (runtime_options.force == false)
{
log_error(_("unable to check replication lag on local node"));
log_hint(_("use -F/--force to continue anyway"));
PQfinish(remote_conn);
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
}
log_warning(_("unable to check replication lag on local node"));
log_notice(_("-F/--force set, continuing with switchover"));
}
/* replication lag is below warning threshold */
else
{
PQExpBufferData msg;
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("replication lag on this standby is %i seconds"),
lag_seconds);
if (runtime_options.dry_run == true)
{
log_info("%s", msg.data);
}
else
{
log_verbose(LOG_INFO, "%s", msg.data);
}
termPQExpBuffer(&msg);
}
}
PQfinish(remote_conn);
/*
* if replication slots are required by demotion candidate and/or siblings,
* check the promotion candidate has sufficient free slots
*/
if (min_required_free_slots > 0 )
{
int available_slots = local_node_record.max_replication_slots -
local_node_record.active_replication_slots;
log_debug("minimum of %i free slots (%i for siblings) required; %i available",
min_required_free_slots,
reachable_sibling_nodes_with_slot_count,
available_slots);
if (available_slots < min_required_free_slots)
{
if (runtime_options.force == false || runtime_options.dry_run == true)
{
log_error(_("insufficient free replication slots to attach all nodes"));
log_detail(_("at least %i additional replication slots required but only %i free slots available on promotion candidate"),
min_required_free_slots,
available_slots);
log_hint(_("increase parameter \"max_replication_slots\" or use -F/--force to proceed in any case"));
if (runtime_options.dry_run == false)
{
PQfinish(local_conn);
exit(ERR_BAD_CONFIG);
}
}
}
else
{
if (runtime_options.dry_run == true)
{
log_info(_("%i replication slots required, %i available"),
min_required_free_slots,
available_slots);
}
}
}
/*
* Attempt to pause all repmgrd instances, unless user explicitly
* specifies not to.
*/
if (runtime_options.repmgrd_no_pause == false)
{
NodeInfoListCell *cell = NULL;
ItemList repmgrd_connection_errors = {NULL, NULL};
int i = 0;
int unreachable_node_count = 0;
get_all_node_records(local_conn, &all_nodes);
repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * all_nodes.node_count);
for (cell = all_nodes.head; cell; cell = cell->next)
{
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
repmgrd_info[i]->node_id = cell->node_info->node_id;
repmgrd_info[i]->pid = UNKNOWN_PID;
repmgrd_info[i]->paused = false;
repmgrd_info[i]->running = false;
repmgrd_info[i]->pg_running = true;
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
{
/*
* unable to connect; treat this as an error
*/
repmgrd_info[i]->pg_running = false;
item_list_append_format(&repmgrd_connection_errors,
_("unable to connect to node \"%s\" (ID %i):\n%s"),
cell->node_info->node_name,
cell->node_info->node_id,
PQerrorMessage(cell->node_info->conn));
PQfinish(cell->node_info->conn);
cell->node_info->conn = NULL;
unreachable_node_count++;
i++;
continue;
}
repmgrd_info[i]->running = repmgrd_is_running(cell->node_info->conn);
repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn);
repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn);
if (repmgrd_info[i]->running == true)
repmgrd_running_count++;
i++;
}
if (unreachable_node_count > 0)
{
PQExpBufferData msg;
PQExpBufferData detail;
ItemListCell *cell;
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("unable to connect to %i node(s), unable to pause all repmgrd instances"),
unreachable_node_count);
initPQExpBuffer(&detail);
for (cell = repmgrd_connection_errors.head; cell; cell = cell->next)
{
appendPQExpBuffer(&detail,
" %s\n",
cell->string);
}
if (runtime_options.force == false)
{
log_error("%s", msg.data);
}
else
{
log_warning("%s", msg.data);
}
log_detail(_("following node(s) unreachable:\n%s"), detail.data);
termPQExpBuffer(&msg);
termPQExpBuffer(&detail);
/* tell user about footgun */
if (runtime_options.force == false)
{
log_hint(_("use -F/--force to continue anyway"));
clear_node_info_list(&sibling_nodes);
clear_node_info_list(&all_nodes);
exit(ERR_SWITCHOVER_FAIL);
}
}
/* pause repmgrd on all reachable nodes */
if (repmgrd_running_count > 0)
{
i = 0;
for (cell = all_nodes.head; cell; cell = cell->next)
{
/*
* Skip if node was unreachable
*/
if (repmgrd_info[i]->pg_running == false)
{
log_warning(_("node %s (ID %i) unreachable, unable to pause repmgrd"),
cell->node_info->node_name,
cell->node_info->node_id);
i++;
continue;
}
/*
* Skip if repmgrd not running on node
*/
if (repmgrd_info[i]->running == false)
{
log_warning(_("repmgrd not running on node %s (ID %i)"),
cell->node_info->node_name,
cell->node_info->node_id);
i++;
continue;
}
/*
* Skip if node is already paused. Note we won't unpause these, to
* leave the repmgrd instances in the cluster in the same state they
* were before the switchover.
*/
if (repmgrd_info[i]->paused == true)
{
PQfinish(cell->node_info->conn);
cell->node_info->conn = NULL;
i++;
continue;
}
if (runtime_options.dry_run == true)
{
log_info(_("would pause repmgrd on node %s (ID %i)"),
cell->node_info->node_name,
cell->node_info->node_id);
}
else
{
/* XXX check result */
log_debug("pausing repmgrd on node %s (ID %i)",
cell->node_info->node_name,
cell->node_info->node_id);
(void) repmgrd_pause(cell->node_info->conn, true);
}
PQfinish(cell->node_info->conn);
cell->node_info->conn = NULL;
i++;
}
}
else
{
/* close all connections - we'll reestablish later */
for (cell = all_nodes.head; cell; cell = cell->next)
{
if (cell->node_info->conn != NULL)
{
PQfinish(cell->node_info->conn);
cell->node_info->conn = NULL;
}
}
}
}
/*
* Sanity checks completed - prepare for the switchover
*/
if (runtime_options.dry_run == true)
{
log_notice(_("local node \"%s\" (ID: %i) would be promoted to primary; "
"current primary \"%s\" (ID: %i) would be demoted to standby"),
local_node_record.node_name,
local_node_record.node_id,
remote_node_record.node_name,
remote_node_record.node_id);
}
else
{
log_notice(_("local node \"%s\" (ID: %i) will be promoted to primary; "
"current primary \"%s\" (ID: %i) will be demoted to standby"),
local_node_record.node_name,
local_node_record.node_id,
remote_node_record.node_name,
remote_node_record.node_id);
}
/*
* Stop the remote primary
*
* We'll issue the pg_ctl command but not force it not to wait; we'll
* check the connection from here - and error out if no shutdown is
* detected after a certain time.
*/
initPQExpBuffer(&remote_command_str);
initPQExpBuffer(&command_output);
make_remote_repmgr_path(&remote_command_str, &remote_node_record);
if (runtime_options.dry_run == true)
{
appendPQExpBufferStr(&remote_command_str,
"node service --terse -LERROR --list-actions --action=stop");
}
else
{
log_notice(_("stopping current primary node \"%s\" (ID: %i)"),
remote_node_record.node_name,
remote_node_record.node_id);
appendPQExpBufferStr(&remote_command_str,
"node service --action=stop --checkpoint");
}
/* XXX handle failure */
(void) remote_command(remote_host,
runtime_options.remote_user,
remote_command_str.data,
config_file_options.ssh_options,
&command_output);
termPQExpBuffer(&remote_command_str);
/*
* --dry-run ends here with display of command which would be used to shut
* down the remote server
*/
if (runtime_options.dry_run == true)
{
/* we use a buffer here as it will be modified by string_remove_trailing_newlines() */
char shutdown_command[MAXLEN] = "";
strncpy(shutdown_command, command_output.data, MAXLEN);
termPQExpBuffer(&command_output);
string_remove_trailing_newlines(shutdown_command);
log_info(_("following shutdown command would be run on node \"%s\":\n \"%s\""),
remote_node_record.node_name,
shutdown_command);
clear_node_info_list(&sibling_nodes);
key_value_list_free(&remote_config_files);
return;
}
termPQExpBuffer(&command_output);
shutdown_success = false;
/* loop for timeout waiting for current primary to stop */
for (i = 0; i < config_file_options.shutdown_check_timeout; i++)
{
/* Check whether primary is available */
PGPing ping_res;
log_info(_("checking for primary shutdown; %i of %i attempts (\"shutdown_check_timeout\")"),
i + 1, config_file_options.shutdown_check_timeout);
ping_res = PQping(remote_conninfo);
log_debug("ping status is: %s", print_pqping_status(ping_res));
/* database server could not be contacted */
if (ping_res == PQPING_NO_RESPONSE || ping_res == PQPING_NO_ATTEMPT)
{
bool command_success;
/*
* remote server can't be contacted at protocol level - that
* doesn't necessarily mean it's shut down, so we'll ask its
* repmgr to check at data directory level, and if shut down also
* return the last checkpoint LSN.
*/
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str, &remote_node_record);
appendPQExpBufferStr(&remote_command_str,
"node status --is-shutdown-cleanly");
initPQExpBuffer(&command_output);
command_success = remote_command(remote_host,
runtime_options.remote_user,
remote_command_str.data,
config_file_options.ssh_options,
&command_output);
termPQExpBuffer(&remote_command_str);
if (command_success == true)
{
NodeStatus status = parse_node_status_is_shutdown_cleanly(command_output.data, &remote_last_checkpoint_lsn);
log_verbose(LOG_DEBUG, "remote node status is: %s", print_node_status(status));
if (status == NODE_STATUS_DOWN && remote_last_checkpoint_lsn != InvalidXLogRecPtr)
{
shutdown_success = true;
log_notice(_("current primary has been cleanly shut down at location %X/%X"),
format_lsn(remote_last_checkpoint_lsn));
termPQExpBuffer(&command_output);
break;
}
/* remote node did not shut down cleanly */
else if (status == NODE_STATUS_UNCLEAN_SHUTDOWN)
{
if (!runtime_options.force)
{
log_error(_("current primary did not shut down cleanly, aborting"));
log_hint(_("use -F/--force to promote current standby"));
termPQExpBuffer(&command_output);
exit(ERR_SWITCHOVER_FAIL);
}
log_error(_("current primary did not shut down cleanly, continuing anyway"));
shutdown_success = true;
break;
}
else if (status == NODE_STATUS_SHUTTING_DOWN)
{
log_info(_("remote node is still shutting down"));
}
}
termPQExpBuffer(&command_output);
}
log_debug("sleeping 1 second until next check");
sleep(1);
}
if (shutdown_success == false)
{
log_error(_("shutdown of the primary server could not be confirmed"));
log_hint(_("check the primary server status before performing any further actions"));
exit(ERR_SWITCHOVER_FAIL);
}
/* this is unlikely to happen, but check and handle gracefully anyway */
if (PQstatus(local_conn) != CONNECTION_OK)
{
log_warning(_("connection to local node lost, reconnecting..."));
log_detail("\n%s", PQerrorMessage(local_conn));
PQfinish(local_conn);
local_conn = establish_db_connection(config_file_options.conninfo, false);
if (PQstatus(local_conn) != CONNECTION_OK)
{
log_error(_("unable to reconnect to local node \"%s\""),
local_node_record.node_name);
exit(ERR_DB_CONN);
}
log_verbose(LOG_INFO, _("successfully reconnected to local node"));
}
/*
* highgo: after stopping remote primary server, delete the vip of primary node
* tianbing
*/
if (get_virtual_ip(local_conn, remote_node_id, virtual_ip) && get_network_card(local_conn, remote_node_id, network_card))
{
int uid = getuid();
if(uid == 0) //root user
sprintf(unbind_vip, "ssh %s \"ip addr del %s dev %s\"", remote_host, virtual_ip, network_card);
else
sprintf(unbind_vip, "ssh %s \"sudo ip addr del %s dev %s\"", remote_host, virtual_ip, network_card);
system(unbind_vip);
}
init_replication_info(&replication_info);
/*
* Compare standby's last WAL receive location with the primary's last
* checkpoint LSN. We'll loop for a while as it's possible the standby's
* walreceiver has not yet flushed all received WAL to disk.
*/
{
bool notice_emitted = false;
for (i = 0; i < config_file_options.wal_receive_check_timeout; i++)
{
get_replication_info(local_conn, STANDBY, &replication_info);
if (replication_info.last_wal_receive_lsn >= remote_last_checkpoint_lsn)
break;
/*
* We'll only output this notice if it looks like we're going to have
* to wait for WAL to be flushed.
*/
if (notice_emitted == false)
{
log_notice(_("waiting up to %i seconds (parameter \"wal_receive_check_timeout\") for received WAL to flush to disk"),
config_file_options.wal_receive_check_timeout);
notice_emitted = true;
}
log_info(_("sleeping %i of maximum %i seconds waiting for standby to flush received WAL to disk"),
i + 1, config_file_options.wal_receive_check_timeout);
sleep(1);
}
}
if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
{
log_warning(_("local node \"%s\" is behind shutdown primary \"%s\""),
local_node_record.node_name,
remote_node_record.node_name);
log_detail(_("local node last receive LSN is %X/%X, primary shutdown checkpoint LSN is %X/%X"),
format_lsn(replication_info.last_wal_receive_lsn),
format_lsn(remote_last_checkpoint_lsn));
if (runtime_options.always_promote == false)
{
log_notice(_("aborting switchover"));
log_hint(_("use --always-promote to force promotion of standby"));
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
}
}
log_debug("local node last receive LSN is %X/%X, primary shutdown checkpoint LSN is %X/%X",
format_lsn(replication_info.last_wal_receive_lsn),
format_lsn(remote_last_checkpoint_lsn));
/* promote standby (local node) */
_do_standby_promote_internal(local_conn, server_version_num);
/*
* highgo: after promoting local server to new primary, add the vip to local node
* tianbing
*/
if(check_vip_conf(config_file_options.virtual_ip, config_file_options.network_card))
{
bind_virtual_ip(config_file_options.virtual_ip, config_file_options.network_card);
}
/*
* If pg_rewind is requested, issue a checkpoint immediately after promoting
* the local node, as pg_rewind compares timelines on the basis of the value
* in pg_control, which is written at the first checkpoint, which might not
* occur immediately.
*/
if (runtime_options.force_rewind_used == true)
{
log_notice(_("issuing CHECKPOINT"));
checkpoint(local_conn);
}
/*
* Execute "repmgr node rejoin" to create recovery.conf and start the
* remote server. Additionally execute "pg_rewind", if required and
* requested.
*/
initPQExpBuffer(&node_rejoin_options);
if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
{
KeyValueListCell *cell = NULL;
bool first_entry = true;
if (runtime_options.force_rewind_used == false)
{
log_error(_("new primary diverges from former primary and --force-rewind not provided"));
log_hint(_("the former primary will need to be restored manually, or use \"repmgr node rejoin\""));
termPQExpBuffer(&node_rejoin_options);
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
}
appendPQExpBufferStr(&node_rejoin_options,
" --force-rewind");
if (runtime_options.force_rewind_path[0] != '\0')
{
appendPQExpBuffer(&node_rejoin_options,
"=%s",
runtime_options.force_rewind_path);
}
appendPQExpBufferStr(&node_rejoin_options,
" --config-files=");
for (cell = remote_config_files.head; cell; cell = cell->next)
{
if (first_entry == false)
appendPQExpBufferChar(&node_rejoin_options, ',');
else
first_entry = false;
appendPQExpBufferStr(&node_rejoin_options, cell->key);
}
appendPQExpBufferChar(&node_rejoin_options, ' ');
}
key_value_list_free(&remote_config_files);
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str, &remote_node_record);
/*
* Here we'll coerce the local node's connection string into
* "param=value" format, in case it's configured in URI format,
* to simplify escaping issues when passing the string to the
* remote node.
*/
{
char *conninfo_normalized = normalize_conninfo_string(local_node_record.conninfo);
appendPQExpBuffer(&remote_command_str,
"%s-d ",
node_rejoin_options.data);
appendRemoteShellString(&remote_command_str,
conninfo_normalized);
appendPQExpBufferStr(&remote_command_str,
" node rejoin");
pfree(conninfo_normalized);
}
termPQExpBuffer(&node_rejoin_options);
log_debug("executing:\n %s", remote_command_str.data);
initPQExpBuffer(&command_output);
command_success = remote_command(remote_host,
runtime_options.remote_user,
remote_command_str.data,
config_file_options.ssh_options,
&command_output);
termPQExpBuffer(&remote_command_str);
/* TODO: verify this node's record was updated correctly */
if (command_success == false)
{
log_error(_("rejoin failed with error code %i"), r);
create_event_notification_extended(local_conn,
&config_file_options,
config_file_options.node_id,
"standby_switchover",
false,
command_output.data,
&event_info);
}
else
{
PQExpBufferData event_details;
initPQExpBuffer(&event_details);
appendPQExpBuffer(&event_details,
"node %i promoted to primary, node %i demoted to standby",
config_file_options.node_id,
remote_node_record.node_id);
create_event_notification_extended(local_conn,
&config_file_options,
config_file_options.node_id,
"standby_switchover",
true,
event_details.data,
&event_info);
termPQExpBuffer(&event_details);
}
termPQExpBuffer(&command_output);
/*
* If --siblings-follow specified, attempt to make them follow the new
* primary
*/
if (runtime_options.siblings_follow == true && sibling_nodes.node_count > 0)
{
int failed_follow_count = 0;
char host[MAXLEN] = "";
NodeInfoListCell *cell = NULL;
log_notice(_("executing STANDBY FOLLOW on %i of %i siblings"),
sibling_nodes.node_count - unreachable_sibling_node_count,
sibling_nodes.node_count);
for (cell = sibling_nodes.head; cell; cell = cell->next)
{
bool success = false;
t_node_info sibling_node_record = T_NODE_INFO_INITIALIZER;
/* skip nodes previously determined as unreachable */
if (cell->node_info->reachable == false)
continue;
record_status = get_node_record(local_conn,
cell->node_info->node_id,
&sibling_node_record);
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str, &sibling_node_record);
if (sibling_node_record.type == WITNESS)
{
PGconn *witness_conn = NULL;
/* TODO: create "repmgr witness resync" or similar */
appendPQExpBuffer(&remote_command_str,
"witness register -d \\'%s\\' --force 2>/dev/null && echo \"1\" || echo \"0\"",
local_node_record.conninfo);
/*
* Notify the witness repmgrd about the new primary, as at this point it will be assuming
* a failover situation is in place. It will detect the new primary at some point, this
* just speeds up the process.
*
* In the unlikely event repmgrd is not running or not in use, this will have no effect.
*/
witness_conn = establish_db_connection_quiet(cell->node_info->conninfo);
if (PQstatus(witness_conn) == CONNECTION_OK)
{
notify_follow_primary(witness_conn, local_node_record.node_id);
}
PQfinish(witness_conn);
}
else
{
appendPQExpBufferStr(&remote_command_str,
"standby follow 2>/dev/null && echo \"1\" || echo \"0\"");
}
get_conninfo_value(cell->node_info->conninfo, "host", host);
log_debug("executing:\n %s", remote_command_str.data);
initPQExpBuffer(&command_output);
success = remote_command(host,
runtime_options.remote_user,
remote_command_str.data,
config_file_options.ssh_options,
&command_output);
termPQExpBuffer(&remote_command_str);
if (success == false || command_output.data[0] == '0')
{
if (sibling_node_record.type == WITNESS)
{
log_warning(_("WITNESS REGISTER failed on node \"%s\""),
cell->node_info->node_name);
}
else
{
log_warning(_("STANDBY FOLLOW failed on node \"%s\""),
cell->node_info->node_name);
}
failed_follow_count++;
}
termPQExpBuffer(&command_output);
}
if (failed_follow_count == 0)
{
log_info(_("STANDBY FOLLOW successfully executed on all reachable sibling nodes"));
}
else
{
log_warning(_("execution of STANDBY FOLLOW failed on %i sibling nodes"),
failed_follow_count);
}
/*
* TODO: double-check all expected nodes are in pg_stat_replication
* and entries in repmgr.nodes match
*/
}
clear_node_info_list(&sibling_nodes);
PQfinish(local_conn);
/*
* Clean up remote node (primary demoted to standby). It's possible that the node is
* still starting up, so poll for a while until we get a connection.
*/
for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
{
remote_conn = establish_db_connection(remote_node_record.conninfo, false);
if (PQstatus(remote_conn) == CONNECTION_OK)
break;
log_info(_("sleeping 1 second; %i of %i attempts (\"standby_reconnect_timeout\") to reconnect to demoted primary"),
i + 1,
config_file_options.standby_reconnect_timeout);
sleep(1);
}
/* check new standby (old primary) is reachable */
if (PQstatus(remote_conn) != CONNECTION_OK)
{
switchover_success = false;
/* TODO: double-check whether new standby has attached */
log_warning(_("switchover did not fully complete"));
log_detail(_("node \"%s\" is now primary but node \"%s\" is not reachable"),
local_node_record.node_name,
remote_node_record.node_name);
if (config_file_options.use_replication_slots == true)
{
log_hint(_("any inactive replication slots on the old primary will need to be dropped manually"));
}
}
else
{
if (config_file_options.use_replication_slots == true)
{
drop_replication_slot_if_exists(remote_conn,
remote_node_record.node_id,
local_node_record.slot_name);
}
/* TODO warn about any inactive replication slots */
log_notice(_("switchover was successful"));
log_detail(_("node \"%s\" is now primary and node \"%s\" is attached as standby"),
local_node_record.node_name,
remote_node_record.node_name);
}
PQfinish(remote_conn);
/*
* Attempt to unpause all paused repmgrd instances, unless user explicitly
* specifies not to.
*/
if (runtime_options.repmgrd_no_pause == false)
{
if (repmgrd_running_count > 0)
{
ItemList repmgrd_unpause_errors = {NULL, NULL};
NodeInfoListCell *cell = NULL;
int i = 0;
int error_node_count = 0;
for (cell = all_nodes.head; cell; cell = cell->next)
{
if (repmgrd_info[i]->paused == true)
{
log_debug("repmgrd on node %s (ID %i) paused before switchover, not unpausing",
cell->node_info->node_name,
cell->node_info->node_id);
i++;
continue;
}
log_debug("unpausing repmgrd on node %s (ID %i)",
cell->node_info->node_name,
cell->node_info->node_id);
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
if (PQstatus(cell->node_info->conn) == CONNECTION_OK)
{
if (repmgrd_pause(cell->node_info->conn, false) == false)
{
item_list_append_format(&repmgrd_unpause_errors,
_("unable to unpause node \"%s\" (ID %i)"),
cell->node_info->node_name,
cell->node_info->node_id);
error_node_count++;
}
}
else
{
item_list_append_format(&repmgrd_unpause_errors,
_("unable to connect to node \"%s\" (ID %i):\n%s"),
cell->node_info->node_name,
cell->node_info->node_id,
PQerrorMessage(cell->node_info->conn));
error_node_count++;
}
i++;
}
if (error_node_count > 0)
{
PQExpBufferData detail;
ItemListCell *cell;
initPQExpBuffer(&detail);
for (cell = repmgrd_unpause_errors.head; cell; cell = cell->next)
{
appendPQExpBuffer(&detail,
" %s\n",
cell->string);
}
log_warning(_("unable to unpause repmgrd on %i node(s)"),
error_node_count);
log_detail(_("errors encountered for following node(s):\n%s"), detail.data);
log_hint(_("check node connection and status; unpause manually with \"repmgr daemon unpause\""));
termPQExpBuffer(&detail);
}
}
clear_node_info_list(&all_nodes);
}
if (switchover_success == true)
{
log_notice(_("STANDBY SWITCHOVER has completed successfully"));
}
else
{
log_notice(_("STANDBY SWITCHOVER has completed with issues"));
log_hint(_("see preceding log message(s) for details"));
exit(ERR_SWITCHOVER_INCOMPLETE);
}
return;
}
static void
check_source_server()
{
PGconn *superuser_conn = NULL;
PGconn *privileged_conn = NULL;
char cluster_size[MAXLEN];
char *connstr = NULL;
t_node_info node_record = T_NODE_INFO_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
ExtensionStatus extension_status = REPMGR_UNKNOWN;
/* Attempt to connect to the upstream server to verify its configuration */
log_verbose(LOG_DEBUG, "check_source_server()");
log_info(_("connecting to source node"));
connstr = param_list_to_string(&source_conninfo);
log_detail(_("connection string is: %s"), connstr);
pfree(connstr);
source_conn = establish_db_connection_by_params(&source_conninfo, false);
/*
* Unless in barman mode, exit with an error;
* establish_db_connection_by_params() will have already logged an error
* message
*/
if (PQstatus(source_conn) != CONNECTION_OK)
{
PQfinish(source_conn);
source_conn = NULL;
if (mode == barman)
return;
else
exit(ERR_DB_CONN);
}
/*
* If a connection was established, perform some sanity checks on the
* provided upstream connection
*/
source_server_version_num = check_server_version(source_conn, "primary", true, NULL);
if (get_cluster_size(source_conn, cluster_size) == false)
exit(ERR_DB_QUERY);
log_detail(_("current installation size is %s"),
cluster_size);
/*
* If the upstream node is a standby, try to connect to the primary too so
* we can write an event record
*/
if (get_recovery_type(source_conn) == RECTYPE_STANDBY)
{
primary_conn = get_primary_connection(source_conn, NULL, NULL);
if (PQstatus(primary_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to primary node"));
exit(ERR_BAD_CONFIG);
}
}
else
{
primary_conn = source_conn;
}
/*
* Sanity-check that the primary node has a repmgr extension - if not
* present, fail with an error unless -F/--force is used (to enable repmgr
* to be used as a standalone clone tool)
*/
extension_status = get_repmgr_extension_status(primary_conn, NULL);
if (extension_status != REPMGR_INSTALLED)
{
if (!runtime_options.force)
{
/* this is unlikely to happen */
if (extension_status == REPMGR_UNKNOWN)
{
log_error(_("unable to determine status of \"repmgr\" extension"));
log_detail("%s", PQerrorMessage(primary_conn));
PQfinish(source_conn);
exit(ERR_DB_QUERY);
}
/* schema doesn't exist */
log_error(_("repmgr extension not found on source node"));
if (extension_status == REPMGR_AVAILABLE)
{
log_detail(_("repmgr extension is available but not installed in database \"%s\""),
param_get(&source_conninfo, "dbname"));
}
else if (extension_status == REPMGR_UNAVAILABLE)
{
log_detail(_("repmgr extension is not available on the upstream node"));
}
else if (extension_status == REPMGR_OLD_VERSION_INSTALLED)
{
log_error(_("an older version of the extension is installed on the upstream node"));
log_hint(_("upgrade \"repmgr\" on the source node first"));
}
log_hint(_("check that the upstream node is part of a repmgr cluster"));
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
log_warning(_("repmgr extension not found on source node"));
}
/* Fetch the source's data directory */
get_superuser_connection(&source_conn, &superuser_conn, &privileged_conn);
if (get_pg_setting(privileged_conn, "data_directory", upstream_data_directory) == false)
{
log_error(_("unable to retrieve source node's data directory"));
log_detail(_("STANDBY CLONE must be run with database superuser permissions"));
log_hint(_("provide a database superuser name with -S/--superuser"));
PQfinish(source_conn);
source_conn = NULL;
if (superuser_conn != NULL)
PQfinish(superuser_conn);
exit(ERR_BAD_CONFIG);
}
if (superuser_conn != NULL)
PQfinish(superuser_conn);
/*
* If no target data directory was explicitly provided, we'll default to
* the source host's data directory.
*/
if (local_data_directory_provided == false)
{
strncpy(local_data_directory, upstream_data_directory, MAXPGPATH);
log_notice(_("setting data directory to: \"%s\""), local_data_directory);
log_hint(_("use -D/--pgdata to explicitly specify a data directory"));
}
/*
* In the default pg_basebackup mode, we'll cowardly refuse to overwrite
* an existing data directory
*/
if (mode == pg_basebackup)
{
if (is_pg_dir(local_data_directory) && runtime_options.force != true)
{
log_error(_("target data directory appears to be a PostgreSQL data directory"));
log_detail(_("target data directory is \"%s\""), local_data_directory);
log_hint(_("use -F/--force to overwrite the existing data directory"));
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
}
/*
* Attempt to find the upstream node record
*/
if (runtime_options.upstream_node_id == NO_UPSTREAM_NODE)
upstream_node_id = get_primary_node_id(source_conn);
else
upstream_node_id = runtime_options.upstream_node_id;
log_debug("upstream_node_id determined as %i", upstream_node_id);
if (upstream_node_id != UNKNOWN_NODE_ID)
{
record_status = get_node_record(source_conn, upstream_node_id, &node_record);
if (record_status == RECORD_FOUND)
{
t_conninfo_param_list upstream_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
char *upstream_conninfo_user;
initialize_conninfo_params(&upstream_conninfo, false);
parse_conninfo_string(node_record.conninfo, &upstream_conninfo, NULL, false);
strncpy(recovery_conninfo_str, node_record.conninfo, MAXLEN);
strncpy(upstream_repluser, node_record.repluser, NAMEDATALEN);
upstream_conninfo_user = param_get(&upstream_conninfo, "user");
if (upstream_conninfo_user != NULL)
{
strncpy(upstream_user, upstream_conninfo_user, NAMEDATALEN);
}
else
{
get_conninfo_default_value("user", upstream_user, NAMEDATALEN);
}
log_verbose(LOG_DEBUG, "upstream_user is \"%s\"", upstream_user);
upstream_conninfo_found = true;
}
/*
* check that there's no existing node record with the same name but
* different ID
*/
record_status = get_node_record_by_name(source_conn, config_file_options.node_name, &node_record);
if (record_status == RECORD_FOUND && node_record.node_id != config_file_options.node_id)
{
log_error(_("another node (node_id: %i) already exists with node_name \"%s\""),
node_record.node_id,
config_file_options.node_name);
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
}
/* disable configuration file options incompatible with 9.3 */
if (source_server_version_num < 90400)
check_93_config();
check_upstream_config(source_conn, source_server_version_num, &node_record, true);
}
static void
check_source_server_via_barman()
{
char buf[MAXLEN] = "";
char barman_conninfo_str[MAXLEN] = "";
t_conninfo_param_list barman_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
char *errmsg = NULL;
bool parse_success = false,
command_success = false;
char where_condition[MAXLEN];
PQExpBufferData command_output;
PQExpBufferData repmgr_conninfo_buf;
int c = 0;
get_barman_property(barman_conninfo_str, "conninfo", local_repmgr_tmp_directory);
initialize_conninfo_params(&barman_conninfo, false);
/*
* parse_conninfo_string() here will remove the upstream's
* `application_name`, if set
*/
parse_success = parse_conninfo_string(barman_conninfo_str, &barman_conninfo, &errmsg, true);
if (parse_success == false)
{
log_error(_("Unable to parse barman conninfo string \"%s\":\n%s"),
barman_conninfo_str, errmsg);
exit(ERR_BARMAN);
}
/* Overwrite database name in the parsed parameter list */
param_set(&barman_conninfo, "dbname", runtime_options.dbname);
/* Rebuild the Barman conninfo string */
initPQExpBuffer(&repmgr_conninfo_buf);
for (c = 0; c < barman_conninfo.size && barman_conninfo.keywords[c] != NULL; c++)
{
if (repmgr_conninfo_buf.len != 0)
appendPQExpBufferChar(&repmgr_conninfo_buf, ' ');
appendPQExpBuffer(&repmgr_conninfo_buf, "%s=",
barman_conninfo.keywords[c]);
appendConnStrVal(&repmgr_conninfo_buf,
barman_conninfo.values[c]);
}
log_verbose(LOG_DEBUG,
"repmgr database conninfo string on barman server: %s",
repmgr_conninfo_buf.data);
if (upstream_node_id == UNKNOWN_NODE_ID)
{
maxlen_snprintf(where_condition, "type='primary' AND active IS TRUE");
}
else
{
maxlen_snprintf(where_condition, "node_id=%i", upstream_node_id);
}
initPQExpBuffer(&command_output);
maxlen_snprintf(buf,
"ssh %s \"psql -Aqt \\\"%s\\\" -c \\\""
" SELECT conninfo"
" FROM repmgr.nodes"
" WHERE %s"
" AND active IS TRUE"
"\\\"\"",
config_file_options.barman_host,
repmgr_conninfo_buf.data,
where_condition);
termPQExpBuffer(&repmgr_conninfo_buf);
command_success = local_command(buf, &command_output);
if (command_success == false)
{
log_error(_("unable to execute database query via Barman server"));
exit(ERR_BARMAN);
}
maxlen_snprintf(recovery_conninfo_str, "%s", command_output.data);
string_remove_trailing_newlines(recovery_conninfo_str);
upstream_conninfo_found = true;
log_verbose(LOG_DEBUG,
"upstream node conninfo string extracted via barman server: %s",
recovery_conninfo_str);
termPQExpBuffer(&command_output);
}
/*
* check_upstream_config()
*
* Perform sanity check on upstream server configuration before starting cloning
* process
*
* For PostreSQL 9.3, ensure check_93_config() is called before calling this.
*
* TODO:
* - check user is qualified to perform base backup
*/
static bool
check_upstream_config(PGconn *conn, int server_version_num, t_node_info *node_info, bool exit_on_error)
{
int i;
bool config_ok = true;
char *wal_error_message = NULL;
t_basebackup_options backup_options = T_BASEBACKUP_OPTIONS_INITIALIZER;
bool backup_options_ok = true;
ItemList backup_option_errors = {NULL, NULL};
bool xlog_stream = true;
standy_clone_mode mode;
/*
* Detecting the intended cloning mode
*/
mode = get_standby_clone_mode();
/*
* Parse `pg_basebackup_options`, if set, to detect whether --xlog-method
* has been set to something other than `stream` (i.e. `fetch`), as this
* will influence some checks
*/
backup_options_ok = parse_pg_basebackup_options(
config_file_options.pg_basebackup_options,
&backup_options, server_version_num,
&backup_option_errors);
if (backup_options_ok == false)
{
if (exit_on_error == true)
{
log_error(_("error(s) encountered parsing \"pg_basebackup_options\""));
print_error_list(&backup_option_errors, LOG_ERR);
log_hint(_("\"pg_basebackup_options\" is: \"%s\""),
config_file_options.pg_basebackup_options);
exit(ERR_BAD_CONFIG);
}
config_ok = false;
}
if (strlen(backup_options.xlog_method) && strcmp(backup_options.xlog_method, "stream") != 0)
xlog_stream = false;
/* Check that WAL level is set correctly */
if (server_version_num < 90400)
{
i = guc_set(conn, "wal_level", "=", "hot_standby");
wal_error_message = _("parameter \"wal_level\" must be set to \"hot_standby\"");
}
else
{
char *levels_pre96[] = {
"hot_standby",
"logical",
NULL,
};
/*
* Note that in 9.6+, "hot_standby" and "archive" are accepted as
* aliases for "replica", but current_setting() will of course always
* return "replica"
*/
char *levels_96plus[] = {
"replica",
"logical",
NULL,
};
char **levels;
int j = 0;
if (server_version_num < 90600)
{
levels = (char **) levels_pre96;
wal_error_message = _("parameter \"wal_level\" must be set to \"hot_standby\" or \"logical\"");
}
else
{
levels = (char **) levels_96plus;
wal_error_message = _("parameter \"wal_level\" must be set to \"replica\" or \"logical\"");
}
do
{
i = guc_set(conn, "wal_level", "=", levels[j]);
if (i)
{
break;
}
j++;
} while (levels[j] != NULL);
}
if (i == 0 || i == -1)
{
if (i == 0)
{
log_error("%s", wal_error_message);
}
if (exit_on_error == true)
{
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
config_ok = false;
}
if (config_file_options.use_replication_slots)
{
i = guc_set_typed(conn, "max_replication_slots", ">",
"0", "integer");
if (i == 0 || i == -1)
{
if (i == 0)
{
log_error(_("parameter \"max_replication_slots\" must be set to at least 1 to enable replication slots"));
log_hint(_("\"max_replication_slots\" should be set to at least the number of expected standbys"));
if (exit_on_error == true)
{
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
config_ok = false;
}
}
}
/*
* physical replication slots not available or not requested - check if
* there are any circumstances where `wal_keep_segments` should be set
*/
else if (mode != barman)
{
bool check_wal_keep_segments = false;
/*
* A non-zero `wal_keep_segments` value will almost certainly be
* required if pg_basebackup is being used with --xlog-method=fetch,
* *and* no restore command has been specified
*/
if (xlog_stream == false
&& strcmp(config_file_options.restore_command, "") == 0)
{
check_wal_keep_segments = true;
}
if (check_wal_keep_segments == true)
{
i = guc_set_typed(conn, "wal_keep_segments", ">", "0", "integer");
if (i == 0 || i == -1)
{
if (i == 0)
{
log_error(_("parameter \"wal_keep_segments\" on the upstream server must be be set to a non-zero value"));
log_hint(_("Choose a value sufficiently high enough to retain enough WAL "
"until the standby has been cloned and started.\n "
"Alternatively set up WAL archiving using e.g. PgBarman and configure "
"'restore_command' in repmgr.conf to fetch WALs from there."));
if (server_version_num >= 90400)
{
log_hint(_("In PostgreSQL 9.4 and later, replication slots can be used, which "
"do not require \"wal_keep_segments\" to be set "
"(set parameter \"use_replication_slots\" in repmgr.conf to enable)\n"
));
}
}
if (exit_on_error == true)
{
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
config_ok = false;
}
}
}
/*
* If archive_mode is enabled, check that 'archive_command' is non empty
* (however it's not practical to check that it actually represents a
* valid command).
*
* From PostgreSQL 9.5, archive_mode can be one of 'off', 'on' or 'always'
* so for ease of backwards compatibility, rather than explicitly check
* for an enabled mode, check that it's not "off".
*/
/* do not check postgres config
if (guc_set(conn, "archive_mode", "!=", "off"))
{
i = guc_set(conn, "archive_command", "!=", "");
if (i == 0 || i == -1)
{
if (i == 0)
log_error(_("parameter \"archive_command\" must be set to a valid command"));
if (exit_on_error == true)
{
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
config_ok = false;
}
}
*/
/*
* Check that 'hot_standby' is on. This isn't strictly necessary for the
* primary server, however the assumption is that we'll be cloning
* standbys and thus copying the primary configuration; this way the
* standby will be correctly configured by default.
*/
i = guc_set(conn, "hot_standby", "=", "on");
if (i == 0 || i == -1)
{
if (i == 0)
{
log_error(_("parameter 'hot_standby' must be set to 'on'"));
}
if (exit_on_error == true)
{
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
config_ok = false;
}
i = guc_set_typed(conn, "max_wal_senders", ">", "0", "integer");
if (i == 0 || i == -1)
{
if (i == 0)
{
log_error(_("parameter \"max_wal_senders\" must be set to be at least 1"));
log_hint(_("\"max_wal_senders\" should be set to at least the number of expected standbys"));
}
if (exit_on_error == true)
{
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
config_ok = false;
}
/*
* If using pg_basebackup, ensure sufficient replication connections can
* be made. There's no guarantee they'll still be available by the time
* pg_basebackup is executed, but there's nothing we can do about that.
*/
if (mode == pg_basebackup)
{
PGconn **connections;
int i;
int min_replication_connections = 1,
possible_replication_connections = 0;
t_conninfo_param_list repl_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
/*
* Make a copy of the connection parameter arrays, and append
* "replication"
*/
initialize_conninfo_params(&repl_conninfo, false);
conn_to_param_list(conn, &repl_conninfo);
param_set(&repl_conninfo, "replication", "1");
if (runtime_options.replication_user[0] != '\0')
{
param_set(&repl_conninfo, "user", runtime_options.replication_user);
}
else if (upstream_repluser[0] != '\0')
{
param_set(&repl_conninfo, "user", upstream_repluser);
}
else if (node_info->repluser[0] != '\0')
{
param_set(&repl_conninfo, "user", node_info->repluser);
}
if (strcmp(param_get(&repl_conninfo, "user"), upstream_user) != 0)
{
param_set(&repl_conninfo, "dbname", "replication");
}
/*
* work out how many replication connections are required (1 or 2)
*/
if (xlog_stream == true)
min_replication_connections += 1;
log_verbose(LOG_NOTICE, "checking for available walsenders on source node (%i required)",
min_replication_connections);
connections = pg_malloc0(sizeof(PGconn *) * min_replication_connections);
/*
* Attempt to create the minimum number of required concurrent
* connections
*/
for (i = 0; i < min_replication_connections; i++)
{
PGconn *replication_conn;
replication_conn = establish_db_connection_by_params(&repl_conninfo, false);
if (PQstatus(replication_conn) == CONNECTION_OK)
{
connections[i] = replication_conn;
possible_replication_connections++;
}
}
/* Close previously created connections */
for (i = 0; i < possible_replication_connections; i++)
{
PQfinish(connections[i]);
}
pfree(connections);
free_conninfo_params(&repl_conninfo);
if (possible_replication_connections < min_replication_connections)
{
config_ok = false;
/*
* XXX at this point we could check
* current_setting('max_wal_senders) - COUNT(*) FROM
* pg_stat_replication; if >= min_replication_connections we could
* infer possible authentication error / lack of permissions.
*
* Alternatively call PQconnectStart() and poll for
* presence/absence of CONNECTION_AUTH_OK ?
*/
log_error(_("unable to establish necessary replication connections"));
log_hint(_("increase \"max_wal_senders\" by at least %i"),
min_replication_connections - possible_replication_connections);
if (exit_on_error == true)
{
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
}
log_verbose(LOG_INFO, "sufficient walsenders available on source node (%i required)",
min_replication_connections);
}
return config_ok;
}
/*
* initialise_direct_clone()
*
* In pg_basebackup mode, configure the target data directory
* if necessary, and fetch information about tablespaces and configuration
* files.
*
* Event(s):
* - standby_clone
*/
static void
initialise_direct_clone(t_node_info *node_record)
{
/*
* Check the destination data directory can be used (in Barman mode, this
* directory will already have been created)
*/
if (!create_pg_dir(local_data_directory, runtime_options.force))
{
log_error(_("unable to use directory \"%s\""),
local_data_directory);
log_hint(_("use -F/--force to force this directory to be overwritten"));
exit(ERR_BAD_CONFIG);
}
/*
* Check that tablespaces named in any `tablespace_mapping` configuration
* file parameters exist.
*
* pg_basebackup doesn't verify mappings, so any errors will not be
* caught. We'll do that here as a value-added service.
*
*/
if (config_file_options.tablespace_mapping.head != NULL)
{
if (source_server_version_num < 90400)
{
log_error(_("tablespace mapping not supported in PostgreSQL 9.3, ignoring"));
}
else
{
TablespaceListCell *cell;
KeyValueList not_found = {NULL, NULL};
int total = 0,
matched = 0;
bool success = false;
for (cell = config_file_options.tablespace_mapping.head; cell; cell = cell->next)
{
char *old_dir_escaped = escape_string(source_conn, cell->old_dir);
char name[MAXLEN] = "";
success = get_tablespace_name_by_location(source_conn, old_dir_escaped, name);
pfree(old_dir_escaped);
if (success == true)
{
matched++;
}
else
{
key_value_list_set(&not_found,
cell->old_dir,
"");
}
total++;
}
if (not_found.head != NULL)
{
PQExpBufferData detail;
KeyValueListCell *kv_cell;
log_error(_("%i of %i mapped tablespaces not found"),
total - matched, total);
initPQExpBuffer(&detail);
for (kv_cell = not_found.head; kv_cell; kv_cell = kv_cell->next)
{
appendPQExpBuffer(
&detail,
" %s\n", kv_cell->key);
}
log_detail(_("following tablespaces not found:\n%s"),
detail.data);
termPQExpBuffer(&detail);
exit(ERR_BAD_CONFIG);
}
}
}
/*
* If replication slots requested, create appropriate slot on the source
* node; this must be done before pg_basebackup is called.
*
* Note: if the source node is different to the specified upstream node,
* we'll need to drop the slot and recreate it on the upstream.
*
* TODO: skip this for Pg10, and ensure temp slot option used
*
* Replication slots are not supported (and not very useful anyway) in
* Barman mode.
*/
if (config_file_options.use_replication_slots == true)
{
PGconn *superuser_conn = NULL;
PGconn *privileged_conn = NULL;
PQExpBufferData event_details;
initPQExpBuffer(&event_details);
get_superuser_connection(&source_conn, &superuser_conn, &privileged_conn);
if (create_replication_slot(privileged_conn, node_record->slot_name, &event_details) == false)
{
log_error("%s", event_details.data);
create_event_notification(primary_conn,
&config_file_options,
config_file_options.node_id,
"standby_clone",
false,
event_details.data);
PQfinish(source_conn);
if (superuser_conn != NULL)
PQfinish(superuser_conn);
exit(ERR_DB_QUERY);
}
termPQExpBuffer(&event_details);
log_verbose(LOG_INFO,
_("replication slot \"%s\" created on source node"),
node_record->slot_name);
if (superuser_conn != NULL)
PQfinish(superuser_conn);
}
return;
}
static int
run_basebackup(t_node_info *node_record)
{
char script[MAXLEN] = "";
int r = SUCCESS;
PQExpBufferData params;
TablespaceListCell *cell = NULL;
t_basebackup_options backup_options = T_BASEBACKUP_OPTIONS_INITIALIZER;
/*
* Parse the pg_basebackup_options provided in repmgr.conf - we'll want to
* check later whether certain options were set by the user
*/
parse_pg_basebackup_options(config_file_options.pg_basebackup_options,
&backup_options,
source_server_version_num,
NULL);
/* Create pg_basebackup command line options */
initPQExpBuffer(&params);
appendPQExpBuffer(&params, " -D %s", local_data_directory);
/*
* conninfo string provided - pass it to pg_basebackup as the -d option
* (pg_basebackup doesn't require or want a database name, but for
* consistency with other applications accepts a conninfo string under
* -d/--dbname)
*/
if (runtime_options.conninfo_provided == true)
{
t_conninfo_param_list conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
char *conninfo_str = NULL;
initialize_conninfo_params(&conninfo, false);
/* string will already have been parsed */
(void) parse_conninfo_string(runtime_options.dbname, &conninfo, NULL, false);
if (runtime_options.replication_user[0] != '\0')
{
param_set(&conninfo, "user", runtime_options.replication_user);
}
else if (upstream_repluser[0] != '\0')
{
param_set(&conninfo, "user", upstream_repluser);
}
else
{
param_set(&conninfo, "user", node_record->repluser);
}
conninfo_str = param_list_to_string(&conninfo);
appendPQExpBuffer(&params, " -d '%s'", conninfo_str);
pfree(conninfo_str);
}
/*
* Connection parameters not passed to repmgr as conninfo string - provide
* them individually to pg_basebackup (-d/--dbname not required)
*/
else
{
if (strlen(runtime_options.host))
{
appendPQExpBuffer(&params, " -h %s", runtime_options.host);
}
if (strlen(runtime_options.port))
{
appendPQExpBuffer(&params, " -p %s", runtime_options.port);
}
if (strlen(runtime_options.replication_user))
{
appendPQExpBuffer(&params, " -U %s", runtime_options.replication_user);
}
else if (strlen(upstream_repluser))
{
appendPQExpBuffer(&params, " -U %s", upstream_repluser);
}
else if (strlen(node_record->repluser))
{
appendPQExpBuffer(&params, " -U %s", node_record->repluser);
}
else if (strlen(runtime_options.username))
{
appendPQExpBuffer(&params, " -U %s", runtime_options.username);
}
}
if (runtime_options.fast_checkpoint)
{
appendPQExpBufferStr(&params, " -c fast");
}
if (config_file_options.tablespace_mapping.head != NULL)
{
for (cell = config_file_options.tablespace_mapping.head; cell; cell = cell->next)
{
appendPQExpBuffer(&params, " -T %s=%s", cell->old_dir, cell->new_dir);
}
}
/*
* To ensure we have all the WALs needed during basebackup execution we
* stream them as the backup is taking place.
*
* From 9.6, if replication slots are in use, we'll have previously
* created a slot with reserved LSN, and will stream from that slot to
* avoid WAL buildup on the primary using the -S/--slot, which requires
* -X/--xlog-method=stream (from 10, -X/--wal-method=stream)
*/
if (!strlen(backup_options.xlog_method))
{
appendPQExpBufferStr(&params, " -X stream");
}
/*
* From 9.6, pg_basebackup accepts -S/--slot, which forces WAL streaming
* to use the specified replication slot. If replication slot usage is
* specified, the slot will already have been created.
*
* NOTE: currently there's no way of disabling the --slot option while
* using --xlog-method=stream - it's hard to imagine a use case for this,
* so no provision has been made for doing it.
*
* NOTE: It's possible to set 'pg_basebackup_options' with an invalid
* combination of values for --wal-method (--xlog-method) and --slot -
* we're not checking that, just that we're not overriding any
* user-supplied values
*/
if (source_server_version_num >= 90600 && config_file_options.use_replication_slots)
{
bool slot_add = true;
/*
* Check whether 'pg_basebackup_options' in repmgr.conf has the --slot
* option set, or if --wal-method (--xlog-method) is set to a value
* other than "stream" (in which case we can't use --slot).
*/
if (strlen(backup_options.slot) || (strlen(backup_options.xlog_method) && strcmp(backup_options.xlog_method, "stream") != 0))
{
slot_add = false;
}
if (slot_add == true)
{
appendPQExpBuffer(&params, " -S %s", node_record->slot_name);
}
}
maxlen_snprintf(script,
"%s -l \"repmgr base backup\" %s %s",
make_pg_path("pg_basebackup"),
params.data,
config_file_options.pg_basebackup_options);
termPQExpBuffer(&params);
log_info(_("executing:\n %s"), script);
/*
* As of 9.4, pg_basebackup only ever returns 0 or 1
*/
r = system(script);
if (r != 0)
return ERR_BAD_BASEBACKUP;
/* check connections are still available */
(void)connection_ping_reconnect(primary_conn);
if (source_conn != primary_conn)
(void)connection_ping_reconnect(source_conn);
/*
* If replication slots in use, check the created slot is on the correct
* node; the slot will initially get created on the source node, and will
* need to be dropped and recreated on the actual upstream node if these
* differ.
*/
if (config_file_options.use_replication_slots && upstream_node_id != UNKNOWN_NODE_ID)
{
PGconn *superuser_conn = NULL;
PGconn *privileged_conn = NULL;
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
bool slot_exists_on_upstream = false;
record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
/*
* if there's no upstream record, there's no point in trying to create
* a replication slot on the designated upstream, as the assumption is
* it won't exist at this point.
*/
if (record_status != RECORD_FOUND)
{
log_warning(_("no record exists for designated upstream node %i"),
upstream_node_id);
log_hint(_("you'll need to create the replication slot (\"%s\") manually"),
node_record->slot_name);
}
else
{
PGconn *upstream_conn = NULL;
upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
/*
* It's possible the upstream node is not yet running, in which case we'll
* have to rely on the user taking action to create the slot
*/
if (PQstatus(upstream_conn) != CONNECTION_OK)
{
log_warning(_("unable to connect to upstream node to create replication slot"));
/*
* TODO: if slot creation also handled by "standby register", update warning
*/
log_hint(_("you may need to create the replication slot manually"));
}
else
{
record_status = get_slot_record(upstream_conn, node_record->slot_name, &slot_info);
if (record_status == RECORD_FOUND)
{
log_verbose(LOG_INFO,
_("replication slot \"%s\" aleady exists on upstream node %i"),
node_record->slot_name,
upstream_node_id);
slot_exists_on_upstream = true;
}
else
{
PQExpBufferData event_details;
log_notice(_("creating replication slot \"%s\" on upstream node %i"),
node_record->slot_name,
upstream_node_id);
get_superuser_connection(&upstream_conn, &superuser_conn, &privileged_conn);
initPQExpBuffer(&event_details);
if (create_replication_slot(privileged_conn, node_record->slot_name, &event_details) == false)
{
log_error("%s", event_details.data);
create_event_notification(primary_conn,
&config_file_options,
config_file_options.node_id,
"standby_clone",
false,
event_details.data);
PQfinish(source_conn);
if (superuser_conn != NULL)
PQfinish(superuser_conn);
exit(ERR_DB_QUERY);
}
if (superuser_conn != NULL)
PQfinish(superuser_conn);
termPQExpBuffer(&event_details);
}
PQfinish(upstream_conn);
}
}
/* delete slot on source server */
get_superuser_connection(&source_conn, &superuser_conn, &privileged_conn);
if (slot_info.active == false)
{
if (slot_exists_on_upstream == false)
{
if (drop_replication_slot(privileged_conn, node_record->slot_name) == true)
{
log_notice(_("replication slot \"%s\" deleted on source node"), node_record->slot_name);
}
else
{
log_error(_("unable to delete replication slot \"%s\" on source node"), node_record->slot_name);
}
}
}
/*
* if replication slot is still active (shouldn't happen), emit a
* warning
*/
else
{
log_warning(_("replication slot \"%s\" is still active on source node"), node_record->slot_name);
}
if (superuser_conn != NULL)
PQfinish(superuser_conn);
}
return SUCCESS;
}
static int
run_file_backup(t_node_info *node_record)
{
int r = SUCCESS,
i;
char command[MAXLEN] = "";
char filename[MAXLEN] = "";
char buf[MAXLEN] = "";
char basebackups_directory[MAXLEN] = "";
char backup_id[MAXLEN] = "";
char *p = NULL,
*q = NULL;
TablespaceDataList tablespace_list = {NULL, NULL};
TablespaceDataListCell *cell_t = NULL;
PQExpBufferData tablespace_map;
bool tablespace_map_rewrite = false;
if (mode == barman)
{
/*
* Locate Barman's base backups directory
*/
get_barman_property(basebackups_directory, "basebackups_directory", local_repmgr_tmp_directory);
/*
* Read the list of backup files into a local file. In the process:
*
* - determine the backup ID; - check, and remove, the prefix; -
* detect tablespaces; - filter files in one list per tablespace;
*/
{
FILE *fi; /* input stream */
FILE *fd; /* output for data.txt */
char prefix[MAXLEN] = "";
char output[MAXLEN] = "";
int n = 0;
maxlen_snprintf(command, "%s list-files --target=data %s latest",
make_barman_ssh_command(barman_command_buf),
config_file_options.barman_server);
log_verbose(LOG_DEBUG, "executing:\n %s", command);
fi = popen(command, "r");
if (fi == NULL)
{
log_error("cannot launch command: %s", command);
exit(ERR_BARMAN);
}
fd = fopen(datadir_list_filename, "w");
if (fd == NULL)
{
log_error("cannot open file: %s", datadir_list_filename);
exit(ERR_BARMAN);
}
maxlen_snprintf(prefix, "%s/", basebackups_directory);
while (fgets(output, MAXLEN, fi) != NULL)
{
/*
* Remove prefix
*/
p = string_skip_prefix(prefix, output);
if (p == NULL)
{
log_error("unexpected output from \"barman list-files\"");
log_detail("%s", output);
exit(ERR_BARMAN);
}
/*
* Remove and note backup ID; copy backup.info
*/
if (!strcmp(backup_id, ""))
{
FILE *fi2;
n = strcspn(p, "/");
strncpy(backup_id, p, n);
strncat(prefix, backup_id, MAXLEN - 1);
strncat(prefix, "/", MAXLEN - 1);
p = string_skip_prefix(backup_id, p);
if (p == NULL)
{
log_error("unexpected output from \"barman list-files\"");
log_detail("%s", output);
exit(ERR_BARMAN);
}
p = string_skip_prefix("/", p);
/*
* Copy backup.info
*/
maxlen_snprintf(command,
"rsync -a %s:%s/%s/backup.info %s",
config_file_options.barman_host,
basebackups_directory,
backup_id,
local_repmgr_tmp_directory);
(void) local_command(command,
NULL);
/*
* Get tablespace data
*/
maxlen_snprintf(filename, "%s/backup.info",
local_repmgr_tmp_directory);
fi2 = fopen(filename, "r");
if (fi2 == NULL)
{
log_error("cannot open file: %s", filename);
exit(ERR_INTERNAL);
}
while (fgets(buf, MAXLEN, fi2) != NULL)
{
q = string_skip_prefix("tablespaces=", buf);
if (q != NULL && strncmp(q, "None\n", 5))
{
get_tablespace_data_barman(q, &tablespace_list);
}
q = string_skip_prefix("version=", buf);
if (q != NULL)
{
source_server_version_num = strtol(q, NULL, 10);
}
}
fclose(fi2);
unlink(filename);
continue;
}
/*
* Skip backup.info
*/
if (string_skip_prefix("backup.info", p))
continue;
/*
* Filter data directory files
*/
if ((q = string_skip_prefix("data/", p)) != NULL)
{
fputs(q, fd);
continue;
}
/*
* Filter other files (i.e. tablespaces)
*/
for (cell_t = tablespace_list.head; cell_t; cell_t = cell_t->next)
{
if ((q = string_skip_prefix(cell_t->oid, p)) != NULL && *q == '/')
{
if (cell_t->f == NULL)
{
maxlen_snprintf(filename, "%s/%s.txt", local_repmgr_tmp_directory, cell_t->oid);
cell_t->f = fopen(filename, "w");
if (cell_t->f == NULL)
{
log_error("cannot open file: %s", filename);
exit(ERR_INTERNAL);
}
}
fputs(q + 1, cell_t->f);
break;
}
}
}
fclose(fd);
pclose(fi);
}
/* For 9.5 and greater, create our own tablespace_map file */
if (source_server_version_num >= 90500)
{
initPQExpBuffer(&tablespace_map);
}
/*
* As of Barman version 1.6.1, the file structure of a backup is as
* follows:
*
* base/ - base backup wals/ - WAL files associated to the backup
*
* base/<ID> - backup files
*
* here ID has the standard timestamp form yyyymmddThhmmss
*
* base/<ID>/backup.info - backup metadata, in text format
* base/<ID>/data - data directory base/<ID>/<OID> -
* tablespace with the given oid
*/
/*
* Copy all backup files from the Barman server
*/
maxlen_snprintf(command,
"rsync --progress -a --files-from=%s %s:%s/%s/data %s",
datadir_list_filename,
config_file_options.barman_host,
basebackups_directory,
backup_id,
local_data_directory);
(void) local_command(
command,
NULL);
unlink(datadir_list_filename);
/*
* We must create some PGDATA subdirectories because they are not
* included in the Barman backup.
*
* See class RsyncBackupExecutor in the Barman source
* (barman/backup_executor.py) for a definitive list of excluded
* directories.
*/
{
const char *const dirs[] = {
/* Only from 10 */
"pg_wal",
/* Only from 9.5 */
"pg_commit_ts",
/* Only from 9.4 */
"pg_dynshmem", "pg_logical", "pg_logical/snapshots", "pg_logical/mappings", "pg_replslot",
/* Already in 9.3 */
"pg_notify", "pg_serial", "pg_snapshots", "pg_stat", "pg_stat_tmp",
"pg_subtrans", "pg_tblspc", "pg_twophase", "pg_xlog", 0
};
const int vers[] = {
100000,
90500,
90400, 90400, 90400, 90400, 90400,
0, 0, 0, 0, 0,
0, 0, 0, -100000
};
for (i = 0; dirs[i]; i++)
{
/* directory exists in newer versions than this server - skip */
if (vers[i] > 0 && source_server_version_num < vers[i])
continue;
/*
* directory existed in earlier versions than this server but
* has been removed/renamed - skip
*/
if (vers[i] < 0 && source_server_version_num >= abs(vers[i]))
continue;
maxlen_snprintf(filename, "%s/%s", local_data_directory, dirs[i]);
if (mkdir(filename, S_IRWXU) != 0 && errno != EEXIST)
{
log_error(_("unable to create the %s directory"), dirs[i]);
exit(ERR_INTERNAL);
}
}
}
}
for (cell_t = tablespace_list.head; cell_t; cell_t = cell_t->next)
{
bool mapping_found = false;
TablespaceListCell *cell = NULL;
char *tblspc_dir_dest = NULL;
/*
* Check if tablespace path matches one of the provided tablespace
* mappings
*/
if (config_file_options.tablespace_mapping.head != NULL)
{
for (cell = config_file_options.tablespace_mapping.head; cell; cell = cell->next)
{
if (strcmp(cell_t->location, cell->old_dir) == 0)
{
mapping_found = true;
break;
}
}
}
if (mapping_found == true)
{
tblspc_dir_dest = cell->new_dir;
log_debug(_("mapping source tablespace \"%s\" (OID %s) to \"%s\""),
cell_t->location, cell_t->oid, tblspc_dir_dest);
}
else
{
tblspc_dir_dest = cell_t->location;
}
/*
* Tablespace file copy
*/
if (mode == barman)
{
create_pg_dir(cell_t->location, false);
if (cell_t->f != NULL) /* cell_t->f == NULL iff the tablespace is
* empty */
{
maxlen_snprintf(command,
"rsync --progress -a --files-from=%s/%s.txt %s:%s/%s/%s %s",
local_repmgr_tmp_directory,
cell_t->oid,
config_file_options.barman_host,
basebackups_directory,
backup_id,
cell_t->oid,
tblspc_dir_dest);
(void) local_command(
command,
NULL);
fclose(cell_t->f);
maxlen_snprintf(filename,
"%s/%s.txt",
local_repmgr_tmp_directory,
cell_t->oid);
unlink(filename);
}
}
/*
* If a valid mapping was provide for this tablespace, arrange for it
* to be remapped (if no tablespace mapping was provided, the link
* will be copied as-is by pg_basebackup and no action is required)
*/
if (mapping_found == true || mode == barman)
{
/* 9.5 and later - append to the tablespace_map file */
if (source_server_version_num >= 90500)
{
tablespace_map_rewrite = true;
appendPQExpBuffer(&tablespace_map,
"%s %s\n",
cell_t->oid,
tblspc_dir_dest);
}
/*
* Pre-9.5, we have to manipulate the symlinks in pg_tblspc/
* ourselves
*/
else
{
PQExpBufferData tblspc_symlink;
initPQExpBuffer(&tblspc_symlink);
appendPQExpBuffer(&tblspc_symlink, "%s/pg_tblspc/%s",
local_data_directory,
cell_t->oid);
if (unlink(tblspc_symlink.data) < 0 && errno != ENOENT)
{
log_error(_("unable to remove tablespace symlink %s"), tblspc_symlink.data);
log_detail("%s", strerror(errno));
r = ERR_BAD_BASEBACKUP;
goto stop_backup;
}
if (symlink(tblspc_dir_dest, tblspc_symlink.data) < 0)
{
log_error(_("unable to create tablespace symlink from %s to %s"), tblspc_symlink.data, tblspc_dir_dest);
r = ERR_BAD_BASEBACKUP;
goto stop_backup;
}
}
}
}
/*
* For 9.5 and later, if tablespace remapping was requested, we'll need to
* rewrite the tablespace map file ourselves. The tablespace map file is
* read on startup and any links created by the backend; we could do this
* ourselves like for pre-9.5 servers, but it's better to rely on
* functionality the backend provides.
*/
if (source_server_version_num >= 90500 && tablespace_map_rewrite == true)
{
PQExpBufferData tablespace_map_filename;
FILE *tablespace_map_file;
initPQExpBuffer(&tablespace_map_filename);
appendPQExpBuffer(&tablespace_map_filename, "%s/%s",
local_data_directory,
TABLESPACE_MAP);
/*
* Unlink any existing file (it should be there, but we don't care if
* it isn't)
*/
if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT)
{
log_error(_("unable to remove tablespace_map file \"%s\""),
tablespace_map_filename.data);
log_detail("%s", strerror(errno));
r = ERR_BAD_BASEBACKUP;
goto stop_backup;
}
tablespace_map_file = fopen(tablespace_map_filename.data, "w");
if (tablespace_map_file == NULL)
{
log_error(_("unable to create tablespace_map file \"%s\""), tablespace_map_filename.data);
r = ERR_BAD_BASEBACKUP;
goto stop_backup;
}
if (fputs(tablespace_map.data, tablespace_map_file) == EOF)
{
fclose(tablespace_map_file);
log_error(_("unable to write to tablespace_map file \"%s\""), tablespace_map_filename.data);
r = ERR_BAD_BASEBACKUP;
goto stop_backup;
}
fclose(tablespace_map_file);
}
stop_backup:
if (mode == barman)
{
/* In Barman mode, remove local_repmgr_directory */
rmtree(local_repmgr_tmp_directory, true);
}
/*
* if replication slots in use, create replication slot
*/
if (r == SUCCESS)
{
if (config_file_options.use_replication_slots == true)
{
bool slot_warning = false;
if (runtime_options.no_upstream_connection == true)
{
slot_warning = true;
}
else
{
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
PGconn *upstream_conn = NULL;
/* check connections are still available */
(void)connection_ping_reconnect(primary_conn);
if (source_conn != primary_conn)
(void)connection_ping_reconnect(source_conn);
(void)connection_ping_reconnect(source_conn);
record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve node record for upstream node %i"), upstream_node_id);
slot_warning = true;
}
else
{
upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
if (PQstatus(upstream_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to upstream node %i to create a replication slot"), upstream_node_id);
slot_warning = true;
}
else
{
record_status = get_slot_record(upstream_conn, node_record->slot_name, &slot_info);
if (record_status == RECORD_FOUND)
{
log_verbose(LOG_INFO,
_("replication slot \"%s\" aleady exists on upstream node %i"),
node_record->slot_name,
upstream_node_id);
}
else
{
PQExpBufferData errmsg;
initPQExpBuffer(&errmsg);
if (create_replication_slot(upstream_conn, node_record->slot_name, &errmsg) == false)
{
log_error(_("unable to create replication slot on upstream node %i"), upstream_node_id);
log_detail("%s", errmsg.data);
slot_warning = true;
}
else
{
log_notice(_("replication slot \"%s\" created on upstream node \"%s\" (ID: %i)"),
node_record->slot_name,
upstream_node_record.node_name,
upstream_node_id );
}
termPQExpBuffer(&errmsg);
}
PQfinish(upstream_conn);
}
}
}
if (slot_warning == true)
{
log_warning(_("\"use_replication_slots\" specified but a replication slot could not be created"));
log_hint(_("ensure a replication slot called \"%s\" is created on the upstream node (ID: %i)"),
node_record->slot_name,
upstream_node_id);
}
}
}
return r;
}
static char *
make_barman_ssh_command(char *buf)
{
static char config_opt[MAXLEN] = "";
if (strlen(config_file_options.barman_config))
maxlen_snprintf(config_opt,
" --config=%s",
config_file_options.barman_config);
maxlen_snprintf(buf,
"ssh %s barman%s",
config_file_options.barman_host,
config_opt);
return buf;
}
static int
get_tablespace_data_barman(char *tablespace_data_barman,
TablespaceDataList *tablespace_list)
{
/*
* Example: [('main', 24674, '/var/lib/postgresql/tablespaces/9.5/main'),
* ('alt', 24678, '/var/lib/postgresql/tablespaces/9.5/alt')]
*/
char name[MAXLEN] = "";
char oid[MAXLEN] = "";
char location[MAXPGPATH] = "";
char *p = tablespace_data_barman;
int i = 0;
tablespace_list->head = NULL;
tablespace_list->tail = NULL;
p = string_skip_prefix("[", p);
if (p == NULL)
return -1;
while (*p == '(')
{
p = string_skip_prefix("('", p);
if (p == NULL)
return -1;
i = strcspn(p, "'");
strncpy(name, p, i);
name[i] = 0;
p = string_skip_prefix("', ", p + i);
if (p == NULL)
return -1;
i = strcspn(p, ",");
strncpy(oid, p, i);
oid[i] = 0;
p = string_skip_prefix(", '", p + i);
if (p == NULL)
return -1;
i = strcspn(p, "'");
strncpy(location, p, i);
location[i] = 0;
p = string_skip_prefix("')", p + i);
if (p == NULL)
return -1;
tablespace_data_append(tablespace_list, name, oid, location);
if (*p == ']')
break;
p = string_skip_prefix(", ", p);
if (p == NULL)
return -1;
}
return SUCCESS;
}
void
get_barman_property(char *dst, char *name, char *local_repmgr_directory)
{
PQExpBufferData command_output;
char buf[MAXLEN] = "";
char command[MAXLEN] = "";
char *p = NULL;
initPQExpBuffer(&command_output);
maxlen_snprintf(command,
"grep \"^[[:space:]]%s:\" %s/show-server.txt",
name, local_repmgr_tmp_directory);
(void) local_command(command, &command_output);
maxlen_snprintf(buf, "\t%s: ", name);
p = string_skip_prefix(buf, command_output.data);
if (p == NULL)
{
log_error("unexpected output from Barman: %s",
command_output.data);
exit(ERR_INTERNAL);
}
strncpy(dst, p, MAXLEN);
string_remove_trailing_newlines(dst);
termPQExpBuffer(&command_output);
}
static void
copy_configuration_files(bool delete_after_copy)
{
int i,
r;
t_configfile_info *file = NULL;
char *host = NULL;
/* get host from upstream record */
host = param_get(&recovery_conninfo, "host");
if (host == NULL)
host = runtime_options.host;
log_notice(_("copying external configuration files from upstream node \"%s\""), host);
for (i = 0; i < config_files.entries; i++)
{
PQExpBufferData dest_path;
file = config_files.files[i];
/*
* Skip files in the data directory - these will be copied during the
* main backup
*/
if (file->in_data_directory == true)
continue;
initPQExpBuffer(&dest_path);
if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_SAMEPATH)
{
appendPQExpBufferStr(&dest_path, file->filepath);
}
else
{
appendPQExpBuffer(&dest_path,
"%s/%s",
local_data_directory,
file->filename);
}
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
file->filepath, dest_path.data, false, source_server_version_num);
/*
* TODO: collate errors into list
*/
if (WEXITSTATUS(r))
{
log_error(_("standby clone: unable to copy config file \"%s\""),
file->filename);
log_hint(_("see preceding messages for details"));
if (runtime_options.force == false)
exit(ERR_BAD_RSYNC);
}
/*
* This is to check we can actually copy the files before running the
* main clone operation
*/
if (delete_after_copy == true)
{
/* this is very unlikely to happen, but log in case it does */
if (unlink(dest_path.data) < 0 && errno != ENOENT)
{
log_warning(_("unable to delete %s"), dest_path.data);
log_detail("%s", strerror(errno));
}
}
termPQExpBuffer(&dest_path);
}
return;
}
static void
tablespace_data_append(TablespaceDataList *list, const char *name, const char *oid, const char *location)
{
TablespaceDataListCell *cell = NULL;
cell = (TablespaceDataListCell *) pg_malloc0(sizeof(TablespaceDataListCell));
if (cell == NULL)
{
log_error(_("unable to allocate memory; terminating"));
exit(ERR_OUT_OF_MEMORY);
}
cell->oid = pg_malloc(1 + strlen(oid));
cell->name = pg_malloc(1 + strlen(name));
cell->location = pg_malloc(1 + strlen(location));
strncpy(cell->oid, oid, 1 + strlen(oid));
strncpy(cell->name, name, 1 + strlen(name));
strncpy(cell->location, location, 1 + strlen(location));
if (list->tail)
list->tail->next = cell;
else
list->head = cell;
list->tail = cell;
}
/*
* check_primary_standby_version_match()
*
* Check server versions of supplied connections are compatible for
* replication purposes.
*
* Exits on error.
*/
static void
check_primary_standby_version_match(PGconn *conn, PGconn *primary_conn)
{
char standby_version[MAXVERSIONSTR] = "";
int standby_version_num = UNKNOWN_SERVER_VERSION_NUM;
char primary_version[MAXVERSIONSTR] = "";
int primary_version_num = UNKNOWN_SERVER_VERSION_NUM;
standby_version_num = check_server_version(conn, "standby", true, standby_version);
/* Verify that primary is a supported server version */
primary_version_num = check_server_version(conn, "primary", false, primary_version);
if (primary_version_num < 0)
{
PQfinish(conn);
PQfinish(primary_conn);
exit(ERR_BAD_CONFIG);
}
/* primary and standby version should match */
if ((primary_version_num / 100) != (standby_version_num / 100))
{
PQfinish(conn);
PQfinish(primary_conn);
log_error(_("PostgreSQL versions on primary (%s) and standby (%s) must match"),
primary_version, standby_version);
exit(ERR_BAD_CONFIG);
}
}
static void
check_recovery_type(PGconn *conn)
{
RecoveryType recovery_type = get_recovery_type(conn);
if (recovery_type != RECTYPE_STANDBY)
{
if (recovery_type == RECTYPE_PRIMARY)
{
log_error(_("this node should be a standby (%s)"),
config_file_options.conninfo);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
else
{
log_error(_("connection to node (%s) lost"),
config_file_options.conninfo);
PQfinish(conn);
exit(ERR_DB_CONN);
}
}
}
/*
* Creates a recovery.conf file for a standby
*
* A database connection pointer is required for escaping primary_conninfo
* parameters. When cloning from Barman and --no-upstream-connection ) this
* might not be available.
*/
static bool
create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file)
{
PQExpBufferData recovery_file_buf;
PQExpBufferData primary_conninfo_buf;
char recovery_file_path[MAXPGPATH] = "";
FILE *recovery_file;
mode_t um;
KeyValueList recovery_config = {NULL, NULL};
KeyValueListCell *cell = NULL;
initPQExpBuffer(&primary_conninfo_buf);
/* standby_mode = 'on' (Pg 11 and earlier) */
if (server_version_num < 120000)
{
key_value_list_set(&recovery_config,
"standby_mode", "on");
}
/* primary_conninfo = '...' */
write_primary_conninfo(&primary_conninfo_buf, primary_conninfo);
key_value_list_set(&recovery_config,
"primary_conninfo", primary_conninfo_buf.data);
/* recovery_target_timeline = 'latest' */
key_value_list_set(&recovery_config,
"recovery_target_timeline", "latest");
/* recovery_min_apply_delay = ... (optional) */
if (config_file_options.recovery_min_apply_delay_provided == true)
{
key_value_list_set(&recovery_config,
"recovery_min_apply_delay", config_file_options.recovery_min_apply_delay);
}
/* primary_slot_name = '...' (optional, for 9.4 and later) */
if (config_file_options.use_replication_slots)
{
key_value_list_set(&recovery_config,
"primary_slot_name", node_record->slot_name);
}
/*
* * If restore_command is set, we use it as restore_command in
* * recovery.conf
* */
if (config_file_options.restore_command[0] != '\0')
{
char *escaped = escape_recovery_conf_value(config_file_options.restore_command);
key_value_list_set(&recovery_config,
"restore_command", escaped);
free(escaped);
}
/* archive_cleanup_command (optional) */
if (config_file_options.archive_cleanup_command[0] != '\0')
{
char *escaped = escape_recovery_conf_value(config_file_options.archive_cleanup_command);
key_value_list_set(&recovery_config,
"archive_cleanup_command", escaped);
free(escaped);
}
if (as_file == false)
{
/* create file in buffer */
initPQExpBuffer(&recovery_file_buf);
for (cell = recovery_config.head; cell; cell = cell->next)
{
appendPQExpBuffer(&recovery_file_buf,
"%s = '%s'\n",
cell->key, cell->value);
}
maxlen_snprintf(dest, "%s", recovery_file_buf.data);
termPQExpBuffer(&recovery_file_buf);
return true;
}
/*
* PostgreSQL 12 and later: modify postgresql.auto.conf
*
*/
if (server_version_num >= 120000)
{
if (modify_auto_conf(dest, &recovery_config) == false)
{
return false;
}
if (write_standby_signal() == false)
{
return false;
}
return true;
}
/*
* * PostgreSQL 11 and earlier: write recovery.conf
* */
maxpath_snprintf(recovery_file_path, "%s/%s", dest, RECOVERY_COMMAND_FILE);
log_debug("create_recovery_file(): creating \"%s\"...",
recovery_file_path);
/* Set umask to 0600 */
um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
recovery_file = fopen(recovery_file_path, "w");
umask(um);
if (recovery_file == NULL)
{
log_error(_("unable to create recovery.conf file at \"%s\""),
recovery_file_path);
log_detail("%s", strerror(errno));
return false;
}
for (cell = recovery_config.head; cell; cell = cell->next)
{
initPQExpBuffer(&recovery_file_buf);
appendPQExpBuffer(&recovery_file_buf,
"%s = '%s'\n",
cell->key, cell->value);
log_debug("recovery.conf line: %s", recovery_file_buf.data);
if (fputs(recovery_file_buf.data, recovery_file) == EOF)
{
log_error(_("unable to write to recovery file at \"%s\""), recovery_file_path);
fclose(recovery_file);
termPQExpBuffer(&recovery_file_buf);
return false;
}
termPQExpBuffer(&recovery_file_buf);
}
fclose(recovery_file);
return true;
}
/*
* create standby.signal (PostgreSQL 12 and later)
*/
static bool
write_standby_signal(void)
{
char standby_signal_file_path[MAXPGPATH] = "";
FILE *file;
mode_t um;
snprintf(standby_signal_file_path, MAXPGPATH,
"%s/%s",
config_file_options.data_directory,
STANDBY_SIGNAL_FILE);
log_debug("dir: %s,signal file: %s",config_file_options.data_directory,standby_signal_file_path);
/* Set umask to 0600 */
um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
file = fopen(standby_signal_file_path, "w");
umask(um);
if (file == NULL)
{
log_error(_("unable to create %s file at \"%s\""),
STANDBY_SIGNAL_FILE,
standby_signal_file_path);
log_detail("%s", strerror(errno));
return false;
}
if (fputs("# created by repmgr\n", file) == EOF)
{
log_error(_("unable to write to %s file at \"%s\""),
STANDBY_SIGNAL_FILE,
standby_signal_file_path);
fclose(file);
return false;
}
fclose(file);
return true;
}
static void
write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list)
{
PQExpBufferData conninfo_buf;
bool application_name_provided = false;
bool password_provided = false;
int c;
char *escaped = NULL;
t_conninfo_param_list env_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
initialize_conninfo_params(&env_conninfo, true);
initPQExpBuffer(&conninfo_buf);
for (c = 0; c < param_list->size && param_list->keywords[c] != NULL; c++)
{
/*
* Skip empty settings and ones which don't make any sense in
* recovery.conf
*/
if (strcmp(param_list->keywords[c], "dbname") == 0 ||
strcmp(param_list->keywords[c], "replication") == 0 ||
(param_list->values[c] == NULL) ||
(param_list->values[c] != NULL && param_list->values[c][0] == '\0'))
continue;
/* only include "password" if explicitly requested */
if (strcmp(param_list->keywords[c], "password") == 0)
{
password_provided = true;
}
if (conninfo_buf.len != 0)
appendPQExpBufferChar(&conninfo_buf, ' ');
if (strcmp(param_list->keywords[c], "application_name") == 0)
application_name_provided = true;
appendPQExpBuffer(&conninfo_buf, "%s=", param_list->keywords[c]);
appendConnStrVal(&conninfo_buf, param_list->values[c]);
}
/* "application_name" not provided - default to repmgr node name */
if (application_name_provided == false)
{
if (strlen(config_file_options.node_name))
{
appendPQExpBufferStr(&conninfo_buf, " application_name=");
appendConnStrVal(&conninfo_buf, config_file_options.node_name);
}
else
{
appendPQExpBufferStr(&conninfo_buf, " application_name=repmgr");
}
}
/* no password provided explicitly */
if (password_provided == false)
{
if (config_file_options.use_primary_conninfo_password == true)
{
const char *password = param_get(&env_conninfo, "password");
if (password != NULL)
{
appendPQExpBufferStr(&conninfo_buf, " password=");
appendConnStrVal(&conninfo_buf, password);
}
}
}
/* passfile provided as configuration option */
if (config_file_options.passfile[0] != '\0')
{
/* check if the libpq we're using supports "passfile=" */
if (has_passfile() == true)
{
appendPQExpBufferStr(&conninfo_buf, " passfile=");
appendConnStrVal(&conninfo_buf, config_file_options.passfile);
}
}
escaped = escape_recovery_conf_value(conninfo_buf.data);
appendPQExpBuffer(dest,"%s",escaped);
// appendPQExpBuffer(dest,
// "primary_conninfo = '%s'\n", escaped);
free(escaped);
free_conninfo_params(&env_conninfo);
termPQExpBuffer(&conninfo_buf);
}
static NodeStatus
parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint)
{
NodeStatus node_status = NODE_STATUS_UNKNOWN;
int c = 0,
argc_item = 0;
char **argv_array = NULL;
int optindex = 0;
/* We're only interested in these options */
struct option node_status_options[] =
{
{"last-checkpoint-lsn", required_argument, NULL, 'L'},
{"state", required_argument, NULL, 'S'},
{NULL, 0, NULL, 0}
};
/* Don't attempt to tokenise an empty string */
if (!strlen(node_status_output))
{
*checkPoint = InvalidXLogRecPtr;
return node_status;
}
argc_item = parse_output_to_argv(node_status_output, &argv_array);
/* Reset getopt's optind variable */
optind = 0;
/* Prevent getopt from emitting errors */
opterr = 0;
while ((c = getopt_long(argc_item, argv_array, "L:S:", node_status_options,
&optindex)) != -1)
{
switch (c)
{
/* --last-checkpoint-lsn */
case 'L':
*checkPoint = parse_lsn(optarg);
break;
/* --state */
case 'S':
{
if (strncmp(optarg, "RUNNING", MAXLEN) == 0)
{
node_status = NODE_STATUS_UP;
}
else if (strncmp(optarg, "SHUTDOWN", MAXLEN) == 0)
{
node_status = NODE_STATUS_DOWN;
}
else if (strncmp(optarg, "UNCLEAN_SHUTDOWN", MAXLEN) == 0)
{
node_status = NODE_STATUS_UNCLEAN_SHUTDOWN;
}
else if (strncmp(optarg, "UNKNOWN", MAXLEN) == 0)
{
node_status = NODE_STATUS_UNKNOWN;
}
}
break;
}
}
free_parsed_argv(&argv_array);
return node_status;
}
static ConnectionStatus
parse_remote_node_replication_connection(const char *node_check_output)
{
ConnectionStatus conn_status = CONN_UNKNOWN;
int c = 0,
argc_item = 0;
char **argv_array = NULL;
int optindex = 0;
/* We're only interested in these options */
struct option node_check_options[] =
{
{"connection", required_argument, NULL, 'c'},
{NULL, 0, NULL, 0}
};
/* Don't attempt to tokenise an empty string */
if (!strlen(node_check_output))
{
return CONN_UNKNOWN;
}
argc_item = parse_output_to_argv(node_check_output, &argv_array);
/* Reset getopt's optind variable */
optind = 0;
/* Prevent getopt from emitting errors */
opterr = 0;
while ((c = getopt_long(argc_item, argv_array, "L:S:", node_check_options,
&optindex)) != -1)
{
switch (c)
{
/* --connection */
case 'c':
{
if (strncmp(optarg, "OK", MAXLEN) == 0)
{
conn_status = CONN_OK;
}
else if (strncmp(optarg, "BAD", MAXLEN) == 0)
{
conn_status = CONN_BAD;
}
else if (strncmp(optarg, "UNKNOWN", MAXLEN) == 0)
{
conn_status = CONN_UNKNOWN;
}
}
break;
}
}
free_parsed_argv(&argv_array);
return conn_status;
}
static CheckStatus
parse_node_check_archiver(const char *node_check_output, int *files, int *threshold)
{
CheckStatus status = CHECK_STATUS_UNKNOWN;
int c = 0,
argc_item = 0;
char **argv_array = NULL;
int optindex = 0;
/* We're only interested in these options */
struct option node_check_options[] =
{
{"status", required_argument, NULL, 'S'},
{"files", required_argument, NULL, 'f'},
{"threshold", required_argument, NULL, 't'},
{NULL, 0, NULL, 0}
};
*files = 0;
*threshold = 0;
/* Don't attempt to tokenise an empty string */
if (!strlen(node_check_output))
{
return status;
}
argc_item = parse_output_to_argv(node_check_output, &argv_array);
/* Reset getopt's optind variable */
optind = 0;
/* Prevent getopt from emitting errors */
opterr = 0;
while ((c = getopt_long(argc_item, argv_array, "f:S:t:", node_check_options,
&optindex)) != -1)
{
switch (c)
{
/* --files */
case 'f':
*files = atoi(optarg);
break;
case 't':
*threshold = atoi(optarg);
break;
/* --status */
case 'S':
{
if (strncmp(optarg, "OK", MAXLEN) == 0)
{
status = CHECK_STATUS_OK;
}
else if (strncmp(optarg, "WARNING", MAXLEN) == 0)
{
status = CHECK_STATUS_WARNING;
}
else if (strncmp(optarg, "CRITICAL", MAXLEN) == 0)
{
status = CHECK_STATUS_CRITICAL;
}
else if (strncmp(optarg, "UNKNOWN", MAXLEN) == 0)
{
status = CHECK_STATUS_UNKNOWN;
}
else
{
status = CHECK_STATUS_UNKNOWN;
}
}
break;
}
}
free_parsed_argv(&argv_array);
return status;
}
static bool
parse_data_directory_config(const char *node_check_output)
{
bool config_ok = true;
int c = 0,
argc_item = 0;
char **argv_array = NULL;
int optindex = 0;
/* We're only interested in this option */
struct option node_check_options[] =
{
{"configured-data-directory", required_argument, NULL, 'C'},
{NULL, 0, NULL, 0}
};
/* Don't attempt to tokenise an empty string */
if (!strlen(node_check_output))
{
return false;
}
argc_item = parse_output_to_argv(node_check_output, &argv_array);
/* Reset getopt's optind variable */
optind = 0;
/* Prevent getopt from emitting errors */
opterr = 0;
while ((c = getopt_long(argc_item, argv_array, "C:", node_check_options,
&optindex)) != -1)
{
switch (c)
{
/* --configured-data-directory */
case 'C':
{
/* we only care whether it's "OK" or not */
if (strncmp(optarg, "OK", 2) != 0)
config_ok = false;
}
break;
}
}
free_parsed_argv(&argv_array);
return config_ok;
}
void
do_standby_help(void)
{
print_help_header();
printf(_("Usage:\n"));
printf(_(" %s [OPTIONS] standby clone\n"), progname());
printf(_(" %s [OPTIONS] standby register\n"), progname());
printf(_(" %s [OPTIONS] standby unregister\n"), progname());
printf(_(" %s [OPTIONS] standby promote\n"), progname());
printf(_(" %s [OPTIONS] standby follow\n"), progname());
printf(_(" %s [OPTIONS] standby switchover\n"), progname());
puts("");
printf(_("STANDBY CLONE\n"));
puts("");
printf(_(" \"standby clone\" clones a standby from the primary or an upstream node.\n"));
puts("");
printf(_(" -d, --dbname=conninfo conninfo of the upstream node to use for cloning.\n"));
printf(_(" -c, --fast-checkpoint force fast checkpoint\n"));
printf(_(" --copy-external-config-files[={samepath|pgdata}]\n" \
" copy configuration files located outside the \n" \
" data directory to the same path on the standby (default) or to the\n" \
" PostgreSQL data directory\n"));
printf(_(" --dry-run perform checks but don't actually clone the standby\n"));
printf(_(" --no-upstream-connection when using Barman, do not connect to upstream node\n"));
printf(_(" -R, --remote-user=USERNAME database server username for SSH operations (default: \"%s\")\n"), runtime_options.username);
printf(_(" --replication-user user to make replication connections with (optional, not usually required)\n"));
printf(_(" --upstream-conninfo \"primary_conninfo\" value to write in recovery.conf\n" \
" when the intended upstream server does not yet exist\n"));
printf(_(" --upstream-node-id ID of the upstream node to replicate from (optional, defaults to primary node)\n"));
printf(_(" --without-barman do not use Barman even if configured\n"));
printf(_(" --recovery-conf-only create \"recovery.conf\" file for a previously cloned instance\n"));
puts("");
printf(_("STANDBY REGISTER\n"));
puts("");
printf(_(" \"standby register\" registers the standby node.\n"));
puts("");
printf(_(" -F, --force overwrite an existing node record, or if primary connection\n" \
" parameters supplied, create record even if standby offline\n"));
printf(_(" --upstream-node-id ID of the upstream node to replicate from (optional)\n"));
printf(_(" --wait-start=VALUE wait for the standby to start (timeout in seconds, default %i)\n"), DEFAULT_WAIT_START);
printf(_(" --wait-sync[=VALUE] wait for the node record to synchronise to the standby\n" \
" (optional timeout in seconds)\n"));
puts("");
printf(_("STANDBY UNREGISTER\n"));
puts("");
printf(_(" \"standby unregister\" unregisters an inactive standby node.\n"));
puts("");
printf(_(" --node-id ID of node to unregister (optional, used when the node to\n" \
" unregister is offline)\n"));
puts("");
printf(_("STANDBY PROMOTE\n"));
puts("");
printf(_(" \"standby promote\" promotes a standby node to primary.\n"));
puts("");
printf(_("STANDBY FOLLOW\n"));
puts("");
printf(_(" \"standby follow\" instructs a standby node to follow a new primary.\n"));
puts("");
printf(_(" --dry-run perform checks but don't actually follow the new primary\n"));
printf(_(" --upstream-node-id node ID of the new primary\n"));
printf(_(" -W, --wait wait for a primary to appear\n"));
puts("");
printf(_("STANDBY SWITCHOVER\n"));
puts("");
printf(_(" \"standby switchover\" promotes a standby node to primary, and demotes the previous primary to a standby.\n"));
puts("");
printf(_(" --always-promote promote standby even if behind original primary\n"));
printf(_(" --dry-run perform checks etc. but don't actually execute switchover\n"));
printf(_(" -F, --force ignore warnings and continue anyway\n"));
printf(_(" --force-rewind[=VALUE] use \"pg_rewind\" to reintegrate the old primary if necessary\n"));
printf(_(" (9.3 and 9.4 - provide \"pg_rewind\" path)\n"));
printf(_(" -R, --remote-user=USERNAME database server username for SSH operations (default: \"%s\")\n"), runtime_options.username);
printf(_(" --repmgrd-no-pause don't pause repmgrd\n"));
printf(_(" --siblings-follow have other standbys follow new primary\n"));
puts("");
}
C
1
https://gitee.com/highgosoftware/hg_repmgr.git
git@gitee.com:highgosoftware/hg_repmgr.git
highgosoftware
hg_repmgr
hg_repmgr
master

搜索帮助