From 4a81c24431006612096ce582f7e8e1ad6d59ed46 Mon Sep 17 00:00:00 2001 From: shirley_zhengx Date: Sat, 6 May 2023 10:14:29 +0800 Subject: [PATCH] add dual cluster for shared_storage 2 --- ...70\273\351\233\206\347\276\244ha_test.png" | Bin 0 -> 6067 bytes ...06\347\276\244\345\210\207\346\215\242.md" | 462 ++++++++++++++ ...\344\271\213dd\346\250\241\346\213\237.md" | 94 +-- ...\344\271\213cm\346\250\241\346\213\237.md" | 599 ++++++++++++++++++ 4 files changed, 1114 insertions(+), 41 deletions(-) create mode 100644 "content/zh/post/zhengxue/images/cm\346\250\241\346\213\237/\344\270\273\351\233\206\347\276\244ha_test.png" create mode 100644 "content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\345\210\207\346\215\242.md" create mode 100644 "content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\272\214)\344\271\213cm\346\250\241\346\213\237.md" diff --git "a/content/zh/post/zhengxue/images/cm\346\250\241\346\213\237/\344\270\273\351\233\206\347\276\244ha_test.png" "b/content/zh/post/zhengxue/images/cm\346\250\241\346\213\237/\344\270\273\351\233\206\347\276\244ha_test.png" new file mode 100644 index 0000000000000000000000000000000000000000..1c76cfedd549da857bffe9684241c08fde0ccd06 GIT binary patch literal 6067 zcmY*dc{mi@+a87)>lo_{i5ZDR42iL3-?AtBHrXYSWGCy4?E5ZDw#pijQPv?E>!>Uh zCX9WFH00Ii{r$c_zVDB-Jl8qb^_+8``?{a!Jc*{px~xomOaK6YRbNlb8~~sJpU*K2 zp!0s--__!Lpb0S7)c`b%@vom7bROzP>Ht7<8uKZJ{@iBt)3XTx03iMUHkzRawXOgF zOiy1+-6Gg=yJWuc;unV@6TjGiX3jrbAHPcddFUhf5$VO3If%Y*&IjISU>(q4%CmeT zkqt8^=U$~Nzukr?Pe5d%*{f>W7Zv?c#^$zKE8z6txb7YkhJDM()5v?H#8R8E zQDR|)`$B3Io8b2K>zi`dH-9^Hx4G8`%7rh06EJXYp7Xx735{Z@j3-26;MXJ|Ed8Ds ze`mN3a}uy0Wy^zr2q=d&1Faf;MIJ59Uhnja11ecbZ)sl`TWff>$y@yzHrBtBa-k{A zG3Y*|cp?6Rd_(`3&FGB;8=(LmFb9Z$;{+}!5@DJA+HuGJ!oTNg79uY$DR6-fX~#SX>QZzkO`zg$ z(VJHQ^5&1nDGfDbL=*}2C1`B5Pne;=UFht`;=-Y^c`oci#tll+S!iMIZfDtG--pF~6VEH2eS4ca z^b}dQE-X}5ZC8?~T>89Zxt>Yyng}@^XN;JD%5SShSv{LNhj%{N4x=xeCz4t1NMeXwRK{Oj&C+yjxHqq z=I1ECazVV78~gQmwKR4NXL020**8JToe`E2_Nu$>-G5J7>@^!DNc4E^K<2HK##7C< z3DoA8`5^y`WZS6vdTB6c>ZPy_Y@)N_ia}2z$jx62>8eIN*2i|f==?rGZ7E_V~!Ve6s*e}*Y z3%o{4NA7Y6fTJh1#bn5VXNBmd1Ev^)P!;9S^n-cGx6bH)%OIDq}Z9>ap#9SbE2H#yJ zr#Jd==88gYCnyDoJGddTeiknfy2e~ev(r|O^S|T8&T;rv7;MF=d_!m4NCa{+g0vfM zz55Y$GLk1GUM_brWimHOQ#+)3O22<(d(oqD@e@eH?cMv=Sw1Y9m>>*xBrB&|zhKl$ zmtUChyP)Red%?L7VR?dLMx^m*L`D292Up?sex9qhZ9bOp>dk2w)l|L=Udg%f&OJ9( zA}@27g$sX0+ciYwZmt03BdL~Hjgy`JBf}0>w%n=OID63`t-ZRd(AvS7tqPOm2J!d35giYPrQM&$KD}6e5u8X+JHO1v- z@GL;Jc-4LdZ(i3CRNZ6V$-SA@FeM52}gc8h6;+xlPtt@V%c2`vDB&wHY^%eKC$nzJKg4|5M09~!ai zeCtzw@@5Ld^RbvvUSCr7r~)~jGuk@+n#JFGIs02+u>kRQp;gYgBv;Fa)y=OA_a2ah zCc^n{B=v9flAB(4JnDwFZJvAyX_VWuo%=JJFrR8a|H&isxog)+0!i%H+@KhtxZjf>?jc+Wk``K| zUrc>CkAkk-ca8%S8u_xSiB%HD{DsPzJlJ2Z`=y$8x88VIrxaLj-M##|sJWzhdYwYZAZIUaqYI+$ENJimNP!Vn=c2JwlI3XZCgu)sXBiU#l zd>?i$LSAWY9AGh)I|srWsJfYxr;=Bx#hu)4I(KwJY^?e7kMdumTiZFOzsE-mHYL5j zRMVgDY}>r@mPn|1sua*3V>)#kPdEI;5!o&Bg?W1U6Lh_5nIsdDpj}NGc6!iGG*ptP zvg<_=H~PsvsAQi1DB_lqKhYy?pWYU%~JrDY}SlrMsVFBQBro z-`|>-uZWpwCMV|5xgf$T)jtOOXL%~|IX^_H#*cQN+C&`XpqIs0tR}1eDO+$g<00{w zfQ*+M*7{H{$7u)t<witD*8CFrs%$wy*XE@s@yW$S#Z3-YYj;8LbS3%@#5bxB9>I+QxuI+qMeD2IIv>=1_qv`nB&G zL%kR1U;mVF7QvdYa#TL0abOCoY_7ksdrlp8%OOD*Vz#@uU9O`hIYBb{V z(8)djJ+phDO&4XN?>x*HDOPj0M0Q2j%FJ+Rio;!6xOn@kvfJPP3i;IjH4#JknI$Wh zHXBjkd?K0u$?abqq@8djs2t~z9Bk>Qms`7D~LM+~!j^5L5 zD_)^aE7R|o-G9BU8&G$a{jO%g`>M-wp%9h4M_bpes{tBf`oyp`J|!Utb(<9GEBb9Lx=;-k{AKD9E1t zF9!djUUVD=IaVKtTUAmosB$q!4kKKd?@t+A^?l=w7}kq?>~N#CbCdokKacNO!^;F; z+ftpC&n`dDujPIoFGG5{9|kT4HJ_d5Z2{2>gp+et66qshnbR3-4?g7a^LOyhv*$~9eq+~M{2&R>FJ4lcKv=?0MF)I}| zChnMO?Jm{R1Ya0LaeW~K3si5qgwPU2S%h$?ari;?0xEg$I1Vn~Lm_Lle*w=AdL)d0 z8KM~dR`cqlQ5Ba0jn58-!7+u@F0qIx&I7U3abgh}KY_(q799=Hq};-3LN(%YlptUf zi!N2((!Md4dzL+?7$+nP5bo)YkOc|S_iFe1=}9zA>3##ya)F;Ei|K$=JE-^KJ%b_D zB+aH4tR@RKhNG;P>gh~iD&PbDbC;LtZgY3=!RWoVHS%cI|GIlTfMf?DAv1m2?4d0C z3_ANfZyraig|$a>9`y)9_P;`nEBulx+2i)%;V`?^?*Iv(N&L)6{2SoB1DnoFC|DSw z>7&q&BEtKVP3^Y;gV=q=P-MT$i~&Sq?Ib9AC5V~Z9H)-ZR&QF&4aisfilr_9Ktm* zr#=~Jae1zaOt&I83yc~uz;%Q%7D4qYLcQLxTU;s&V`-7%vv4#h3j61?MrEt~(wRe>6aJcAQlpG5rX+3ZVNU|v*`n%`Sw`17$fDSc0* zb~E~)hg;JK7a-2e+E;YUMy&M2Ak*C@glH{sEBF0tHw%De zMO2bZAZNpoh8e?qI+Lkm(zArThm^qmRsA!EwTA02O*UO1)8_d1;-EPR+0{HJykO+7 z$sc}y>lh$PJaKFfdTw?_(K>cbVBvU>EOb4UAFX`Kp#1kYc8MS^*Y(Q)Y+PA8p;odv zx2QOCxtvDjOH9fn$e#L{=X6LMX&?EPxdyy;X96G;3OZ&IQ%*pzgEZK&BG)fdOgTu={wc2&8}= zG-F#SzgYHr0``O9kXwi=fp6mNZ6I}f+^4TQYQ>*{aDz>;nHlEz^@)k9XK&+aE~}NK zKs?D#8tofLyY$2(XmMhRp^x)-pVN*&93^r|Ab8_IG8E3Etjy<(J_%przWN;djYa+% z=jx?F{%Nn?+smCivvYKb-W`4OrdAI{{C{EyC-Jf+onqr5e8Dp2fbVMlYM9`4zODgs zPpg_ZIh_%x0a`D^%*~kFIbK->(?!M!LDkr_f4nz2T&$1U=w~+hO5+Q!@PMj!n`l5? zV3!hfO9K|C#EbSO7=Dy>P~R|{vAxU4a-gfS%vGgCCfZ#ua-c)-=H z?mSpSXr`qCC)lUgt1o3c)EPOJ$R2z%jOo391n~RYtzM__2w*aT3w^HSd1Q#sec{H+tS+ zK2|5*;;}9f2tS+*Tm1DKzqhgbjb8LsG?imglcq_!D^2HYpK`L-|0QhipkJCrQ4{C0 zk!rmcAEgR#I$nXG&0vrrEBEIu*+S36(7*nG7oCDk`rpac&q2yay6WXS zv6Q|aFdu5~W<)UBj}Rxi*{{kUF%thbberLHuaLDd;!BLUl|wM)rqbSz_94IZ3++7~ zkra=`9pHs{CjAP8yHhp!N%qk${W?{hfPP{OXlYK&EGgzZ&kF-xK>mBxbVw682s+d% zhloModKp0zx;`fZ>w!yWle$Y8%#iTEjdF`f7Ny4%RJ`QVSg@w-^3dO&_}>d_{qJ}z zU-YOq_9jfcUPe#=cxXm!!h6PNr(5^^jw|Rr2BinqN+up{ob!bY)$>nxGjZ4ZoFeb} zE>T9rF#&bPv>Q!`wvm?`so^FaXWqL)EA|3C@e8qs7+8R(-TeED=9K>|f|eDJzY(_$ zz6rqZ9`CK(R0i0o$1Mp-UkUkg1mD@444`HM7fxTWrl~BeM{U9neO00{sTJjRcL7;E zIw>bK%KwOBmrAU{_GW@vhQ89Upi08yg-tL*ESzx}xvBEPYn@Qxt2r*5!~O*T2D7Wb zC0p?_yqPf2c?rl1h=gb$IQmP|pK2zmLAgYc+}KBCa~w-Xz35Jyd{ira-okc&PCHv) z)o+vDkIz(@=1c6#HB0I1mHCsGg1R5kHk_@Y>_avY_Ja+5^9#pb)S+8JJvFLYN$opY zY!#O}#rn^n#saemHD`}F8~(Wu8%s6mQR&r(|Ebi7H^g(83p0pu4IW&`l;X0E}Hk_)ry;@m&70fkBXtsO{zFvr@QyWT9H(Z3Z1PZcb!R~H8;r8vTzkp0`n%59lKj1dc z1@0PUXN(v+Bp<3C!RpW!^Z<+wD`?23=`J+@Q36l9~x;N?xh_W zBV3LqYYE}NIXl2Rx+rK506e@tBDrE_gD#43gtQp^x+ccl3~}$ZO3sb^QjpPrviWK2+Q#QV?L|5m#HF2*cUF5 z{ibHhbuDMhab3aHt>1-1E-sRExUNt&Zig7LFyS-7H3zSXz4eC)!LXPY`%8amV9RaI V%CQDM=Y@8FzP7PegNAd={{ZBQT}1!@ literal 0 HcmV?d00001 diff --git "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\345\210\207\346\215\242.md" "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\345\210\207\346\215\242.md" new file mode 100644 index 00000000..6c4d491e --- /dev/null +++ "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\345\210\207\346\215\242.md" @@ -0,0 +1,462 @@ ++++ +title = "资源池化支持同城dorado双集群切换" +date = "2023-04-01" +tags = ["资源池化支持同城dorado双集群切换"] +archives = "2023-04-01" +author = "shirley_zhengx" +summary = "资源池化支持同城dorado双集群切换" +img = "/zh/post/zhengxue/title/img1.png" +times = "9:30" ++++ + + + +- [1. 集群间切换](#1.集群间切换) + - [1.1.主备集群状态](#1.1.主备集群状态) + - [1.2.环境配置](#1.2.环境配置) +- [2. 主集群内切换](#2.主集群内切换) + - [2.1.创建lun](#2.1.创建lun) + - [2.2.下载源码编译](#2.2.下载源码编译) + - [2.3.环境变量](#2.3.环境变量) +- [3. 备集群内切换](#3.备集群内切换) + - [3.1.集群状态查询](#3.1.集群状态查询) + - [3.2.主集群一写多读](#3.2.主集群一写多读) + - [3.2.备集群只读](#3.2.备集群只读) + + + + + +# 资源池化支持同城dorado双集群切换 + +资源池化支持同城dorado双集群部署方式:dd模拟(手动部署+无cm)、cm模拟(手动部署dd模拟+有cm)、磁阵(手动部署)、集群管理工具部署 + + +## 1.集群间切换 + + 基于《资源池化+同城dorado双集群》部署方式,集群间切换设计如下: + +###   1.1.主备集群状态 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
集群中心节点类型local rolerun mode
生产中心主端主节点0primaryprimary (资源池化+传统主)
备节点1standbynormal (资源池化+传统单机)
容灾中心备端首备节点0standbystandby(资源池化+传统备)
从备节点1standbynormal (资源池化+传统单机)
+ +local role从系统函数pg_stat_get_stream_replications中获取的local_role参数: +``` +openGauss=# select * from pg_stat_get_stream_replications(); + local_role | static_connections | db_state | detail_information +------------+--------------------+----------+-------------------- + Primary | 1 | Normal | Normal +(1 row) +``` + +run mode 指数据库内核运行模式是primary还是standby还是normal,是t_thrd.postmaster_cxt.HaShmData->current_mode或t_thrd.xlog_cxt.server_mode参数指代的主备运行模式类型 + +###   1.2.failover + 基于cm模拟部署方式,因此没有管控平台切换同步复制对方向的操作。 + 双集群间failover即主集群故障,备集群升为主集群的过程,操作过程如下: + +(1) kill 主集群 + 将主集群节点全部kill掉 +(2) stop 备集群 +``` +gs_ctl stop -D /home/omm/ss_hatest1/dn0 +gs_ctl stop -D /home/omm/ss_hatest1/dn1 +``` +(3) 备集群设置cluster_run_mode +``` +gs_guc set -Z datanode -D /home/omm/ss_hatest1/dn0 -c "cluster_run_mode=cluster_primary" +``` +(4) 以主集群模式重启备集群的节点 +``` +gs_ctl start -D /home/omm/ss_hatest1/dn0 -M primary +gs_ctl start -D /home/omm/ss_hatest1/dn1 +``` +(5) 查询新主集群 +``` +gs_ctl query -D /home/omm/ss_hatest1/dn0 +``` + +###   1.2.switchover + 基于cm模拟部署方式,因此没有管控平台切换同步复制对方向的操作。 + 双集群间switchover即主集群降为备集群,备集群升为主集群的过程,操作过程如下: + +(1) stop 主集群 +``` +gs_ctl stop -D /home/omm/ss_hatest/dn0 +gs_ctl stop -D /home/omm/ss_hatest/dn1 +``` +(2) stop 备集群 +``` +gs_ctl stop -D /home/omm/ss_hatest1/dn0 +gs_ctl stop -D /home/omm/ss_hatest1/dn1 +``` +(3) 备集群设置cluster_run_mode +``` +gs_guc set -Z datanode -D /home/omm/ss_hatest1/dn0 -c "cluster_run_mode=cluster_primary" +``` +(4) 以主集群模式重启备集群的节点 +``` +gs_ctl start -D /home/omm/ss_hatest1/dn0 -M primary +gs_ctl start -D /home/omm/ss_hatest1/dn1 +``` +(5) 查询新主集群 +``` +gs_ctl query -D /home/omm/ss_hatest1/dn0 +``` +(6) 主集群设置cluster_run_mode=cluster_standby +``` +gs_guc set -Z datanode -D /home/zx/ss_hatest/dn0 -c "cluster_run_mode=cluster_standby" +``` +(7) 以备集群模式重启备集群的节点 +``` +gs_ctl start -D /home/omm/ss_hatest/dn0 -M standby +gs_ctl start -D /home/omm/ss_hatest/dn1 +``` +(8) 查询新备集群 +``` +gs_ctl query -D /home/omm/ss_hatest/dn0 +``` + +## 2. 主集群内切换 + + +###   2.1.failover + 基于cm模拟部署方式 + 主集群内failover即主集群主节点降为备节点,备节点升为主节点的过程,操作过程如下: + + (1) 检查节点状态 + 查询状态 +``` +主集群主节点0 +gs_ctl query -D /home/omm/ss_hatest/dn0 +HA state: + local_role : Primary + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: + sender_pid : 1456376 + local_role : Primary + peer_role : StandbyCluster_Standby + peer_state : Normal + state : Streaming + sender_sent_location : 2/5C8 + sender_write_location : 2/5C8 + sender_flush_location : 2/5C8 + sender_replay_location : 2/5C8 + receiver_received_location : 2/5C8 + receiver_write_location : 2/5C8 + receiver_flush_location : 2/5C8 + receiver_replay_location : 2/5C8 + sync_percent : 100% + sync_state : Async + sync_priority : 0 + sync_most_available : Off + channel : 127.0.0.1:6600-->127.0.0.1:43350 + + Receiver info: +No information + +主集群备节点1 +gs_ctl query -D /home/omm/ss_hatest/dn1 +HA state: + local_role : Standby + static_connections : 0 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information + +备集群首备节点0 +gs_ctl query -D /home/omm/ss_hatest1/dn0 +HA state: + local_role : Standby + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: + receiver_pid : 1901181 + local_role : Standby + peer_role : Primary + peer_state : Normal + state : Normal + sender_sent_location : 2/A458 + sender_write_location : 2/A458 + sender_flush_location : 2/A458 + sender_replay_location : 2/A458 + receiver_received_location : 2/A458 + receiver_write_location : 2/A458 + receiver_flush_location : 2/A458 + receiver_replay_location : 2/A458 + sync_percent : 100% + channel : 127.0.0.1:41952<--127.0.0.1:6600 + +备集群备节点1 +gs_ctl query -D /home/omm/ss_hatest1/dn1 +HA state: + local_role : Standby + static_connections : 0 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information + +``` + + (2) 配置参数 + 主集群节点的postgresql.conf文件 +``` +主集群主节点0 +port = 6600 +xlog_file_path = '/home/zx/ss_hatest/dorado_shared_disk' +xlog_lock_file_path = '/home/zx/ss_hatest/shared_lock_primary' +application_name = 'dn_master_0' +cross_cluster_replconninfo1='localhost=127.0.0.1 localport=6600 remotehost=127.0.0.1 remoteport=9600' +cross_cluster_replconninfo2='localhost=127.0.0.1 localport=6600 remotehost=127.0.0.1 remoteport=9700' +cluster_run_mode = 'cluster_primary' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB + +主集群备节点1 +port = 6700 +xlog_file_path = '/home/zx/ss_hatest/dorado_shared_disk' +xlog_lock_file_path = '/home/zx/ss_hatest/shared_lock_primary' +application_name = 'dn_master_1' +cross_cluster_replconninfo1='localhost=127.0.0.1 localport=6700 remotehost=127.0.0.1 remoteport=9600' +cross_cluster_replconninfo2='localhost=127.0.0.1 localport=6700 remotehost=127.0.0.1 remoteport=9700' +cluster_run_mode = 'cluster_primary' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +``` + 备集群节点的postgresql.conf文件 +``` +备集群首备节点0 +port = 9600 +xlog_file_path = '/home/zx/ss_hatest/dorado_shared_disk' +xlog_lock_file_path = '/home/zx/ss_hatest/shared_lock_primary' +application_name = 'dn_standby_0' +cross_cluster_replconninfo1='localhost=127.0.0.1 localport=9600 remotehost=127.0.0.1 remoteport=6600' +cross_cluster_replconninfo2='localhost=127.0.0.1 localport=9600 remotehost=127.0.0.1 remoteport=6700' +cluster_run_mode = 'cluster_standby' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB + +备集群备节点1 +port = 9700 +xlog_file_path = '/home/zx/ss_hatest/dorado_shared_disk' +xlog_lock_file_path = '/home/zx/ss_hatest/shared_lock_primary' +application_name = 'dn_standby_1' +cross_cluster_replconninfo1='localhost=127.0.0.1 localport=9700 remotehost=127.0.0.1 remoteport=6600' +cross_cluster_replconninfo2='localhost=127.0.0.1 localport=9700 remotehost=127.0.0.1 remoteport=6700' +cluster_run_mode = 'cluster_standby' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +``` + + 双集群所有节点必须提前都配置xlog_file_path、xlog_lock_file_path、cross_cluster_replconninfo1、cluster_run_mode这些容灾关系建立的参数 + + (3) 导入用于切换的环境变量CM_CONFIG_PATH +``` +export CM_CONFIG_PATH=/opt/omm/openGauss-server/src/test/ss/cm_config.ini +``` + + (4) 模拟failover ++ 当前节点0是主节点,kill -9 pid (pid是主节点0的进程号) ++ 修改 cm_config.ini + ``` + REFORMER_ID = 1 + BITMAP_ONLINE = 2 + ``` + +**说明**:模拟主节点0故障,REFORMER_ID模拟reform锁被备节点1抢到,即为将要做failover的节点,BITMAP_ONLINE模拟cm获取的在线节点是节点1(bitmap = 2 = 0b10) + +###   2.1.failover + 基于cm模拟部署方式 + 主集群内failover即主集群主节点降为备节点,备节点升为主节点的过程,操作过程如下: + + (1) 检查节点状态 +同failover检查一致 + + (2) 配置参数 +同failover配置一致 + + (3) 执行switchover命令 + +``` +[omm@nodename dn0]$ gs_ctl switchover -D /home/zx/ss_hatest/dn1 +[2023-04-24 15:49:04.785][3815633][][gs_ctl]: gs_ctl switchover ,datadir is /home/zx/ss_hatest/dn1 +[2023-04-24 15:49:04.786][3815633][][gs_ctl]: switchover term (1) +[2023-04-24 15:49:04.954][3815633][][gs_ctl]: waiting for server to switchover....[2023-04-24 15:49:06.122][3815633][][gs_ctl]: Getting state from gaussdb.state! +.[2023-04-24 15:49:07.123][3815633][][gs_ctl]: Getting state from gaussdb.state! +.[2023-04-24 15:49:08.125][3815633][][gs_ctl]: Getting state from gaussdb.state! +.[2023-04-24 15:49:09.126][3815633][][gs_ctl]: Getting state from gaussdb.state! +.[2023-04-24 15:49:10.198][3815633][][gs_ctl]: Getting state from gaussdb.state! +... +[2023-04-24 15:49:13.353][3815633][][gs_ctl]: done +[2023-04-24 15:49:13.353][3815633][][gs_ctl]: switchover completed (/home/zx/ss_hatest/dn1) +``` + +**说明**:/home/zx/ss_hatest/dn1是主集群备节点1的数据库,做switchover将主集群主节点0降备,将主集群备节点1升主 + +查看目录/opt/omm/openGauss-server/src/test/ss/: +``` +[omm@nodename ss]$ ll +总用量 56 +-rwxrwxrwx 1 zx zx 3749 4月 24 14:29 build_ss_database_common.sh +-rwxrwxrwx 1 zx zx 2952 4月 24 14:29 build_ss_database.sh +-rw------- 1 zx zx 34 4月 24 15:49 cm_config.ini +-rw------- 1 zx zx 33 4月 24 15:49 cm_config.ini_bak +``` +cm_config.ini是switchcover后的新生成的集群列表,主节点REFORMER_ID是1 +``` +BITMAP_ONLINE = 3 +REFORMER_ID = 1 +``` + +cm_config.ini_bak是switchcover前的集群列表,主节点REFORMER_ID是0 +``` +REFORMER_ID = 0 +BITMAP_ONLINE = 3 +``` + + (4) 双集群状态查询 +``` +主集群备节点0 +[omm@nodename dn0]$ gs_ctl query -D /home/zx/ss_hatest/dn0 +[2023-04-24 15:52:33.134][3862235][][gs_ctl]: gs_ctl query ,datadir is /home/zx/ss_hatest/dn0 + HA state: + local_role : Standby + static_connections : 2 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information + +主集群主节点1 +[zx@node1host54 dn0]$ gs_ctl query -D /home/zx/ss_hatest/dn1 +[2023-04-24 15:52:35.777][3862851][][gs_ctl]: gs_ctl query ,datadir is /home/zx/ss_hatest/dn1 + HA state: + local_role : Primary + static_connections : 2 + db_state : Normal + detail_information : Normal + + Senders info: + sender_pid : 3817397 + local_role : Primary + peer_role : StandbyCluster_Standby + peer_state : Normal + state : Streaming + sender_sent_location : 2/43EA678 + sender_write_location : 2/43EA678 + sender_flush_location : 2/43EA678 + sender_replay_location : 2/43EA678 + receiver_received_location : 2/43EA678 + receiver_write_location : 2/43EA678 + receiver_flush_location : 2/43EA678 + receiver_replay_location : 2/43EA678 + sync_percent : 100% + sync_state : Async + sync_priority : 0 + sync_most_available : Off + channel : 127.0.0.1:9700-->127.0.0.1:37904 + + Receiver info: +No information + +备集群首备节点0 +[zx@node1host54 pg_log]$ gs_ctl query -D /home/zx/ss_hatest1/dn0 +[2023-04-24 15:53:44.305][3878378][][gs_ctl]: gs_ctl query ,datadir is /home/zx/ss_hatest1/dn0 + HA state: + local_role : Standby + static_connections : 2 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: + receiver_pid : 3816277 + local_role : Standby + peer_role : Primary + peer_state : Normal + state : Normal + sender_sent_location : 2/43EA798 + sender_write_location : 2/43EA798 + sender_flush_location : 2/43EA798 + sender_replay_location : 2/43EA798 + receiver_received_location : 2/43EA798 + receiver_write_location : 2/43EA798 + receiver_flush_location : 2/43EA798 + receiver_replay_location : 2/43EA798 + sync_percent : 100% + channel : 127.0.0.1:37904<--127.0.0.1:9700 + +备集群从备节点1 +[omm@nodename pg_log]$ gs_ctl query -D /home/zx/ss_hatest1/dn1 +[2023-04-24 15:53:46.779][3879076][][gs_ctl]: gs_ctl query ,datadir is /home/zx/ss_hatest1/dn1 + HA state: + local_role : Standby + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information +``` + +**说明**:switchover成功后,备集群的首备节点0与主集群新主节点1容灾关系自动连接成功,同步复制功能正常,备集群首备回放正常 + +***Notice:不推荐直接用于生产环境*** diff --git "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\270\200)\344\271\213dd\346\250\241\346\213\237.md" "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\270\200)\344\271\213dd\346\250\241\346\213\237.md" index 73134f5f..d540bb32 100644 --- "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\270\200)\344\271\213dd\346\250\241\346\213\237.md" +++ "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\270\200)\344\271\213dd\346\250\241\346\213\237.md" @@ -14,20 +14,16 @@ times = "9:30" - [1. 环境描述](#1.环境描述) - [1.1.组网方式](#1.1.组网方式) - [1.2.环境配置](#1.2.环境配置) - - [1.3.系统目录软链接](#1.3.系统目录软链接) -- [2. 编译三方库](#2.编译三方库) - - [2.1.编译步骤综述](#2.1.编译步骤概述) - - [2.2.依赖库](#2.2.依赖库) - - [2.3.源码脚本修改](#2.3.源码脚本修改) - - [2.4.三方库编译及结果](#2.4.三方库编译及结果) -- [3. 编译数据库](#3.编译数据库) - - [3.1.准备openGauss-server源码以及代码修改](#3.1.准备openGauss-server源码以及代码修改) - - [3.2.环境变量](#3.2.环境变量) - - [3.3.数据库编译与打包](#3.3.数据库编译与打包) -- [4. 安装数据库](#4.安装数据库) - - [4.1.编译安装](#4.1.编译安装) - - [4.2.OM安装](#4.2.OM安装) -- [5. 下载链接](#5.下载链接) +- [2. 环境搭建](#2.环境搭建) + - [2.1.创建lun](#2.1.创建lun) + - [2.2.下载源码编译](#2.2.下载源码编译) + - [2.3.环境变量](#2.3.环境变量) + - [2.4.dss配置-dd模拟](#2.4.dss配置-dd模拟) + - [2.5.数据库部署](#2.5.数据库部署) +- [3. 主备集群功能验证](#3.主备集群功能验证) + - [3.1.集群状态查询](#3.1.集群状态查询) + - [3.2.主集群一写多读](#3.2.主集群一写多读) + - [3.2.备集群只读](#3.2.备集群只读) @@ -35,12 +31,12 @@ times = "9:30" # 资源池化支持同城dorado双集群部署(一)----dd模拟 -资源池化支持同城dorado双集群部署方式:dd模拟(手动部署)、磁阵(手动部署)、集群管理工具部署 +资源池化支持同城dorado双集群部署方式:dd模拟(手动部署+无cm)、cm模拟(手动部署dd模拟+有cm)、磁阵(手动部署)、集群管理工具部署 ## 1.环境描述 - 针对dd模拟(手动部署)作出指导,环境描述如下: + 针对dd模拟(手动部署+无cm)作出指导,环境描述如下: ###   1.1.组网方式 @@ -72,7 +68,7 @@ times = "9:30" ## 2. 环境搭建 -针对资源池化双集群部署之《资源池化dd模拟搭建(手动部署) + dorado同步复制》作出指导,环境搭建如下: +针对资源池化双集群部署之《资源池化dd模拟搭建(手动部署) + dorado同步复制》作出指导,无cm部署,环境搭建如下: ###   2.1.创建lun @@ -236,10 +232,10 @@ function create_one_device() touch ${DSS_HOME_ONE}/cfg/dss_vg_conf.ini echo "data:${DSS_HOME_ONE}/dss-data" > ${DSS_HOME_ONE}/cfg/dss_vg_conf.ini echo "INST_ID = 0" > ${DSS_HOME_ONE}/cfg/dss_inst.ini - echo "_LOG_BACKUP_FILE_COUNT = 128" > ${DSS_HOME_ONE}/cfg/dss_inst.ini - echo "_LOG_MAX_FILE_SIZE = 20M" > ${DSS_HOME_ONE}/cfg/dss_inst.ini - echo "LSNR_PATH = ${DSS_HOME_ONE}" > ${DSS_HOME_ONE}/cfg/dss_inst.ini - echo "_log_LEVEL = 255" > ${DSS_HOME_ONE}/cfg/dss_inst.ini + echo "_LOG_BACKUP_FILE_COUNT = 128" >> ${DSS_HOME_ONE}/cfg/dss_inst.ini + echo "_LOG_MAX_FILE_SIZE = 20M" >> ${DSS_HOME_ONE}/cfg/dss_inst.ini + echo "LSNR_PATH = ${DSS_HOME_ONE}" >> ${DSS_HOME_ONE}/cfg/dss_inst.ini + echo "_log_LEVEL = 255" >> ${DSS_HOME_ONE}/cfg/dss_inst.ini dd if=/dev/zero of=${DSS_HOME_ONE}/dss-data bs=100k count=1048576 >/dev/null 2>&1 } @@ -252,10 +248,10 @@ function create_two_device() touch ${DSS_HOME_TWO}/cfg/dss_vg_conf.ini echo "data:${DSS_HOME_ONE}/dss-data" > ${DSS_HOME_TWO}/cfg/dss_vg_conf.ini echo "INST_ID = 1" > ${DSS_HOME_TWO}/cfg/dss_inst.ini - echo "_LOG_BACKUP_FILE_COUNT = 128" > ${DSS_HOME_TWO}/cfg/dss_inst.ini - echo "_LOG_MAX_FILE_SIZE = 20M" > ${DSS_HOME_TWO}/cfg/dss_inst.ini - echo "LSNR_PATH = ${DSS_HOME_TWO}" > ${DSS_HOME_TWO}/cfg/dss_inst.ini - echo "_log_LEVEL = 255" > ${DSS_HOME_TWO}/cfg/dss_inst.ini + echo "_LOG_BACKUP_FILE_COUNT = 128" >> ${DSS_HOME_TWO}/cfg/dss_inst.ini + echo "_LOG_MAX_FILE_SIZE = 20M" >> ${DSS_HOME_TWO}/cfg/dss_inst.ini + echo "LSNR_PATH = ${DSS_HOME_TWO}" >> ${DSS_HOME_TWO}/cfg/dss_inst.ini + echo "_log_LEVEL = 255" >> ${DSS_HOME_TWO}/cfg/dss_inst.ini } function create_vg() @@ -284,22 +280,22 @@ esle fi ``` - @Notice Thing!@:主备集群都执行dss_autoscript.sh脚本配置dss + @Notice Thing!@:主备集群都执行dss_autoscript.sh脚本配置dss, 用户需要自行修改脚本中的/opt/omm/ss_env0环境变量、DSS_HOME_ONE 和 DSS_HOME_TWO目录,将其配置成自己的目录。 -###   2.4 数据库部署 -####    2.4.1 主集群(生产中心) +###   2.5 数据库部署 +####    2.5.1 主集群(生产中心)  (1) 主集群主节点0初始化  @Precondition!@:节点0对应的dssserver必须提前拉起,即dsserver进程存在 ``` -gs_initdb -D /opt/omm/cluster/dn0 --nodename=node1 -U omm -w opengauss@123 --vgname=+data --enable-dss --dma-url="0:10.10.10.10:4411,1:10.10.10.10:4412" -I 0 --socketpath='UDS:/opt/omm/cluster/ss_data/dss_home0/.dss_unix_d_socket' -d -n -g /dev/sdj +gs_initdb -D /opt/omm/cluster/dn0 --nodename=node1 -U omm -w opengauss@123 --vgname=+data --enable-dss --dms_url="0:10.10.10.10:4411,1:10.10.10.10:4412" -I 0 --socketpath='UDS:/opt/omm/cluster/ss_data/dss_home0/.dss_unix_d_socket' -d -n -g /dev/sdj ``` (2)配置主集群主节点0  postgresql.conf文件 ``` port = 44100 -listen_address = 'localhost, 10.10.10.10' +listen_addresses = 'localhost, 10.10.10.10' ss_enable_reform = off xlog_file_path = '/dev/sdj' xlog_lock_file_path = '/opt/omm/cluster/dn0/redolog.lock' @@ -311,11 +307,11 @@ ss_log_backup_file_count = 100 ss_log_max_file_size = 1GB ```  参数解释: -+ ss_enable_reform ++ ss_enable_reform dms reform功能,没有cm的情况下,设置该参数为off + xlog_file_path -+ xlog_lock_file_path -+ cross_cluster_replconninfo1 -+ cluster_run_mode ++ xlog_lock_file_path 配置dorado xlog lock文件,不需要手动创建,启动时会自动创建 ++ cross_cluster_replconninfo1 配置dorado双集群复制连接通道 ++ cluster_run_mode 运行集群模式,是主集群,还是备集群  pg_hba.conf文件 @@ -329,9 +325,17 @@ host all all 10.10.10.20/32 sha256 (3)主集群备节点1初始化 ``` -gs_initdb -D /opt/omm/cluster/dn1 --nodename=node2 -U omm -w opengauss@123 --vgname=+data --enable-dss --dma-url="0:10.10.10.10:4411,1:10.10.10.10:4412" -I 1 --socketpath='UDS:/opt/omm/cluster/ss_data/dss_home1/.dss_unix_d_socket' +gs_initdb -D /opt/omm/cluster/dn1 --nodename=node2 -U omm -w opengauss@123 --vgname=+data --enable-dss --dms_url="0:10.10.10.10:4411,1:10.10.10.10:4412" -I 1 --socketpath='UDS:/opt/omm/cluster/ss_data/dss_home1/.dss_unix_d_socket' ``` +主集群备节点1配置参数 +port = 48100 +listen_addresses = 'localhost, 10.10.10.10' +ss_enable_reform = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB + (4)主集群启动 ``` 主节点0启动 @@ -343,10 +347,10 @@ gs_ctl start -D /opt/omm/cluster/dn0 ``` -####    2.4.2 备集群(容灾中心) +####    2.5.2 备集群(容灾中心)  (1) 备集群首备节点0初始化 ``` -gs_initdb -D /opt/omm/cluster/dn0 --nodename=node1 -U omm -w opengauss@123 --vgname=+data --enable-dss --dma-url="0:10.10.10.20:4411,1:10.10.10.20:4412" -I 0 --socketpath='UDS:/opt/omm/cluster/ss_data/dss_home0/.dss_unix_d_socket' -d -n -g /dev/sdi +gs_initdb -D /opt/omm/cluster/dn0 --nodename=node1 -U omm -w opengauss@123 --vgname=+data --enable-dss --dms_url="0:10.10.10.20:4411,1:10.10.10.20:4412" -I 0 --socketpath='UDS:/opt/omm/cluster/ss_data/dss_home0/.dss_unix_d_socket' -d -n -g /dev/sdi ```  (2) 配置备集群首备节点0 @@ -354,7 +358,7 @@ gs_initdb -D /opt/omm/cluster/dn0 --nodename=node1 -U omm -w opengauss@123 --vgn  postgresql.conf文件 ``` port = 44100 -listen_address = 'localhost, 10.10.10.20' +listen_addresses = 'localhost, 10.10.10.20' ss_enable_reform = off xlog_file_path = '/dev/sdi' xlog_lock_file_path = '/opt/omm/cluster/dn0/redolog.lock' @@ -390,7 +394,7 @@ gs_ctl build -D /opt/omm/cluster/dn0 -b cross_cluster_full -g 0 --vgname=+data - ``` 参数解释: + -b cross_cluster_full -+ -g 0 ++ -g 0 指资源池化的节点0,表明是对节点0进行build + -q  (4)备集群从备节点1初始化 @@ -400,6 +404,14 @@ gs_ctl build -D /opt/omm/cluster/dn0 -b cross_cluster_full -g 0 --vgname=+data - gs_initdb -D /opt/omm/cluster/dn1 --nodename=node2 -U omm -w opengauss@123 --vgname=+data --enable-dss --dma-url="0:10.10.10.20:4411,1:10.10.10.20:4412" -I 1 --socketpath='UDS:/opt/omm/cluster/ss_data/dss_home1/.dss_unix_d_socket' ``` +备集群从备节点1配置参数 +port = 48100 +listen_addresses = 'localhost, 10.10.10.20' +ss_enable_reform = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +  (5)备集群启动 ``` 首备节点0启动 @@ -508,7 +520,7 @@ No information No information ``` -###   3.1.主集群一写多读 +###   3.2.主集群一写多读 ``` 主集群主节点0执行 gsql -d postgres -p 44100 -r @@ -522,7 +534,7 @@ gsql -d postgres -p 48100 -r select * from test01; ``` -###   3.1.备集群只读 +###   3.3.备集群只读 ``` 备集群首备节点0查询,可查询到主节点0创建的表和数据 gsql -d postgres -p 44100 -r diff --git "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\272\214)\344\271\213cm\346\250\241\346\213\237.md" "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\272\214)\344\271\213cm\346\250\241\346\213\237.md" new file mode 100644 index 00000000..a16c0d98 --- /dev/null +++ "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\272\214)\344\271\213cm\346\250\241\346\213\237.md" @@ -0,0 +1,599 @@ ++++ +title = "资源池化支持同城dorado双集群部署(二)----cm模拟" +date = "2023-04-01" +tags = ["资源池化支持同城dorado双集群部署(二)----cm模拟"] +archives = "2023-04-01" +author = "shirley_zhengx" +summary = "资源池化支持同城dorado双集群部署(二)----cm模拟" +img = "/zh/post/zhengxue/title/img1.png" +times = "9:30" ++++ + + + +- [1. 环境描述](#1.环境描述) + - [1.1.组网方式](#1.1.组网方式) + - [1.2.环境配置](#1.2.环境配置) +- [2. 环境搭建](#2.环境搭建) + - [2.1.创建lun](#2.1.创建lun) + - [2.2.下载源码编译](#2.2.下载源码编译) + - [2.3.环境变量](#2.3.环境变量) + - [2.4.dss配置-dd模拟](#2.4.dss配置-dd模拟) + - [2.5.数据库部署](#2.5.数据库部署) +- [3. 主备集群功能验证](#3.主备集群功能验证) + - [3.1.集群状态查询](#3.1.集群状态查询) + - [3.2.主集群一写多读](#3.2.主集群一写多读) + - [3.2.备集群只读](#3.2.备集群只读) + + + + + +# 资源池化支持同城dorado双集群部署(二)----cm模拟 + +资源池化支持同城dorado双集群部署方式:dd模拟(手动部署+无cm)、cm模拟(手动部署dd模拟+有cm)、磁阵(手动部署)、集群管理工具部署 + + +## 1.环境描述 + + 针对cm模拟(手动部署dd模拟+有cm)作出指导,环境描述如下: + +###   1.1.组网方式 +
+ + + + + + + + + + + + + + + + + + +
生产中心主端业务计算节点0主存储节点Dorado
业务计算节点1
容灾中心备端业务计算节点0备存储节点Dorado
业务计算节点1
+ + ** 缺个图,后面补充哈!!!** +###   1.2.环境配置 + + 支持存储远程复制 + + +## 2. 环境搭建 + +针对资源池化双集群部署之《资源池化cm模拟搭建(手动部署dd模拟+有cm) + dorado同步复制》作出指导,模拟cm部署,环境搭建如下: + +###   2.1.创建lun + +(1) 主存储创建lun组和lun + 主存储管控平台(DeviceManager)登录:https://主存储ip:8088 + 在管控平台上创建lun组和lun,并映射到主机之后,在业务节点上查看挂好的lun + +(2) 主存储对应的业务计算节点上查看挂好的lun +``` +rescan-scsi-bus.sh upadmin show vlun lsscsi -is +``` + +![](../images/dd模拟/lun查询.png) + +lun说明: LUNGroup-zx01-doradoxlog 指dorado同步复制需要的lun(可以理解为共享盘,盘符/dev/sdj),暂不关注LUNGroup-zx01-dmsdata和LUNGroup-zx01-dmsxlog0,这两个是用于磁阵搭建资源池化集群 + +修改盘符属组 +``` +chown zx:zx /dev/sgj +``` + +(3) 创建同步复制对 +  在主存储管控平台(DeviceManager)登录:https://主存储ip:8088 +  data protection -> luns -> remote replication pairs(远程复制对) -> create ->选择主存储需要同步复制给备存储的lun -> next +  请原谅这里截图工具的搓,标记笔太难用了,画的蓝圈圈很丑但很个性! +![](../images/dd模拟/远程复制对创建.png) + +选择同步 -> Automatic -> 备存储的存储池名称 -> next +![](../images/dd模拟/远程复制对创建1.png) + +(4) 备存储创建lun组和lun + 备存储管控平台(DeviceManager)登录:https://备存储ip:8088 + 在管控平台上创建lun组,并映射到主机 + +由于第(3)步创建了远程复制对,会自动在备存储上创建同步复制对应的lun,名字和主存储一致(即备存储上会有一样的lun名字为LUNGroup-zx01-doradoxlog),在备存储管控平台上查看如下图 +![](../images/dd模拟/备存储doradoxlog.png) + +(5) 备存储上的lun进行映射 +方式1:直接映射到业务计算节点上,不需要提前创建lun组,如果存在多个业务接待你需要映射到每一个业务节点上 +选择more -> Map -> node2host01 -> ok 将dorado同步复制功能需要的lun映射到备集群业务节点上 +![](../images/dd模拟/备存储映射节点.png) + + @温馨提示@:node2host01指为业务节点ip创建的机器名称,名字可自行定义 + + 方式2:在lun组中添加该lun,要提前创建lun组,直接会映射到lun组中的所有业务节点上 + +(6) 备存储对应的业务计算节点上查看挂好的lun +``` +rescan-scsi-bus.sh upadmin show vlun lsscsi -is +``` + +![](../images/dd模拟/备存储业务节点lun查询.png) + + lun说明: LUNGroup-zx01-doradoxlog 指dorado同步复制需要的lun(可以理解为共享盘,盘符/dev/sdi) + +修改盘符属组 +``` +chown zx:zx /dev/sgi +``` +###   2.2.下载源码编译 + 需要重新编译代码,不能跳过该步骤,步骤如下: + +(1) 下载三方库 + 根据平台操作系统下载对应三方库,三方库下载地址:https://gitee.com/opengauss/openGauss-server 主页上README.md中查找需要的三方库binarylibs + + 获取master分支openEuler_x86系统对应的三方库 +``` +wget https://opengauss.obs.cn-south-1.myhuaweicloud.com/latest/binarylibs/openGauss-third_party_binarylibs_openEuler_x86_64.tar.gz +``` + +(2) 下载cbb并编译 +``` +git clone https://gitee.com/opengauss/CBB.git -b master cbb +cd cbb/build/linux/opengauss +sh build.sh -3rd $binarylibsDir -m Debug +``` + 编译成功会自动将二进制放入三方库openGauss-third_party_binarylibs_openEuler_x86_64/kernel/component目录下 + +(3) 下载dss并编译 +``` +git clone https://gitee.com/opengauss/DSS.git -b master dss +cd dss/build/linux/opengaussDSS +sh build.sh -3rd $binarylibsDir -m DebugDsstest +``` + +(4) 下载dms并编译 +``` +git clone https://gitee.com/opengauss/DMS.git -b master dms + +cd dms/build/linux/opengauss + +sh build.sh -3rd $binarylibsDir -m DMSTest +``` + +(5) 下载openGauss-server并编译 + 编译过程需要cbb、dss、dms的二进制,会从openGauss-third_party_binarylibs_openEuler_x86_64/kernel/component中获取 +``` +git clone https://gitee.com/opengauss/openGauss-server.git -b master openGauss-server + +修改dms代码:vim src/gausskernel/storage/dss/fio_dss.cpp +int dss_set_server_status_wrapper() +{ + // return g_dss_device_op.dss_set_main_inst(); + return GS_SUCCESS; +} + +sh build.sh -3rd $binarylibsDir -m Debug +``` + 编译完之后的二进制存放在openGauss-server/mppdb_temp_install/目录下 + + +###   2.3.环境变量 +由于机器资源不足,这里以一个业务计算服务器上部署双集群,主备集群都是一主一备 +(1) 主集群环境变量ss_env0 + +环境变量 +``` +export HOME=/opt/omm +export GAUSSHOME=${HOME}/openGauss-server/mppdb_temp_install/ +export LD_LIBRARY_PATH=$GAUSSHOME/lib:$LD_LIBRARY_PATH +export PATH=$GAUSSHOME/bin:$PATH +export DSS_HOME=/home/omm/ss_hatest/dss_home0 +export CM_CONFIG_PATH=/opt/omm/openGauss-server/src/test/ss/cm_config.ini +``` +`Tips`: 环境变量里面一定要写export,即使`echo $GCC_PATH`存在,也要写export才能真正导入路径 + +参数说明: +HOME 为用户自己创建的工作目录; +GAUSSHOME 为编译完成的目标文件路径,包含openGauss的bin、lib等; +CM_CONFIG_PATH 用于主集群cm模拟部署下的集群内节点切换 + +(2) 备集群环境变量ss_env1 +``` +export HOME=/opt/omm +export GAUSSHOME=${HOME}/openGauss-server/mppdb_temp_install/ +export LD_LIBRARY_PATH=$GAUSSHOME/lib:$LD_LIBRARY_PATH +export PATH=$GAUSSHOME/bin:$PATH +export DSS_HOME=/home/omm/ss_hatest/dss_home0 +export CM_CONFIG_PATH=/opt/omm/openGauss-server/src/test/ss/cm_config_standby.ini +``` + +###   2.4.资源池化双集群部署 + 通过源码中的ha_test.sh脚本搭建 + (1) 资源池化双集群cm模拟部署 +  注释ha_test.sh倒数2行 +![](./images/cm模拟/主集群ha_test.png) + +``` +sh ha_test.sh dual_cluster +``` + + ha_test.sh脚本适配了双集群模拟, 执行的时候带上dual_cluster就是双集群,不带就是单集群。脚本会自动将数据库拉起,执行完该脚本后,就相当于部署了2套独立的资源池化 + + (2) 集群状态查询 +因为是在一个机器上模拟双集群,所以开两个窗口,一个窗口导入主集群环境变量ss_env0,一个窗口导入备集群环境变量ss_env1 +``` +主集群节点0 +[omm@nodename dn0]$ gs_ctl query -D /home/omm/ss_hatest/dn0 +[2023-04-15 15:34:21.475][568656][][gs_ctl]: gs_ctl query ,datadir is /home/omm/ss_hatest/dn0 + HA state: + local_role : Primary + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information + +主集群备节点1 +[omm@nodename dn1]$ gs_ctl query -D /home/omm/ss_hatest/dn1 +[2023-04-15 15:34:21.475][568656][][gs_ctl]: gs_ctl query ,datadir is /home/omm/ss_hatest/dn1 + HA state: + local_role : Standby + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information + +备集群节点0 +[omm@nodename dn0]$ gs_ctl query -D /home/omm/ss_hatest1/dn0 +[2023-04-15 15:34:21.475][568656][][gs_ctl]: gs_ctl query ,datadir is /home/omm/ss_hatest1/dn0 + HA state: + local_role : Primary + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information + +备集群备节点1 +[omm@nodename dn1]$ gs_ctl query -D /home/omm/ss_hatest1/dn1 +[2023-04-15 15:34:21.475][568656][][gs_ctl]: gs_ctl query ,datadir is /home/omm/ss_hatest1/dn1 + HA state: + local_role : Standby + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information + +``` +###   2.5 资源池化双集群容灾搭建 +####   2.5.1 手动容灾搭建 +#####    2.5.1.1 主集群(生产中心) +(1) 配置主集群主节点0的dorado容灾参数 + postgresql.conf文件 +``` +port = 6600 +xlog_file_path = '/home/zx/ss_hatest/dorado_shared_disk' +xlog_lock_file_path = '/home/zx/ss_hatest/shared_lock_primary' +application_name = 'dn_master_0' +cross_cluster_replconninfo1='localhost=127.0.0.1 localport=6600 remotehost=127.0.0.1 remoteport=9600' +cross_cluster_replconninfo1='localhost=127.0.0.1 localport=6600 remotehost=127.0.0.1 remoteport=9700' +cluster_run_mode = 'cluster_primary' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +``` + +``` +xlog_file_size = 68719476736 +``` + pg_hba.conf文件 +``` +host all omm 10.10.10.10/32 trust +host all omm 10.10.10.20/32 trust + +host all all 10.10.10.10/32 sha256 +host all all 10.10.10.20/32 sha256 +``` + +(2) 以primary模式重启主集群主节点0 +``` +gs_ctl start -D /home/omm/ss_hatest/dn0 -M primary +``` +执行build前一定要给主集群主节点0配置容灾参数并以primary模式重启主集群主节点0 + + +#####    2.5.1.2 备集群(容灾中心) +(1) 配置备集群首备节点0的容灾参数 + postgresql.conf文件 +``` +port = 9600 +xlog_file_path = '/home/zx/ss_hatest/dorado_shared_disk' +xlog_lock_file_path = '/home/zx/ss_hatest/shared_lock_standby' +application_name = 'dn_standby_0' +cross_cluster_replconninfo1='localhost=127.0.0.1 localport=9600 remotehost=127.0.0.1 remoteport=6600' +cross_cluster_replconninfo1='localhost=127.0.0.1 localport=9600 remotehost=127.0.0.1 remoteport=6700' +cluster_run_mode = 'cluster_standby' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +``` + + pg_hba.conf文件 +``` +host all omm 10.10.10.10/32 trust +host all omm 10.10.10.20/32 trust + +host all all 10.10.10.10/32 sha256 +host all all 10.10.10.20/32 sha256 +``` + +(2) 执行build +必须先执行build,否则首备节点0会报错是无效值,原因是因为备集群第一次初始化启动部署的时候,是资源池化的单机normal模式,一直为0,当主集群主节点0 + +``` +gs_ctl build -D /home/zx/ss_hatest1/dn0 -b cross_cluster_full -g 0 --vgname=+data --enable-dss --socketpath='UDS:/home/zx/ss_hatest1/dss_home0/.dss_unix_d_socket' -q +``` + +(3) 以standby模式重启备集群首备节点0 +``` +gs_ctl start -D /home/omm/ss_hatest1/dn0 -M standby +``` +####   2.5.2 自动化容灾搭建 +同2.5.1 手动容灾搭建效果一致,只是用shell脚本自动化执行 +``` +cd openGauss_server/src/test/ss/dual_cluster_single_shared_storage + +sh standby_full_build_reconnect.sh +``` + +####    2.5.3 主备集群查询 +(1) 主集群主节点 +``` +[omm@nodename dn0]$ gs_ctl query -D /home/omm/ss_hatest/dn0 +[2023-04-18 09:38:34.397][1498175][][gs_ctl]: gs_ctl query ,datadir is /home/omm/ss_hatest/dn0 + HA state: + local_role : Primary + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: + sender_pid : 1456376 + local_role : Primary + peer_role : StandbyCluster_Standby + peer_state : Normal + state : Streaming + sender_sent_location : 2/5C8 + sender_write_location : 2/5C8 + sender_flush_location : 2/5C8 + sender_replay_location : 2/5C8 + receiver_received_location : 2/5C8 + receiver_write_location : 2/5C8 + receiver_flush_location : 2/5C8 + receiver_replay_location : 2/5C8 + sync_percent : 100% + sync_state : Async + sync_priority : 0 + sync_most_available : Off + channel : 127.0.0.1:6600-->127.0.0.1:43350 + + Receiver info: +No information +``` + +(2) 主集群备节点 +``` +[omm@nodename pg_log]$ gs_ctl query -D /home/omm/ss_hatest/dn1 +[2023-04-18 11:42:09.475][2857388][][gs_ctl]: gs_ctl query ,datadir is /home/omm/ss_hatest/dn1 + HA state: + local_role : Standby + static_connections : 0 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information +``` + +(3) 备集群首备节点 +``` +备集群首备 +[omm@nodename pg_log]$ gs_ctl query -D /home/omm/ss_hatest1/dn0 +[2023-04-18 11:33:09.288][2760315][][gs_ctl]: gs_ctl query ,datadir is /home/omm/ss_hatest1/dn0 + HA state: + local_role : Standby + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: + receiver_pid : 1901181 + local_role : Standby + peer_role : Primary + peer_state : Normal + state : Normal + sender_sent_location : 2/A458 + sender_write_location : 2/A458 + sender_flush_location : 2/A458 + sender_replay_location : 2/A458 + receiver_received_location : 2/A458 + receiver_write_location : 2/A458 + receiver_flush_location : 2/A458 + receiver_replay_location : 2/A458 + sync_percent : 100% + channel : 127.0.0.1:41952<--127.0.0.1:6600 +``` + +(4) 备集群从备节点 +``` +[omm@nodename pg_log]$ gs_ctl query -D /home/omm/ss_hatest1/dn1 +[2023-04-18 11:42:09.475][2857388][][gs_ctl]: gs_ctl query ,datadir is /home/omm/ss_hatest1/dn1 + HA state: + local_role : Standby + static_connections : 0 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information +``` + +## 3. 主备集群功能验证 +###   3.1.集群状态查询 +``` +主集群主节点0查询结果 +gs_ctl query -D /opt/omm/cluster/dn0 +[2023-04-03 19:29:20.472][1324519][][gs_ctl]: gs_ctl query ,datadir is /opt/omm/cluster/dn0 + HA state: + local_role : Primary + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: + sender_pid : 1324039 + local_role : Primary + peer_role : StandbyCluster_Standby + peer_state : Normal + state : Streaming + sender_sent_location : 1/3049568 + sender_write_location : 1/3049568 + sender_flush_location : 1/3049568 + sender_replay_location : 1/3049568 + receiver_received_location : 1/3049568 + receiver_write_location : 1/3049568 + receiver_flush_location : 1/3049568 + receiver_replay_location : 1/3049568 + sync_percent : 100% + sync_state : Async + sync_priority : 0 + sync_most_available : Off + channel : 10.10.10.10:44100-->10.10.10.20:42690 + + Receiver info: +No information +``` + +``` +主集群备节点1查询结果 +gs_ctl query -D /opt/omm/cluster/dn1 +[2023-04-03 19:29:20.472][2125915][][gs_ctl]: gs_ctl query ,datadir is /opt/omm/cluster/dn0 + HA state: + local_role : Standby + static_connections : 0 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information +``` + +``` +备集群首备节点0查询结果 +gs_ctl query -D /opt/omm/cluster/dn0 +[2023-04-03 19:29:20.472][2720317][][gs_ctl]: gs_ctl query ,datadir is /opt/omm/cluster/dn0 + HA state: + local_role : Standby + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: + receiver_pid : 2720076 + local_role : Standby + peer_role : Primary + peer_state : Normal + state : Normal + sender_sent_location : 1/3049568 + sender_write_location : 1/3049568 + sender_flush_location : 1/3049568 + sender_replay_location : 1/3049568 + receiver_received_location : 1/3049568 + receiver_write_location : 1/3049568 + receiver_flush_location : 1/3049568 + receiver_replay_location : 1/3049568 + sync_percent : 100% + channel : 10.10.10.20:39864<--10.10.10.10:44100 +``` + +``` +备集群从备节点1查询结果 +gs_ctl query -D /opt/omm/cluster/dn1 +[2023-04-03 19:29:20.472][2125915][][gs_ctl]: gs_ctl query ,datadir is /opt/omm/cluster/dn0 + HA state: + local_role : Standby + static_connections : 0 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information +``` + +###   3.2.主集群一写多读 +``` +主集群主节点0执行 +gsql -d postgres -p 44100 -r +create table test01(id int) with(segment = on); +insert into test01 select generate_series(0,100); +``` + +``` +主集群备节点1查询,可查询到主节点0创建的表和数据 +gsql -d postgres -p 48100 -r +select * from test01; +``` + +###   3.3.备集群只读 +``` +备集群首备节点0查询,可查询到主节点0创建的表和数据 +gsql -d postgres -p 44100 -r +select * from test01; +``` + +``` +备集群从备节点1查询,可查询到主节点0创建的表和数据 +gsql -d postgres -p 48100 -r +select * from test01; +``` + + +常用命令 +(1) 查看pg_control文件 +``` +pg_controldata -I 0 --enable-dss --socketpath=UDS:$DSS_HOME/.dss_unix_d_socket +data +``` + +(2) +***Notice:不推荐直接用于生产环境*** -- Gitee