https://www.modb.pro/db/176706
pcp_detach_node - 将给定节点从 pgpool-ii 分离。与pgpool-ii的外泄连接被迫断开。
pcp_attach_node - 将给定的节点连接到 pgpool-ii。
pcp_stop_pgpool - 终止pgpool-ii进程
pcp_reload_config - 重新加载 pgpool-ii 配置文件
pcp_recovery_node - 将给定的后端节点与恢复连接
故障转移脚本介绍
failover_command触发条件:
1,health check(网络问题或者pg无法工作)
2,若没有连接链接到pgpool,
则pg关闭不会触发
脚本逻辑
1,如果是从库down
那么在主库上drop replication slot
2,如果是主库down 那么提升从库为新主
follow_primary_command触发条件
1,在主节点故障转移后要运行的用户命令
2,pcp_promote_node命令
(仅在流复制模式下有效)
脚本逻辑
同步从库到新主
1,pg_rewind
2,pg_basebackup(pcp_recovery_node)
端口规划:
在100/101/102上关闭防火墙和selinux:
[root@allserver ~]# systemctl disable firewalld
[root@allserver ~]# systemctl stop firewalld
[root@allserver ~]# setenforce 0
[root@allserver ~]# sed -i 's/^selinux=.*/selinux=disabled/' /etc/selinux/confi
安装/配置 pgpool:
[root@allserver src]# tar -zxf pgpool-ii-4.2.4.tar.gz
[root@allserver pgpool-ii-4.2.4]# ./configure --prefix=/usr/local/pgpool
[root@allserver pgpool-ii-4.2.4]# make && make install
[root@allserver pgpool-ii-4.2.4]# cd /usr/local/pgpool/etc
[root@allserver etc]# cp pcp.conf.sample pcp.conf;cp pgpool.conf.sample-stream pgpool.conf;
cp follow_primary.sh.sample follow_primary.sh; chmod x failover.sh;cp failover.sh.sample failover.sh; chmod x failover.sh;
cp escalation.sh.sample escalation.sh;chmod x escalation.sh;
chown postgres:postgres /usr/local/pgpool/etc/{failover.sh,follow_primary.sh,escalation.sh};
cp recovery_1st_stage.sample recovery_1st_stage;chmod x recovery_1st_stage;
mv recovery_1st_stage /data/pg_data/;cp pgpool_remote_start.sample pgpool_remote_start;
chmod x pgpool_remote_start;mv pgpool_remote_start /data/pg_data/;chown postgres:
postgres /data/pg_data -r;cp pool_hba.conf.sample pool_hba.conf;
echo "export path=\$path:/usr/local/pgpool/bin/">> /etc/profile; . /etc/profile
设置密码
[root@allserver etc]# pg_md5 -p -m -u postgres pool_passwd
password: (输入密码123qwert)
[root@allserver etc]# pg_md5 -p -m -u pgpool pool_passwd
password: (输入密码123qwert)
[root@allserver etc]# cat /usr/local/pgpool/etc/pool_passwd
postgres:md569d08236c8e9fc9e0fd97e9d74afe7de
pgpool:md560cd0c52f2f43bb1eb5f18c70ae85a59
修改:
[postgres@allserver ~]$ vim /usr/local/pgpool/etc/failover.sh
pghome=/usr/local/pgsql
/data/pg_data/recovery_1st_stage脚本内容:
[postgres@allserver pgpool_logs]$ vim /data/pg_data/recovery_1st_stage
pghome=/usr/local/pgsql/
archivedir=/data/pg_archive/
repluser=replic
在53行左右的pg_basebackup的后面加上-r参数
以及第56行左右的.pgpass文件路径由/var/lib/pgsql/.pgpass更改为 ~/.pgpass
修改/data/pg_data/pgpool_remote_start里面的内容
pghome=/usr/local/pgsql/
修改/usr/local/pgpool/etc/follow_primary.sh里面内容
#!/bin/bash
# this script is run after failover_command to synchronize the standby with the new primary.
# first try pg_rewind. if pg_rewind failed, use pg_basebackup.
set -o xtrace
# special values:
# 1) %d = node id
# 2) %h = hostname
# 3) %p = port number
# 4) %d = node database cluster path
# 5) %m = new primary node id
# 6) %h = new primary node hostname
# 7) %m = old main node id
# 8) %p = old primary node id
# 9) %r = new primary port number
# 10) %r = new primary database cluster path
# 11) %n = old primary node hostname
# 12) %s = old primary node port number
# 13) %% = '%' character
node_id="$1"
node_host="$2"
node_port="$3"
node_pgdata="$4"
new_primary_node_id="$5"
new_primary_node_host="$6"
old_main_node_id="$7"
old_primary_node_id="$8"
new_primary_node_port="$9"
new_primary_node_pgdata="${10}"
failed_node_id="$1"
failed_node_host="$2"
failed_node_port="$3"
failed_node_data="$4"
new_master_node_id="$5"
new_master_node_host="$6"
old_master_node_id="$7"
old_primary_node_id="$8"
new_master_node_port="$9"
new_master_node_pgdata="${10}"
old_primary_node_host="${11}"
old_primary_node_port="${12}"
pghome=/usr/local/pgsql/
archivedir=/data/pg_archive
repluser=replic
pcp_user=pgpool
pgpool_path=/usr/local/pgpool/bin/
pcp_port=9898
repl_slot_name=${node_host//[-.]/_}
#超级用户
pguser_super=postgres
#超级用户pgdb密码
pguser_super_pwd='123qwert'
#dbname
pguser_super_dbname=postgres
#复制用户
pguser_repli=replic
#复制用户密码
pguser_repli_pwd='123qwert'
echo follow_primary.sh: start: standby node ${failed_node_id}
## get postgresql major version
pgversion=`${pghome}/bin/initdb -v | awk '{print $3}' | sed 's/\..*//' | sed 's/\([0-9]*\)[a-za-z].*/\1/'`
if [ $pgversion -ge 12 ]; then
recoveryconf=${failed_node_data}/myrecovery.conf
else
recoveryconf=${failed_node_data}/recovery.conf
fi
## test passwrodless ssh
ssh -t -o stricthostkeychecking=no -o userknownhostsfile=/dev/null $pguser_super@${new_primary_node_host} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
if [ $? -ne 0 ]; then
echo follow_main.sh: passwrodless ssh to $pguser_super@${new_primary_node_host} failed. please setup passwrodless ssh.
exit 1
fi
## check the status of standby
ssh -t -o stricthostkeychecking=no -o userknownhostsfile=/dev/null \
$pguser_super@${failed_node_host} -i ~/.ssh/id_rsa_pgpool ${pghome}/bin/pg_ctl -w -d ${failed_node_data} status
## if standby is running, synchronize it with the new primary.
if [ $? -eq 0 ]; then
echo follow_primary.sh: pg_rewind for node ${failed_node_id}
# create replication slot "${repl_slot_name}"
echo 'this is create_physical_replication_slot-->'${repl_slot_name}
${pghome}/bin/psql -h ${new_primary_node_host} -p ${new_primary_node_port} \
-c "select pg_create_physical_replication_slot('${repl_slot_name}');" >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo follow_primary.sh: create replication slot \"${repl_slot_name}\" failed. you may need to create replication slot manually.
fi
ssh -t -o stricthostkeychecking=no -o userknownhostsfile=/dev/null $pguser_super@${failed_node_host} -i ~/.ssh/id_rsa_pgpool "
set -o errexit
${pghome}/bin/pg_ctl -w -m f -d ${failed_node_data} stop
${pghome}/bin/pg_rewind -r -d ${failed_node_data} --source-server=\"user=$pguser_super host=${new_primary_node_host} port=${new_primary_node_port}\"
rm -rf ${failed_node_data}/pg_replslot/*
cat > ${recoveryconf} << eot
primary_conninfo = 'host=${new_primary_node_host} port=${new_primary_node_port} user=${repluser} application_name=${failed_node_host} user=$pguser_repli password=$pguser_repli_pwd'
recovery_target_timeline = 'latest'
restore_command = 'scp ${new_primary_node_host}:${archivedir}/%f %p'
primary_slot_name = '${repl_slot_name}'
eot
if [ ${pgversion} -ge 12 ]; then
sed -i -e \"\\\$ainclude_if_exists = '$(echo ${recoveryconf} | sed -e 's/\//\\\//g')'\" \
-e \"/^include_if_exists = '$(echo ${recoveryconf} | sed -e 's/\//\\\//g')'/d\" ${failed_node_data}/postgresql.auto.conf
touch ${failed_node_data}/standby.signal
else
echo \"standby_mode = 'on'\" >> ${recoveryconf}
fi
"
sleep 2
ssh -t -o stricthostkeychecking=no -o userknownhostsfile=/dev/null $pguser_super@${failed_node_host} -i ~/.ssh/id_rsa_pgpool "
${pghome}/bin/pg_ctl -l /dev/null -w -d ${failed_node_data} restart -mf
"
if [ $? -ne 0 ]; then
echo 'pg_rewind is failed ,will try to use pcp_recovery_node'
ssh -i ~/.ssh/id_rsa_pgpool -t $failed_node_host "${pgpool_path}/pcp_recovery_node -h new_master_node_host -v -p $pgpool_port -u $pguser_super -n $failed_node_id "
sleep 2
ssh -i ~/.ssh/id_rsa_pgpool -t $failed_node_host "${pgpool_path}/pcp_recovery_node -h failed_node_host -v -p $pgpool_port -u $pguser_super -n $failed_node_id "
if [ $? -ne 0 ]; then
echo 'the old primary pg_rewind and pcp_recovery_node is failed!'
fi
fi
# if start standby successfully, attach this node
ssh -t -o stricthostkeychecking=no -o userknownhostsfile=/dev/null $pguser_super@${failed_node_host} -i ~/.ssh/id_rsa_pgpool $pghome/bin/pg_ctl -w -d /data/pg_data status
if [ $? -eq 0 ]; then
# run pcp_attact_node to attach standby node to pgpool-ii.
echo "pgpool_follow_master pcp_attach_node 111 id-->"${failed_node_id}
${pgpool_path}/pcp_attach_node -w -h $new_master_node_host -u $pcp_user -p ${pcp_port} -n ${failed_node_id}
if [ $? -ne 0 ]; then
ssh -i ~/.ssh/id_rsa_pgpool -t $new_master_node_host "${pgpool_path}/pcp_attach_node -d -w -h $new_master_node_host -p $pcp_port -u $pguser_super -n $failed_node_id "
fi
if [ $? -ne 0 ]; then
ssh -i ~/.ssh/id_rsa_pgpool -t $new_master_node_host "${pgpool_path}/pcp_attach_node -d -w -h $failed_node_host -p $pcp_port -u $pguser_super -n $failed_node_id "
fi
if [ $? -ne 0 ]; then
echo error: follow_primary.sh: end: pcp_attach_node failed
exit 1
fi
# if start standby failed, drop replication slot "${repl_slot_name}"
else
${pghome}/bin/psql -h ${new_primary_node_host} -p ${new_primary_node_port} \
-c "select pg_drop_replication_slot('${repl_slot_name}');" >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo error: follow_primary.sh: drop replication slot \"${repl_slot_name}\" failed. you may need to drop replication slot manually.
fi
echo error: follow_primary.sh: end: follow primary command failed
exit 1
fi
else
echo "follow_primary.sh: failed_nod_id=${failed_node_id} is not running. try to fix it"
###对失败的数据节点尝试操作####################### start
echo '对失败的数据节点尝试操作--start'
echo "pgpool_follow_master stop pgdb "
ssh -i ~/.ssh/id_rsa_pgpool -t $failed_node_host "${pghome}/bin/pg_ctl stop -m fast -d $failed_node_data"
echo "failover.sh: failed_node_id=$failed_node_id failed_node_host=${failed_node_host} stop the action has been completed!"
# echo "pgpool_follow_master pg_rewind pgdb "
# ssh -i ~/.ssh/id_rsa_pgpool -t $failed_node_host "${pghome}/bin/pg_rewind --target-pgdata=$failed_node_data --source-server='host=$new_master_node_host port=$new_master_node_port user=$pguser_super password=$pguser_super_pwd dbname=$pguser_super_dbname' -p -r"
echo "pgpool_follow_master touch the failed_node_id:${failed_node_id} pgdb conf file ${pgrecoveryconf}"
# create replication slot "${repl_slot_name}"
echo 'this is start create replication'${repl_slot_name}
${pghome}/bin/psql -h ${new_primary_node_host} -p ${new_primary_node_port} \
-c "select pg_create_physical_replication_slot('${repl_slot_name}');" >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo follow_primary.sh: create replication slot \"${repl_slot_name}\" failed. you may need to create replication slot manually.
fi
echo "pgpool_follow_master pg_rewind pgdb "
ssh -t -o stricthostkeychecking=no -o userknownhostsfile=/dev/null $pguser_super@${failed_node_host} -i ~/.ssh/id_rsa_pgpool "
set -o errexit
${pghome}/bin/pg_rewind -r -d ${failed_node_data} --source-server=\"user=$pguser_super host=${new_primary_node_host} port=${new_primary_node_port}\"
rm -rf ${failed_node_data}/pg_replslot/*
cat > ${recoveryconf} << eot
primary_conninfo = 'host=${new_primary_node_host} port=${new_primary_node_port} user=${repluser} application_name=${failed_node_data} user=$pguser_repli password=$pguser_repli_pwd'
recovery_target_timeline = 'latest'
restore_command = 'scp ${new_primary_node_host}:${archivedir}/%f %p'
primary_slot_name = '${repl_slot_name}'
eot
if [ ${pgversion} -ge 12 ]; then
sed -i -e \"\\\$ainclude_if_exists = '$(echo ${recoveryconf} | sed -e 's/\//\\\//g')'\" \
-e \"/^include_if_exists = '$(echo ${recoveryconf} | sed -e 's/\//\\\//g')'/d\" ${failed_node_data}/postgresql.auto.conf
touch ${failed_node_data}/standby.signal
else
echo \"standby_mode = 'on'\" >> ${recoveryconf}
fi
"
sleep 2
echo "pgpool_follow_master start pgdb "
ssh -i ~/.ssh/id_rsa_pgpool -t $failed_node_host "${pghome}/bin/pg_ctl start -d $failed_node_data 2>/dev/null 1>/dev/null < /dev/null &"
##################################### 如果pg_rewind失败进行recovery操作 -- start
if [ $? -ne 0 ]; then
ssh -i ~/.ssh/id_rsa_pgpool -t $failed_node_host "${pgpool_path}/pcp_recovery_node -h new_master_node_host -v -p $pgpool_port -u $pguser_super -n $failed_node_id "
sleep 2
ssh -i ~/.ssh/id_rsa_pgpool -t $failed_node_host "${pgpool_path}/pcp_recovery_node -h failed_node_host -v -p $pgpool_port -u $pguser_super -n $failed_node_id "
if [ $? -ne 0 ]; then
echo 'the old primary pg_rewind and pcp_recovery_node is failed!'
fi
fi
##################################### 如果pg_rewind失败进行recovery操作 -- end
sleep 2
echo "pgpool_follow_master pcp_attach_node 222 id->"${failed_node_id}
${pgpool_path}/pcp_attach_node -w -h $new_master_node_host -u $pcp_user -p ${pcp_port} -n ${failed_node_id}
sleep 2
ssh -i ~/.ssh/id_rsa_pgpool -t $new_master_node_host "${pgpool_path}/pcp_attach_node -d -w -h $new_master_node_host -p $pcp_port -u $pguser_super -n $failed_node_id "
sleep 2
ssh -i ~/.ssh/id_rsa_pgpool -t $failed_node_host "${pgpool_path}/pcp_attach_node -d -w -h $failed_node_host -p $pcp_port -u $pguser_super -n $failed_node_id "
echo "pgpool_follow_master exit 0 "
echo '对失败的数据节点尝试操作--start'
###对失败的数据节点尝试操作####################### end
exit 0
fi
echo follow_primary.sh: end: follow primary command is completed successfully
exit 0
修改 $vip/24为$vip/20 掩码;并且修改vip;以及device配置为机器自己的网卡
[root@allserver etc]# vim escalation.sh
set -o xtrace
pgpools=(server1 server2 server3)
vip=192.168.254.99
device=ens33
for pgpool in "${pgpools[@]}"; do
[ "$hostname" = "$pgpool" ] && continue
ssh -t -o stricthostkeychecking=no -o userknownhostsfile=/dev/null postgres@$pgpool -i ~/.ssh/id_rsa_pgpool "
/usr/bin/sudo /sbin/ip addr del $vip/20 dev $device
"
done
exit 0
如果某一个网卡名称不一样,需要另外处理:
[postgres@server1 ~]$ vim /usr/local/pgpool/etc/escalation.sh
#!/bin/bash
# this script is run by wd_escalation_command to bring down the virtual ip on other pgpool nodes
# before bringing up the virtual ip on the new active pgpool node.
set -o xtrace
pgpools=(server1 server2 server3)
vip=192.168.254.99
device=ens33
device2=eth0
for pgpool in "${pgpools[@]}"; do
[ "$hostname" = "$pgpool" ] && continue
if [[ $pgpool != 'server2' ]]
then
ssh -t -o stricthostkeychecking=no -o userknownhostsfile=/dev/null postgres@$pgpool -i ~/.ssh/id_rsa_pgpool "
/usr/bin/sudo /sbin/ip addr del $vip/20 dev $device
"
else
ssh -t -o stricthostkeychecking=no -o userknownhostsfile=/dev/null postgres@$pgpool -i ~/.ssh/id_rsa_pgpool "
/usr/bin/sudo /sbin/ip addr del $vip/20 dev $device2
"
fi
done
exit 0
配置postgres用户的sudo权限:
[root@allserver etc]# cat /etc/sudoers |grep postgres
postgres all=(all) nopasswd:all
配置目录:
[root@allserver etc]# mkdir /runpg;mkdir /runpg/pgpool_log/;mkdir /runpg/pgpool ;mkdir /runpg/postgres ; chown postgres:postgres /runpg -r
#enable_pool_hba = on
[all servers]# vi /usr/local/pgpool/etc/pool_hba.conf
# 官方文档为 scram-sha-256,改为md5
host all pgpool 0.0.0.0/0 md5
host all postgres 0.0.0.0/0 md5
修改pgpool.conf相关配置
[root@localhost etc]# vim pgpool.conf
# [connections]
listen_addresses = '*'
port = 9999
socket_dir = '/runpg/postgresql'
pcp_socket_dir = '/runpg/postgres'
## - backend connection settings -
backend_hostname0 = 'server1'
backend_port0 = 5432
backend_weight0 = 1
backend_data_directory0 = '/data/pg_data'
backend_flag0 = 'allow_to_failover'
backend_application_name0 = 'server1'
backend_hostname1 = 'server2'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/data/pg_data'
backend_flag1 = 'allow_to_failover'
backend_application_name1 = 'server2'
backend_hostname2 = 'server3'
backend_port2 = 5432
backend_weight2 = 1
backend_data_directory2 = '/data/pg_data'
backend_flag2 = 'allow_to_failover'
backend_application_name2 = 'server3'
## - authentication -
enable_pool_hba = on
pool_passwd = 'pool_passwd'
# [logs]
logging_collector = on
log_directory = '/data/pgpool_log'
log_filename = 'pgpool-%d.log'
log_file_mode = 0600
log_truncate_on_rotation = on
log_rotation_age = 1d
log_rotation_size = 100mb
# [file locations]
pid_file_name = '/runpg/pgpool/pgpool.pid'
## 此目录用来存放 pgpool_status 文件,此文件保存集群状态(刷新有问题时会造成show pool_status不正确)
logdir = '/tmp'
# [streaming replication mode]
sr_check_user = 'pgpool'
sr_check_password = '123qwert'
follow_primary_command = '/usr/local/pgpool/etc/follow_primary.sh %d %h %p %d %m %h %m %p %r %r'
# [health check global parameters]
health_check_period = 5
health_check_timeout = 20
health_check_user = 'pgpool'
## 为健康检查时查找 pool_passwd
health_check_password = '123qwert'
health_check_max_retries = 3
# [failover and failback]
failover_command = '/usr/local/pgpool/etc/failover.sh %d %h %p %d %m %h %m %p %r %r %n %s'
# [online recovery]
recovery_user = 'postgres'
recovery_password = '123qwert'
recovery_1st_stage_command = 'recovery_1st_stage'
# [watchdog]
use_watchdog = on
hostname0 = 'server1'
wd_port0 = 9000
pgpool_port0 = 9999
hostname1 = 'server2'
wd_port1 = 9000
pgpool_port1 = 9999
hostname2 = 'server3'
wd_port2 = 9000
pgpool_port2 = 9999
wd_ipc_socket_dir = '/runpg/postgresql'
## - virtual ip control setting -
delegate_ip = '192.168.254.99'
## - behaivor on escalation setting -
wd_escalation_command=''
或者
wd_escalation_command = '/usr/local/pgpool/etc/escalation.sh'
## - lifecheck setting -
wd_lifecheck_method = 'heartbeat'
### -- heartbeat mode --
heartbeat_hostname0 = 'server1'
heartbeat_port0 = 9694
heartbeat_device0 = ''
heartbeat_hostname1 = 'server2'
heartbeat_port1 = 9694
heartbeat_device1 = ''
heartbeat_hostname2 = 'server3'
heartbeat_port2 = 9694
heartbeat_device2 = ''
在100/101/102上面安装pg:
[root@localhost src]# tar -zxf postgresql-13.4.tar.gz
[root@localhost src]# cd postgresql-13.4/
[root@localhost src]# ./configure --prefix=/usr/local/pgsql
[root@localhost src]# make world && make install-world
[root@localhost src]# groupadd postgres && useradd -g postgres postgres
[root@localhost src]# passwd postgres
[root@localhost src]# mkdir -p /data/pg_data && mkdir -p /data/pg_archive && mkdir -p /data/pg_log&& mkdir -p /data/pgpool_logs
[root@localhost src]# chown -r postgres:postgres /data
在100/101/102上环境设置:
/root/.bash_profile和/home/postgres/.bash_profile配置
# .bash_profile
# get the aliases and functions
if [ -f ~/.bashrc ]; then
. ~/.bashrc
fi
# user specific environment and startup programs
path=$path:$home/.local/bin:$home/bin
export path
export pgport=5432
export pgdata=/data/pg_data
export pghome=/usr/local/pgsql
export ld_library_path=$pghome/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$ld_library_path
export path=$pghome/bin:$path:.
export manpath=$pghome/share/man:$manpath
export pguser=postgres
export pgdatabase=postgres
alias rm='rm -i'
alias ll='ls -lh'
export lang="zh_cn.utf8"
安装pgpool-recovery
[root@allserver etc]# cd /usr/local/src/pgpool-ii-4.2.4/src/sql/pgpool-recovery/
[root@allserver pgpool-recovery]# make && make install
在100上初始化数据库:
[postgres@server1 ~]$ initdb -d /data/pg_data -e utf-8 --locale=c -u postgres -w
100数据库配置文件postgresql.conf:
listen_addresses = '*'
archive_mode = on
archive_command = 'test ! -f /data/pg_archive/%f && cp %p /data/pg_archive/%f '
max_wal_senders = 10
max_replication_slots = 10
wal_level = replica
hot_standby = on
wal_log_hints = on
进入数据库:
postgres=# create user pgpool password '123qwert';
postgres=# create user replic password '123qwert';
postgres=# grant pg_monitor to pgpool;
postgres=# \c template1
template1=# create extension pgpool_recovery;
配置认证文件pg_hba.conf 添加:
host all all 192.168.254.1/24 **粗体** md5
host replication all 192.168.254.1/24 md5
在101/102上面建立基础备份并启动:
[postgres@server2/3 ~]$ pg_basebackup -h 192.168.254.100 -ureplic -r -p --verbose -c fast -d /data/pg_data/ -f p -c --slot='slot101'
(在102上就是--slot='slot102')
[postgres@server2/3 ~]$ pg_ctl -d /data/pg_data/ start
在100上查看主从复制信息:
postgres=# select pid,usesysid,usename,client_addr,backend_start,state,flush_lsn,replay_lsn,write_lsn,write_lag,flush_lag,sync_state from pg_stat_replication ;
在100/101/102上设置host:
echo "192.168.254.100 server1" >> /etc/hosts
echo "192.168.254.101 server2" >> /etc/hosts
echo "192.168.254.102 server3" >> /etc/hosts
分别修改 hostname
[server1]# hostnamectl set-hostname server1
[server2]# hostnamectl set-hostname server2
[server3]# hostnamectl set-hostname server3
查看是否修改成功:
[root@localhost postgresql-13.4]# uname -a
linux server1 3.10.0-957.el7.x86_64 #1 smp thu nov 8 23:39:32 utc 2018 x86_64 x86_64 x86_64 gnu/linux
配置ssh互信(分别在root和postgres用户下面进行配置):
[root@allserver ~]# mkdir ~/.ssh; cd ~/.ssh
[root@allserver .ssh]# ssh-keygen -t rsa -f id_rsa_pgpool
[root@allserver .ssh]# ssh-copy-id -i id_rsa_pgpool.pub postgres@server1
[root@allserver .ssh]# ssh-copy-id -i id_rsa_pgpool.pub postgres@server2
[root@allserver .ssh]# ssh-copy-id -i id_rsa_pgpool.pub postgres@server3
[root@allserver .ssh]# su - postgres
[postgres@allserver ~]$ cd ~/.ssh/
[postgres@allserver .ssh]$ ssh-keygen -t rsa -f id_rsa_pgpool
[postgres@allserver .ssh]$ ssh-copy-id -i id_rsa_pgpool.pub postgres@server1
[postgres@allserver .ssh]$ ssh-copy-id -i id_rsa_pgpool.pub postgres@server2
[postgres@allserver .ssh]$ ssh-copy-id -i id_rsa_pgpool.pub postgres@server3
测试看是否免密登录:
ssh postgres@serverno -i ~/.ssh/id_rsa_pgpool
在100/101/102上面的pg安装目录下面创建.pgpass文件:
[postgres@allserver ~]$ su - postgres
[postgres@allserver ~]$ vim ~/.pgpass
server1:5432:replication:replic:123qwert
server2:5432:replication:replic:123qwert
server3:5432:replication:replic:123qwert
server1:5432:postgres:postgres:123qwert
server2:5432:postgres:postgres:123qwert
server3:5432:postgres:postgres:123qwert
[postgres@allserver ~]$ chmod 0600 ~/.pgpass
从 pgpool-ii 4.2 中,现在所有主机的所有配置参数都相同。如果启用了监视器功能,以消除对哪个主机是哪个主机的干扰,则需要pgpool_node_id文件。需要创建一个pgpool_node_id文件,并指定pgpool(看门狗)节点编号(例如0,1,2…)来识别pgpool(看门狗)主机。
[root@server1 etc]# echo "0" >> /usr/local/pgpool/etc/pgpool_node_id
[root@server2 etc]# echo "1" >> /usr/local/pgpool/etc/pgpool_node_id
[root@server3 etc]# echo "2" >> /usr/local/pgpool/etc/pgpool_node_id
为follow_primary.sh中的pcp_user=pgpool配置免密登录
[root@allserver etc]#
echo 'pgpool:'`pg_md5 123qwert` >>/usr/local/pgpool/etc/pcp.conf
echo 'postgres:'`pg_md5 123qwert` >>/usr/local/pgpool/etc/pcp.conf
[root@allserver etc]# su - postgres
[postgres@allserver ~]$
echo 'localhost:9898:pgpool:123qwert' >> ~/.pcppass;
echo '192.168.254.99:9898:pgpool:123qwert' >> ~/.pcppass;
echo 'server1:9898:pgpool:123qwert' >> ~/.pcppass;
echo 'server2:9898:pgpool:123qwert' >> ~/.pcppass;
echo 'server3:9898:pgpool:123qwert' >> ~/.pcppass;
[postgres@allserver ~]$ chmod 600 ~/.pcppass
#启动pgpool(先后分别在server 1、2、3上启动)
[postgres@allserver ~]$ pgpool -d -d -n &
主从模式
可以用两种方式进行实现
1,手动使用pg_basebackup来操作
2,可以使用pcp_recovery_node来进行添加
2.1,
[server1]# pcp_recovery_node -h 192.168.254.99 -p 9898 -u pgpool -n 1
2.2,
[server1]# pcp_recovery_node -h 192.168.254.99 -p 9898 -u pgpool -n 2
添加成功之后,会自动的启动从库,无需再手动启动
集群管理
pgpool 集群
查看集群配置信息
pcp_pool_status -h 192.168.254.99 -p 9898 -u pgpool -v
查看集群节点详情
#-h 表示集群ip,-p 表示pcp管道端口(默认是9898),-u 表示 pcp管道用户,-v表示查看详细内容
pcp_watchdog_info -h 192.168.254.99 -p 9898 -u pgpool -v -w
查看节点数量
pcp_node_count -h 192.168.254.99 -p 9898 -u pgpool -w
查看指定节点信息
pcp_node_info -h 192.168.254.99 -p 9898 -u pgpool -n 0 -v -w
增加一个集群节点
#-n 表示节点序号(从0开始)
pcp_attach_node -h 192.168.254.99 -p 9898 -u pgpool -n 0 -v -w
脱离一个集群节点
pcp_detach_node -h 192.168.254.99 -p 9898 -u pgpool -n 0 -v -w
提升一个备用节点为活动节点
pcp_promote_node -h 192.168.254.99 -p 9898 -u pgpool -n 0 -v -w
恢复一个离线节点为集群节点
pcp_recovery_node -h 192.168.254.99 -p 9898 -u pgpool -n 0 -v -w
postgressql集群
连接集群
[all servers]$ psql -h 192.168.254.99 -p 9999
查看集群状态
[all servers]$ psql -h 192.168.254.99 -p 9999 -u postgres postgres -c "show pool_nodes"
show pool_status;查看相关参数配置
高可用测试:
目前状态:
vip位于100上。
1,停掉100上面的pg数据库
结果:主库原来的server2上面的从库被提升为主库,原来的sever1的主库将为从库,server3的从库被重新指向新的主库
2, 再次切换:
停掉101上面的主库。
结果:主库原来的server1上面的从库被提升为主库,原来的sever2的主库将为从库,server3的从库被重新指向新的主库
3,关闭100虚拟机
结果:原来的server2的从库被提升为新主库,server3的从库重新指向server2,原来在sever1上的vip漂移到server3 102上。
4,恢复100
结果:server1启动之后,sever1依然是“down”状态,需要进行手动加入到集群中。
手动恢复节点1
测试完毕