m6米乐安卓版下载-米乐app官网下载

postgresql高可用之repmgr两节点搭建及switchover操作 -m6米乐安卓版下载

多米爸比 2020-02-05
3376

环境
虚拟机centos7.6
ha1:192.168.99.200
ha2:192.168.99.202
postgresql 12.1
repmgr 5.0.0

$ su root
--创建系统用户和组
# groupadd postgres -g 1001
# useradd postgres -g 1001 -u 1001
# id postgres
uid=1001(postgres) gid=1001(postgres) groups=1001(postgres)
# cd /opt
--源文件解压目录
# mkdir -p /opt/postgresql-12.1
# chown postgres:postgres postgresql-12.1
--安装文件目录
# mkdir -p /opt/pg12
# chown -r postgres:postgres pg12
# chmod 0700 pg12
# ln -s /opt/pg12 /opt/pgsql
--数据目录
# mkdir -p /opt/data6000
# chown -r postgres:postgres data6000
# chmod 0700 data6000
 
--归档目录
# mkdir -p /opt/archive6000
# chown -r postgres:postgres archive6000
# chmod 0700 archive6000
# su postgres
$ wget https://ftp.postgresql.org/pub/source/v12.1/postgresql-12.1.tar.gz
$ chmod 750 postgresql-12.1.tar.gz
$ tar zxvf postgresql-12.1.tar.gz
$ cd postgresql-12.1/
--编译
$ ./configure --prefix=/opt/pg12 --with-pgport=6000
--安装
$ gmake world && gmake install-world
--配置开机自启动
$ su root
# cp /opt/postgresql-12.1/contrib/start-scripts/linux /etc/init.d/postgres-12
# chmod  x /etc/init.d/postgres-12
# chkconfig postgres-12 on
# vi /etc/init.d/postgres-12
#! /bin/sh
## edit from here
prefix=/opt/pgsql
pgdata="/opt/data6000"
pguser=postgres
## stop editing here
path=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
daemon="$prefix/bin/postmaster"
pgctl="$prefix/bin/pg_ctl"
set -e
# only start if we can find the postmaster.
test -x $daemon ||
{
	echo "$daemon not found"
	if [ "$1" = "stop" ]
	then exit 0
	else exit 5
	fi
}
# parse command line parameters.
case $1 in
  start)
	echo -n "starting postgresql: "
        su - $pguser -c "$pgctl -d '$pgdata' start >>/dev/null &"	
echo "ok"
	;;
  stop)
	echo -n "stopping postgresql: "
	su - $pguser -c "$pgctl stop -d '$pgdata' -s"
	echo "ok"
	;;
  restart)
	echo -n "restarting postgresql: "
    su - $pguser -c "$pgctl stop -d '$pgdata' -s"
    su - $pguser -c "$pgctl -d '$pgdata' start >>/dev/null &"	
       echo "ok"
	;;
  reload)
	echo -n "reload postgresql: "
	su - $pguser -c "$pgctl reload -d '$pgdata' -s"
	echo "ok"
	;;
  status)
	su - $pguser -c "$pgctl status -d '$pgdata'"
	;;
  *)
	# print help
	echo "usage: $0 {start|stop|restart|reload|status}" 1>&2
	exit 1
	;;
esac
exit 0
--删除源文件
# cd /opt
# rm -rf postgresql-12.1/
--初始化数据目录
# su postgres
$ /opt/pgsql/bin/initdb \
-d/opt/data6000 \
-eutf8 \
-upostgres \
-w
--服务启停
# service postgres-12 start | stop
$ /opt/pgsql/bin/pg_ctl -d /opt/data6000 start |stop
$ vi postgresql.conf
listen_addresses = '*'
port=6000
wal_level=replica
archive_mode=on
archive_command='cp %p /opt/archive6000/%f'
max_wal_senders=10
wal_keep_segments=512
hot_standby=on
primary_conninfo='host=192.168.99.202 port=6000 user=repuser'
logging_collector=on
log_destination=csvlog
log_directory='/opt/data6000/pg_log'
log_filename='pg_log_%y-%m-%d_%h%m%s.log'
log_file_mode=0600
log_truncate_on_rotation=on
log_rotation_age=1d
log_rotation_size=10mb
log_min_messages=warning
log_min_duration_statement=30s
log_checkpoints=on
log_connections=on
log_disconnections=on
log_duration=on
log_lock_waits=on
log_statement=ddl
--机器ha1(192.168.99.200)配置pg_hba.conf
$ vi pg_hba.conf
host all  all 192.168.99.1/24  md5
host replication  all  192.168.99.1/24 trust
$ /opt/pgsql/bin/psql -p6000 -h127.0.0.1 -upostgres
create user repuser replication login connection limit 5 encrypted password'123456';
create table t(id int,info text);
insert into t values(1,now()),(2,now()),(3,now());
--重启ha1(192.168.99.200)
# service postgres-12 restart
--机器ha2(192.168.99.202)使用pg_basebackup做基础备份
$ /opt/pgsql/bin/pg_basebackup -d /opt/data6000 -fp -xs -v -p -h192.168.99.200 -p6000 -urepuser
--如果报错请检查防火墙等配置,例如下面开放6000端口
# firewall-cmd --zone=public --add-port=6000/tcp --permanent
# firewall-cmd --reload
--机器ha2(192.168.99.202)修改postgresql.conf
--修改primary_conninfo信息
$ vi postgresql.conf
listen_addresses = '*'
port=6000
wal_level=replica
archive_mode=on
archive_command='cp %p /opt/archive6000/%f'
max_wal_senders=10
wal_keep_segments=512
hot_standby=on
primary_conninfo='host=192.168.99.200 port=6000 user=repuser'
logging_collector=on
log_destination=csvlog
log_directory='/opt/data6000/pg_log'
log_filename='pg_log_%y-%m-%d_%h%m%s.log'
log_file_mode=0600
log_truncate_on_rotation=on
log_rotation_age=1d
log_rotation_size=10mb
log_min_messages=warning
log_min_duration_statement=30s
log_checkpoints=on
log_connections=on
log_disconnections=on
log_duration=on
log_lock_waits=on
log_statement=ddl
--机器ha2(192.168.99.202)配置standby.signal
$ touch /opt/data6000/standby.signal
--启动ha2(192.168.99.202)
$ /opt/pgsql/bin/pg_ctl -d/opt/data6000 start &
--异步流复制搭建完成
主库:ha1(192.168.99.200)
备库:ha2(192.168.99.202)
--ntp时间同步
# yum install ntp ntpdate
# ntpdate cn.pool.ntp.org
# hwclock --systohc
$ /opt/pgsql/bin/psql -p6000 -upostgres postgres
create user repmgr superuser connection limit 10 password '123456' ;
create database repmgr owner repmgr;
local   repmgr        repmgr                              trust
host    repmgr        repmgr      127.0.0.1/32            trust
host    repmgr        repmgr      192.168.99.0/24          trust
local   replication   repmgr                              trust
host    replication   repmgr      127.0.0.1/32            trust
host    replication   repmgr      192.168.99.0/24          trust
# wget https://github.com/2ndquadrant/repmgr/archive/v5.0.0.tar.gz
# chmod 755 repmgr-5.0.0.tar.gz
# chown postgres:postgres repmgr-5.0.0.tar.gz
# tar zxvf repmgr-5.0.0.tar.gz
# cd /opt/repmgr-5.0.0
# ./configure pg_config=/opt/pgsql/bin/pg_config
# make && make install
--停备库:ha2(192.168.99.202),并删除数据文件
# service postgres-12 stop
# rm -rf archive6000/*
# rm -rf data6000/*
--数据库访问免密
# mkdir /home/postgres
# chown -r postgres:postgres /home/postgres/
# chmod -r 0700 /home/postgres/
# usermod -d /home/postgres postgres
# su - postgres
$ chmod 0600 .pgpass
$ cat .pgpass 
192.168.99.200:6000:repmgr:repmgr:123456
192.168.99.202:6000:repmgr:repmgr:123456
--ssh免密步骤(操作系统用户postgres)
--1.每个节点都执行
$ ssh-keygen -t rsa
--2.每个节点分别执行
$ ssh-copy-id -i .ssh/id_rsa.pub postgres@192.168.99.202
$ ssh-copy-id -i .ssh/id_rsa.pub postgres@192.168.99.200
--测试ssh互通
--ha1(192.168.99.200)
# vi /opt/repmgr.conf
node_id=1
node_name='node1'
conninfo='host=192.168.99.200 port=6000 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/opt/data6000'
replication_user='repmgr'
replication_type='physical'
repmgr_bindir='/opt/pgsql/bin'
pg_bindir='/opt/pgsql/bin'
monitoring_history=yes
monitor_interval_secs=5
log_level='debug'
log_file='/opt/data6000/repmgr.log'
# chown -r postgres:postgres repmgr.conf
# chmod 0600 repmgr.conf
--ha2(192.168.99.202)测试repmgr是否能免密访问主库
$ /opt/pgsql/bin/psql -p6000 -h192.168.99.200 -urepmgr
--ha1(192.168.99.200)
$ /opt/pgsql/bin/repmgr -f /opt/repmgr.conf primary register --force
info: connecting to primary database...
notice: attempting to install extension "repmgr"
notice: "repmgr" extension successfully installed
notice: primary node record (id: 1) registered
--ha2(192.168.99.202)
# vi /opt/repmgr.conf
node_id=2
node_name='node2'
conninfo='host=192.168.99.202 port=6000 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/opt/data6000'
replication_user='repmgr'
replication_type='physical'
repmgr_bindir='/opt/pgsql/bin'
pg_bindir='/opt/pgsql/bin'
monitoring_history=yes
monitor_interval_secs=5
log_level='debug'
log_file='/opt/data6000/repmgr.log'
# chown -r postgres:postgres repmgr.conf
# chmod 0600 repmgr.conf
--check
$ /opt/pgsql/bin/repmgr -h 192.168.99.200 -p6000 -u repmgr -d repmgr -f /opt/repmgr.conf standby clone --dry-run
--clone
$ /opt/pgsql/bin/repmgr -h 192.168.99.200 -p6000 -u repmgr -d repmgr -f /opt/repmgr.conf standby clone
--ha2(192.168.99.202)修改配置文件,primary_conninfo里host修改ha2指向ha1
# vi /opt/data6000/postgresql.conf
primary_conninfo='host=192.168.99.200 port=6000 user=repuser'
--启动备库
# service postgres-12 start
--主库查看postgres进程
[postgres@ha1 ~]$  ps f -u postgres
  pid tty      stat   time command
 6155 pts/1    s      0:00 -bash
 6408 pts/1    r      0:00  \_ ps f -u postgres
 6337 ?        ss     0:00 /opt/pg12/bin/postgres -d /opt/data6000
 6338 ?        ss     0:00  \_ postgres: logger   
 6340 ?        ss     0:00  \_ postgres: checkpointer   
 6341 ?        ss     0:00  \_ postgres: background writer   
 6342 ?        ss     0:00  \_ postgres: walwriter   
 6343 ?        ss     0:00  \_ postgres: autovacuum launcher   
 6344 ?        ss     0:00  \_ postgres: archiver   last was 000000030000000000000012.00000028.backup
 6345 ?        ss     0:00  \_ postgres: stats collector   
 6346 ?        ss     0:00  \_ postgres: logical replication launcher  
 6404 ?        ss     0:00  \_ postgres: walsender repmgr 192.168.99.202(55371) streaming 0/13001a28
--备库查看postgres进程
[postgres@ha2 ~]$  ps f -u postgres
  pid tty      stat   time command
 6297 pts/1    s      0:00 -bash
 7040 pts/1    r      0:00  \_ ps f -u postgres
 3880 pts/0    s      0:00 -bash
 7030 ?        ss     0:00 /opt/pg12/bin/postgres -d /opt/data6000
 7031 ?        ss     0:00  \_ postgres: logger   
 7032 ?        ss     0:00  \_ postgres: startup   recovering 000000030000000000000013
 7033 ?        ss     0:00  \_ postgres: checkpointer   
 7034 ?        ss     0:00  \_ postgres: background writer   
 7035 ?        ss     0:00  \_ postgres: stats collector   
 7036 ?        ss     0:00  \_ postgres: walreceiver 
--可以看到主库walsender进程,备库walreceiver进程   
--主库查看
select * from pg_stat_replication;
--ha2(192.168.99.202)
[postgres@ha2 ~]$ /opt/pgsql/bin/repmgr -f /opt/repmgr.conf standby register --force
info: connecting to local node "node2" (id: 2)
info: connecting to primary database
info: standby registration complete
notice: standby node "node2" (id: 2) successfully registered
--ha1和ha2查看状态一致
[postgres@ha2 ~]$ /opt/pgsql/bin/repmgr -f /opt/repmgr.conf cluster show
 id | name  | role    | status    | upstream | location | priority | timeline | connection string                                                        
---- ------- --------- ----------- ---------- ---------- ---------- ---------- ---------------------------------------------------------------------------
 1  | node1 | primary | * running |          | default  | 100      | 1        | host=192.168.99.200 port=6000 user=repmgr dbname=repmgr connect_timeout=2
 2  | node2 | standby |   running | node1    | default  | 100      | 1        | host=192.168.99.202 port=6000 user=repmgr dbname=repmgr connect_timeout=2
[postgres@ha2 ~]$ /opt/pgsql/bin/repmgr -f /opt/repmgr.conf standby switchover -u repmgr --verbose
notice: using provided configuration file "/opt/repmgr.conf"
warning: following problems with command line parameters detected:
  database connection parameters not required when executing unknown action
notice: executing switchover on node "node2" (id: 2)
info: searching for primary node
info: checking if node 1 is primary
info: current primary node is 1
info: ssh connection to host "192.168.99.200" succeeded
info: 0 pending archive files
info: replication lag on this standby is 0 seconds
notice: local node "node2" (id: 2) will be promoted to primary; current primary "node1" (id: 1) will be demoted to standby
notice: stopping current primary node "node1" (id: 1)
notice: issuing checkpoint
detail: executing server command "/opt/pgsql/bin/pg_ctl  -d '/opt/data6000' -w -m fast stop"
info: checking for primary shutdown; 1 of 60 attempts ("shutdown_check_timeout")
info: checking for primary shutdown; 2 of 60 attempts ("shutdown_check_timeout")
notice: current primary has been cleanly shut down at location 0/14000028
notice: promoting standby to primary
detail: promoting server "node2" (id: 2) using pg_promote()
notice: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
info: standby promoted to primary after 1 second(s)
notice: standby promote successful
detail: server "node2" (id: 2) was successfully promoted to primary
info: local node 1 can attach to rejoin target node 2
detail: local node's recovery point: 0/14000028; rejoin target node's fork point: 0/140000a0
notice: setting node 1's upstream to node 2
warning: unable to ping "host=192.168.99.200 port=6000 user=repmgr dbname=repmgr connect_timeout=2"
detail: pqping() returned "pqping_no_response"
notice: starting server using "/opt/pgsql/bin/pg_ctl  -w -d '/opt/data6000' start"
notice: node rejoin successful
detail: node 1 is now attached to node 2
notice: switchover was successful
detail: node "node2" is now primary and node "node1" is attached as standby
notice: standby switchover has completed successfully
--从日志可以看到
1.check主库为node1
2.ssh主库node1
3.主库node1检查归档文件和wal
4.主库node1触发checkpoint
5.fast关闭主库node1
6.check主库node1是否关闭
7.主库node1关闭clean后开始promte备库node2
8.节点node2调用pg_promote()函数提升备库为主库
9.等待60秒(promote_check_timeout) 以便promotion完成
10.节点node2完成promotion
11.node1满足rejoin到node2
12.node1执行pg_ctl启动服务,rejoin,attached node2
13.switchover完成
--查看集群状态
[postgres@ha2 ~]$ /opt/pgsql/bin/repmgr -f /opt/repmgr.conf cluster show
 id | name  | role    | status    | upstream | location | priority | timeline | connection string                                                        
---- ------- --------- ----------- ---------- ---------- ---------- ---------- ---------------------------------------------------------------------------
 1  | node1 | standby |   running | node2    | default  | 100      | 4        | host=192.168.99.200 port=6000 user=repmgr dbname=repmgr connect_timeout=2
 2  | node2 | primary | * running |          | default  | 100      | 4        | host=192.168.99.202 port=6000 user=repmgr dbname=repmgr connect_timeout=2
[postgres@ha1 ~]$ /opt/pgsql/bin/repmgr -f /opt/repmgr.conf standby switchover -u repmgr --verbose
notice: using provided configuration file "/opt/repmgr.conf"
warning: following problems with command line parameters detected:
  database connection parameters not required when executing unknown action
notice: executing switchover on node "node1" (id: 1)
info: searching for primary node
info: checking if node 2 is primary
info: current primary node is 2
info: ssh connection to host "192.168.99.202" succeeded
info: 0 pending archive files
info: replication lag on this standby is 0 seconds
notice: local node "node1" (id: 1) will be promoted to primary; current primary "node2" (id: 2) will be demoted to standby
notice: stopping current primary node "node2" (id: 2)
notice: issuing checkpoint
detail: executing server command "/opt/pgsql/bin/pg_ctl  -d '/opt/data6000' -w -m fast stop"
info: checking for primary shutdown; 1 of 60 attempts ("shutdown_check_timeout")
info: checking for primary shutdown; 2 of 60 attempts ("shutdown_check_timeout")
notice: current primary has been cleanly shut down at location 0/15000028
notice: promoting standby to primary
detail: promoting server "node1" (id: 1) using pg_promote()
notice: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
info: standby promoted to primary after 1 second(s)
notice: standby promote successful
detail: server "node1" (id: 1) was successfully promoted to primary
info: local node 2 can attach to rejoin target node 1
detail: local node's recovery point: 0/15000028; rejoin target node's fork point: 0/150000a0
notice: setting node 2's upstream to node 1
warning: unable to ping "host=192.168.99.202 port=6000 user=repmgr dbname=repmgr connect_timeout=2"
detail: pqping() returned "pqping_no_response"
notice: starting server using "/opt/pgsql/bin/pg_ctl  -w -d '/opt/data6000' start"
notice: node rejoin successful
detail: node 2 is now attached to node 1
notice: switchover was successful
detail: node "node1" is now primary and node "node2" is attached as standby
notice: standby switchover has completed successfully
--从日志可以看到
1.check主库为node2
2.ssh主库node2
3.主库node2检查归档文件和wal
4.主库node2触发checkpoint
5.fast关闭主库node2
6.check主库node2是否关闭
7.主库node2关闭clean后开始promte备库node1
8.节点node1调用pg_promote()函数提升备库为主库
9.等待60秒(promote_check_timeout) 以便promotion完成
10.节点node1完成promotion
11.node2满足rejoin到node1
12.node2执行pg_ctl启动服务,rejoin,attached node1
13.switchover完成
--查看集群状态
[postgres@ha1 ~]$ /opt/pgsql/bin/repmgr -f /opt/repmgr.conf cluster show
 id | name  | role    | status    | upstream | location | priority | timeline | connection string                                                        
---- ------- --------- ----------- ---------- ---------- ---------- ---------- ---------------------------------------------------------------------------
 1  | node1 | primary | * running |          | default  | 100      | 5        | host=192.168.99.200 port=6000 user=repmgr dbname=repmgr connect_timeout=2
 2  | node2 | standby |   running | node1    | default  | 100      | 4        | host=192.168.99.202 port=6000 user=repmgr dbname=repmgr connect_timeout=2
经过两次switchover已经恢复为ha1为主库,ha2为备库。
error: unable to connect via ssh to host "192.168.99.200", user ""
--排查觉得可能是主机名重复引起,因为是虚拟机复制,只改了ip没改主机名
--改完主机名后重新配置ssh互信还是有问题,别的普通用户正常
--删除postgres用户和组
# userdel postgres
# groupdel postgres
# rm -rf /home/postgres/
--重新创建用户和组
# groupadd postgres -g 1001
# useradd postgres -g 1001 -u 1001
# passwd postgres
# su - postgres
--重新配置ssh免密
--1.每个节点都执行
$ ssh-keygen -t rsa
--2.每个节点分别执行
$ ssh-copy-id -i .ssh/id_rsa.pub 192.168.99.202
$ ssh-copy-id -i .ssh/id_rsa.pub 192.168.99.200
--再次测试ssh免密访问成功
[postgres@ha2 ~]$ /opt/pgsql/bin/repmgr -f /opt/repmgr.conf standby register --force
info: connecting to local node "node2" (id: 2)
error: unable to connect to local node "node2" (id: 2)
hint: to register a standby which is not running, additionally provide the primary connection parameters
--备库ha2数据库服务未启动,注册repmgr会报错,启动服务即可
「喜欢文章,快来给作者赞赏墨值吧」
【米乐app官网下载的版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。

评论

网站地图