运维-08-消息队列-rabbitmq¶
常用命令¶
RabbitMQ 服务常用命令¶
查看消息队列服务状态¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl status
Status of node 'rabbit@bj02-control-10e129e168e11' ...
[{pid,256181},
{running_applications,
[{rabbitmq_management,"RabbitMQ Management Console","3.6.6"},
{rabbitmq_web_dispatch,"RabbitMQ Web Dispatcher","3.6.6"},
{webmachine,"webmachine","1.10.3"},
{mochiweb,"MochiMedia Web Server","2.13.1"},
{ssl,"Erlang/OTP SSL application","8.1.3.1"},
{public_key,"Public key infrastructure","1.4"},
{inets,"INETS CXC 138 49","6.3.9"},
{amqp_client,"RabbitMQ AMQP Client","3.6.6"},
{rabbitmq_management_agent,"RabbitMQ Management Agent","3.6.6"},
{rabbit,"RabbitMQ","3.6.6"},
{mnesia,"MNESIA CXC 138 12","4.14.3"},
{os_mon,"CPO CXC 138 46","2.4.2"},
{syntax_tools,"Syntax tools","2.1.1"},
{compiler,"ERTS CXC 138 10","7.0.4.1"},
{rabbit_common,[],"3.6.6"},
{xmerl,"XML parser","1.3.14"},
{asn1,"The Erlang ASN1 compiler version 4.0.4","4.0.4"},
{ranch,"Socket acceptor pool for TCP protocols.","1.2.1"},
{crypto,"CRYPTO","3.7.4"},
{sasl,"SASL CXC 138 11","3.0.3"},
{stdlib,"ERTS CXC 138 10","3.3"},
{kernel,"ERTS CXC 138 10","5.2"}]},
{os,{unix,linux}},
{erlang_version,
"Erlang/OTP 19 [erts-8.3.5.3] [source] [64-bit] [smp:40:40] [async-threads:640] [hipe] [kernel-poll:true]\n"},
{memory,
[{total,1413937720},
{connection_readers,28043056},
{connection_writers,2264688},
{connection_channels,32172032},
{connection_other,26160008},
{queue_procs,62936224},
{queue_slave_procs,46849384},
{plugins,4296160},
{other_proc,27734840},
{mnesia,21477712},
{mgmt_db,444646656},
{msg_index,4284440},
{other_ets,54203520},
{binary,589647464},
{code,24842283},
{atom,1033401},
{other_system,43345852}]},
{alarms,[]},
{listeners,[{clustering,18892,"::"},{amqp,18889,"10.129.168.11"}]},
{vm_memory_high_watermark,0.4},
{vm_memory_limit,216036717363},
{disk_free_limit,50000000},
{disk_free,415873417216},
{file_descriptors,
[{total_limit,10140},
{total_used,1131},
{sockets_limit,9124},
{sockets_used,1036}]},
{processes,[{limit,1048576},{used,26599}]},
{run_queue,0},
{uptime,1319971},
{kernel,{net_ticktime,60}}]
查看消息队列集群状态¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl cluster_status
Cluster status of node 'rabbit@bj02-control-10e129e168e11' ...
[{nodes,[{disc,['rabbit@bj02-control-10e129e168e11',
'rabbit@bj02-control-10e129e168e14',
'rabbit@bj02-control-10e129e168e23']}]},
{running_nodes,['rabbit@bj02-control-10e129e168e23',
'rabbit@bj02-control-10e129e168e14',
'rabbit@bj02-control-10e129e168e11']},
{cluster_name,<<"rabbit@bj02-control-10e129e168e14">>},
{partitions,[]},
{alarms,[{'rabbit@bj02-control-10e129e168e23',[]},
{'rabbit@bj02-control-10e129e168e14',[]},
{'rabbit@bj02-control-10e129e168e11',[]}]}]
停止 Erlang node 上的 RabbitMQ 的应用¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl stop_app
启动 Erlang node 上的 RabbitMQ 的应用¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl start_app
初始化 Node 状态¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl reset
注解
初始化 Node 状态会从集群中删除该节点,从管理数据库中删除所有数据,例如 Vhosts 等等。在初始化之前 RabbitMQ 应用必须先停止
无条件初始化 Node 状态¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl force_reset
加入 RabbitMQ 集群¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl join_cluster rabbit@'bj02-control-10e129e168e11'
注解
在 join_cluster 后可以加 –ram 参数表示以内存节点加入集群,默认是以磁盘节点加入集群,在一个节点加入集群之前,必须先停止该节点的 RabbitMQ 应用。
转换节点的运行模式¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl change_cluster_node_type ram
注解
改变模式之前要先停止 RabbitMQ 应用,可以转换成 ram 节点也可以转换成 disc 节点,集群中最少要存在一个磁盘节点。
远程移除集群中某个节点¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl forget_cluster_node bj02-control-10e129e168e12
注解
如果节点状态是 Online 则需要加–offline 参数。
添加 test 用户 密码 openstack¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl add_user test openstack
Creating user "test" ...
修改用户密码¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl change_password test test
Changing password for user "test" ...
清除用户密码禁止登陆¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl clear_password test
Clearing password for user "test" ...
删除用户¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl delete_user test
Deleting user "test" ...
查询用户列表¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl list_users
Listing users ...
openstack []
guest [administrator]
设置集群 HA 策略¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl set_policy ha-all '^(?!amq\.).*' '{"ha-mode": "all"}'
查询队列信息¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl list_queues
reply_e82156135e6a45a199ebd90b1c332121 0
neutron-vo-QosPolicy-1.8_fanout_f327e6fa4ab94b879a27ce58e9a58371 0
q-agent-notifier-security_group-update.bj02-compute-10e129e168e244 0
neutron-vo-SecurityGroup-1.0.bj02-network-10e129e168e12 0
conductor.bj02-control-10e129e168e14 0
q-agent-notifier-network-update.bj02-compute-10e129e170e84 0
q-agent-notifier-network-update.bj02-compute-10e129e168e221 0
reply_63de2fac8818479cb03f825c1d9a0c9d 0
q-agent-notifier-tunnel-delete_fanout_e0643cb17437424d91af1d9fd44d6e52 0
l3_agent_fanout_bef556ffc9fa4d348d75beadfd6933e7 0
neutron-vo-Trunk-1.1.bj02-compute-10e129e170e76 0
l3_agent.bj02-compute-10e129e170e76 0
q-agent-notifier-dvr-update.bj02-compute-10e129e170e80 0
q-agent-notifier-network-update.bj02-compute-10e129e168e224 0
reply_43f2513950f24a86a779d6b9a4c204fd 0
reply_6d637e644f6b4b3fb059e280cd1592af 0
q-agent-notifier-dvr-update.bj02-compute-10e129e168e226 0
查询队列交换机信息¶
[root@bj02-control-10e129e168e11 ~]# rabbitmqctl list_exchanges
Listing exchanges ...
reply_51c86a2dce494a00a0ffb19b41806e42 direct
reply_beb7cb964a184651b4f77a19b84eb8f3 direct
reply_b4f6d8b9710e481b9d417c031bfc59c2 direct
reply_f5e65901a7a04d92a62a7cf720ac0719 direct
reply_f158fb540a204d808330f626f74549a9 direct
reply_14609fcd964e4c199dc2a656f39d7d41 direct
reply_5370e30c7a71476f81da9503b07335e4 direct
reply_cdf495a8a96b4bbebd29e231625ae4f9 direct
reply_28569b05da4047babe63e0e92f2e5452 direct
reply_8db0fc7471784f14a7cae7368bd2515e direct
reply_5da2cd3e9d6e411f8ff24997c08c9cd9 direct
reply_f90ebdf3a76040be8c32e81414427a55 direct
reply_c4401bdadd484d5e867e7e23d357f5f5 direct
reply_b8418195211a480ebd57fef4d763dced direct
cinder-volume_fanout fanout
reply_682c7fd2e9264524aee4c8f8bb329fa5 direct
reply_f2934f2aa10a45e898c20ab81930db7b direct
故障处理¶
问题1:RabbitMQ 3 节点集群 down 2 节点后,第 3 个节点自动 down 机¶
问题现象¶
RabbitMQ 3 节点集群, down 2 节点后,第 3 个节点出现自动 down 机。
处理过程¶
- 查看第三个节点 down 机日志,发现自动 down 机,没有找到有用的信息。
- 查看 RabbitMQ 官方文档,关于网络分区的配置,发现 cluster_partition_handling 配置项配置为 pause_minority 的时候,RabbitMQ 会自动停止集群少数派,与这个问题类似。
- 在测试集群将 cluster_partition_handling 配置项配置为 pause_minority,down 掉两个节点,第三个节点会自动 down 机,配置为 autoheal,down 掉两个节点,第三个节点不会 down 机。
- 在线上集群将 cluster_partition_handling 配置项配置为 autoheal,问题解决。
问题原因¶
线上集群 cluster_partition_handling 配置项配置为 pause_minority,此模式下,当集群 down 掉两个节点以后,第三个节点自动作为少数派,被 down 机。