跳转至

运维-08-消息队列-rabbitmq

常用命令

RabbitMQ 服务常用命令

查看消息队列服务状态

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl status
Status of node 'rabbit@bj02-control-10e129e168e11' ...
[{pid,256181},
 {running_applications,
     [{rabbitmq_management,"RabbitMQ Management Console","3.6.6"},
      {rabbitmq_web_dispatch,"RabbitMQ Web Dispatcher","3.6.6"},
      {webmachine,"webmachine","1.10.3"},
      {mochiweb,"MochiMedia Web Server","2.13.1"},
      {ssl,"Erlang/OTP SSL application","8.1.3.1"},
      {public_key,"Public key infrastructure","1.4"},
      {inets,"INETS  CXC 138 49","6.3.9"},
      {amqp_client,"RabbitMQ AMQP Client","3.6.6"},
      {rabbitmq_management_agent,"RabbitMQ Management Agent","3.6.6"},
      {rabbit,"RabbitMQ","3.6.6"},
      {mnesia,"MNESIA  CXC 138 12","4.14.3"},
      {os_mon,"CPO  CXC 138 46","2.4.2"},
      {syntax_tools,"Syntax tools","2.1.1"},
      {compiler,"ERTS  CXC 138 10","7.0.4.1"},
      {rabbit_common,[],"3.6.6"},
      {xmerl,"XML parser","1.3.14"},
      {asn1,"The Erlang ASN1 compiler version 4.0.4","4.0.4"},
      {ranch,"Socket acceptor pool for TCP protocols.","1.2.1"},
      {crypto,"CRYPTO","3.7.4"},
      {sasl,"SASL  CXC 138 11","3.0.3"},
      {stdlib,"ERTS  CXC 138 10","3.3"},
      {kernel,"ERTS  CXC 138 10","5.2"}]},
 {os,{unix,linux}},
 {erlang_version,
     "Erlang/OTP 19 [erts-8.3.5.3] [source] [64-bit] [smp:40:40] [async-threads:640] [hipe] [kernel-poll:true]\n"},
 {memory,
     [{total,1413937720},
      {connection_readers,28043056},
      {connection_writers,2264688},
      {connection_channels,32172032},
      {connection_other,26160008},
      {queue_procs,62936224},
      {queue_slave_procs,46849384},
      {plugins,4296160},
      {other_proc,27734840},
      {mnesia,21477712},
      {mgmt_db,444646656},
      {msg_index,4284440},
      {other_ets,54203520},
      {binary,589647464},
      {code,24842283},
      {atom,1033401},
      {other_system,43345852}]},
 {alarms,[]},
 {listeners,[{clustering,18892,"::"},{amqp,18889,"10.129.168.11"}]},
 {vm_memory_high_watermark,0.4},
 {vm_memory_limit,216036717363},
 {disk_free_limit,50000000},
 {disk_free,415873417216},
 {file_descriptors,
     [{total_limit,10140},
      {total_used,1131},
      {sockets_limit,9124},
      {sockets_used,1036}]},
 {processes,[{limit,1048576},{used,26599}]},
 {run_queue,0},
 {uptime,1319971},
 {kernel,{net_ticktime,60}}]

查看消息队列集群状态

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl cluster_status
Cluster status of node 'rabbit@bj02-control-10e129e168e11' ...
[{nodes,[{disc,['rabbit@bj02-control-10e129e168e11',
                'rabbit@bj02-control-10e129e168e14',
                'rabbit@bj02-control-10e129e168e23']}]},
 {running_nodes,['rabbit@bj02-control-10e129e168e23',
                 'rabbit@bj02-control-10e129e168e14',
                 'rabbit@bj02-control-10e129e168e11']},
 {cluster_name,<<"rabbit@bj02-control-10e129e168e14">>},
 {partitions,[]},
 {alarms,[{'rabbit@bj02-control-10e129e168e23',[]},
          {'rabbit@bj02-control-10e129e168e14',[]},
          {'rabbit@bj02-control-10e129e168e11',[]}]}]

停止 Erlang node 上的 RabbitMQ 的应用

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl stop_app

启动 Erlang node 上的 RabbitMQ 的应用

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl start_app

初始化 Node 状态

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl reset

注解

初始化 Node 状态会从集群中删除该节点,从管理数据库中删除所有数据,例如 Vhosts 等等。在初始化之前 RabbitMQ 应用必须先停止

无条件初始化 Node 状态

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl force_reset

加入 RabbitMQ 集群

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl join_cluster rabbit@'bj02-control-10e129e168e11'

注解

在 join_cluster 后可以加 –ram 参数表示以内存节点加入集群,默认是以磁盘节点加入集群,在一个节点加入集群之前,必须先停止该节点的 RabbitMQ 应用。

转换节点的运行模式

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl change_cluster_node_type ram

注解

改变模式之前要先停止 RabbitMQ 应用,可以转换成 ram 节点也可以转换成 disc 节点,集群中最少要存在一个磁盘节点。

远程移除集群中某个节点

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl forget_cluster_node bj02-control-10e129e168e12

注解

如果节点状态是 Online 则需要加–offline 参数。

添加 test 用户 密码 openstack

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl add_user test openstack
Creating user "test" ...

修改用户密码

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl change_password test test
Changing password for user "test" ...

清除用户密码禁止登陆

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl clear_password test
Clearing password for user "test" ...

删除用户

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl delete_user test
Deleting user "test" ...

查询用户列表

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl list_users
Listing users ...
openstack       []
guest   [administrator]

设置集群 HA 策略

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl set_policy ha-all '^(?!amq\.).*' '{"ha-mode": "all"}'

查询队列信息

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl list_queues
reply_e82156135e6a45a199ebd90b1c332121  0
neutron-vo-QosPolicy-1.8_fanout_f327e6fa4ab94b879a27ce58e9a58371        0
q-agent-notifier-security_group-update.bj02-compute-10e129e168e244      0
neutron-vo-SecurityGroup-1.0.bj02-network-10e129e168e12 0
conductor.bj02-control-10e129e168e14    0
q-agent-notifier-network-update.bj02-compute-10e129e170e84      0
q-agent-notifier-network-update.bj02-compute-10e129e168e221     0
reply_63de2fac8818479cb03f825c1d9a0c9d  0
q-agent-notifier-tunnel-delete_fanout_e0643cb17437424d91af1d9fd44d6e52  0
l3_agent_fanout_bef556ffc9fa4d348d75beadfd6933e7        0
neutron-vo-Trunk-1.1.bj02-compute-10e129e170e76 0
l3_agent.bj02-compute-10e129e170e76     0
q-agent-notifier-dvr-update.bj02-compute-10e129e170e80  0
q-agent-notifier-network-update.bj02-compute-10e129e168e224     0
reply_43f2513950f24a86a779d6b9a4c204fd  0
reply_6d637e644f6b4b3fb059e280cd1592af  0
q-agent-notifier-dvr-update.bj02-compute-10e129e168e226 0

查询队列交换机信息

[root@bj02-control-10e129e168e11 ~]# rabbitmqctl list_exchanges
Listing exchanges ...
reply_51c86a2dce494a00a0ffb19b41806e42  direct
reply_beb7cb964a184651b4f77a19b84eb8f3  direct
reply_b4f6d8b9710e481b9d417c031bfc59c2  direct
reply_f5e65901a7a04d92a62a7cf720ac0719  direct
reply_f158fb540a204d808330f626f74549a9  direct
reply_14609fcd964e4c199dc2a656f39d7d41  direct
reply_5370e30c7a71476f81da9503b07335e4  direct
reply_cdf495a8a96b4bbebd29e231625ae4f9  direct
reply_28569b05da4047babe63e0e92f2e5452  direct
reply_8db0fc7471784f14a7cae7368bd2515e  direct
reply_5da2cd3e9d6e411f8ff24997c08c9cd9  direct
reply_f90ebdf3a76040be8c32e81414427a55  direct
reply_c4401bdadd484d5e867e7e23d357f5f5  direct
reply_b8418195211a480ebd57fef4d763dced  direct
cinder-volume_fanout    fanout
reply_682c7fd2e9264524aee4c8f8bb329fa5  direct
reply_f2934f2aa10a45e898c20ab81930db7b  direct

故障处理

问题1:RabbitMQ 3 节点集群 down 2 节点后,第 3 个节点自动 down 机

问题现象

RabbitMQ 3 节点集群, down 2 节点后,第 3 个节点出现自动 down 机。

处理过程

  1. 查看第三个节点 down 机日志,发现自动 down 机,没有找到有用的信息。
  2. 查看 RabbitMQ 官方文档,关于网络分区的配置,发现 cluster_partition_handling 配置项配置为 pause_minority 的时候,RabbitMQ 会自动停止集群少数派,与这个问题类似。
  3. 在测试集群将 cluster_partition_handling 配置项配置为 pause_minority,down 掉两个节点,第三个节点会自动 down 机,配置为 autoheal,down 掉两个节点,第三个节点不会 down 机。
  4. 在线上集群将 cluster_partition_handling 配置项配置为 autoheal,问题解决。

问题原因

线上集群 cluster_partition_handling 配置项配置为 pause_minority,此模式下,当集群 down 掉两个节点以后,第三个节点自动作为少数派,被 down 机。