From 87856f05a85e2d20b7265b78373657e97dbf18e4 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 29 Nov 2017 17:21:29 -0600 Subject: [PATCH 001/812] Fix: attrd: ensure node name is broadcast at start-up (CLBZ#5330) This fixes a regression introduced in 1.1.18. Since c9d1c3cd, the crmd no longer explicitly clears the terminate and shutdown node attributes at first join. An unwanted side effect of this was that the attrd writer no longer reliably learned a joining node's name. If a node is known only by its ID, the writer can not write its attributes to the CIB. The worst outcome is that the joining node would be unable to shut down, since the shutdown attribute would never trigger the policy engine. The window was limited because the writer learns the node's name if a new attrd election was required, or a node attribute was set locally on the joining node. The fix is to set a new private attribute, #attrd-protocol, at attrd start-up, with the supported attrd protocol version. This has the additional benefit of allowing any node to determine the minimum supported protocol version across all active cluster nodes. --- attrd/commands.c | 30 ++++++++++++++++++++++++------ attrd/internal.h | 1 + attrd/main.c | 7 +++++++ include/crm/crm.h | 1 + 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/attrd/commands.c b/attrd/commands.c index 967703f0602..0a20b2654ea 100644 --- a/attrd/commands.c +++ b/attrd/commands.c @@ -35,8 +35,9 @@ * heartbeat, CMAN, or corosync-plugin stacks) is unversioned. * * With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every - * peer request and reply. Currently, there is no way to know the minimum - * version supported by all peers, which limits its usefulness. + * peer request and reply. As of Pacemaker 2.0.0, at start-up each attrd will + * also set a private attribute for itself with its version, so any attrd can + * determine the minimum version supported by all peers. * * Protocol Pacemaker Significant changes * -------- --------- ------------------- @@ -289,11 +290,10 @@ void attrd_client_clear_failure(xmlNode *xml) { #if 0 - /* @TODO This would be most efficient, but there is currently no way to - * verify that all peers support the op. If that ever changes, we could - * enable this code. + /* @TODO Track the minimum supported protocol version across all nodes, + * then enable this more-efficient code. */ - if (all_peers_support_clear_failure) { + if (compare_version("2", minimum_protocol_version) <= 0) { /* Propagate to all peers (including ourselves). * This ends up at attrd_peer_message(). */ @@ -523,6 +523,24 @@ attrd_peer_clear_failure(crm_node_t *peer, xmlNode *xml) regfree(®ex); } +/*! + \internal + \brief Broadcast private attribute for local node with protocol version +*/ +void +attrd_broadcast_protocol() +{ + xmlNode *attrd_op = create_xml_node(NULL, __FUNCTION__); + + crm_xml_add(attrd_op, F_TYPE, T_ATTRD); + crm_xml_add(attrd_op, F_ORIG, crm_system_name); + crm_xml_add(attrd_op, F_ATTRD_TASK, ATTRD_OP_UPDATE); + crm_xml_add(attrd_op, F_ATTRD_ATTRIBUTE, CRM_ATTR_PROTOCOL); + crm_xml_add(attrd_op, F_ATTRD_VALUE, ATTRD_PROTOCOL_VERSION); + crm_xml_add_int(attrd_op, F_ATTRD_IS_PRIVATE, 1); + attrd_client_update(attrd_op); +} + void attrd_peer_message(crm_node_t *peer, xmlNode *xml) { diff --git a/attrd/internal.h b/attrd/internal.h index 99fc3fd0f4c..23bcbda751f 100644 --- a/attrd/internal.h +++ b/attrd/internal.h @@ -53,6 +53,7 @@ election_t *writer; crm_ipcs_send_ack((client), (id), (flags), "ack", __FUNCTION__, __LINE__) void write_attributes(bool all); +void attrd_broadcast_protocol(void); void attrd_peer_message(crm_node_t *client, xmlNode *msg); void attrd_client_peer_remove(const char *client_name, xmlNode *xml); void attrd_client_clear_failure(xmlNode *xml); diff --git a/attrd/main.c b/attrd/main.c index 2670dc52be6..7721439136b 100644 --- a/attrd/main.c +++ b/attrd/main.c @@ -220,6 +220,13 @@ attrd_cib_connect(int max_retry) // Always read the CIB at start-up mainloop_set_trigger(attrd_config_read); + /* Set a private attribute for ourselves with the protocol version we + * support. This lets all nodes determine the minimum supported version + * across all nodes. It also ensures that the writer learns our node name, + * so it can send our attributes to the CIB. + */ + attrd_broadcast_protocol(); + return pcmk_ok; cleanup: diff --git a/include/crm/crm.h b/include/crm/crm.h index 05ec555f493..6e2bcfa0f24 100644 --- a/include/crm/crm.h +++ b/include/crm/crm.h @@ -106,6 +106,7 @@ extern char *crm_system_name; # define CRM_ATTR_DIGESTS_ALL "#digests-all" # define CRM_ATTR_DIGESTS_SECURE "#digests-secure" # define CRM_ATTR_RA_VERSION "#ra-version" +# define CRM_ATTR_PROTOCOL "#attrd-protocol" /* Valid operations */ # define CRM_OP_NOOP "noop" From a87421042f5030e6dd7823cd80d7632b91296519 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 1 Dec 2017 11:02:54 -0600 Subject: [PATCH 002/812] Refactor: pengine: functionize checking whether node was unfenced reduces code duplication and enhances readability --- pengine/native.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pengine/native.c b/pengine/native.c index e72dec49f59..c998e4b30da 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -429,6 +429,14 @@ rsc_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const c return work; } +static inline bool +node_has_been_unfenced(node_t *node) +{ + const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED); + + return unfenced && strcmp("0", unfenced); +} + node_t * native_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { @@ -2524,10 +2532,9 @@ StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * d if(is_set(rsc->flags, pe_rsc_needs_unfencing)) { action_t *unfence = pe_fence_op(current, "on", TRUE, NULL, data_set); - const char *unfenced = pe_node_attribute_raw(current, CRM_ATTR_UNFENCED); order_actions(stop, unfence, pe_order_implies_first); - if (unfenced == NULL || safe_str_eq("0", unfenced)) { + if (!node_has_been_unfenced(current)) { pe_proc_err("Stopping %s until %s can be unfenced", rsc->id, current->details->uname); } } @@ -2547,11 +2554,9 @@ StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * if(is_set(rsc->flags, pe_rsc_needs_unfencing)) { action_t *unfence = pe_fence_op(next, "on", TRUE, NULL, data_set); - const char *unfenced = pe_node_attribute_raw(next, CRM_ATTR_UNFENCED); order_actions(unfence, start, pe_order_implies_then); - - if (unfenced == NULL || safe_str_eq("0", unfenced)) { + if (!node_has_been_unfenced(next)) { char *reason = crm_strdup_printf("Required by %s", rsc->id); trigger_unfencing(NULL, next, reason, NULL, data_set); free(reason); From b6b3fb9e8c6c6b34fb39c9d7f0b89ef41e9486fa Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 1 Dec 2017 11:45:31 -0600 Subject: [PATCH 003/812] Refactor: pengine: functionize checking for unfence device Reduces code duplication and enhances readability. This also comments out some dead code from when probe_complete was still used. --- pengine/native.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/pengine/native.c b/pengine/native.c index c998e4b30da..e57fbc75cf6 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -437,6 +437,13 @@ node_has_been_unfenced(node_t *node) return unfenced && strcmp("0", unfenced); } +static inline bool +is_unfence_device(resource_t *rsc, pe_working_set_t *data_set) +{ + return is_set(rsc->flags, pe_rsc_fence_device) + && is_set(data_set->flags, pe_flag_enable_unfencing); +} + node_t * native_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { @@ -3015,12 +3022,8 @@ native_create_probe(resource_t * rsc, node_t * node, action_t * complete, crm_debug("Probing %s on %s (%s) %d %p", rsc->id, node->details->uname, role2text(rsc->role), is_set(probe->flags, pe_action_runnable), rsc->running_on); - if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) { + if (is_unfence_device(rsc, data_set) || !pe_rsc_is_clone(top)) { top = rsc; - - } else if (pe_rsc_is_clone(top) == FALSE) { - top = rsc; - } else { crm_trace("Probing %s on %s (%s) as %s", rsc->id, node->details->uname, role2text(rsc->role), top->id); } @@ -3041,17 +3044,18 @@ native_create_probe(resource_t * rsc, node_t * node, action_t * complete, top, reload_key(rsc), NULL, pe_order_optional, data_set); - if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) { +#if 0 + // complete is always null currently + if (!is_unfence_device(rsc, data_set)) { /* Normally rsc.start depends on probe complete which depends - * on rsc.probe. But this can't be the case in this scenario as - * it would create graph loops. + * on rsc.probe. But this can't be the case for fence devices + * with unfencing, as it would create graph loops. * * So instead we explicitly order 'rsc.probe then rsc.start' */ - - } else { order_actions(probe, complete, pe_order_implies_then); } +#endif return TRUE; } From 63431baae2e544dc3b21d51b035942dfeeca5561 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 1 Dec 2017 12:06:16 -0600 Subject: [PATCH 004/812] Fix: pengine: unfence before probing or starting fence devices Regression since 7f8ba307 --- pengine/native.c | 62 +++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/pengine/native.c b/pengine/native.c index e57fbc75cf6..0013e333828 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -2550,6 +2550,39 @@ StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * d return TRUE; } +static void +order_after_unfencing(resource_t *rsc, pe_node_t *node, action_t *action, + enum pe_ordering order, pe_working_set_t *data_set) +{ + /* When unfencing is in use, we order unfence actions before any probe or + * start of resources that require unfencing, and also of fence devices. + * + * This might seem to violate the principle that fence devices require + * only quorum. However, fence agents that unfence often don't have enough + * information to even probe or start unless the node is first unfenced. + */ + if (is_unfence_device(rsc, data_set) + || is_set(rsc->flags, pe_rsc_needs_unfencing)) { + + /* Start with an optional ordering. Requiring unfencing would result in + * the node being unfenced, and all its resources being stopped, + * whenever a new resource is added -- which would be highly suboptimal. + */ + action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, data_set); + + order_actions(unfence, action, order); + + if (!node_has_been_unfenced(node)) { + // But unfencing is required if it has never been done + char *reason = crm_strdup_printf("required by %s %s", + rsc->id, action->task); + + trigger_unfencing(NULL, node, reason, NULL, data_set); + free(reason); + } + } +} + gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { @@ -2559,16 +2592,7 @@ StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * pe_rsc_trace(rsc, "%s on %s %d %d", rsc->id, next ? next->details->uname : "N/A", optional, next ? next->weight : 0); start = start_action(rsc, next, TRUE); - if(is_set(rsc->flags, pe_rsc_needs_unfencing)) { - action_t *unfence = pe_fence_op(next, "on", TRUE, NULL, data_set); - - order_actions(unfence, start, pe_order_implies_then); - if (!node_has_been_unfenced(next)) { - char *reason = crm_strdup_printf("Required by %s", rsc->id); - trigger_unfencing(NULL, next, reason, NULL, data_set); - free(reason); - } - } + order_after_unfencing(rsc, next, start, pe_order_implies_then, data_set); if (is_set(start->flags, pe_action_runnable) && optional == FALSE) { update_action_flags(start, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__); @@ -2989,23 +3013,7 @@ native_create_probe(resource_t * rsc, node_t * node, action_t * complete, probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set); update_action_flags(probe, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__); - /* If enabled, require unfencing before probing any fence devices - * but ensure it happens after any resources that require - * unfencing have been probed. - * - * Doing it the other way (requiring unfencing after probing - * resources that need it) would result in the node being - * unfenced, and all its resources being stopped, whenever a new - * resource is added. Which would be highly suboptimal. - * - * So essentially, at the point the fencing device(s) have been - * probed, we know the state of all resources that require - * unfencing and that unfencing occurred. - */ - if(is_set(rsc->flags, pe_rsc_needs_unfencing)) { - action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, data_set); - order_actions(unfence, probe, pe_order_optional); - } + order_after_unfencing(rsc, node, probe, pe_order_optional, data_set); /* * We need to know if it's running_on (not just known_on) this node From 9d3840f374122f6258ddfe44bf85ff43d394d209 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 1 Dec 2017 12:24:55 -0600 Subject: [PATCH 005/812] Test: PE: update regression tests for unfencing change --- pengine/test10/start-then-stop-with-unfence.dot | 3 +++ pengine/test10/start-then-stop-with-unfence.exp | 15 +++++++++++++-- .../test10/start-then-stop-with-unfence.summary | 10 +++++----- pengine/test10/unfence-definition.dot | 2 ++ pengine/test10/unfence-definition.exp | 9 ++++++++- pengine/test10/unfence-definition.summary | 4 ++-- pengine/test10/unfence-parameters.dot | 2 ++ pengine/test10/unfence-parameters.exp | 9 ++++++++- pengine/test10/unfence-parameters.summary | 4 ++-- pengine/test10/unfence-startup.dot | 1 + pengine/test10/unfence-startup.exp | 6 +++++- pengine/test10/unfence-startup.summary | 4 ++-- 12 files changed, 53 insertions(+), 16 deletions(-) diff --git a/pengine/test10/start-then-stop-with-unfence.dot b/pengine/test10/start-then-stop-with-unfence.dot index 6e9569ba36c..b3243392d02 100644 --- a/pengine/test10/start-then-stop-with-unfence.dot +++ b/pengine/test10/start-then-stop-with-unfence.dot @@ -23,5 +23,8 @@ digraph "g" { "mpath-node2_monitor_0 rhel7-node1.example.com" [ style=bold color="green" fontcolor="black"] "stonith 'on' rhel7-node1.example.com" -> "ip1_start_0 rhel7-node1.example.com" [ style = bold] "stonith 'on' rhel7-node1.example.com" -> "jrummy_start_0 rhel7-node1.example.com" [ style = bold] +"stonith 'on' rhel7-node1.example.com" -> "mpath-node1_monitor_0 rhel7-node1.example.com" [ style = bold] +"stonith 'on' rhel7-node1.example.com" -> "mpath-node1_start_0 rhel7-node1.example.com" [ style = bold] +"stonith 'on' rhel7-node1.example.com" -> "mpath-node2_monitor_0 rhel7-node1.example.com" [ style = bold] "stonith 'on' rhel7-node1.example.com" [ style=bold color="green" fontcolor="black"] } diff --git a/pengine/test10/start-then-stop-with-unfence.exp b/pengine/test10/start-then-stop-with-unfence.exp index 75cb356eba0..715ba4091d2 100644 --- a/pengine/test10/start-then-stop-with-unfence.exp +++ b/pengine/test10/start-then-stop-with-unfence.exp @@ -6,7 +6,11 @@ - + + + + + @@ -29,6 +33,9 @@ + + + @@ -41,7 +48,11 @@ - + + + + + diff --git a/pengine/test10/start-then-stop-with-unfence.summary b/pengine/test10/start-then-stop-with-unfence.summary index 2e02a21e747..b2114d7cf43 100644 --- a/pengine/test10/start-then-stop-with-unfence.summary +++ b/pengine/test10/start-then-stop-with-unfence.summary @@ -11,23 +11,23 @@ Online: [ rhel7-node1.example.com rhel7-node2.example.com ] Stopped: [ rhel7-node1.example.com ] Transition Summary: - * Fence (on) rhel7-node1.example.com 'Required by ip1' + * Fence (on) rhel7-node1.example.com 'required by mpath-node2 monitor' * Start mpath-node1 (rhel7-node1.example.com) * Move ip1 ( rhel7-node2.example.com -> rhel7-node1.example.com ) * Start jrummy:1 (rhel7-node1.example.com) Executing cluster transition: - * Resource action: mpath-node2 monitor on rhel7-node1.example.com - * Resource action: mpath-node1 monitor on rhel7-node1.example.com * Pseudo action: jrummy-clone_start_0 * Fencing rhel7-node1.example.com (on) - * Resource action: mpath-node1 start on rhel7-node1.example.com + * Resource action: mpath-node2 monitor on rhel7-node1.example.com + * Resource action: mpath-node1 monitor on rhel7-node1.example.com * Resource action: jrummy start on rhel7-node1.example.com * Pseudo action: jrummy-clone_running_0 - * Resource action: mpath-node1 monitor=60000 on rhel7-node1.example.com + * Resource action: mpath-node1 start on rhel7-node1.example.com * Resource action: ip1 stop on rhel7-node2.example.com * Resource action: jrummy monitor=10000 on rhel7-node1.example.com * Pseudo action: all_stopped + * Resource action: mpath-node1 monitor=60000 on rhel7-node1.example.com * Resource action: ip1 start on rhel7-node1.example.com * Resource action: ip1 monitor=10000 on rhel7-node1.example.com diff --git a/pengine/test10/unfence-definition.dot b/pengine/test10/unfence-definition.dot index 3bc29d396f2..c42391ad3b6 100644 --- a/pengine/test10/unfence-definition.dot +++ b/pengine/test10/unfence-definition.dot @@ -66,11 +66,13 @@ digraph "g" { "fencing_stop_0 virt-1" [ style=bold color="green" fontcolor="black"] "stonith 'on' virt-1" -> "clvmd_start_0 virt-1" [ style = bold] "stonith 'on' virt-1" -> "dlm_start_0 virt-1" [ style = bold] +"stonith 'on' virt-1" -> "fencing_start_0 virt-1" [ style = bold] "stonith 'on' virt-1" [ style=bold color="green" fontcolor="black"] "stonith 'on' virt-3" -> "clvmd:2_monitor_0 virt-3" [ style = bold] "stonith 'on' virt-3" -> "clvmd:2_start_0 virt-3" [ style = bold] "stonith 'on' virt-3" -> "dlm:2_monitor_0 virt-3" [ style = bold] "stonith 'on' virt-3" -> "dlm:2_start_0 virt-3" [ style = bold] +"stonith 'on' virt-3" -> "fencing_monitor_0 virt-3" [ style = bold] "stonith 'on' virt-3" [ style=bold color="green" fontcolor="black"] "stonith 'reboot' virt-4" -> "stonith_complete" [ style = bold] "stonith 'reboot' virt-4" [ style=bold color="green" fontcolor="black"] diff --git a/pengine/test10/unfence-definition.exp b/pengine/test10/unfence-definition.exp index b1e241ae4b0..25c5674024b 100644 --- a/pengine/test10/unfence-definition.exp +++ b/pengine/test10/unfence-definition.exp @@ -10,6 +10,9 @@ + + + @@ -28,7 +31,11 @@ - + + + + + diff --git a/pengine/test10/unfence-definition.summary b/pengine/test10/unfence-definition.summary index 4ca9344e721..2051c511133 100644 --- a/pengine/test10/unfence-definition.summary +++ b/pengine/test10/unfence-definition.summary @@ -13,7 +13,7 @@ Online: [ virt-1 virt-2 virt-3 ] Transition Summary: * Fence (reboot) virt-4 'node is unclean' - * Fence (on) virt-3 'Required by dlm:2' + * Fence (on) virt-3 'required by fencing monitor' * Fence (on) virt-1 'Device definition changed' * Restart fencing ( virt-1 ) * Restart dlm:0 ( virt-1 ) due to required stonith @@ -23,13 +23,13 @@ Transition Summary: * Start clvmd:2 (virt-3) Executing cluster transition: - * Resource action: fencing monitor on virt-3 * Resource action: fencing stop on virt-1 * Resource action: clvmd monitor on virt-2 * Pseudo action: clvmd-clone_stop_0 * Fencing virt-4 (reboot) * Pseudo action: stonith_complete * Fencing virt-3 (on) + * Resource action: fencing monitor on virt-3 * Resource action: fencing delete on virt-1 * Resource action: dlm monitor on virt-3 * Resource action: clvmd stop on virt-1 diff --git a/pengine/test10/unfence-parameters.dot b/pengine/test10/unfence-parameters.dot index ce006c42e25..3c27b22d1da 100644 --- a/pengine/test10/unfence-parameters.dot +++ b/pengine/test10/unfence-parameters.dot @@ -63,11 +63,13 @@ digraph "g" { "fencing_stop_0 virt-1" [ style=bold color="green" fontcolor="black"] "stonith 'on' virt-1" -> "clvmd_start_0 virt-1" [ style = bold] "stonith 'on' virt-1" -> "dlm_start_0 virt-1" [ style = bold] +"stonith 'on' virt-1" -> "fencing_start_0 virt-1" [ style = bold] "stonith 'on' virt-1" [ style=bold color="green" fontcolor="black"] "stonith 'on' virt-3" -> "clvmd:2_monitor_0 virt-3" [ style = bold] "stonith 'on' virt-3" -> "clvmd:2_start_0 virt-3" [ style = bold] "stonith 'on' virt-3" -> "dlm:2_monitor_0 virt-3" [ style = bold] "stonith 'on' virt-3" -> "dlm:2_start_0 virt-3" [ style = bold] +"stonith 'on' virt-3" -> "fencing_monitor_0 virt-3" [ style = bold] "stonith 'on' virt-3" [ style=bold color="green" fontcolor="black"] "stonith 'reboot' virt-4" -> "stonith_complete" [ style = bold] "stonith 'reboot' virt-4" [ style=bold color="green" fontcolor="black"] diff --git a/pengine/test10/unfence-parameters.exp b/pengine/test10/unfence-parameters.exp index b8053c75262..3b73fc7daec 100644 --- a/pengine/test10/unfence-parameters.exp +++ b/pengine/test10/unfence-parameters.exp @@ -15,7 +15,11 @@ - + + + + + @@ -28,6 +32,9 @@ + + + diff --git a/pengine/test10/unfence-parameters.summary b/pengine/test10/unfence-parameters.summary index 5b582d9f680..2cc9e2767dd 100644 --- a/pengine/test10/unfence-parameters.summary +++ b/pengine/test10/unfence-parameters.summary @@ -13,7 +13,7 @@ Online: [ virt-1 virt-2 virt-3 ] Transition Summary: * Fence (reboot) virt-4 'node is unclean' - * Fence (on) virt-3 'Required by dlm:2' + * Fence (on) virt-3 'required by fencing monitor' * Fence (on) virt-1 'Device parameters changed (reload)' * Restart fencing ( virt-1 ) due to resource definition change * Restart dlm:0 ( virt-1 ) due to required stonith @@ -24,12 +24,12 @@ Transition Summary: Executing cluster transition: * Resource action: fencing stop on virt-1 - * Resource action: fencing monitor on virt-3 * Resource action: clvmd monitor on virt-2 * Pseudo action: clvmd-clone_stop_0 * Fencing virt-4 (reboot) * Pseudo action: stonith_complete * Fencing virt-3 (on) + * Resource action: fencing monitor on virt-3 * Resource action: dlm monitor on virt-3 * Resource action: clvmd stop on virt-1 * Resource action: clvmd monitor on virt-3 diff --git a/pengine/test10/unfence-startup.dot b/pengine/test10/unfence-startup.dot index d496956a133..642f795f8cb 100644 --- a/pengine/test10/unfence-startup.dot +++ b/pengine/test10/unfence-startup.dot @@ -29,6 +29,7 @@ digraph "g" { "stonith 'on' virt-3" -> "clvmd:2_start_0 virt-3" [ style = bold] "stonith 'on' virt-3" -> "dlm:2_monitor_0 virt-3" [ style = bold] "stonith 'on' virt-3" -> "dlm:2_start_0 virt-3" [ style = bold] +"stonith 'on' virt-3" -> "fencing_monitor_0 virt-3" [ style = bold] "stonith 'on' virt-3" [ style=bold color="green" fontcolor="black"] "stonith 'reboot' virt-4" -> "stonith_complete" [ style = bold] "stonith 'reboot' virt-4" [ style=bold color="green" fontcolor="black"] diff --git a/pengine/test10/unfence-startup.exp b/pengine/test10/unfence-startup.exp index 70c1686b137..bfd24c878f9 100644 --- a/pengine/test10/unfence-startup.exp +++ b/pengine/test10/unfence-startup.exp @@ -6,7 +6,11 @@ - + + + + + diff --git a/pengine/test10/unfence-startup.summary b/pengine/test10/unfence-startup.summary index 276358ca582..4601f31754d 100644 --- a/pengine/test10/unfence-startup.summary +++ b/pengine/test10/unfence-startup.summary @@ -13,18 +13,18 @@ Online: [ virt-1 virt-2 virt-3 ] Transition Summary: * Fence (reboot) virt-4 'node is unclean' - * Fence (on) virt-3 'Required by dlm:2' + * Fence (on) virt-3 'required by fencing monitor' * Start dlm:2 (virt-3) * Start clvmd:1 (virt-2) * Start clvmd:2 (virt-3) Executing cluster transition: - * Resource action: fencing monitor on virt-3 * Resource action: clvmd monitor on virt-2 * Fencing virt-4 (reboot) * Pseudo action: stonith_complete * Fencing virt-3 (on) * Pseudo action: all_stopped + * Resource action: fencing monitor on virt-3 * Resource action: dlm monitor on virt-3 * Pseudo action: dlm-clone_start_0 * Resource action: clvmd monitor on virt-3 From c11d10ef4f04bbdb2e6b7e6251b88e50faccaaca Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 1 Dec 2017 14:36:03 -0600 Subject: [PATCH 006/812] Test: PE: add regression test for unfencing with only fence devices --- pengine/regression.sh | 1 + pengine/test10/unfence-device.dot | 18 +++++ pengine/test10/unfence-device.exp | 100 ++++++++++++++++++++++++++ pengine/test10/unfence-device.scores | 5 ++ pengine/test10/unfence-device.summary | 29 ++++++++ pengine/test10/unfence-device.xml | 66 +++++++++++++++++ 6 files changed, 219 insertions(+) create mode 100644 pengine/test10/unfence-device.dot create mode 100644 pengine/test10/unfence-device.exp create mode 100644 pengine/test10/unfence-device.scores create mode 100644 pengine/test10/unfence-device.summary create mode 100644 pengine/test10/unfence-device.xml diff --git a/pengine/regression.sh b/pengine/regression.sh index db101e72a29..47cf0ba030c 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -393,6 +393,7 @@ echo "" do_test unfence-startup "Clean unfencing" do_test unfence-definition "Unfencing when the agent changes" do_test unfence-parameters "Unfencing when the agent parameters changes" +do_test unfence-device "Unfencing when a cluster has only fence devices" echo "" do_test master-0 "Stopped -> Slave" diff --git a/pengine/test10/unfence-device.dot b/pengine/test10/unfence-device.dot new file mode 100644 index 00000000000..e383fd2be89 --- /dev/null +++ b/pengine/test10/unfence-device.dot @@ -0,0 +1,18 @@ +digraph "g" { +"fence_scsi_monitor_0 virt-008" -> "fence_scsi_start_0 virt-008" [ style = bold] +"fence_scsi_monitor_0 virt-008" [ style=bold color="green" fontcolor="black"] +"fence_scsi_monitor_0 virt-009" -> "fence_scsi_start_0 virt-008" [ style = bold] +"fence_scsi_monitor_0 virt-009" [ style=bold color="green" fontcolor="black"] +"fence_scsi_monitor_0 virt-013" -> "fence_scsi_start_0 virt-008" [ style = bold] +"fence_scsi_monitor_0 virt-013" [ style=bold color="green" fontcolor="black"] +"fence_scsi_monitor_60000 virt-008" [ style=bold color="green" fontcolor="black"] +"fence_scsi_start_0 virt-008" -> "fence_scsi_monitor_60000 virt-008" [ style = bold] +"fence_scsi_start_0 virt-008" [ style=bold color="green" fontcolor="black"] +"stonith 'on' virt-008" -> "fence_scsi_monitor_0 virt-008" [ style = bold] +"stonith 'on' virt-008" -> "fence_scsi_start_0 virt-008" [ style = bold] +"stonith 'on' virt-008" [ style=bold color="green" fontcolor="black"] +"stonith 'on' virt-009" -> "fence_scsi_monitor_0 virt-009" [ style = bold] +"stonith 'on' virt-009" [ style=bold color="green" fontcolor="black"] +"stonith 'on' virt-013" -> "fence_scsi_monitor_0 virt-013" [ style = bold] +"stonith 'on' virt-013" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/unfence-device.exp b/pengine/test10/unfence-device.exp new file mode 100644 index 00000000000..98cb5487e17 --- /dev/null +++ b/pengine/test10/unfence-device.exp @@ -0,0 +1,100 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/unfence-device.scores b/pengine/test10/unfence-device.scores new file mode 100644 index 00000000000..8ea5036621a --- /dev/null +++ b/pengine/test10/unfence-device.scores @@ -0,0 +1,5 @@ +Allocation scores: +Using the original execution date of: 2017-11-30 10:44:29Z +native_color: fence_scsi allocation score on virt-008: 0 +native_color: fence_scsi allocation score on virt-009: 0 +native_color: fence_scsi allocation score on virt-013: 0 diff --git a/pengine/test10/unfence-device.summary b/pengine/test10/unfence-device.summary new file mode 100644 index 00000000000..181724b6f5e --- /dev/null +++ b/pengine/test10/unfence-device.summary @@ -0,0 +1,29 @@ +Using the original execution date of: 2017-11-30 10:44:29Z + +Current cluster status: +Online: [ virt-008 virt-009 virt-013 ] + + fence_scsi (stonith:fence_scsi): Stopped + +Transition Summary: + * Fence (on) virt-013 'required by fence_scsi monitor' + * Fence (on) virt-009 'required by fence_scsi monitor' + * Fence (on) virt-008 'required by fence_scsi monitor' + * Start fence_scsi ( virt-008 ) + +Executing cluster transition: + * Fencing virt-013 (on) + * Fencing virt-009 (on) + * Fencing virt-008 (on) + * Resource action: fence_scsi monitor on virt-013 + * Resource action: fence_scsi monitor on virt-009 + * Resource action: fence_scsi monitor on virt-008 + * Resource action: fence_scsi start on virt-008 + * Resource action: fence_scsi monitor=60000 on virt-008 +Using the original execution date of: 2017-11-30 10:44:29Z + +Revised cluster status: +Online: [ virt-008 virt-009 virt-013 ] + + fence_scsi (stonith:fence_scsi): Started virt-008 + diff --git a/pengine/test10/unfence-device.xml b/pengine/test10/unfence-device.xml new file mode 100644 index 00000000000..e977d9baddf --- /dev/null +++ b/pengine/test10/unfence-device.xml @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 2948a8e329cda42e5e7e106c0374d49d93b65481 Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Wed, 6 Dec 2017 14:05:05 +1100 Subject: [PATCH 007/812] Fix: PE: Passing boolean instead of a pointer --- lib/pengine/container.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pengine/container.c b/lib/pengine/container.c index 52b60a4cd1e..4d2d876a5b2 100644 --- a/lib/pengine/container.c +++ b/lib/pengine/container.c @@ -557,7 +557,7 @@ create_remote_resource( * remote should be ordered relative to docker. */ xml_remote = pe_create_remote_xml(NULL, id, tuple->docker->id, - XML_BOOLEAN_FALSE, NULL, "60s", NULL, + NULL, NULL, "60s", NULL, NULL, connect_name, (data->control_port? data->control_port : port_s)); From f3593e410643dcafa81e28da27c3a623e306fa61 Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Wed, 6 Dec 2017 14:48:57 +1100 Subject: [PATCH 008/812] Fix: PE: Ordering bundle child stops/demotes after container fencing causes graph loops --- include/crm/pengine/status.h | 6 ++++++ lib/pengine/utils.c | 2 +- pengine/allocate.c | 4 +++- pengine/native.c | 9 +++++++-- pengine/test10/bundle-order-fencing.dot | 5 ----- pengine/test10/bundle-order-fencing.exp | 15 --------------- 6 files changed, 17 insertions(+), 24 deletions(-) diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index f2a89105b3e..fca7f127c6e 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -517,4 +517,10 @@ pe_rsc_is_anon_clone(resource_t *rsc) return pe_rsc_is_clone(rsc) && is_not_set(rsc->flags, pe_rsc_unique); } +static inline bool +pe_rsc_is_bundled(resource_t *rsc) +{ + return uber_parent(rsc)->parent != NULL; +} + #endif diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 0ce0e30c91c..a875226e000 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1015,7 +1015,7 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, value = "nothing (resource)"; } - pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->task, value); + pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->uuid, value); value = unpack_operation_on_fail(action); diff --git a/pengine/allocate.c b/pengine/allocate.c index 98464a9b5af..2ae491c9b93 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1470,7 +1470,9 @@ fence_guest(pe_node_t *node, pe_action_t *done, pe_working_set_t *data_set) /* Order/imply other actions relative to pseudo-fence as with real fence */ stonith_constraints(node, stonith_op, data_set); - order_actions(stonith_op, done, pe_order_implies_then); + if(done) { + order_actions(stonith_op, done, pe_order_implies_then); + } } /* diff --git a/pengine/native.c b/pengine/native.c index 0013e333828..96c9a26381c 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -3164,7 +3164,9 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_ */ flags |= pe_order_preserve; } - order_actions(stonith_op, action, flags); + if (pe_rsc_is_bundled(rsc) == FALSE) { + order_actions(stonith_op, action, flags); + } order_actions(stonith_op, parent_stop, flags); } @@ -3252,7 +3254,10 @@ native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_ update_action_flags(action, pe_action_pseudo, __FUNCTION__, __LINE__); update_action_flags(action, pe_action_runnable, __FUNCTION__, __LINE__); - if (start == NULL || start->needs > rsc_req_quorum) { + if (pe_rsc_is_bundled(rsc)) { + /* Do nothing, let the recovery be ordered after the parent's implied stop */ + + } else if (start == NULL || start->needs > rsc_req_quorum) { order_actions(stonith_op, action, pe_order_preserve|pe_order_optional); } } diff --git a/pengine/test10/bundle-order-fencing.dot b/pengine/test10/bundle-order-fencing.dot index a7e58054e98..64b63265806 100644 --- a/pengine/test10/bundle-order-fencing.dot +++ b/pengine/test10/bundle-order-fencing.dot @@ -403,19 +403,14 @@ digraph "g" { "redis_stop_0 redis-bundle-0" -> "redis_start_0 redis-bundle-0" [ style = dashed] "redis_stop_0 redis-bundle-0" [ style=bold color="green" fontcolor="orange"] "stonith 'off' galera-bundle-0" -> "galera-bundle-master_stop_0" [ style = bold] -"stonith 'off' galera-bundle-0" -> "galera_demote_0 galera-bundle-0" [ style = bold] -"stonith 'off' galera-bundle-0" -> "galera_stop_0 galera-bundle-0" [ style = bold] "stonith 'off' galera-bundle-0" -> "stonith_complete" [ style = bold] "stonith 'off' galera-bundle-0" [ style=bold color="green" fontcolor="orange"] "stonith 'off' rabbitmq-bundle-0" -> "rabbitmq-bundle-clone_stop_0" [ style = bold] "stonith 'off' rabbitmq-bundle-0" -> "rabbitmq_post_notify_stonith_0" [ style = bold] -"stonith 'off' rabbitmq-bundle-0" -> "rabbitmq_stop_0 rabbitmq-bundle-0" [ style = bold] "stonith 'off' rabbitmq-bundle-0" -> "stonith_complete" [ style = bold] "stonith 'off' rabbitmq-bundle-0" [ style=bold color="green" fontcolor="orange"] "stonith 'off' redis-bundle-0" -> "redis-bundle-master_stop_0" [ style = bold] -"stonith 'off' redis-bundle-0" -> "redis_demote_0 redis-bundle-0" [ style = bold] "stonith 'off' redis-bundle-0" -> "redis_post_notify_stonith_0" [ style = bold] -"stonith 'off' redis-bundle-0" -> "redis_stop_0 redis-bundle-0" [ style = bold] "stonith 'off' redis-bundle-0" -> "stonith_complete" [ style = bold] "stonith 'off' redis-bundle-0" [ style=bold color="green" fontcolor="orange"] "stonith 'reboot' controller-0" -> "galera-bundle-0_stop_0 controller-0" [ style = bold] diff --git a/pengine/test10/bundle-order-fencing.exp b/pengine/test10/bundle-order-fencing.exp index 8e35f3210b0..78ce6754377 100644 --- a/pengine/test10/bundle-order-fencing.exp +++ b/pengine/test10/bundle-order-fencing.exp @@ -55,9 +55,6 @@ - - - @@ -440,9 +437,6 @@ - - - @@ -455,9 +449,6 @@ - - - @@ -701,9 +692,6 @@ - - - @@ -716,9 +704,6 @@ - - - From 906cd4a9e6b871eefb6d113354f9045c1826711a Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Wed, 6 Dec 2017 15:04:21 +1100 Subject: [PATCH 009/812] Fix: PE: Only allowed nodes need to be considered when ordering resource startup after _all_ recovery --- pengine/native.c | 1 + pengine/test10/bundle-order-fencing.dot | 2 -- pengine/test10/bundle-order-fencing.exp | 6 ------ pengine/test10/bundle-order-fencing.summary | 8 ++++---- 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/pengine/native.c b/pengine/native.c index 96c9a26381c..d4f1ff7bd1b 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -3088,6 +3088,7 @@ native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set order_actions(stonith_done, action, pe_order_optional); } else if (safe_str_eq(action->task, RSC_START) + && NULL != pe_hash_table_lookup(rsc->allowed_nodes, target->details->id) && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) { /* if known == NULL, then we don't know if * the resource is active on the node diff --git a/pengine/test10/bundle-order-fencing.dot b/pengine/test10/bundle-order-fencing.dot index 64b63265806..d6532507983 100644 --- a/pengine/test10/bundle-order-fencing.dot +++ b/pengine/test10/bundle-order-fencing.dot @@ -3,8 +3,6 @@ digraph "g" { "Cancel redis_monitor_45000 redis-bundle-1" [ style=bold color="green" fontcolor="black"] "Cancel redis_monitor_60000 redis-bundle-1" -> "redis_promote_0 redis-bundle-1" [ style = bold] "Cancel redis_monitor_60000 redis-bundle-1" [ style=bold color="green" fontcolor="black"] -"all_stopped" -> "stonith-fence_ipmilan-5254000dcb3f_start_0 controller-2" [ style = bold] -"all_stopped" -> "stonith-fence_ipmilan-5254003e8e97_start_0 controller-1" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "galera-bundle-0_monitor_0 controller-1" -> "galera-bundle-0_start_0 controller-2" [ style = dashed] "galera-bundle-0_monitor_0 controller-1" [ style=bold color="green" fontcolor="black"] diff --git a/pengine/test10/bundle-order-fencing.exp b/pengine/test10/bundle-order-fencing.exp index 78ce6754377..708815fef1f 100644 --- a/pengine/test10/bundle-order-fencing.exp +++ b/pengine/test10/bundle-order-fencing.exp @@ -1623,9 +1623,6 @@ - - - @@ -1660,9 +1657,6 @@ - - - diff --git a/pengine/test10/bundle-order-fencing.summary b/pengine/test10/bundle-order-fencing.summary index e78c5313b51..ee2c361240f 100644 --- a/pengine/test10/bundle-order-fencing.summary +++ b/pengine/test10/bundle-order-fencing.summary @@ -91,6 +91,8 @@ Executing cluster transition: * Pseudo action: redis-bundle-master_demote_0 * Pseudo action: redis-bundle-0_stop_0 * Pseudo action: haproxy-bundle-docker-0_stop_0 + * Resource action: stonith-fence_ipmilan-5254003e8e97 start on controller-1 + * Resource action: stonith-fence_ipmilan-5254000dcb3f start on controller-2 * Pseudo action: stonith-redis-bundle-0-off on redis-bundle-0 * Pseudo action: stonith-rabbitmq-bundle-0-off on rabbitmq-bundle-0 * Pseudo action: stonith-galera-bundle-0-off on galera-bundle-0 @@ -107,6 +109,8 @@ Executing cluster transition: * Pseudo action: ip-192.168.24.7_stop_0 * Pseudo action: ip-10.0.0.109_stop_0 * Pseudo action: ip-172.17.4.11_stop_0 + * Resource action: stonith-fence_ipmilan-5254003e8e97 monitor=60000 on controller-1 + * Resource action: stonith-fence_ipmilan-5254000dcb3f monitor=60000 on controller-2 * Pseudo action: galera-bundle_demoted_0 * Pseudo action: galera-bundle_stop_0 * Pseudo action: rabbitmq_stop_0 @@ -172,11 +176,7 @@ Executing cluster transition: * Pseudo action: rabbitmq-bundle_running_0 * Pseudo action: all_stopped * Pseudo action: redis-bundle-master_running_0 - * Resource action: stonith-fence_ipmilan-5254003e8e97 start on controller-1 - * Resource action: stonith-fence_ipmilan-5254000dcb3f start on controller-2 * Pseudo action: redis-bundle-master_post_notify_running_0 - * Resource action: stonith-fence_ipmilan-5254003e8e97 monitor=60000 on controller-1 - * Resource action: stonith-fence_ipmilan-5254000dcb3f monitor=60000 on controller-2 * Resource action: redis notify on redis-bundle-0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 From c6d208dfbda95d8610519de50075087e56a4f8c0 Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Wed, 6 Dec 2017 23:50:12 +1100 Subject: [PATCH 010/812] Fix: PE: Remote connection resources are safe to to require only quorum --- lib/pengine/complex.c | 6 +++ pengine/test10/bug-rh-1097457.dot | 2 +- pengine/test10/bug-rh-1097457.exp | 6 +-- pengine/test10/bug-rh-1097457.summary | 14 +++---- pengine/test10/bundle-order-fencing.dot | 6 --- pengine/test10/bundle-order-fencing.exp | 18 ++------- pengine/test10/bundle-order-fencing.summary | 8 ++-- pengine/test10/guest-node-host-dies.dot | 6 +-- pengine/test10/guest-node-host-dies.exp | 24 ++++-------- pengine/test10/guest-node-host-dies.summary | 12 +++--- pengine/test10/remote-fence-unclean.dot | 2 +- pengine/test10/remote-fence-unclean.exp | 2 +- pengine/test10/remote-partial-migrate2.dot | 6 +-- pengine/test10/remote-partial-migrate2.exp | 27 +++---------- .../test10/remote-partial-migrate2.summary | 38 +++++++++---------- pengine/test10/remote-recover-all.dot | 3 +- pengine/test10/remote-recover-all.exp | 10 ++--- pengine/test10/remote-recover-all.summary | 8 ++-- pengine/test10/remote-recover-connection.dot | 6 --- pengine/test10/remote-recover-connection.exp | 27 ++----------- .../test10/remote-recover-connection.summary | 24 ++++++------ pengine/test10/remote-recover-fail.dot | 2 +- pengine/test10/remote-recover-fail.exp | 2 +- .../test10/remote-recover-no-resources.dot | 3 +- .../test10/remote-recover-no-resources.exp | 10 ++--- .../remote-recover-no-resources.summary | 8 ++-- pengine/test10/remote-recover-unknown.dot | 3 +- pengine/test10/remote-recover-unknown.exp | 10 ++--- pengine/test10/remote-recover-unknown.summary | 8 ++-- pengine/test10/remote-recovery.dot | 6 --- pengine/test10/remote-recovery.exp | 27 ++----------- pengine/test10/remote-recovery.summary | 24 ++++++------ pengine/test10/remote-unclean2.dot | 2 +- pengine/test10/remote-unclean2.exp | 2 +- pengine/test10/whitebox-fail1.dot | 2 +- pengine/test10/whitebox-fail1.exp | 6 +-- pengine/test10/whitebox-fail1.summary | 8 ++-- pengine/test10/whitebox-fail2.dot | 2 +- pengine/test10/whitebox-fail2.exp | 6 +-- pengine/test10/whitebox-fail2.summary | 8 ++-- .../test10/whitebox-imply-stop-on-fence.dot | 6 +-- .../test10/whitebox-imply-stop-on-fence.exp | 24 ++++-------- .../whitebox-imply-stop-on-fence.summary | 20 +++++----- pengine/test10/whitebox-ms-ordering.dot | 4 +- pengine/test10/whitebox-ms-ordering.exp | 12 +++--- pengine/test10/whitebox-ms-ordering.summary | 8 ++-- .../test10/whitebox-unexpectedly-running.dot | 2 + .../test10/whitebox-unexpectedly-running.exp | 6 +++ 48 files changed, 182 insertions(+), 294 deletions(-) diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c index 3e0abedad48..d58d6beb8fb 100644 --- a/lib/pengine/complex.c +++ b/lib/pengine/complex.c @@ -784,6 +784,12 @@ common_unpack(xmlNode * xml_obj, resource_t ** rsc, if(is_set((*rsc)->flags, pe_rsc_fence_device)) { value = "quorum"; + } else if (safe_str_eq(crm_element_value((*rsc)->xml, XML_AGENT_ATTR_CLASS), "ocf") + && safe_str_eq(crm_element_value((*rsc)->xml, XML_AGENT_ATTR_PROVIDER), "pacemaker") + && safe_str_eq(crm_element_value((*rsc)->xml, XML_ATTR_TYPE), "remote") + ) { + value = "quorum"; + } else if (is_set(data_set->flags, pe_flag_enable_unfencing)) { value = "unfencing"; diff --git a/pengine/test10/bug-rh-1097457.dot b/pengine/test10/bug-rh-1097457.dot index 59848117667..94ffe136a56 100644 --- a/pengine/test10/bug-rh-1097457.dot +++ b/pengine/test10/bug-rh-1097457.dot @@ -80,6 +80,7 @@ digraph "g" { "VM2_stop_0 lama3" -> "all_stopped" [ style = bold] "VM2_stop_0 lama3" -> "stonith 'reboot' lamaVM2" [ style = bold] "VM2_stop_0 lama3" [ style=bold color="green" fontcolor="black"] +"all_stopped" -> "lamaVM2_start_0 lama3" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "lamaVM2-G4_running_0" [ style=bold color="green" fontcolor="orange"] "lamaVM2-G4_start_0" -> "FAKE4-IP_start_0 lamaVM2" [ style = bold] @@ -121,6 +122,5 @@ digraph "g" { "stonith_complete" -> "FSlun3_start_0 lama2" [ style = bold] "stonith_complete" -> "VM2_start_0 lama3" [ style = bold] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "lamaVM2_start_0 lama3" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/bug-rh-1097457.exp b/pengine/test10/bug-rh-1097457.exp index 4eedd9123e6..f1451b5a2b7 100644 --- a/pengine/test10/bug-rh-1097457.exp +++ b/pengine/test10/bug-rh-1097457.exp @@ -599,13 +599,13 @@ - + - + - + diff --git a/pengine/test10/bug-rh-1097457.summary b/pengine/test10/bug-rh-1097457.summary index e23c6adc259..0e7d2e0b9ab 100644 --- a/pengine/test10/bug-rh-1097457.summary +++ b/pengine/test10/bug-rh-1097457.summary @@ -70,26 +70,26 @@ Executing cluster transition: * Pseudo action: lamaVM2-G4_stop_0 * Pseudo action: FAKE4-IP_stop_0 * Pseudo action: FAKE6-clone_stop_0 - * Resource action: lamaVM2 start on lama3 - * Resource action: lamaVM2 monitor=30000 on lama3 - * Resource action: FSlun3 monitor=10000 on lamaVM2 * Pseudo action: FAKE4_stop_0 * Pseudo action: FAKE6_stop_0 * Pseudo action: FAKE6-clone_stopped_0 * Pseudo action: FAKE6-clone_start_0 * Pseudo action: lamaVM2-G4_stopped_0 - * Resource action: FAKE6 start on lamaVM2 - * Resource action: FAKE6 monitor=30000 on lamaVM2 - * Pseudo action: FAKE6-clone_running_0 * Pseudo action: FSlun3_stop_0 * Pseudo action: all_stopped * Resource action: FSlun3 start on lama2 * Pseudo action: lamaVM2-G4_start_0 + * Resource action: lamaVM2 start on lama3 + * Resource action: lamaVM2 monitor=30000 on lama3 + * Resource action: FSlun3 monitor=10000 on lama2 + * Resource action: FSlun3 monitor=10000 on lamaVM2 * Resource action: FAKE4 start on lamaVM2 * Resource action: FAKE4 monitor=30000 on lamaVM2 * Resource action: FAKE4-IP start on lamaVM2 * Resource action: FAKE4-IP monitor=30000 on lamaVM2 - * Resource action: FSlun3 monitor=10000 on lama2 + * Resource action: FAKE6 start on lamaVM2 + * Resource action: FAKE6 monitor=30000 on lamaVM2 + * Pseudo action: FAKE6-clone_running_0 * Pseudo action: lamaVM2-G4_running_0 Revised cluster status: diff --git a/pengine/test10/bundle-order-fencing.dot b/pengine/test10/bundle-order-fencing.dot index d6532507983..980bab4b231 100644 --- a/pengine/test10/bundle-order-fencing.dot +++ b/pengine/test10/bundle-order-fencing.dot @@ -411,15 +411,12 @@ digraph "g" { "stonith 'off' redis-bundle-0" -> "redis_post_notify_stonith_0" [ style = bold] "stonith 'off' redis-bundle-0" -> "stonith_complete" [ style = bold] "stonith 'off' redis-bundle-0" [ style=bold color="green" fontcolor="orange"] -"stonith 'reboot' controller-0" -> "galera-bundle-0_stop_0 controller-0" [ style = bold] "stonith 'reboot' controller-0" -> "galera-bundle-docker-0_stop_0 controller-0" [ style = bold] "stonith 'reboot' controller-0" -> "haproxy-bundle-docker-0_stop_0 controller-0" [ style = bold] "stonith 'reboot' controller-0" -> "ip-10.0.0.109_stop_0 controller-0" [ style = bold] "stonith 'reboot' controller-0" -> "ip-172.17.4.11_stop_0 controller-0" [ style = bold] "stonith 'reboot' controller-0" -> "ip-192.168.24.7_stop_0 controller-0" [ style = bold] -"stonith 'reboot' controller-0" -> "rabbitmq-bundle-0_stop_0 controller-0" [ style = bold] "stonith 'reboot' controller-0" -> "rabbitmq-bundle-docker-0_stop_0 controller-0" [ style = bold] -"stonith 'reboot' controller-0" -> "redis-bundle-0_stop_0 controller-0" [ style = bold] "stonith 'reboot' controller-0" -> "redis-bundle-docker-0_stop_0 controller-0" [ style = bold] "stonith 'reboot' controller-0" -> "stonith 'off' galera-bundle-0" [ style = bold] "stonith 'reboot' controller-0" -> "stonith 'off' rabbitmq-bundle-0" [ style = bold] @@ -439,14 +436,11 @@ digraph "g" { "stonith-fence_ipmilan-5254003e8e97_stop_0 controller-0" -> "stonith-fence_ipmilan-5254003e8e97_start_0 controller-1" [ style = bold] "stonith-fence_ipmilan-5254003e8e97_stop_0 controller-0" [ style=bold color="green" fontcolor="orange"] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "galera-bundle-0_start_0 controller-2" [ style = dashed] "stonith_complete" -> "galera_start_0 galera-bundle-0" [ style = dashed] "stonith_complete" -> "ip-10.0.0.109_start_0 controller-1" [ style = bold] "stonith_complete" -> "ip-172.17.4.11_start_0 controller-1" [ style = bold] "stonith_complete" -> "ip-192.168.24.7_start_0 controller-2" [ style = bold] -"stonith_complete" -> "rabbitmq-bundle-0_start_0 controller-1" [ style = dashed] "stonith_complete" -> "rabbitmq_start_0 rabbitmq-bundle-0" [ style = dashed] -"stonith_complete" -> "redis-bundle-0_start_0 controller-1" [ style = dashed] "stonith_complete" -> "redis_promote_0 redis-bundle-1" [ style = bold] "stonith_complete" -> "redis_start_0 redis-bundle-0" [ style = dashed] "stonith_complete" [ style=bold color="green" fontcolor="orange"] diff --git a/pengine/test10/bundle-order-fencing.exp b/pengine/test10/bundle-order-fencing.exp index 708815fef1f..dc4c5c99108 100644 --- a/pengine/test10/bundle-order-fencing.exp +++ b/pengine/test10/bundle-order-fencing.exp @@ -379,11 +379,7 @@ - - - - - + @@ -565,11 +561,7 @@ - - - - - + @@ -1413,11 +1405,7 @@ - - - - - + diff --git a/pengine/test10/bundle-order-fencing.summary b/pengine/test10/bundle-order-fencing.summary index ee2c361240f..0457f833ba5 100644 --- a/pengine/test10/bundle-order-fencing.summary +++ b/pengine/test10/bundle-order-fencing.summary @@ -56,10 +56,12 @@ Transition Summary: Executing cluster transition: * Pseudo action: rabbitmq-bundle-clone_pre_notify_stop_0 + * Pseudo action: rabbitmq-bundle-0_stop_0 * Resource action: rabbitmq-bundle-0 monitor on controller-2 * Resource action: rabbitmq-bundle-0 monitor on controller-1 * Resource action: rabbitmq-bundle-1 monitor on controller-2 * Resource action: rabbitmq-bundle-2 monitor on controller-1 + * Pseudo action: galera-bundle-0_stop_0 * Resource action: galera-bundle-0 monitor on controller-2 * Resource action: galera-bundle-0 monitor on controller-1 * Resource action: galera-bundle-1 monitor on controller-2 @@ -67,6 +69,7 @@ Executing cluster transition: * Resource action: redis cancel=45000 on redis-bundle-1 * Resource action: redis cancel=60000 on redis-bundle-1 * Pseudo action: redis-bundle-master_pre_notify_demote_0 + * Pseudo action: redis-bundle-0_stop_0 * Resource action: redis-bundle-0 monitor on controller-2 * Resource action: redis-bundle-0 monitor on controller-1 * Resource action: redis-bundle-1 monitor on controller-2 @@ -82,14 +85,12 @@ Executing cluster transition: * Resource action: rabbitmq notify on rabbitmq-bundle-1 * Resource action: rabbitmq notify on rabbitmq-bundle-2 * Pseudo action: rabbitmq-bundle-clone_confirmed-pre_notify_stop_0 - * Pseudo action: rabbitmq-bundle-0_stop_0 + * Pseudo action: rabbitmq-bundle-docker-0_stop_0 * Pseudo action: galera-bundle-master_demote_0 - * Pseudo action: galera-bundle-0_stop_0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-pre_notify_demote_0 * Pseudo action: redis-bundle-master_demote_0 - * Pseudo action: redis-bundle-0_stop_0 * Pseudo action: haproxy-bundle-docker-0_stop_0 * Resource action: stonith-fence_ipmilan-5254003e8e97 start on controller-1 * Resource action: stonith-fence_ipmilan-5254000dcb3f start on controller-2 @@ -100,7 +101,6 @@ Executing cluster transition: * Pseudo action: haproxy-bundle_stopped_0 * Pseudo action: rabbitmq_post_notify_stop_0 * Pseudo action: rabbitmq-bundle-clone_stop_0 - * Pseudo action: rabbitmq-bundle-docker-0_stop_0 * Pseudo action: galera_demote_0 * Pseudo action: galera-bundle-master_demoted_0 * Pseudo action: redis_post_notify_stop_0 diff --git a/pengine/test10/guest-node-host-dies.dot b/pengine/test10/guest-node-host-dies.dot index a85250df749..c50e07127c1 100644 --- a/pengine/test10/guest-node-host-dies.dot +++ b/pengine/test10/guest-node-host-dies.dot @@ -6,6 +6,8 @@ digraph "g" { "Fencing_stop_0 rhel7-4" -> "all_stopped" [ style = bold] "Fencing_stop_0 rhel7-4" [ style=bold color="green" fontcolor="black"] "all_stopped" -> "Fencing_start_0 rhel7-4" [ style = bold] +"all_stopped" -> "lxc1_start_0 rhel7-2" [ style = bold] +"all_stopped" -> "lxc2_start_0 rhel7-3" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "container1_start_0 rhel7-2" -> "lxc-ms_promote_0 lxc1" [ style = bold] "container1_start_0 rhel7-2" -> "lxc-ms_start_0 lxc1" [ style = bold] @@ -115,8 +117,6 @@ digraph "g" { "stonith 'reboot' lxc2" [ style=bold color="green" fontcolor="orange"] "stonith 'reboot' rhel7-1" -> "container1_stop_0 rhel7-1" [ style = bold] "stonith 'reboot' rhel7-1" -> "container2_stop_0 rhel7-1" [ style = bold] -"stonith 'reboot' rhel7-1" -> "lxc1_stop_0 rhel7-1" [ style = bold] -"stonith 'reboot' rhel7-1" -> "lxc2_stop_0 rhel7-1" [ style = bold] "stonith 'reboot' rhel7-1" -> "rsc_rhel7-1_stop_0 rhel7-1" [ style = bold] "stonith 'reboot' rhel7-1" -> "stonith_complete" [ style = bold] "stonith 'reboot' rhel7-1" [ style=bold color="green" fontcolor="black"] @@ -126,8 +126,6 @@ digraph "g" { "stonith_complete" -> "lxc-ms_promote_0 lxc1" [ style = bold] "stonith_complete" -> "lxc-ms_start_0 lxc1" [ style = bold] "stonith_complete" -> "lxc-ms_start_0 lxc2" [ style = bold] -"stonith_complete" -> "lxc1_start_0 rhel7-2" [ style = bold] -"stonith_complete" -> "lxc2_start_0 rhel7-3" [ style = bold] "stonith_complete" -> "rsc_rhel7-1_start_0 rhel7-5" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/guest-node-host-dies.exp b/pengine/test10/guest-node-host-dies.exp index 8dbadde32b9..b5a34ea1716 100644 --- a/pengine/test10/guest-node-host-dies.exp +++ b/pengine/test10/guest-node-host-dies.exp @@ -431,6 +431,9 @@ + + + @@ -446,9 +449,6 @@ - - - @@ -457,11 +457,7 @@ - - - - - + @@ -511,6 +507,9 @@ + + + @@ -526,9 +525,6 @@ - - - @@ -537,11 +533,7 @@ - - - - - + diff --git a/pengine/test10/guest-node-host-dies.summary b/pengine/test10/guest-node-host-dies.summary index 4feee8892f9..9813d2b97d4 100644 --- a/pengine/test10/guest-node-host-dies.summary +++ b/pengine/test10/guest-node-host-dies.summary @@ -26,16 +26,16 @@ Transition Summary: Executing cluster transition: * Resource action: Fencing stop on rhel7-4 * Pseudo action: lxc-ms-master_demote_0 + * Pseudo action: lxc1_stop_0 * Resource action: lxc1 monitor on rhel7-5 * Resource action: lxc1 monitor on rhel7-4 * Resource action: lxc1 monitor on rhel7-3 + * Pseudo action: lxc2_stop_0 * Resource action: lxc2 monitor on rhel7-5 * Resource action: lxc2 monitor on rhel7-4 * Resource action: lxc2 monitor on rhel7-2 * Fencing rhel7-1 (reboot) * Pseudo action: rsc_rhel7-1_stop_0 - * Pseudo action: lxc1_stop_0 - * Pseudo action: lxc2_stop_0 * Pseudo action: container1_stop_0 * Pseudo action: container2_stop_0 * Pseudo action: stonith-lxc2-reboot on lxc2 @@ -47,21 +47,21 @@ Executing cluster transition: * Pseudo action: lxc-ms_demote_0 * Pseudo action: lxc-ms-master_demoted_0 * Pseudo action: lxc-ms-master_stop_0 - * Resource action: lxc1 start on rhel7-2 - * Resource action: lxc2 start on rhel7-3 * Resource action: rsc_rhel7-1 monitor=5000 on rhel7-5 * Pseudo action: lxc-ms_stop_0 * Pseudo action: lxc-ms_stop_0 * Pseudo action: lxc-ms-master_stopped_0 * Pseudo action: lxc-ms-master_start_0 - * Resource action: lxc1 monitor=30000 on rhel7-2 - * Resource action: lxc2 monitor=30000 on rhel7-3 * Pseudo action: all_stopped * Resource action: Fencing start on rhel7-4 * Resource action: Fencing monitor=120000 on rhel7-4 + * Resource action: lxc1 start on rhel7-2 + * Resource action: lxc2 start on rhel7-3 * Resource action: lxc-ms start on lxc1 * Resource action: lxc-ms start on lxc2 * Pseudo action: lxc-ms-master_running_0 + * Resource action: lxc1 monitor=30000 on rhel7-2 + * Resource action: lxc2 monitor=30000 on rhel7-3 * Resource action: lxc-ms monitor=10000 on lxc2 * Pseudo action: lxc-ms-master_promote_0 * Resource action: lxc-ms promote on lxc1 diff --git a/pengine/test10/remote-fence-unclean.dot b/pengine/test10/remote-fence-unclean.dot index b2829a7c85c..76a676d3221 100644 --- a/pengine/test10/remote-fence-unclean.dot +++ b/pengine/test10/remote-fence-unclean.dot @@ -18,6 +18,7 @@ "FAKE4_stop_0 18node1" -> "FAKE4_start_0 18node2" [ style = bold] "FAKE4_stop_0 18node1" -> "all_stopped" [ style = bold] "FAKE4_stop_0 18node1" [ style=bold color="green" fontcolor="black"] +"all_stopped" -> "remote1_start_0 18node1" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "remote1_monitor_60000 18node1" [ style=bold color="green" fontcolor="black"] "remote1_start_0 18node1" -> "remote1_monitor_60000 18node1" [ style = bold] @@ -32,6 +33,5 @@ "stonith_complete" -> "FAKE3_start_0 18node1" [ style = bold] "stonith_complete" -> "FAKE4_start_0 18node2" [ style = bold] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "remote1_start_0 18node1" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/remote-fence-unclean.exp b/pengine/test10/remote-fence-unclean.exp index 3a07384982c..f77d7f611c2 100644 --- a/pengine/test10/remote-fence-unclean.exp +++ b/pengine/test10/remote-fence-unclean.exp @@ -11,7 +11,7 @@ - + diff --git a/pengine/test10/remote-partial-migrate2.dot b/pengine/test10/remote-partial-migrate2.dot index a8bf29bf9cd..17c8bf3f51d 100644 --- a/pengine/test10/remote-partial-migrate2.dot +++ b/pengine/test10/remote-partial-migrate2.dot @@ -89,6 +89,7 @@ "FAKE9_stop_0 pcmk2" -> "FAKE9_start_0 pcmk_remote4" [ style = bold] "FAKE9_stop_0 pcmk2" -> "all_stopped" [ style = bold] "FAKE9_stop_0 pcmk2" [ style=bold color="green" fontcolor="black"] +"all_stopped" -> "pcmk_remote5_start_0 pcmk2" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "pcmk_remote2_migrate_from_0 pcmk1" -> "pcmk_remote2_start_0 pcmk1" [ style = bold] "pcmk_remote2_migrate_from_0 pcmk1" -> "pcmk_remote2_stop_0 pcmk3" [ style = bold] @@ -150,10 +151,5 @@ "stonith_complete" -> "FAKE5_start_0 pcmk_remote4" [ style = bold] "stonith_complete" -> "FAKE9_start_0 pcmk_remote4" [ style = bold] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "pcmk_remote2_migrate_from_0 pcmk1" [ style = bold] -"stonith_complete" -> "pcmk_remote2_start_0 pcmk1" [ style = bold] -"stonith_complete" -> "pcmk_remote4_start_0 pcmk2" [ style = bold] -"stonith_complete" -> "pcmk_remote5_migrate_to_0 pcmk1" [ style = bold] -"stonith_complete" -> "pcmk_remote5_start_0 pcmk2" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/remote-partial-migrate2.exp b/pengine/test10/remote-partial-migrate2.exp index abf281f6128..bae190cf703 100644 --- a/pengine/test10/remote-partial-migrate2.exp +++ b/pengine/test10/remote-partial-migrate2.exp @@ -6,11 +6,7 @@ - - - - - + @@ -38,9 +34,6 @@ - - - @@ -76,11 +69,7 @@ - - - - - + @@ -102,11 +91,7 @@ - - - - - + @@ -129,13 +114,13 @@ - + - + - + diff --git a/pengine/test10/remote-partial-migrate2.summary b/pengine/test10/remote-partial-migrate2.summary index 2a242bdfe9a..6b6428de750 100644 --- a/pengine/test10/remote-partial-migrate2.summary +++ b/pengine/test10/remote-partial-migrate2.summary @@ -84,6 +84,10 @@ Transition Summary: * Move FAKE49 ( pcmk_remote3 -> pcmk_remote4 ) Executing cluster transition: + * Resource action: pcmk_remote2 migrate_from on pcmk1 + * Resource action: pcmk_remote2 stop on pcmk3 + * Resource action: pcmk_remote4 start on pcmk2 + * Resource action: pcmk_remote5 migrate_to on pcmk1 * Resource action: FAKE5 stop on pcmk1 * Resource action: FAKE9 stop on pcmk2 * Resource action: FAKE12 stop on pcmk1 @@ -99,11 +103,15 @@ Executing cluster transition: * Resource action: FAKE48 stop on pcmk1 * Resource action: FAKE49 stop on pcmk_remote3 * Fencing pcmk4 (reboot) + * Pseudo action: pcmk_remote2_start_0 + * Resource action: pcmk_remote4 monitor=60000 on pcmk2 + * Resource action: pcmk_remote5 migrate_from on pcmk2 + * Resource action: pcmk_remote5 stop on pcmk1 + * Resource action: FAKE41 stop on pcmk_remote2 * Pseudo action: stonith_complete - * Resource action: pcmk_remote2 migrate_from on pcmk1 - * Resource action: pcmk_remote2 stop on pcmk3 - * Resource action: pcmk_remote4 start on pcmk2 - * Resource action: pcmk_remote5 migrate_to on pcmk1 + * Pseudo action: all_stopped + * Resource action: pcmk_remote2 monitor=60000 on pcmk1 + * Pseudo action: pcmk_remote5_start_0 * Resource action: FAKE5 start on pcmk_remote4 * Resource action: FAKE9 start on pcmk_remote4 * Resource action: FAKE12 start on pcmk2 @@ -114,12 +122,12 @@ Executing cluster transition: * Resource action: FAKE30 start on pcmk_remote1 * Resource action: FAKE33 start on pcmk_remote4 * Resource action: FAKE38 start on pcmk_remote1 + * Resource action: FAKE39 start on pcmk_remote2 + * Resource action: FAKE41 start on pcmk_remote4 + * Resource action: FAKE47 start on pcmk_remote2 * Resource action: FAKE48 start on pcmk_remote3 * Resource action: FAKE49 start on pcmk_remote4 - * Pseudo action: pcmk_remote2_start_0 - * Resource action: pcmk_remote4 monitor=60000 on pcmk2 - * Resource action: pcmk_remote5 migrate_from on pcmk2 - * Resource action: pcmk_remote5 stop on pcmk1 + * Resource action: pcmk_remote5 monitor=60000 on pcmk2 * Resource action: FAKE5 monitor=10000 on pcmk_remote4 * Resource action: FAKE9 monitor=10000 on pcmk_remote4 * Resource action: FAKE12 monitor=10000 on pcmk2 @@ -130,19 +138,11 @@ Executing cluster transition: * Resource action: FAKE30 monitor=10000 on pcmk_remote1 * Resource action: FAKE33 monitor=10000 on pcmk_remote4 * Resource action: FAKE38 monitor=10000 on pcmk_remote1 - * Resource action: FAKE39 start on pcmk_remote2 - * Resource action: FAKE41 stop on pcmk_remote2 - * Resource action: FAKE47 start on pcmk_remote2 - * Resource action: FAKE48 monitor=10000 on pcmk_remote3 - * Resource action: FAKE49 monitor=10000 on pcmk_remote4 - * Pseudo action: all_stopped - * Resource action: pcmk_remote2 monitor=60000 on pcmk1 - * Pseudo action: pcmk_remote5_start_0 * Resource action: FAKE39 monitor=10000 on pcmk_remote2 - * Resource action: FAKE41 start on pcmk_remote4 - * Resource action: FAKE47 monitor=10000 on pcmk_remote2 - * Resource action: pcmk_remote5 monitor=60000 on pcmk2 * Resource action: FAKE41 monitor=10000 on pcmk_remote4 + * Resource action: FAKE47 monitor=10000 on pcmk_remote2 + * Resource action: FAKE48 monitor=10000 on pcmk_remote3 + * Resource action: FAKE49 monitor=10000 on pcmk_remote4 Revised cluster status: Online: [ pcmk1 pcmk2 pcmk3 ] diff --git a/pengine/test10/remote-recover-all.dot b/pengine/test10/remote-recover-all.dot index ad421e6cbc2..5b79602f03a 100644 --- a/pengine/test10/remote-recover-all.dot +++ b/pengine/test10/remote-recover-all.dot @@ -1,4 +1,5 @@ digraph "g" { +"all_stopped" -> "galera-0_start_0 controller-2" [ style = bold] "all_stopped" -> "stonith-fence_ipmilan-5254005bdbb5_start_0 controller-2" [ style = bold] "all_stopped" -> "stonith-fence_ipmilan-525400b4f6bd_start_0 controller-0" [ style = bold] "all_stopped" -> "stonith-fence_ipmilan-525400bbf613_start_0 controller-0" [ style = bold] @@ -114,7 +115,6 @@ digraph "g" { "redis_stop_0 controller-1" -> "all_stopped" [ style = bold] "redis_stop_0 controller-1" -> "redis-master_stopped_0" [ style = bold] "redis_stop_0 controller-1" [ style=bold color="green" fontcolor="orange"] -"stonith 'reboot' controller-1" -> "galera-0_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "galera-2_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "haproxy-clone_stop_0" [ style = bold] "stonith 'reboot' controller-1" -> "haproxy_stop_0 controller-1" [ style = bold] @@ -156,7 +156,6 @@ digraph "g" { "stonith-fence_ipmilan-525400bbf613_stop_0 controller-0" -> "stonith-fence_ipmilan-525400bbf613_start_0 controller-0" [ style = bold] "stonith-fence_ipmilan-525400bbf613_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "galera-0_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.1.14_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.1.17_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.4.11_start_0 controller-2" [ style = bold] diff --git a/pengine/test10/remote-recover-all.exp b/pengine/test10/remote-recover-all.exp index b0af5c43780..556ccfd7d67 100644 --- a/pengine/test10/remote-recover-all.exp +++ b/pengine/test10/remote-recover-all.exp @@ -36,10 +36,10 @@ - + - + @@ -49,11 +49,7 @@ - - - - - + diff --git a/pengine/test10/remote-recover-all.summary b/pengine/test10/remote-recover-all.summary index 6c9f0586b23..ba074e5082f 100644 --- a/pengine/test10/remote-recover-all.summary +++ b/pengine/test10/remote-recover-all.summary @@ -56,13 +56,13 @@ Transition Summary: * Move stonith-fence_ipmilan-5254005bdbb5 ( controller-1 -> controller-2 ) Executing cluster transition: + * Pseudo action: galera-0_stop_0 * Pseudo action: galera-master_demote_0 * Pseudo action: redis-master_pre_notify_stop_0 * Resource action: stonith-fence_ipmilan-525400bbf613 stop on controller-0 * Resource action: stonith-fence_ipmilan-525400b4f6bd stop on controller-0 * Pseudo action: stonith-fence_ipmilan-5254005bdbb5_stop_0 * Fencing controller-1 (reboot) - * Pseudo action: galera-0_stop_0 * Pseudo action: redis_post_notify_stop_0 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 @@ -79,17 +79,14 @@ Executing cluster transition: * Pseudo action: haproxy-clone_stopped_0 * Fencing messaging-1 (reboot) * Pseudo action: stonith_complete - * Resource action: galera-0 start on controller-2 * Pseudo action: rabbitmq_post_notify_stop_0 * Pseudo action: rabbitmq-clone_stop_0 * Pseudo action: galera_stop_0 - * Resource action: galera monitor=10000 on galera-0 * Pseudo action: galera-master_stopped_0 * Pseudo action: redis-master_post_notify_stopped_0 * Pseudo action: ip-172.17.1.14_stop_0 * Pseudo action: ip-172.17.1.17_stop_0 * Pseudo action: ip-172.17.4.11_stop_0 - * Resource action: galera-0 monitor=20000 on controller-2 * Pseudo action: galera-2_stop_0 * Resource action: rabbitmq notify on messaging-2 * Resource action: rabbitmq notify on messaging-0 @@ -108,11 +105,14 @@ Executing cluster transition: * Resource action: ip-172.17.1.17 monitor=10000 on controller-2 * Resource action: ip-172.17.4.11 monitor=10000 on controller-2 * Pseudo action: all_stopped + * Resource action: galera-0 start on controller-2 + * Resource action: galera monitor=10000 on galera-0 * Resource action: stonith-fence_ipmilan-525400bbf613 start on controller-0 * Resource action: stonith-fence_ipmilan-525400bbf613 monitor=60000 on controller-0 * Resource action: stonith-fence_ipmilan-525400b4f6bd start on controller-0 * Resource action: stonith-fence_ipmilan-525400b4f6bd monitor=60000 on controller-0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 start on controller-2 + * Resource action: galera-0 monitor=20000 on controller-2 * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2 Using the original execution date of: 2017-05-03 13:33:24Z diff --git a/pengine/test10/remote-recover-connection.dot b/pengine/test10/remote-recover-connection.dot index d6fdefede2a..6cd342fa27a 100644 --- a/pengine/test10/remote-recover-connection.dot +++ b/pengine/test10/remote-recover-connection.dot @@ -89,14 +89,11 @@ digraph "g" { "redis_stop_0 controller-1" -> "all_stopped" [ style = bold] "redis_stop_0 controller-1" -> "redis-master_stopped_0" [ style = bold] "redis_stop_0 controller-1" [ style=bold color="green" fontcolor="orange"] -"stonith 'reboot' controller-1" -> "galera-0_stop_0 controller-1" [ style = bold] -"stonith 'reboot' controller-1" -> "galera-2_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "haproxy-clone_stop_0" [ style = bold] "stonith 'reboot' controller-1" -> "haproxy_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "ip-172.17.1.14_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "ip-172.17.1.17_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "ip-172.17.4.11_stop_0 controller-1" [ style = bold] -"stonith 'reboot' controller-1" -> "messaging-1_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "redis-master_stop_0" [ style = bold] "stonith 'reboot' controller-1" -> "redis_post_notify_stonith_0" [ style = bold] "stonith 'reboot' controller-1" -> "redis_stop_0 controller-1" [ style = bold] @@ -121,11 +118,8 @@ digraph "g" { "stonith-fence_ipmilan-525400bbf613_stop_0 controller-0" -> "stonith-fence_ipmilan-525400bbf613_start_0 controller-0" [ style = bold] "stonith-fence_ipmilan-525400bbf613_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "galera-0_start_0 controller-2" [ style = bold] -"stonith_complete" -> "galera-2_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.1.14_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.1.17_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.4.11_start_0 controller-2" [ style = bold] -"stonith_complete" -> "messaging-1_start_0 controller-2" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/remote-recover-connection.exp b/pengine/test10/remote-recover-connection.exp index cf74efb0f96..40338b406b1 100644 --- a/pengine/test10/remote-recover-connection.exp +++ b/pengine/test10/remote-recover-connection.exp @@ -23,9 +23,6 @@ - - - @@ -34,11 +31,7 @@ - - - - - + @@ -64,9 +57,6 @@ - - - @@ -75,11 +65,7 @@ - - - - - + @@ -105,9 +91,6 @@ - - - @@ -116,11 +99,7 @@ - - - - - + diff --git a/pengine/test10/remote-recover-connection.summary b/pengine/test10/remote-recover-connection.summary index b0433fed017..8246cd958d1 100644 --- a/pengine/test10/remote-recover-connection.summary +++ b/pengine/test10/remote-recover-connection.summary @@ -52,6 +52,9 @@ Transition Summary: * Move stonith-fence_ipmilan-5254005bdbb5 ( controller-1 -> controller-2 ) Executing cluster transition: + * Pseudo action: messaging-1_stop_0 + * Pseudo action: galera-0_stop_0 + * Pseudo action: galera-2_stop_0 * Pseudo action: redis-master_pre_notify_stop_0 * Resource action: stonith-fence_ipmilan-525400bbf613 stop on controller-0 * Resource action: stonith-fence_ipmilan-525400bbf613 start on controller-0 @@ -61,9 +64,12 @@ Executing cluster transition: * Resource action: stonith-fence_ipmilan-525400b4f6bd monitor=60000 on controller-0 * Pseudo action: stonith-fence_ipmilan-5254005bdbb5_stop_0 * Fencing controller-1 (reboot) - * Pseudo action: messaging-1_stop_0 - * Pseudo action: galera-0_stop_0 - * Pseudo action: galera-2_stop_0 + * Resource action: messaging-1 start on controller-2 + * Resource action: galera-0 start on controller-2 + * Resource action: galera-2 start on controller-2 + * Resource action: rabbitmq monitor=10000 on messaging-1 + * Resource action: galera monitor=10000 on galera-2 + * Resource action: galera monitor=10000 on galera-0 * Pseudo action: redis_post_notify_stop_0 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 @@ -72,20 +78,14 @@ Executing cluster transition: * Pseudo action: haproxy-clone_stop_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 start on controller-2 * Pseudo action: stonith_complete - * Resource action: messaging-1 start on controller-2 - * Resource action: galera-0 start on controller-2 - * Resource action: galera-2 start on controller-2 - * Resource action: rabbitmq monitor=10000 on messaging-1 - * Resource action: galera monitor=10000 on galera-2 - * Resource action: galera monitor=10000 on galera-0 + * Resource action: messaging-1 monitor=20000 on controller-2 + * Resource action: galera-0 monitor=20000 on controller-2 + * Resource action: galera-2 monitor=20000 on controller-2 * Pseudo action: redis_stop_0 * Pseudo action: redis-master_stopped_0 * Pseudo action: haproxy_stop_0 * Pseudo action: haproxy-clone_stopped_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2 - * Resource action: messaging-1 monitor=20000 on controller-2 - * Resource action: galera-0 monitor=20000 on controller-2 - * Resource action: galera-2 monitor=20000 on controller-2 * Pseudo action: redis-master_post_notify_stopped_0 * Pseudo action: ip-172.17.1.14_stop_0 * Pseudo action: ip-172.17.1.17_stop_0 diff --git a/pengine/test10/remote-recover-fail.dot b/pengine/test10/remote-recover-fail.dot index 7b6edaa4137..3375687f91a 100644 --- a/pengine/test10/remote-recover-fail.dot +++ b/pengine/test10/remote-recover-fail.dot @@ -18,6 +18,7 @@ "FAKE6_stop_0 rhel7-auto4" -> "all_stopped" [ style = bold] "FAKE6_stop_0 rhel7-auto4" -> "rhel7-auto4_stop_0 rhel7-auto2" [ style = bold] "FAKE6_stop_0 rhel7-auto4" [ style=bold color="green" fontcolor="orange"] +"all_stopped" -> "rhel7-auto4_start_0 rhel7-auto2" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "rhel7-auto4_monitor_60000 rhel7-auto2" [ style=bold color="green" fontcolor="black"] "rhel7-auto4_start_0 rhel7-auto2" -> "rhel7-auto4_monitor_60000 rhel7-auto2" [ style = bold] @@ -33,6 +34,5 @@ "stonith_complete" -> "FAKE2_start_0 rhel7-auto3" [ style = bold] "stonith_complete" -> "FAKE6_start_0 rhel7-auto2" [ style = bold] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "rhel7-auto4_start_0 rhel7-auto2" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/remote-recover-fail.exp b/pengine/test10/remote-recover-fail.exp index bd014ae66c3..f9085666ca3 100644 --- a/pengine/test10/remote-recover-fail.exp +++ b/pengine/test10/remote-recover-fail.exp @@ -24,7 +24,7 @@ - + diff --git a/pengine/test10/remote-recover-no-resources.dot b/pengine/test10/remote-recover-no-resources.dot index 1e162210652..8c2f7831547 100644 --- a/pengine/test10/remote-recover-no-resources.dot +++ b/pengine/test10/remote-recover-no-resources.dot @@ -1,4 +1,5 @@ digraph "g" { +"all_stopped" -> "galera-0_start_0 controller-2" [ style = bold] "all_stopped" -> "stonith-fence_ipmilan-5254005bdbb5_start_0 controller-2" [ style = bold] "all_stopped" -> "stonith-fence_ipmilan-525400b4f6bd_start_0 controller-0" [ style = bold] "all_stopped" -> "stonith-fence_ipmilan-525400bbf613_start_0 controller-0" [ style = bold] @@ -98,7 +99,6 @@ digraph "g" { "redis_stop_0 controller-1" -> "all_stopped" [ style = bold] "redis_stop_0 controller-1" -> "redis-master_stopped_0" [ style = bold] "redis_stop_0 controller-1" [ style=bold color="green" fontcolor="orange"] -"stonith 'reboot' controller-1" -> "galera-0_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "galera-2_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "haproxy-clone_stop_0" [ style = bold] "stonith 'reboot' controller-1" -> "haproxy_stop_0 controller-1" [ style = bold] @@ -135,7 +135,6 @@ digraph "g" { "stonith-fence_ipmilan-525400bbf613_stop_0 controller-0" -> "stonith-fence_ipmilan-525400bbf613_start_0 controller-0" [ style = bold] "stonith-fence_ipmilan-525400bbf613_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "galera-0_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.1.14_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.1.17_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.4.11_start_0 controller-2" [ style = bold] diff --git a/pengine/test10/remote-recover-no-resources.exp b/pengine/test10/remote-recover-no-resources.exp index 987acfdd9af..0a57e2737a1 100644 --- a/pengine/test10/remote-recover-no-resources.exp +++ b/pengine/test10/remote-recover-no-resources.exp @@ -36,10 +36,10 @@ - + - + @@ -49,11 +49,7 @@ - - - - - + diff --git a/pengine/test10/remote-recover-no-resources.summary b/pengine/test10/remote-recover-no-resources.summary index b682e5fdc53..bed02d0452d 100644 --- a/pengine/test10/remote-recover-no-resources.summary +++ b/pengine/test10/remote-recover-no-resources.summary @@ -54,12 +54,12 @@ Transition Summary: * Move stonith-fence_ipmilan-5254005bdbb5 ( controller-1 -> controller-2 ) Executing cluster transition: + * Pseudo action: galera-0_stop_0 * Pseudo action: redis-master_pre_notify_stop_0 * Resource action: stonith-fence_ipmilan-525400bbf613 stop on controller-0 * Resource action: stonith-fence_ipmilan-525400b4f6bd stop on controller-0 * Pseudo action: stonith-fence_ipmilan-5254005bdbb5_stop_0 * Fencing controller-1 (reboot) - * Pseudo action: galera-0_stop_0 * Pseudo action: galera-2_stop_0 * Pseudo action: redis_post_notify_stop_0 * Resource action: redis notify on controller-0 @@ -69,15 +69,12 @@ Executing cluster transition: * Pseudo action: haproxy-clone_stop_0 * Fencing messaging-1 (reboot) * Pseudo action: stonith_complete - * Resource action: galera-0 start on controller-2 * Pseudo action: rabbitmq_post_notify_stop_0 * Pseudo action: rabbitmq-clone_stop_0 - * Resource action: galera monitor=10000 on galera-0 * Pseudo action: redis_stop_0 * Pseudo action: redis-master_stopped_0 * Pseudo action: haproxy_stop_0 * Pseudo action: haproxy-clone_stopped_0 - * Resource action: galera-0 monitor=20000 on controller-2 * Resource action: rabbitmq notify on messaging-2 * Resource action: rabbitmq notify on messaging-0 * Pseudo action: rabbitmq_notified_0 @@ -99,11 +96,14 @@ Executing cluster transition: * Resource action: ip-172.17.1.17 monitor=10000 on controller-2 * Resource action: ip-172.17.4.11 monitor=10000 on controller-2 * Pseudo action: all_stopped + * Resource action: galera-0 start on controller-2 + * Resource action: galera monitor=10000 on galera-0 * Resource action: stonith-fence_ipmilan-525400bbf613 start on controller-0 * Resource action: stonith-fence_ipmilan-525400bbf613 monitor=60000 on controller-0 * Resource action: stonith-fence_ipmilan-525400b4f6bd start on controller-0 * Resource action: stonith-fence_ipmilan-525400b4f6bd monitor=60000 on controller-0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 start on controller-2 + * Resource action: galera-0 monitor=20000 on controller-2 * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2 Using the original execution date of: 2017-05-03 13:33:24Z diff --git a/pengine/test10/remote-recover-unknown.dot b/pengine/test10/remote-recover-unknown.dot index a8b4e18ae82..8ce59b44434 100644 --- a/pengine/test10/remote-recover-unknown.dot +++ b/pengine/test10/remote-recover-unknown.dot @@ -1,4 +1,5 @@ digraph "g" { +"all_stopped" -> "galera-0_start_0 controller-2" [ style = bold] "all_stopped" -> "stonith-fence_ipmilan-5254005bdbb5_start_0 controller-2" [ style = bold] "all_stopped" -> "stonith-fence_ipmilan-525400b4f6bd_start_0 controller-0" [ style = bold] "all_stopped" -> "stonith-fence_ipmilan-525400bbf613_start_0 controller-0" [ style = bold] @@ -99,7 +100,6 @@ digraph "g" { "redis_stop_0 controller-1" -> "all_stopped" [ style = bold] "redis_stop_0 controller-1" -> "redis-master_stopped_0" [ style = bold] "redis_stop_0 controller-1" [ style=bold color="green" fontcolor="orange"] -"stonith 'reboot' controller-1" -> "galera-0_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "galera-2_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "haproxy-clone_stop_0" [ style = bold] "stonith 'reboot' controller-1" -> "haproxy_stop_0 controller-1" [ style = bold] @@ -138,7 +138,6 @@ digraph "g" { "stonith-fence_ipmilan-525400bbf613_stop_0 controller-0" -> "stonith-fence_ipmilan-525400bbf613_start_0 controller-0" [ style = bold] "stonith-fence_ipmilan-525400bbf613_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "galera-0_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.1.14_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.1.17_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.4.11_start_0 controller-2" [ style = bold] diff --git a/pengine/test10/remote-recover-unknown.exp b/pengine/test10/remote-recover-unknown.exp index b8d51be76de..0d7b318d098 100644 --- a/pengine/test10/remote-recover-unknown.exp +++ b/pengine/test10/remote-recover-unknown.exp @@ -36,10 +36,10 @@ - + - + @@ -49,11 +49,7 @@ - - - - - + diff --git a/pengine/test10/remote-recover-unknown.summary b/pengine/test10/remote-recover-unknown.summary index 09f10d84ae7..d47f174fb17 100644 --- a/pengine/test10/remote-recover-unknown.summary +++ b/pengine/test10/remote-recover-unknown.summary @@ -55,12 +55,12 @@ Transition Summary: * Move stonith-fence_ipmilan-5254005bdbb5 ( controller-1 -> controller-2 ) Executing cluster transition: + * Pseudo action: galera-0_stop_0 * Pseudo action: redis-master_pre_notify_stop_0 * Resource action: stonith-fence_ipmilan-525400bbf613 stop on controller-0 * Resource action: stonith-fence_ipmilan-525400b4f6bd stop on controller-0 * Pseudo action: stonith-fence_ipmilan-5254005bdbb5_stop_0 * Fencing controller-1 (reboot) - * Pseudo action: galera-0_stop_0 * Pseudo action: galera-2_stop_0 * Pseudo action: redis_post_notify_stop_0 * Resource action: redis notify on controller-0 @@ -71,15 +71,12 @@ Executing cluster transition: * Fencing galera-2 (reboot) * Fencing messaging-1 (reboot) * Pseudo action: stonith_complete - * Resource action: galera-0 start on controller-2 * Pseudo action: rabbitmq_post_notify_stop_0 * Pseudo action: rabbitmq-clone_stop_0 - * Resource action: galera monitor=10000 on galera-0 * Pseudo action: redis_stop_0 * Pseudo action: redis-master_stopped_0 * Pseudo action: haproxy_stop_0 * Pseudo action: haproxy-clone_stopped_0 - * Resource action: galera-0 monitor=20000 on controller-2 * Resource action: rabbitmq notify on messaging-2 * Resource action: rabbitmq notify on messaging-0 * Pseudo action: rabbitmq_notified_0 @@ -101,11 +98,14 @@ Executing cluster transition: * Resource action: ip-172.17.1.17 monitor=10000 on controller-2 * Resource action: ip-172.17.4.11 monitor=10000 on controller-2 * Pseudo action: all_stopped + * Resource action: galera-0 start on controller-2 + * Resource action: galera monitor=10000 on galera-0 * Resource action: stonith-fence_ipmilan-525400bbf613 start on controller-0 * Resource action: stonith-fence_ipmilan-525400bbf613 monitor=60000 on controller-0 * Resource action: stonith-fence_ipmilan-525400b4f6bd start on controller-0 * Resource action: stonith-fence_ipmilan-525400b4f6bd monitor=60000 on controller-0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 start on controller-2 + * Resource action: galera-0 monitor=20000 on controller-2 * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2 Using the original execution date of: 2017-05-03 13:33:24Z diff --git a/pengine/test10/remote-recovery.dot b/pengine/test10/remote-recovery.dot index d6fdefede2a..6cd342fa27a 100644 --- a/pengine/test10/remote-recovery.dot +++ b/pengine/test10/remote-recovery.dot @@ -89,14 +89,11 @@ digraph "g" { "redis_stop_0 controller-1" -> "all_stopped" [ style = bold] "redis_stop_0 controller-1" -> "redis-master_stopped_0" [ style = bold] "redis_stop_0 controller-1" [ style=bold color="green" fontcolor="orange"] -"stonith 'reboot' controller-1" -> "galera-0_stop_0 controller-1" [ style = bold] -"stonith 'reboot' controller-1" -> "galera-2_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "haproxy-clone_stop_0" [ style = bold] "stonith 'reboot' controller-1" -> "haproxy_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "ip-172.17.1.14_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "ip-172.17.1.17_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "ip-172.17.4.11_stop_0 controller-1" [ style = bold] -"stonith 'reboot' controller-1" -> "messaging-1_stop_0 controller-1" [ style = bold] "stonith 'reboot' controller-1" -> "redis-master_stop_0" [ style = bold] "stonith 'reboot' controller-1" -> "redis_post_notify_stonith_0" [ style = bold] "stonith 'reboot' controller-1" -> "redis_stop_0 controller-1" [ style = bold] @@ -121,11 +118,8 @@ digraph "g" { "stonith-fence_ipmilan-525400bbf613_stop_0 controller-0" -> "stonith-fence_ipmilan-525400bbf613_start_0 controller-0" [ style = bold] "stonith-fence_ipmilan-525400bbf613_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "galera-0_start_0 controller-2" [ style = bold] -"stonith_complete" -> "galera-2_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.1.14_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.1.17_start_0 controller-2" [ style = bold] "stonith_complete" -> "ip-172.17.4.11_start_0 controller-2" [ style = bold] -"stonith_complete" -> "messaging-1_start_0 controller-2" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/remote-recovery.exp b/pengine/test10/remote-recovery.exp index cf74efb0f96..40338b406b1 100644 --- a/pengine/test10/remote-recovery.exp +++ b/pengine/test10/remote-recovery.exp @@ -23,9 +23,6 @@ - - - @@ -34,11 +31,7 @@ - - - - - + @@ -64,9 +57,6 @@ - - - @@ -75,11 +65,7 @@ - - - - - + @@ -105,9 +91,6 @@ - - - @@ -116,11 +99,7 @@ - - - - - + diff --git a/pengine/test10/remote-recovery.summary b/pengine/test10/remote-recovery.summary index b0433fed017..8246cd958d1 100644 --- a/pengine/test10/remote-recovery.summary +++ b/pengine/test10/remote-recovery.summary @@ -52,6 +52,9 @@ Transition Summary: * Move stonith-fence_ipmilan-5254005bdbb5 ( controller-1 -> controller-2 ) Executing cluster transition: + * Pseudo action: messaging-1_stop_0 + * Pseudo action: galera-0_stop_0 + * Pseudo action: galera-2_stop_0 * Pseudo action: redis-master_pre_notify_stop_0 * Resource action: stonith-fence_ipmilan-525400bbf613 stop on controller-0 * Resource action: stonith-fence_ipmilan-525400bbf613 start on controller-0 @@ -61,9 +64,12 @@ Executing cluster transition: * Resource action: stonith-fence_ipmilan-525400b4f6bd monitor=60000 on controller-0 * Pseudo action: stonith-fence_ipmilan-5254005bdbb5_stop_0 * Fencing controller-1 (reboot) - * Pseudo action: messaging-1_stop_0 - * Pseudo action: galera-0_stop_0 - * Pseudo action: galera-2_stop_0 + * Resource action: messaging-1 start on controller-2 + * Resource action: galera-0 start on controller-2 + * Resource action: galera-2 start on controller-2 + * Resource action: rabbitmq monitor=10000 on messaging-1 + * Resource action: galera monitor=10000 on galera-2 + * Resource action: galera monitor=10000 on galera-0 * Pseudo action: redis_post_notify_stop_0 * Resource action: redis notify on controller-0 * Resource action: redis notify on controller-2 @@ -72,20 +78,14 @@ Executing cluster transition: * Pseudo action: haproxy-clone_stop_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 start on controller-2 * Pseudo action: stonith_complete - * Resource action: messaging-1 start on controller-2 - * Resource action: galera-0 start on controller-2 - * Resource action: galera-2 start on controller-2 - * Resource action: rabbitmq monitor=10000 on messaging-1 - * Resource action: galera monitor=10000 on galera-2 - * Resource action: galera monitor=10000 on galera-0 + * Resource action: messaging-1 monitor=20000 on controller-2 + * Resource action: galera-0 monitor=20000 on controller-2 + * Resource action: galera-2 monitor=20000 on controller-2 * Pseudo action: redis_stop_0 * Pseudo action: redis-master_stopped_0 * Pseudo action: haproxy_stop_0 * Pseudo action: haproxy-clone_stopped_0 * Resource action: stonith-fence_ipmilan-5254005bdbb5 monitor=60000 on controller-2 - * Resource action: messaging-1 monitor=20000 on controller-2 - * Resource action: galera-0 monitor=20000 on controller-2 - * Resource action: galera-2 monitor=20000 on controller-2 * Pseudo action: redis-master_post_notify_stopped_0 * Pseudo action: ip-172.17.1.14_stop_0 * Pseudo action: ip-172.17.1.17_stop_0 diff --git a/pengine/test10/remote-unclean2.dot b/pengine/test10/remote-unclean2.dot index 3f8981b88bc..2311a72964e 100644 --- a/pengine/test10/remote-unclean2.dot +++ b/pengine/test10/remote-unclean2.dot @@ -1,4 +1,5 @@ digraph "g" { +"all_stopped" -> "rhel7-auto4_start_0 rhel7-auto1" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "rhel7-auto4_monitor_60000 rhel7-auto1" [ style=bold color="green" fontcolor="black"] "rhel7-auto4_start_0 rhel7-auto1" -> "rhel7-auto4_monitor_60000 rhel7-auto1" [ style = bold] @@ -9,6 +10,5 @@ "stonith 'reboot' rhel7-auto4" -> "stonith_complete" [ style = bold] "stonith 'reboot' rhel7-auto4" [ style=bold color="green" fontcolor="black"] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "rhel7-auto4_start_0 rhel7-auto1" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/remote-unclean2.exp b/pengine/test10/remote-unclean2.exp index ca0b3ba6927..2c73d82aa84 100644 --- a/pengine/test10/remote-unclean2.exp +++ b/pengine/test10/remote-unclean2.exp @@ -11,7 +11,7 @@ - + diff --git a/pengine/test10/whitebox-fail1.dot b/pengine/test10/whitebox-fail1.dot index bfff4bfb998..9b755f981cc 100644 --- a/pengine/test10/whitebox-fail1.dot +++ b/pengine/test10/whitebox-fail1.dot @@ -26,6 +26,7 @@ digraph "g" { "M_stop_0 lxc1" -> "M_start_0 lxc1" [ style = bold] "M_stop_0 lxc1" -> "all_stopped" [ style = bold] "M_stop_0 lxc1" [ style=bold color="green" fontcolor="orange"] +"all_stopped" -> "lxc1_start_0 18node2" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "container1_start_0 18node2" -> "B_start_0 lxc1" [ style = bold] "container1_start_0 18node2" -> "M_start_0 lxc1" [ style = bold] @@ -55,6 +56,5 @@ digraph "g" { "stonith_complete" -> "M_start_0 lxc1" [ style = bold] "stonith_complete" -> "all_stopped" [ style = bold] "stonith_complete" -> "container1_start_0 18node2" [ style = bold] -"stonith_complete" -> "lxc1_start_0 18node2" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/whitebox-fail1.exp b/pengine/test10/whitebox-fail1.exp index 901a1e30231..1532c6eaa43 100644 --- a/pengine/test10/whitebox-fail1.exp +++ b/pengine/test10/whitebox-fail1.exp @@ -227,13 +227,13 @@ - + - + - + diff --git a/pengine/test10/whitebox-fail1.summary b/pengine/test10/whitebox-fail1.summary index d1f34804e4c..a5b85dd1acb 100644 --- a/pengine/test10/whitebox-fail1.summary +++ b/pengine/test10/whitebox-fail1.summary @@ -31,17 +31,17 @@ Executing cluster transition: * Resource action: container1 start on 18node2 * Pseudo action: M-clone_stop_0 * Pseudo action: B_stop_0 - * Resource action: lxc1 start on 18node2 - * Resource action: lxc1 monitor=30000 on 18node2 * Pseudo action: M_stop_0 * Pseudo action: M-clone_stopped_0 * Pseudo action: M-clone_start_0 - * Resource action: B start on lxc1 * Pseudo action: all_stopped + * Resource action: lxc1 start on 18node2 + * Resource action: lxc1 monitor=30000 on 18node2 * Resource action: M start on lxc1 * Pseudo action: M-clone_running_0 - * Resource action: B monitor=10000 on lxc1 + * Resource action: B start on lxc1 * Resource action: M monitor=10000 on lxc1 + * Resource action: B monitor=10000 on lxc1 Revised cluster status: Online: [ 18node1 18node2 18node3 ] diff --git a/pengine/test10/whitebox-fail2.dot b/pengine/test10/whitebox-fail2.dot index bfff4bfb998..9b755f981cc 100644 --- a/pengine/test10/whitebox-fail2.dot +++ b/pengine/test10/whitebox-fail2.dot @@ -26,6 +26,7 @@ digraph "g" { "M_stop_0 lxc1" -> "M_start_0 lxc1" [ style = bold] "M_stop_0 lxc1" -> "all_stopped" [ style = bold] "M_stop_0 lxc1" [ style=bold color="green" fontcolor="orange"] +"all_stopped" -> "lxc1_start_0 18node2" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "container1_start_0 18node2" -> "B_start_0 lxc1" [ style = bold] "container1_start_0 18node2" -> "M_start_0 lxc1" [ style = bold] @@ -55,6 +56,5 @@ digraph "g" { "stonith_complete" -> "M_start_0 lxc1" [ style = bold] "stonith_complete" -> "all_stopped" [ style = bold] "stonith_complete" -> "container1_start_0 18node2" [ style = bold] -"stonith_complete" -> "lxc1_start_0 18node2" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/whitebox-fail2.exp b/pengine/test10/whitebox-fail2.exp index 901a1e30231..1532c6eaa43 100644 --- a/pengine/test10/whitebox-fail2.exp +++ b/pengine/test10/whitebox-fail2.exp @@ -227,13 +227,13 @@ - + - + - + diff --git a/pengine/test10/whitebox-fail2.summary b/pengine/test10/whitebox-fail2.summary index ebf6c515e33..afee2618baf 100644 --- a/pengine/test10/whitebox-fail2.summary +++ b/pengine/test10/whitebox-fail2.summary @@ -31,17 +31,17 @@ Executing cluster transition: * Resource action: container1 start on 18node2 * Pseudo action: M-clone_stop_0 * Pseudo action: B_stop_0 - * Resource action: lxc1 start on 18node2 - * Resource action: lxc1 monitor=30000 on 18node2 * Pseudo action: M_stop_0 * Pseudo action: M-clone_stopped_0 * Pseudo action: M-clone_start_0 - * Resource action: B start on lxc1 * Pseudo action: all_stopped + * Resource action: lxc1 start on 18node2 + * Resource action: lxc1 monitor=30000 on 18node2 * Resource action: M start on lxc1 * Pseudo action: M-clone_running_0 - * Resource action: B monitor=10000 on lxc1 + * Resource action: B start on lxc1 * Resource action: M monitor=10000 on lxc1 + * Resource action: B monitor=10000 on lxc1 Revised cluster status: Online: [ 18node1 18node2 18node3 ] diff --git a/pengine/test10/whitebox-imply-stop-on-fence.dot b/pengine/test10/whitebox-imply-stop-on-fence.dot index 7b536ea55a3..1ef3cba3d11 100644 --- a/pengine/test10/whitebox-imply-stop-on-fence.dot +++ b/pengine/test10/whitebox-imply-stop-on-fence.dot @@ -18,6 +18,8 @@ "R-lxc-02_kiff-01_stop_0 kiff-01" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] "R-lxc-02_kiff-01_stop_0 kiff-01" [ style=bold color="green" fontcolor="orange"] "all_stopped" -> "fence-kiff-02_start_0 kiff-02" [ style = bold] +"all_stopped" -> "lxc-01_kiff-01_start_0 kiff-02" [ style = bold] +"all_stopped" -> "lxc-02_kiff-01_start_0 kiff-02" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "clvmd-clone_stop_0" -> "clvmd-clone_stopped_0" [ style = bold] "clvmd-clone_stop_0" -> "clvmd_stop_0 kiff-01" [ style = bold] @@ -78,8 +80,6 @@ "stonith 'reboot' kiff-01" -> "clvmd_stop_0 kiff-01" [ style = bold] "stonith 'reboot' kiff-01" -> "dlm-clone_stop_0" [ style = bold] "stonith 'reboot' kiff-01" -> "dlm_stop_0 kiff-01" [ style = bold] -"stonith 'reboot' kiff-01" -> "lxc-01_kiff-01_stop_0 kiff-01" [ style = bold] -"stonith 'reboot' kiff-01" -> "lxc-02_kiff-01_stop_0 kiff-01" [ style = bold] "stonith 'reboot' kiff-01" -> "shared0-clone_stop_0" [ style = bold] "stonith 'reboot' kiff-01" -> "shared0_stop_0 kiff-01" [ style = bold] "stonith 'reboot' kiff-01" -> "stonith 'reboot' lxc-01_kiff-01" [ style = bold] @@ -94,8 +94,6 @@ "stonith_complete" -> "R-lxc-01_kiff-01_start_0 kiff-02" [ style = bold] "stonith_complete" -> "R-lxc-02_kiff-01_start_0 kiff-02" [ style = bold] "stonith_complete" -> "all_stopped" [ style = bold] -"stonith_complete" -> "lxc-01_kiff-01_start_0 kiff-02" [ style = bold] -"stonith_complete" -> "lxc-02_kiff-01_start_0 kiff-02" [ style = bold] "stonith_complete" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] "vm-fs_monitor_0 lxc-01_kiff-02" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] diff --git a/pengine/test10/whitebox-imply-stop-on-fence.exp b/pengine/test10/whitebox-imply-stop-on-fence.exp index f80dde1ba40..c73d1d291a7 100644 --- a/pengine/test10/whitebox-imply-stop-on-fence.exp +++ b/pengine/test10/whitebox-imply-stop-on-fence.exp @@ -421,13 +421,13 @@ - + - + - + @@ -437,11 +437,7 @@ - - - - - + @@ -465,13 +461,13 @@ - + - + - + @@ -481,11 +477,7 @@ - - - - - + diff --git a/pengine/test10/whitebox-imply-stop-on-fence.summary b/pengine/test10/whitebox-imply-stop-on-fence.summary index d272b25d21f..5ce580e4a2e 100644 --- a/pengine/test10/whitebox-imply-stop-on-fence.summary +++ b/pengine/test10/whitebox-imply-stop-on-fence.summary @@ -48,29 +48,23 @@ Executing cluster transition: * Resource action: shared0 monitor on lxc-01_kiff-02 * Resource action: vm-fs monitor on lxc-02_kiff-02 * Resource action: vm-fs monitor on lxc-01_kiff-02 - * Fencing kiff-01 (reboot) * Pseudo action: lxc-01_kiff-01_stop_0 * Pseudo action: lxc-02_kiff-01_stop_0 + * Fencing kiff-01 (reboot) + * Pseudo action: R-lxc-01_kiff-01_stop_0 + * Pseudo action: R-lxc-02_kiff-01_stop_0 * Pseudo action: stonith-lxc-02_kiff-01-reboot on lxc-02_kiff-01 * Pseudo action: stonith-lxc-01_kiff-01-reboot on lxc-01_kiff-01 * Pseudo action: stonith_complete - * Pseudo action: R-lxc-01_kiff-01_stop_0 - * Pseudo action: R-lxc-02_kiff-01_stop_0 - * Pseudo action: vm-fs_stop_0 * Pseudo action: shared0-clone_stop_0 * Resource action: R-lxc-01_kiff-01 start on kiff-02 * Resource action: R-lxc-02_kiff-01 start on kiff-02 - * Resource action: lxc-01_kiff-01 start on kiff-02 - * Resource action: lxc-02_kiff-01 start on kiff-02 + * Pseudo action: vm-fs_stop_0 * Pseudo action: shared0_stop_0 * Pseudo action: shared0-clone_stopped_0 * Resource action: R-lxc-01_kiff-01 monitor=10000 on kiff-02 * Resource action: R-lxc-02_kiff-01 monitor=10000 on kiff-02 - * Resource action: vm-fs start on lxc-01_kiff-01 - * Resource action: lxc-01_kiff-01 monitor=30000 on kiff-02 - * Resource action: lxc-02_kiff-01 monitor=30000 on kiff-02 * Pseudo action: clvmd-clone_stop_0 - * Resource action: vm-fs monitor=20000 on lxc-01_kiff-01 * Pseudo action: clvmd_stop_0 * Pseudo action: clvmd-clone_stopped_0 * Pseudo action: dlm-clone_stop_0 @@ -78,7 +72,13 @@ Executing cluster transition: * Pseudo action: dlm-clone_stopped_0 * Pseudo action: all_stopped * Resource action: fence-kiff-02 start on kiff-02 + * Resource action: lxc-01_kiff-01 start on kiff-02 + * Resource action: lxc-02_kiff-01 start on kiff-02 * Resource action: fence-kiff-02 monitor=60000 on kiff-02 + * Resource action: vm-fs start on lxc-01_kiff-01 + * Resource action: lxc-01_kiff-01 monitor=30000 on kiff-02 + * Resource action: lxc-02_kiff-01 monitor=30000 on kiff-02 + * Resource action: vm-fs monitor=20000 on lxc-01_kiff-01 Revised cluster status: Online: [ kiff-02 ] diff --git a/pengine/test10/whitebox-ms-ordering.dot b/pengine/test10/whitebox-ms-ordering.dot index 1f4d95b1ad1..d5112b92ded 100644 --- a/pengine/test10/whitebox-ms-ordering.dot +++ b/pengine/test10/whitebox-ms-ordering.dot @@ -1,4 +1,6 @@ digraph "g" { +"all_stopped" -> "lxc1_start_0 18node1" [ style = bold] +"all_stopped" -> "lxc2_start_0 18node1" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "container1_monitor_0 18node1" -> "container1_start_0 18node1" [ style = bold] "container1_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] @@ -106,7 +108,5 @@ "stonith_complete" -> "lxc-ms_promote_0 lxc1" [ style = bold] "stonith_complete" -> "lxc-ms_start_0 lxc1" [ style = bold] "stonith_complete" -> "lxc-ms_start_0 lxc2" [ style = bold] -"stonith_complete" -> "lxc1_start_0 18node1" [ style = bold] -"stonith_complete" -> "lxc2_start_0 18node1" [ style = bold] "stonith_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/whitebox-ms-ordering.exp b/pengine/test10/whitebox-ms-ordering.exp index c8fee5e6c02..d5608e4456d 100644 --- a/pengine/test10/whitebox-ms-ordering.exp +++ b/pengine/test10/whitebox-ms-ordering.exp @@ -418,6 +418,9 @@ + + + @@ -430,9 +433,6 @@ - - - @@ -483,6 +483,9 @@ + + + @@ -495,9 +498,6 @@ - - - diff --git a/pengine/test10/whitebox-ms-ordering.summary b/pengine/test10/whitebox-ms-ordering.summary index d8ff62c505e..46fe9d1bb22 100644 --- a/pengine/test10/whitebox-ms-ordering.summary +++ b/pengine/test10/whitebox-ms-ordering.summary @@ -43,18 +43,18 @@ Executing cluster transition: * Pseudo action: lxc-ms_demote_0 * Pseudo action: lxc-ms-master_demoted_0 * Pseudo action: lxc-ms-master_stop_0 - * Resource action: lxc1 start on 18node1 - * Resource action: lxc2 start on 18node1 * Pseudo action: lxc-ms_stop_0 * Pseudo action: lxc-ms_stop_0 * Pseudo action: lxc-ms-master_stopped_0 * Pseudo action: lxc-ms-master_start_0 - * Resource action: lxc1 monitor=30000 on 18node1 - * Resource action: lxc2 monitor=30000 on 18node1 * Pseudo action: all_stopped + * Resource action: lxc1 start on 18node1 + * Resource action: lxc2 start on 18node1 * Resource action: lxc-ms start on lxc1 * Resource action: lxc-ms start on lxc2 * Pseudo action: lxc-ms-master_running_0 + * Resource action: lxc1 monitor=30000 on 18node1 + * Resource action: lxc2 monitor=30000 on 18node1 * Resource action: lxc-ms monitor=10000 on lxc2 * Pseudo action: lxc-ms-master_promote_0 * Resource action: lxc-ms promote on lxc1 diff --git a/pengine/test10/whitebox-unexpectedly-running.dot b/pengine/test10/whitebox-unexpectedly-running.dot index fa1171ec830..f16e705eb2c 100644 --- a/pengine/test10/whitebox-unexpectedly-running.dot +++ b/pengine/test10/whitebox-unexpectedly-running.dot @@ -8,6 +8,8 @@ "FAKE-crashed_stop_0 18builder" -> "stonith 'reboot' remote2" [ style = bold] "FAKE-crashed_stop_0 18builder" [ style=bold color="green" fontcolor="black"] "FAKE_monitor_60000 18builder" [ style=bold color="green" fontcolor="black"] +"all_stopped" -> "remote1_start_0 18builder" [ style = bold] +"all_stopped" -> "remote2_start_0 18builder" [ style = bold] "all_stopped" [ style=bold color="green" fontcolor="orange"] "remote1_monitor_0 18builder" -> "remote1_start_0 18builder" [ style = bold] "remote1_monitor_0 18builder" [ style=bold color="green" fontcolor="black"] diff --git a/pengine/test10/whitebox-unexpectedly-running.exp b/pengine/test10/whitebox-unexpectedly-running.exp index c4e13b93c0c..46376a5b9e2 100644 --- a/pengine/test10/whitebox-unexpectedly-running.exp +++ b/pengine/test10/whitebox-unexpectedly-running.exp @@ -64,6 +64,9 @@ + + + @@ -99,6 +102,9 @@ + + + From b04b392925daa70af17f2abdef9a6198127c5608 Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Fri, 8 Dec 2017 13:53:36 +1100 Subject: [PATCH 011/812] Fix: PE: Ensure stop operations occur after stopped remote connections have been brought up --- pengine/allocate.c | 55 +++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/pengine/allocate.c b/pengine/allocate.c index 2ae491c9b93..1c95e97d8eb 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -48,6 +48,25 @@ enum remote_connection_state { remote_state_stopped = 4 }; +static const char * +state2text(enum remote_connection_state state) +{ + switch (state) { + case remote_state_unknown: + return "unknown"; + case remote_state_alive: + return "alive"; + case remote_state_resting: + return "resting"; + case remote_state_failed: + return "failed"; + case remote_state_stopped: + return "stopped"; + } + + return "impossible"; +} + resource_alloc_functions_t resource_class_alloc_functions[] = { { native_merge_weights, @@ -2011,10 +2030,10 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set) cluster_node = remote_rsc->running_on->data; } - crm_trace("Order %s action %s relative to %s%s (state %d)", + crm_trace("Order %s action %s relative to %s%s (state: %s)", action->task, action->uuid, is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", - remote_rsc->id, state); + remote_rsc->id, state2text(state)); if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE) || safe_str_eq(action->task, CRMD_ACTION_MIGRATE)) { @@ -2042,23 +2061,29 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set) /* Handle special case with remote node where stop actions need to be * ordered after the connection resource starts somewhere else. */ - if(state == remote_state_resting) { - /* Wait for the connection resource to be up and assume everything is as we left it */ - order_start_then_action(remote_rsc, action, pe_order_none, - data_set); + if(state == remote_state_alive) { + order_action_then_stop(action, remote_rsc, + pe_order_implies_first, data_set); - } else { - if(state == remote_state_failed) { - /* We would only be here if the resource is - * running on the remote node. Since we have no - * way to stop it, it is necessary to fence the - * node. - */ - pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable"); - } + } else if(state == remote_state_failed) { + /* We would only be here if the resource is + * running on the remote node. Since we have no + * way to stop it, it is necessary to fence the + * node. + */ + pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable"); + order_action_then_stop(action, remote_rsc, + pe_order_implies_first, data_set); + } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) { + /* If its not coming back up, better do what we need first */ order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); + + } else { + /* Wait for the connection resource to be up and assume everything is as we left it */ + order_start_then_action(remote_rsc, action, pe_order_none, data_set); + } break; From 502770c763807bc7ac19bf2b2fd50d4a0e195df0 Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Fri, 8 Dec 2017 13:56:03 +1100 Subject: [PATCH 012/812] Test: PE: Ensure stop operations occur after stopped remote connections have been brought up --- pengine/regression.sh | 1 + .../test10/bundle-order-stop-on-remote.dot | 307 ++++ .../test10/bundle-order-stop-on-remote.exp | 1607 +++++++++++++++++ .../test10/bundle-order-stop-on-remote.scores | 934 ++++++++++ .../bundle-order-stop-on-remote.summary | 224 +++ .../test10/bundle-order-stop-on-remote.xml | 1165 ++++++++++++ 6 files changed, 4238 insertions(+) create mode 100644 pengine/test10/bundle-order-stop-on-remote.dot create mode 100644 pengine/test10/bundle-order-stop-on-remote.exp create mode 100644 pengine/test10/bundle-order-stop-on-remote.scores create mode 100644 pengine/test10/bundle-order-stop-on-remote.summary create mode 100644 pengine/test10/bundle-order-stop-on-remote.xml diff --git a/pengine/regression.sh b/pengine/regression.sh index 47cf0ba030c..cf1824abdcd 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -819,6 +819,7 @@ do_test bundle-order-partial-start "Bundle startup ordering when some dependanci do_test bundle-order-partial-start-2 "Bundle startup ordering when some dependancies and the container are already running" do_test bundle-order-stop "Bundle stop ordering" do_test bundle-order-partial-stop "Bundle startup ordering when some dependancies are already stopped" +do_test bundle-order-stop-on-remote "Stop nested resource after bringing up the connection" do_test bundle-order-startup-clone "Prevent startup because bundle isn't promoted" do_test bundle-order-startup-clone-2 "Bundle startup with clones" diff --git a/pengine/test10/bundle-order-stop-on-remote.dot b/pengine/test10/bundle-order-stop-on-remote.dot new file mode 100644 index 00000000000..f0b6336298c --- /dev/null +++ b/pengine/test10/bundle-order-stop-on-remote.dot @@ -0,0 +1,307 @@ +digraph "g" { +"all_stopped" [ style=bold color="green" fontcolor="orange"] +"database-0_monitor_20000 controller-0" [ style=bold color="green" fontcolor="black"] +"database-0_start_0 controller-0" -> "database-0_monitor_20000 controller-0" [ style = bold] +"database-0_start_0 controller-0" -> "galera-bundle-docker-0_monitor_60000 database-0" [ style = bold] +"database-0_start_0 controller-0" -> "galera-bundle-docker-0_start_0 database-0" [ style = bold] +"database-0_start_0 controller-0" -> "galera-bundle-docker-0_stop_0 database-0" [ style = bold] +"database-0_start_0 controller-0" [ style=bold color="green" fontcolor="black"] +"database-2_monitor_20000 controller-1" [ style=bold color="green" fontcolor="black"] +"database-2_start_0 controller-1" -> "database-2_monitor_20000 controller-1" [ style = bold] +"database-2_start_0 controller-1" -> "galera-bundle-docker-2_monitor_60000 database-2" [ style = bold] +"database-2_start_0 controller-1" -> "galera-bundle-docker-2_start_0 database-2" [ style = bold] +"database-2_start_0 controller-1" -> "galera-bundle-docker-2_stop_0 database-2" [ style = bold] +"database-2_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"galera-bundle-0_monitor_0 controller-1" -> "galera-bundle-0_start_0 controller-0" [ style = bold] +"galera-bundle-0_monitor_0 controller-1" [ style=bold color="green" fontcolor="black"] +"galera-bundle-0_monitor_60000 controller-0" [ style=bold color="green" fontcolor="black"] +"galera-bundle-0_start_0 controller-0" -> "galera-bundle-0_monitor_60000 controller-0" [ style = bold] +"galera-bundle-0_start_0 controller-0" -> "galera_monitor_10000 galera-bundle-0" [ style = bold] +"galera-bundle-0_start_0 controller-0" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"galera-bundle-0_start_0 controller-0" -> "galera_start_0 galera-bundle-0" [ style = bold] +"galera-bundle-0_start_0 controller-0" [ style=bold color="green" fontcolor="black"] +"galera-bundle-2_monitor_0 controller-1" -> "galera-bundle-2_start_0 controller-1" [ style = bold] +"galera-bundle-2_monitor_0 controller-1" [ style=bold color="green" fontcolor="black"] +"galera-bundle-2_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"galera-bundle-2_start_0 controller-1" -> "galera-bundle-2_monitor_60000 controller-1" [ style = bold] +"galera-bundle-2_start_0 controller-1" -> "galera_monitor_10000 galera-bundle-2" [ style = bold] +"galera-bundle-2_start_0 controller-1" -> "galera_promote_0 galera-bundle-2" [ style = bold] +"galera-bundle-2_start_0 controller-1" -> "galera_start_0 galera-bundle-2" [ style = bold] +"galera-bundle-2_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"galera-bundle-docker-0_monitor_60000 database-0" [ style=bold color="green" fontcolor="black"] +"galera-bundle-docker-0_start_0 database-0" -> "galera-bundle-0_monitor_0 controller-1" [ style = bold] +"galera-bundle-docker-0_start_0 database-0" -> "galera-bundle-0_start_0 controller-0" [ style = bold] +"galera-bundle-docker-0_start_0 database-0" -> "galera-bundle-docker-0_monitor_60000 database-0" [ style = bold] +"galera-bundle-docker-0_start_0 database-0" -> "galera-bundle_running_0" [ style = bold] +"galera-bundle-docker-0_start_0 database-0" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"galera-bundle-docker-0_start_0 database-0" -> "galera_start_0 galera-bundle-0" [ style = bold] +"galera-bundle-docker-0_start_0 database-0" [ style=bold color="green" fontcolor="black"] +"galera-bundle-docker-0_stop_0 database-0" -> "all_stopped" [ style = bold] +"galera-bundle-docker-0_stop_0 database-0" -> "galera-bundle-docker-0_start_0 database-0" [ style = bold] +"galera-bundle-docker-0_stop_0 database-0" -> "galera-bundle_stopped_0" [ style = bold] +"galera-bundle-docker-0_stop_0 database-0" -> "stonith 'reboot' galera-bundle-0" [ style = bold] +"galera-bundle-docker-0_stop_0 database-0" [ style=bold color="green" fontcolor="black"] +"galera-bundle-docker-2_monitor_60000 database-2" [ style=bold color="green" fontcolor="black"] +"galera-bundle-docker-2_start_0 database-2" -> "galera-bundle-2_monitor_0 controller-1" [ style = bold] +"galera-bundle-docker-2_start_0 database-2" -> "galera-bundle-2_start_0 controller-1" [ style = bold] +"galera-bundle-docker-2_start_0 database-2" -> "galera-bundle-docker-2_monitor_60000 database-2" [ style = bold] +"galera-bundle-docker-2_start_0 database-2" -> "galera-bundle_running_0" [ style = bold] +"galera-bundle-docker-2_start_0 database-2" -> "galera_promote_0 galera-bundle-2" [ style = bold] +"galera-bundle-docker-2_start_0 database-2" -> "galera_start_0 galera-bundle-2" [ style = bold] +"galera-bundle-docker-2_start_0 database-2" [ style=bold color="green" fontcolor="black"] +"galera-bundle-docker-2_stop_0 database-2" -> "all_stopped" [ style = bold] +"galera-bundle-docker-2_stop_0 database-2" -> "galera-bundle-docker-2_start_0 database-2" [ style = bold] +"galera-bundle-docker-2_stop_0 database-2" -> "galera-bundle_stopped_0" [ style = bold] +"galera-bundle-docker-2_stop_0 database-2" -> "stonith 'reboot' galera-bundle-2" [ style = bold] +"galera-bundle-docker-2_stop_0 database-2" [ style=bold color="green" fontcolor="black"] +"galera-bundle-master_demote_0" -> "galera-bundle-master_demoted_0" [ style = bold] +"galera-bundle-master_demote_0" -> "galera_demote_0 galera-bundle-0" [ style = bold] +"galera-bundle-master_demote_0" -> "galera_demote_0 galera-bundle-2" [ style = bold] +"galera-bundle-master_demote_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_demoted_0" -> "galera-bundle-master_promote_0" [ style = bold] +"galera-bundle-master_demoted_0" -> "galera-bundle-master_start_0" [ style = bold] +"galera-bundle-master_demoted_0" -> "galera-bundle-master_stop_0" [ style = bold] +"galera-bundle-master_demoted_0" -> "galera-bundle_demoted_0" [ style = bold] +"galera-bundle-master_demoted_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_promote_0" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"galera-bundle-master_promote_0" -> "galera_promote_0 galera-bundle-2" [ style = bold] +"galera-bundle-master_promote_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_promoted_0" -> "galera-bundle_promoted_0" [ style = bold] +"galera-bundle-master_promoted_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_running_0" -> "galera-bundle-master_promote_0" [ style = bold] +"galera-bundle-master_running_0" -> "galera-bundle_running_0" [ style = bold] +"galera-bundle-master_running_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_start_0" -> "galera-bundle-master_running_0" [ style = bold] +"galera-bundle-master_start_0" -> "galera_start_0 galera-bundle-0" [ style = bold] +"galera-bundle-master_start_0" -> "galera_start_0 galera-bundle-2" [ style = bold] +"galera-bundle-master_start_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_stop_0" -> "galera-bundle-master_stopped_0" [ style = bold] +"galera-bundle-master_stop_0" -> "galera_stop_0 galera-bundle-0" [ style = bold] +"galera-bundle-master_stop_0" -> "galera_stop_0 galera-bundle-2" [ style = bold] +"galera-bundle-master_stop_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle-master_stopped_0" -> "galera-bundle-master_promote_0" [ style = bold] +"galera-bundle-master_stopped_0" -> "galera-bundle-master_start_0" [ style = bold] +"galera-bundle-master_stopped_0" -> "galera-bundle_stopped_0" [ style = bold] +"galera-bundle-master_stopped_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_demote_0" -> "galera-bundle-master_demote_0" [ style = bold] +"galera-bundle_demote_0" -> "galera-bundle_demoted_0" [ style = bold] +"galera-bundle_demote_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_demoted_0" -> "galera-bundle_promote_0" [ style = bold] +"galera-bundle_demoted_0" -> "galera-bundle_start_0" [ style = bold] +"galera-bundle_demoted_0" -> "galera-bundle_stop_0" [ style = bold] +"galera-bundle_demoted_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_promote_0" -> "galera-bundle-master_promote_0" [ style = bold] +"galera-bundle_promote_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_promoted_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_running_0" -> "galera-bundle_promote_0" [ style = bold] +"galera-bundle_running_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_start_0" -> "galera-bundle-docker-0_start_0 database-0" [ style = bold] +"galera-bundle_start_0" -> "galera-bundle-docker-2_start_0 database-2" [ style = bold] +"galera-bundle_start_0" -> "galera-bundle-master_start_0" [ style = bold] +"galera-bundle_start_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_stop_0" -> "galera-bundle-docker-0_stop_0 database-0" [ style = bold] +"galera-bundle_stop_0" -> "galera-bundle-docker-2_stop_0 database-2" [ style = bold] +"galera-bundle_stop_0" -> "galera-bundle-master_stop_0" [ style = bold] +"galera-bundle_stop_0" -> "galera_stop_0 galera-bundle-0" [ style = bold] +"galera-bundle_stop_0" -> "galera_stop_0 galera-bundle-2" [ style = bold] +"galera-bundle_stop_0" [ style=bold color="green" fontcolor="orange"] +"galera-bundle_stopped_0" -> "galera-bundle_promote_0" [ style = bold] +"galera-bundle_stopped_0" -> "galera-bundle_start_0" [ style = bold] +"galera-bundle_stopped_0" [ style=bold color="green" fontcolor="orange"] +"galera_demote_0 galera-bundle-0" -> "galera-bundle-master_demoted_0" [ style = bold] +"galera_demote_0 galera-bundle-0" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"galera_demote_0 galera-bundle-0" -> "galera_stop_0 galera-bundle-0" [ style = bold] +"galera_demote_0 galera-bundle-0" [ style=bold color="green" fontcolor="orange"] +"galera_demote_0 galera-bundle-2" -> "galera-bundle-master_demoted_0" [ style = bold] +"galera_demote_0 galera-bundle-2" -> "galera_promote_0 galera-bundle-2" [ style = bold] +"galera_demote_0 galera-bundle-2" -> "galera_stop_0 galera-bundle-2" [ style = bold] +"galera_demote_0 galera-bundle-2" [ style=bold color="green" fontcolor="orange"] +"galera_monitor_10000 galera-bundle-0" [ style=bold color="green" fontcolor="black"] +"galera_monitor_10000 galera-bundle-2" [ style=bold color="green" fontcolor="black"] +"galera_promote_0 galera-bundle-0" -> "galera-bundle-master_promoted_0" [ style = bold] +"galera_promote_0 galera-bundle-0" -> "galera_monitor_10000 galera-bundle-0" [ style = bold] +"galera_promote_0 galera-bundle-0" [ style=bold color="green" fontcolor="black"] +"galera_promote_0 galera-bundle-2" -> "galera-bundle-master_promoted_0" [ style = bold] +"galera_promote_0 galera-bundle-2" -> "galera_monitor_10000 galera-bundle-2" [ style = bold] +"galera_promote_0 galera-bundle-2" [ style=bold color="green" fontcolor="black"] +"galera_start_0 galera-bundle-0" -> "galera-bundle-master_running_0" [ style = bold] +"galera_start_0 galera-bundle-0" -> "galera_monitor_10000 galera-bundle-0" [ style = bold] +"galera_start_0 galera-bundle-0" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"galera_start_0 galera-bundle-0" -> "galera_start_0 galera-bundle-2" [ style = bold] +"galera_start_0 galera-bundle-0" [ style=bold color="green" fontcolor="black"] +"galera_start_0 galera-bundle-2" -> "galera-bundle-master_running_0" [ style = bold] +"galera_start_0 galera-bundle-2" -> "galera_monitor_10000 galera-bundle-2" [ style = bold] +"galera_start_0 galera-bundle-2" -> "galera_promote_0 galera-bundle-2" [ style = bold] +"galera_start_0 galera-bundle-2" [ style=bold color="green" fontcolor="black"] +"galera_stop_0 galera-bundle-0" -> "all_stopped" [ style = bold] +"galera_stop_0 galera-bundle-0" -> "galera-bundle-master_stopped_0" [ style = bold] +"galera_stop_0 galera-bundle-0" -> "galera_start_0 galera-bundle-0" [ style = bold] +"galera_stop_0 galera-bundle-0" [ style=bold color="green" fontcolor="orange"] +"galera_stop_0 galera-bundle-2" -> "all_stopped" [ style = bold] +"galera_stop_0 galera-bundle-2" -> "galera-bundle-master_stopped_0" [ style = bold] +"galera_stop_0 galera-bundle-2" -> "galera_start_0 galera-bundle-2" [ style = bold] +"galera_stop_0 galera-bundle-2" -> "galera_stop_0 galera-bundle-0" [ style = bold] +"galera_stop_0 galera-bundle-2" [ style=bold color="green" fontcolor="orange"] +"haproxy-bundle-docker-1_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"haproxy-bundle-docker-1_start_0 controller-1" -> "haproxy-bundle-docker-1_monitor_60000 controller-1" [ style = bold] +"haproxy-bundle-docker-1_start_0 controller-1" -> "haproxy-bundle_running_0" [ style = bold] +"haproxy-bundle-docker-1_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"haproxy-bundle_running_0" [ style=bold color="green" fontcolor="orange"] +"haproxy-bundle_start_0" -> "haproxy-bundle-docker-1_start_0 controller-1" [ style = bold] +"haproxy-bundle_start_0" [ style=bold color="green" fontcolor="orange"] +"ip-10.0.0.104_monitor_10000 controller-1" [ style=bold color="green" fontcolor="black"] +"ip-10.0.0.104_start_0 controller-1" -> "haproxy-bundle_start_0" [ style = bold] +"ip-10.0.0.104_start_0 controller-1" -> "ip-10.0.0.104_monitor_10000 controller-1" [ style = bold] +"ip-10.0.0.104_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"ip-172.17.1.11_monitor_10000 controller-0" [ style=bold color="green" fontcolor="black"] +"ip-172.17.1.11_start_0 controller-0" -> "haproxy-bundle_start_0" [ style = bold] +"ip-172.17.1.11_start_0 controller-0" -> "ip-172.17.1.11_monitor_10000 controller-0" [ style = bold] +"ip-172.17.1.11_start_0 controller-0" [ style=bold color="green" fontcolor="black"] +"ip-172.17.3.13_monitor_10000 controller-1" [ style=bold color="green" fontcolor="black"] +"ip-172.17.3.13_start_0 controller-1" -> "haproxy-bundle_start_0" [ style = bold] +"ip-172.17.3.13_start_0 controller-1" -> "ip-172.17.3.13_monitor_10000 controller-1" [ style = bold] +"ip-172.17.3.13_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"ip-192.168.24.11_monitor_10000 controller-0" [ style=bold color="green" fontcolor="black"] +"ip-192.168.24.11_start_0 controller-0" -> "haproxy-bundle_start_0" [ style = bold] +"ip-192.168.24.11_start_0 controller-0" -> "ip-192.168.24.11_monitor_10000 controller-0" [ style = bold] +"ip-192.168.24.11_start_0 controller-0" [ style=bold color="green" fontcolor="black"] +"openstack-cinder-volume_monitor_60000 controller-0" [ style=bold color="green" fontcolor="black"] +"openstack-cinder-volume_start_0 controller-0" -> "openstack-cinder-volume_monitor_60000 controller-0" [ style = bold] +"openstack-cinder-volume_start_0 controller-0" [ style=bold color="green" fontcolor="black"] +"redis-bundle-1_monitor_0 controller-1" -> "redis-bundle-1_start_0 controller-1" [ style = bold] +"redis-bundle-1_monitor_0 controller-1" [ style=bold color="green" fontcolor="black"] +"redis-bundle-1_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"redis-bundle-1_start_0 controller-1" -> "redis-bundle-1_monitor_60000 controller-1" [ style = bold] +"redis-bundle-1_start_0 controller-1" -> "redis:1_monitor_45000 redis-bundle-1" [ style = bold] +"redis-bundle-1_start_0 controller-1" -> "redis:1_monitor_60000 redis-bundle-1" [ style = bold] +"redis-bundle-1_start_0 controller-1" -> "redis:1_start_0 redis-bundle-1" [ style = bold] +"redis-bundle-1_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"redis-bundle-docker-1_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"redis-bundle-docker-1_start_0 controller-1" -> "redis-bundle-1_monitor_0 controller-1" [ style = bold] +"redis-bundle-docker-1_start_0 controller-1" -> "redis-bundle-1_start_0 controller-1" [ style = bold] +"redis-bundle-docker-1_start_0 controller-1" -> "redis-bundle-docker-1_monitor_60000 controller-1" [ style = bold] +"redis-bundle-docker-1_start_0 controller-1" -> "redis-bundle_running_0" [ style = bold] +"redis-bundle-docker-1_start_0 controller-1" -> "redis:1_start_0 redis-bundle-1" [ style = bold] +"redis-bundle-docker-1_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis-bundle_promoted_0" [ style = bold] +"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis:1_monitor_45000 redis-bundle-1" [ style = bold] +"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis:1_monitor_60000 redis-bundle-1" [ style = bold] +"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis_monitor_20000 redis-bundle-0" [ style = bold] +"redis-bundle-master_confirmed-post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle-master_pre_notify_promote_0" [ style = bold] +"redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle_running_0" [ style = bold] +"redis-bundle-master_confirmed-post_notify_running_0" -> "redis:1_monitor_45000 redis-bundle-1" [ style = bold] +"redis-bundle-master_confirmed-post_notify_running_0" -> "redis:1_monitor_60000 redis-bundle-1" [ style = bold] +"redis-bundle-master_confirmed-post_notify_running_0" -> "redis_monitor_20000 redis-bundle-0" [ style = bold] +"redis-bundle-master_confirmed-post_notify_running_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_confirmed-pre_notify_promote_0" -> "redis-bundle-master_post_notify_promoted_0" [ style = bold] +"redis-bundle-master_confirmed-pre_notify_promote_0" -> "redis-bundle-master_promote_0" [ style = bold] +"redis-bundle-master_confirmed-pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_confirmed-pre_notify_start_0" -> "redis-bundle-master_post_notify_running_0" [ style = bold] +"redis-bundle-master_confirmed-pre_notify_start_0" -> "redis-bundle-master_start_0" [ style = bold] +"redis-bundle-master_confirmed-pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_post_notify_promoted_0" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] +"redis-bundle-master_post_notify_promoted_0" -> "redis:1_post_notify_promote_0 redis-bundle-1" [ style = bold] +"redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-0" [ style = bold] +"redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-2" [ style = bold] +"redis-bundle-master_post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_post_notify_running_0" -> "redis-bundle-master_confirmed-post_notify_running_0" [ style = bold] +"redis-bundle-master_post_notify_running_0" -> "redis:1_post_notify_start_0 redis-bundle-1" [ style = bold] +"redis-bundle-master_post_notify_running_0" -> "redis_post_notify_running_0 redis-bundle-0" [ style = bold] +"redis-bundle-master_post_notify_running_0" -> "redis_post_notify_running_0 redis-bundle-2" [ style = bold] +"redis-bundle-master_post_notify_running_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_pre_notify_promote_0" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] +"redis-bundle-master_pre_notify_promote_0" -> "redis:1_pre_notify_promote_0 redis-bundle-1" [ style = bold] +"redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-0" [ style = bold] +"redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-2" [ style = bold] +"redis-bundle-master_pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_pre_notify_start_0" -> "redis-bundle-master_confirmed-pre_notify_start_0" [ style = bold] +"redis-bundle-master_pre_notify_start_0" -> "redis_pre_notify_start_0 redis-bundle-0" [ style = bold] +"redis-bundle-master_pre_notify_start_0" -> "redis_pre_notify_start_0 redis-bundle-2" [ style = bold] +"redis-bundle-master_pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_promote_0" -> "redis_promote_0 redis-bundle-0" [ style = bold] +"redis-bundle-master_promote_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_promoted_0" -> "redis-bundle-master_post_notify_promoted_0" [ style = bold] +"redis-bundle-master_promoted_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_running_0" -> "redis-bundle-master_post_notify_running_0" [ style = bold] +"redis-bundle-master_running_0" -> "redis-bundle-master_promote_0" [ style = bold] +"redis-bundle-master_running_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_start_0" -> "redis-bundle-master_running_0" [ style = bold] +"redis-bundle-master_start_0" -> "redis:1_start_0 redis-bundle-1" [ style = bold] +"redis-bundle-master_start_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle_promote_0" -> "redis-bundle-master_promote_0" [ style = bold] +"redis-bundle_promote_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle_promoted_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle_running_0" -> "redis-bundle_promote_0" [ style = bold] +"redis-bundle_running_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle_start_0" -> "redis-bundle-docker-1_start_0 controller-1" [ style = bold] +"redis-bundle_start_0" -> "redis-bundle-master_start_0" [ style = bold] +"redis-bundle_start_0" [ style=bold color="green" fontcolor="orange"] +"redis:1_monitor_45000 redis-bundle-1" [ style=bold color="green" fontcolor="black"] +"redis:1_monitor_60000 redis-bundle-1" [ style=bold color="green" fontcolor="black"] +"redis:1_post_notify_promote_0 redis-bundle-1" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] +"redis:1_post_notify_promote_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] +"redis:1_post_notify_start_0 redis-bundle-1" -> "redis-bundle-master_confirmed-post_notify_running_0" [ style = bold] +"redis:1_post_notify_start_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] +"redis:1_pre_notify_promote_0 redis-bundle-1" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] +"redis:1_pre_notify_promote_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] +"redis:1_start_0 redis-bundle-1" -> "redis-bundle-master_running_0" [ style = bold] +"redis:1_start_0 redis-bundle-1" -> "redis:1_monitor_45000 redis-bundle-1" [ style = bold] +"redis:1_start_0 redis-bundle-1" -> "redis:1_monitor_60000 redis-bundle-1" [ style = bold] +"redis:1_start_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] +"redis_monitor_20000 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"redis_post_notify_promoted_0 redis-bundle-0" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] +"redis_post_notify_promoted_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"redis_post_notify_promoted_0 redis-bundle-2" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] +"redis_post_notify_promoted_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] +"redis_post_notify_running_0 redis-bundle-0" -> "redis-bundle-master_confirmed-post_notify_running_0" [ style = bold] +"redis_post_notify_running_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"redis_post_notify_running_0 redis-bundle-2" -> "redis-bundle-master_confirmed-post_notify_running_0" [ style = bold] +"redis_post_notify_running_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] +"redis_pre_notify_promote_0 redis-bundle-0" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] +"redis_pre_notify_promote_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"redis_pre_notify_promote_0 redis-bundle-2" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] +"redis_pre_notify_promote_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] +"redis_pre_notify_start_0 redis-bundle-0" -> "redis-bundle-master_confirmed-pre_notify_start_0" [ style = bold] +"redis_pre_notify_start_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"redis_pre_notify_start_0 redis-bundle-2" -> "redis-bundle-master_confirmed-pre_notify_start_0" [ style = bold] +"redis_pre_notify_start_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] +"redis_promote_0 redis-bundle-0" -> "redis-bundle-master_promoted_0" [ style = bold] +"redis_promote_0 redis-bundle-0" -> "redis_monitor_20000 redis-bundle-0" [ style = bold] +"redis_promote_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"stonith 'reboot' galera-bundle-0" -> "galera-bundle-master_stop_0" [ style = bold] +"stonith 'reboot' galera-bundle-0" -> "stonith_complete" [ style = bold] +"stonith 'reboot' galera-bundle-0" [ style=bold color="green" fontcolor="orange"] +"stonith 'reboot' galera-bundle-2" -> "galera-bundle-master_stop_0" [ style = bold] +"stonith 'reboot' galera-bundle-2" -> "stonith_complete" [ style = bold] +"stonith 'reboot' galera-bundle-2" [ style=bold color="green" fontcolor="orange"] +"stonith-fence_ipmilan-525400498d34_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-525400498d34_start_0 controller-1" -> "stonith-fence_ipmilan-525400498d34_monitor_60000 controller-1" [ style = bold] +"stonith-fence_ipmilan-525400498d34_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-525400542c06_monitor_60000 controller-0" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-525400542c06_start_0 controller-0" -> "stonith-fence_ipmilan-525400542c06_monitor_60000 controller-0" [ style = bold] +"stonith-fence_ipmilan-525400542c06_start_0 controller-0" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254005ea387_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-5254005ea387_start_0 controller-1" -> "stonith-fence_ipmilan-5254005ea387_monitor_60000 controller-1" [ style = bold] +"stonith-fence_ipmilan-5254005ea387_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-525400c709f7_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith-fence_ipmilan-525400c709f7_start_0 controller-1" -> "stonith-fence_ipmilan-525400c709f7_monitor_60000 controller-1" [ style = bold] +"stonith-fence_ipmilan-525400c709f7_start_0 controller-1" [ style=bold color="green" fontcolor="black"] +"stonith_complete" -> "all_stopped" [ style = bold] +"stonith_complete" -> "galera-bundle-docker-0_start_0 database-0" [ style = bold] +"stonith_complete" -> "galera-bundle-docker-2_start_0 database-2" [ style = bold] +"stonith_complete" -> "galera_promote_0 galera-bundle-0" [ style = bold] +"stonith_complete" -> "galera_promote_0 galera-bundle-2" [ style = bold] +"stonith_complete" -> "galera_start_0 galera-bundle-0" [ style = bold] +"stonith_complete" -> "galera_start_0 galera-bundle-2" [ style = bold] +"stonith_complete" -> "haproxy-bundle-docker-1_start_0 controller-1" [ style = bold] +"stonith_complete" -> "ip-10.0.0.104_start_0 controller-1" [ style = bold] +"stonith_complete" -> "ip-172.17.1.11_start_0 controller-0" [ style = bold] +"stonith_complete" -> "ip-172.17.3.13_start_0 controller-1" [ style = bold] +"stonith_complete" -> "ip-192.168.24.11_start_0 controller-0" [ style = bold] +"stonith_complete" -> "openstack-cinder-volume_start_0 controller-0" [ style = bold] +"stonith_complete" -> "redis-bundle-docker-1_start_0 controller-1" [ style = bold] +"stonith_complete" -> "redis:1_start_0 redis-bundle-1" [ style = bold] +"stonith_complete" -> "redis_promote_0 redis-bundle-0" [ style = bold] +"stonith_complete" [ style=bold color="green" fontcolor="orange"] +} diff --git a/pengine/test10/bundle-order-stop-on-remote.exp b/pengine/test10/bundle-order-stop-on-remote.exp new file mode 100644 index 00000000000..db5386b1f39 --- /dev/null +++ b/pengine/test10/bundle-order-stop-on-remote.exp @@ -0,0 +1,1607 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/bundle-order-stop-on-remote.scores b/pengine/test10/bundle-order-stop-on-remote.scores new file mode 100644 index 00000000000..e26f51114a6 --- /dev/null +++ b/pengine/test10/bundle-order-stop-on-remote.scores @@ -0,0 +1,934 @@ +Allocation scores: +clone_color: galera-bundle-master allocation score on controller-0: -INFINITY +clone_color: galera-bundle-master allocation score on controller-1: -INFINITY +clone_color: galera-bundle-master allocation score on controller-2: -INFINITY +clone_color: galera-bundle-master allocation score on database-0: -INFINITY +clone_color: galera-bundle-master allocation score on database-1: -INFINITY +clone_color: galera-bundle-master allocation score on database-2: -INFINITY +clone_color: galera-bundle-master allocation score on galera-bundle-0: 0 +clone_color: galera-bundle-master allocation score on galera-bundle-1: 0 +clone_color: galera-bundle-master allocation score on galera-bundle-2: 0 +clone_color: galera-bundle-master allocation score on messaging-0: -INFINITY +clone_color: galera-bundle-master allocation score on messaging-1: -INFINITY +clone_color: galera-bundle-master allocation score on messaging-2: -INFINITY +clone_color: galera:0 allocation score on galera-bundle-0: INFINITY +clone_color: galera:1 allocation score on galera-bundle-1: INFINITY +clone_color: galera:2 allocation score on galera-bundle-2: INFINITY +clone_color: rabbitmq-bundle-clone allocation score on controller-0: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on controller-1: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on controller-2: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on database-0: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on database-1: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on database-2: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on messaging-0: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on messaging-1: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on messaging-2: -INFINITY +clone_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-0: 0 +clone_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-1: 0 +clone_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-2: 0 +clone_color: rabbitmq:0 allocation score on rabbitmq-bundle-0: INFINITY +clone_color: rabbitmq:1 allocation score on rabbitmq-bundle-1: INFINITY +clone_color: rabbitmq:2 allocation score on rabbitmq-bundle-2: INFINITY +clone_color: redis-bundle-master allocation score on controller-0: -INFINITY +clone_color: redis-bundle-master allocation score on controller-1: -INFINITY +clone_color: redis-bundle-master allocation score on controller-2: -INFINITY +clone_color: redis-bundle-master allocation score on database-0: -INFINITY +clone_color: redis-bundle-master allocation score on database-1: -INFINITY +clone_color: redis-bundle-master allocation score on database-2: -INFINITY +clone_color: redis-bundle-master allocation score on messaging-0: -INFINITY +clone_color: redis-bundle-master allocation score on messaging-1: -INFINITY +clone_color: redis-bundle-master allocation score on messaging-2: -INFINITY +clone_color: redis-bundle-master allocation score on redis-bundle-0: 0 +clone_color: redis-bundle-master allocation score on redis-bundle-1: 0 +clone_color: redis-bundle-master allocation score on redis-bundle-2: 0 +clone_color: redis:0 allocation score on redis-bundle-0: INFINITY +clone_color: redis:1 allocation score on redis-bundle-1: INFINITY +clone_color: redis:2 allocation score on redis-bundle-2: INFINITY +container_color: galera-bundle allocation score on controller-0: -INFINITY +container_color: galera-bundle allocation score on controller-1: -INFINITY +container_color: galera-bundle allocation score on controller-2: -INFINITY +container_color: galera-bundle allocation score on database-0: 0 +container_color: galera-bundle allocation score on database-1: 0 +container_color: galera-bundle allocation score on database-2: 0 +container_color: galera-bundle allocation score on messaging-0: -INFINITY +container_color: galera-bundle allocation score on messaging-1: -INFINITY +container_color: galera-bundle allocation score on messaging-2: -INFINITY +container_color: galera-bundle-0 allocation score on controller-0: 0 +container_color: galera-bundle-0 allocation score on controller-1: 0 +container_color: galera-bundle-0 allocation score on controller-2: 0 +container_color: galera-bundle-0 allocation score on database-0: -INFINITY +container_color: galera-bundle-0 allocation score on database-1: -INFINITY +container_color: galera-bundle-0 allocation score on database-2: -INFINITY +container_color: galera-bundle-0 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-0 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-0 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-1 allocation score on controller-0: 0 +container_color: galera-bundle-1 allocation score on controller-1: 0 +container_color: galera-bundle-1 allocation score on controller-2: INFINITY +container_color: galera-bundle-1 allocation score on database-0: -INFINITY +container_color: galera-bundle-1 allocation score on database-1: -INFINITY +container_color: galera-bundle-1 allocation score on database-2: -INFINITY +container_color: galera-bundle-1 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-1 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-1 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-2 allocation score on controller-0: 0 +container_color: galera-bundle-2 allocation score on controller-1: 0 +container_color: galera-bundle-2 allocation score on controller-2: 0 +container_color: galera-bundle-2 allocation score on database-0: -INFINITY +container_color: galera-bundle-2 allocation score on database-1: -INFINITY +container_color: galera-bundle-2 allocation score on database-2: -INFINITY +container_color: galera-bundle-2 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-2 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-2 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-docker-0 allocation score on controller-0: -INFINITY +container_color: galera-bundle-docker-0 allocation score on controller-1: -INFINITY +container_color: galera-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: galera-bundle-docker-0 allocation score on database-0: INFINITY +container_color: galera-bundle-docker-0 allocation score on database-1: 0 +container_color: galera-bundle-docker-0 allocation score on database-2: 0 +container_color: galera-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-docker-1 allocation score on controller-0: -INFINITY +container_color: galera-bundle-docker-1 allocation score on controller-1: -INFINITY +container_color: galera-bundle-docker-1 allocation score on controller-2: -INFINITY +container_color: galera-bundle-docker-1 allocation score on database-0: 0 +container_color: galera-bundle-docker-1 allocation score on database-1: INFINITY +container_color: galera-bundle-docker-1 allocation score on database-2: 0 +container_color: galera-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-docker-2 allocation score on controller-0: -INFINITY +container_color: galera-bundle-docker-2 allocation score on controller-1: -INFINITY +container_color: galera-bundle-docker-2 allocation score on controller-2: -INFINITY +container_color: galera-bundle-docker-2 allocation score on database-0: 0 +container_color: galera-bundle-docker-2 allocation score on database-1: 0 +container_color: galera-bundle-docker-2 allocation score on database-2: INFINITY +container_color: galera-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: galera-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: galera-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: galera-bundle-master allocation score on controller-0: 0 +container_color: galera-bundle-master allocation score on controller-1: 0 +container_color: galera-bundle-master allocation score on controller-2: 0 +container_color: galera-bundle-master allocation score on database-0: 0 +container_color: galera-bundle-master allocation score on database-1: 0 +container_color: galera-bundle-master allocation score on database-2: 0 +container_color: galera-bundle-master allocation score on galera-bundle-0: -INFINITY +container_color: galera-bundle-master allocation score on galera-bundle-1: -INFINITY +container_color: galera-bundle-master allocation score on galera-bundle-2: -INFINITY +container_color: galera-bundle-master allocation score on messaging-0: 0 +container_color: galera-bundle-master allocation score on messaging-1: 0 +container_color: galera-bundle-master allocation score on messaging-2: 0 +container_color: galera:0 allocation score on galera-bundle-0: INFINITY +container_color: galera:1 allocation score on galera-bundle-1: INFINITY +container_color: galera:2 allocation score on galera-bundle-2: INFINITY +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-0: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-1: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on controller-2: 0 +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-0: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-1: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on database-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-0 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-0 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-0 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-0 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-0 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-0 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on controller-2: 0 +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-1 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on controller-2: 0 +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-1: 0 +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: rabbitmq-bundle allocation score on controller-0: -INFINITY +container_color: rabbitmq-bundle allocation score on controller-1: -INFINITY +container_color: rabbitmq-bundle allocation score on controller-2: -INFINITY +container_color: rabbitmq-bundle allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle allocation score on messaging-0: 0 +container_color: rabbitmq-bundle allocation score on messaging-1: 0 +container_color: rabbitmq-bundle allocation score on messaging-2: 0 +container_color: rabbitmq-bundle-0 allocation score on controller-0: 0 +container_color: rabbitmq-bundle-0 allocation score on controller-1: 0 +container_color: rabbitmq-bundle-0 allocation score on controller-2: INFINITY +container_color: rabbitmq-bundle-0 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-0 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-0 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-0 allocation score on messaging-0: -INFINITY +container_color: rabbitmq-bundle-0 allocation score on messaging-1: -INFINITY +container_color: rabbitmq-bundle-0 allocation score on messaging-2: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on controller-0: 0 +container_color: rabbitmq-bundle-1 allocation score on controller-1: 0 +container_color: rabbitmq-bundle-1 allocation score on controller-2: INFINITY +container_color: rabbitmq-bundle-1 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on messaging-0: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on messaging-1: -INFINITY +container_color: rabbitmq-bundle-1 allocation score on messaging-2: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on controller-0: 0 +container_color: rabbitmq-bundle-2 allocation score on controller-1: 0 +container_color: rabbitmq-bundle-2 allocation score on controller-2: INFINITY +container_color: rabbitmq-bundle-2 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on messaging-0: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on messaging-1: -INFINITY +container_color: rabbitmq-bundle-2 allocation score on messaging-2: -INFINITY +container_color: rabbitmq-bundle-clone allocation score on controller-0: 0 +container_color: rabbitmq-bundle-clone allocation score on controller-1: 0 +container_color: rabbitmq-bundle-clone allocation score on controller-2: 0 +container_color: rabbitmq-bundle-clone allocation score on database-0: 0 +container_color: rabbitmq-bundle-clone allocation score on database-1: 0 +container_color: rabbitmq-bundle-clone allocation score on database-2: 0 +container_color: rabbitmq-bundle-clone allocation score on messaging-0: 0 +container_color: rabbitmq-bundle-clone allocation score on messaging-1: 0 +container_color: rabbitmq-bundle-clone allocation score on messaging-2: 0 +container_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-0: -INFINITY +container_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-1: -INFINITY +container_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-2: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on controller-0: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on controller-1: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on controller-2: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on messaging-0: INFINITY +container_color: rabbitmq-bundle-docker-0 allocation score on messaging-1: 0 +container_color: rabbitmq-bundle-docker-0 allocation score on messaging-2: 0 +container_color: rabbitmq-bundle-docker-1 allocation score on controller-0: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on controller-1: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on controller-2: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on messaging-0: 0 +container_color: rabbitmq-bundle-docker-1 allocation score on messaging-1: INFINITY +container_color: rabbitmq-bundle-docker-1 allocation score on messaging-2: 0 +container_color: rabbitmq-bundle-docker-2 allocation score on controller-0: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on controller-1: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on controller-2: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: rabbitmq-bundle-docker-2 allocation score on messaging-0: 0 +container_color: rabbitmq-bundle-docker-2 allocation score on messaging-1: 0 +container_color: rabbitmq-bundle-docker-2 allocation score on messaging-2: INFINITY +container_color: rabbitmq:0 allocation score on rabbitmq-bundle-0: INFINITY +container_color: rabbitmq:1 allocation score on rabbitmq-bundle-1: INFINITY +container_color: rabbitmq:2 allocation score on rabbitmq-bundle-2: INFINITY +container_color: redis-bundle allocation score on controller-0: 0 +container_color: redis-bundle allocation score on controller-1: 0 +container_color: redis-bundle allocation score on controller-2: 0 +container_color: redis-bundle allocation score on database-0: -INFINITY +container_color: redis-bundle allocation score on database-1: -INFINITY +container_color: redis-bundle allocation score on database-2: -INFINITY +container_color: redis-bundle allocation score on messaging-0: -INFINITY +container_color: redis-bundle allocation score on messaging-1: -INFINITY +container_color: redis-bundle allocation score on messaging-2: -INFINITY +container_color: redis-bundle-0 allocation score on controller-0: INFINITY +container_color: redis-bundle-0 allocation score on controller-1: 0 +container_color: redis-bundle-0 allocation score on controller-2: 0 +container_color: redis-bundle-0 allocation score on database-0: -INFINITY +container_color: redis-bundle-0 allocation score on database-1: -INFINITY +container_color: redis-bundle-0 allocation score on database-2: -INFINITY +container_color: redis-bundle-0 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-0 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-0 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-1 allocation score on controller-0: 0 +container_color: redis-bundle-1 allocation score on controller-1: 0 +container_color: redis-bundle-1 allocation score on controller-2: 0 +container_color: redis-bundle-1 allocation score on database-0: -INFINITY +container_color: redis-bundle-1 allocation score on database-1: -INFINITY +container_color: redis-bundle-1 allocation score on database-2: -INFINITY +container_color: redis-bundle-1 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-1 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-1 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-2 allocation score on controller-0: 0 +container_color: redis-bundle-2 allocation score on controller-1: 0 +container_color: redis-bundle-2 allocation score on controller-2: INFINITY +container_color: redis-bundle-2 allocation score on database-0: -INFINITY +container_color: redis-bundle-2 allocation score on database-1: -INFINITY +container_color: redis-bundle-2 allocation score on database-2: -INFINITY +container_color: redis-bundle-2 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-2 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-2 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-docker-0 allocation score on controller-0: INFINITY +container_color: redis-bundle-docker-0 allocation score on controller-1: 0 +container_color: redis-bundle-docker-0 allocation score on controller-2: 0 +container_color: redis-bundle-docker-0 allocation score on database-0: -INFINITY +container_color: redis-bundle-docker-0 allocation score on database-1: -INFINITY +container_color: redis-bundle-docker-0 allocation score on database-2: -INFINITY +container_color: redis-bundle-docker-0 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-docker-0 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-docker-0 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-docker-1 allocation score on controller-0: 0 +container_color: redis-bundle-docker-1 allocation score on controller-1: 0 +container_color: redis-bundle-docker-1 allocation score on controller-2: 0 +container_color: redis-bundle-docker-1 allocation score on database-0: -INFINITY +container_color: redis-bundle-docker-1 allocation score on database-1: -INFINITY +container_color: redis-bundle-docker-1 allocation score on database-2: -INFINITY +container_color: redis-bundle-docker-1 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-docker-1 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-docker-1 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-docker-2 allocation score on controller-0: 0 +container_color: redis-bundle-docker-2 allocation score on controller-1: 0 +container_color: redis-bundle-docker-2 allocation score on controller-2: INFINITY +container_color: redis-bundle-docker-2 allocation score on database-0: -INFINITY +container_color: redis-bundle-docker-2 allocation score on database-1: -INFINITY +container_color: redis-bundle-docker-2 allocation score on database-2: -INFINITY +container_color: redis-bundle-docker-2 allocation score on messaging-0: -INFINITY +container_color: redis-bundle-docker-2 allocation score on messaging-1: -INFINITY +container_color: redis-bundle-docker-2 allocation score on messaging-2: -INFINITY +container_color: redis-bundle-master allocation score on controller-0: 0 +container_color: redis-bundle-master allocation score on controller-1: 0 +container_color: redis-bundle-master allocation score on controller-2: 0 +container_color: redis-bundle-master allocation score on database-0: 0 +container_color: redis-bundle-master allocation score on database-1: 0 +container_color: redis-bundle-master allocation score on database-2: 0 +container_color: redis-bundle-master allocation score on messaging-0: 0 +container_color: redis-bundle-master allocation score on messaging-1: 0 +container_color: redis-bundle-master allocation score on messaging-2: 0 +container_color: redis-bundle-master allocation score on redis-bundle-0: -INFINITY +container_color: redis-bundle-master allocation score on redis-bundle-1: -INFINITY +container_color: redis-bundle-master allocation score on redis-bundle-2: -INFINITY +container_color: redis:0 allocation score on redis-bundle-0: INFINITY +container_color: redis:1 allocation score on redis-bundle-1: 500 +container_color: redis:2 allocation score on redis-bundle-2: INFINITY +galera:0 promotion score on galera-bundle-0: 100 +galera:1 promotion score on galera-bundle-1: 100 +galera:2 promotion score on galera-bundle-2: 100 +native_color: database-0 allocation score on controller-0: 0 +native_color: database-0 allocation score on controller-1: 0 +native_color: database-0 allocation score on controller-2: 0 +native_color: database-0 allocation score on database-0: -INFINITY +native_color: database-0 allocation score on database-1: -INFINITY +native_color: database-0 allocation score on database-2: -INFINITY +native_color: database-0 allocation score on messaging-0: -INFINITY +native_color: database-0 allocation score on messaging-1: -INFINITY +native_color: database-0 allocation score on messaging-2: -INFINITY +native_color: database-1 allocation score on controller-0: 0 +native_color: database-1 allocation score on controller-1: 0 +native_color: database-1 allocation score on controller-2: INFINITY +native_color: database-1 allocation score on database-0: -INFINITY +native_color: database-1 allocation score on database-1: -INFINITY +native_color: database-1 allocation score on database-2: -INFINITY +native_color: database-1 allocation score on messaging-0: -INFINITY +native_color: database-1 allocation score on messaging-1: -INFINITY +native_color: database-1 allocation score on messaging-2: -INFINITY +native_color: database-2 allocation score on controller-0: 0 +native_color: database-2 allocation score on controller-1: 0 +native_color: database-2 allocation score on controller-2: 0 +native_color: database-2 allocation score on database-0: -INFINITY +native_color: database-2 allocation score on database-1: -INFINITY +native_color: database-2 allocation score on database-2: -INFINITY +native_color: database-2 allocation score on messaging-0: -INFINITY +native_color: database-2 allocation score on messaging-1: -INFINITY +native_color: database-2 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-0 allocation score on controller-0: 0 +native_color: galera-bundle-0 allocation score on controller-1: -INFINITY +native_color: galera-bundle-0 allocation score on controller-2: -INFINITY +native_color: galera-bundle-0 allocation score on database-0: -INFINITY +native_color: galera-bundle-0 allocation score on database-1: -INFINITY +native_color: galera-bundle-0 allocation score on database-2: -INFINITY +native_color: galera-bundle-0 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-0 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-0 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-1 allocation score on controller-0: -INFINITY +native_color: galera-bundle-1 allocation score on controller-1: -INFINITY +native_color: galera-bundle-1 allocation score on controller-2: INFINITY +native_color: galera-bundle-1 allocation score on database-0: -INFINITY +native_color: galera-bundle-1 allocation score on database-1: -INFINITY +native_color: galera-bundle-1 allocation score on database-2: -INFINITY +native_color: galera-bundle-1 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-1 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-1 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-2 allocation score on controller-0: -INFINITY +native_color: galera-bundle-2 allocation score on controller-1: 0 +native_color: galera-bundle-2 allocation score on controller-2: -INFINITY +native_color: galera-bundle-2 allocation score on database-0: -INFINITY +native_color: galera-bundle-2 allocation score on database-1: -INFINITY +native_color: galera-bundle-2 allocation score on database-2: -INFINITY +native_color: galera-bundle-2 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-2 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-2 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-docker-0 allocation score on controller-0: -INFINITY +native_color: galera-bundle-docker-0 allocation score on controller-1: -INFINITY +native_color: galera-bundle-docker-0 allocation score on controller-2: -INFINITY +native_color: galera-bundle-docker-0 allocation score on database-0: INFINITY +native_color: galera-bundle-docker-0 allocation score on database-1: -INFINITY +native_color: galera-bundle-docker-0 allocation score on database-2: -10000 +native_color: galera-bundle-docker-0 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-docker-0 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-docker-0 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-docker-1 allocation score on controller-0: -INFINITY +native_color: galera-bundle-docker-1 allocation score on controller-1: -INFINITY +native_color: galera-bundle-docker-1 allocation score on controller-2: -INFINITY +native_color: galera-bundle-docker-1 allocation score on database-0: -10000 +native_color: galera-bundle-docker-1 allocation score on database-1: INFINITY +native_color: galera-bundle-docker-1 allocation score on database-2: -10000 +native_color: galera-bundle-docker-1 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-docker-1 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-docker-1 allocation score on messaging-2: -INFINITY +native_color: galera-bundle-docker-2 allocation score on controller-0: -INFINITY +native_color: galera-bundle-docker-2 allocation score on controller-1: -INFINITY +native_color: galera-bundle-docker-2 allocation score on controller-2: -INFINITY +native_color: galera-bundle-docker-2 allocation score on database-0: -INFINITY +native_color: galera-bundle-docker-2 allocation score on database-1: -INFINITY +native_color: galera-bundle-docker-2 allocation score on database-2: INFINITY +native_color: galera-bundle-docker-2 allocation score on messaging-0: -INFINITY +native_color: galera-bundle-docker-2 allocation score on messaging-1: -INFINITY +native_color: galera-bundle-docker-2 allocation score on messaging-2: -INFINITY +native_color: galera:0 allocation score on galera-bundle-0: INFINITY +native_color: galera:1 allocation score on galera-bundle-1: INFINITY +native_color: galera:2 allocation score on galera-bundle-2: INFINITY +native_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY +native_color: haproxy-bundle-docker-0 allocation score on controller-1: 0 +native_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on database-0: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on database-1: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on database-2: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on messaging-0: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on messaging-1: -INFINITY +native_color: haproxy-bundle-docker-0 allocation score on messaging-2: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on controller-1: 0 +native_color: haproxy-bundle-docker-1 allocation score on controller-2: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on database-0: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on database-1: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on database-2: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on messaging-0: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on messaging-1: -INFINITY +native_color: haproxy-bundle-docker-1 allocation score on messaging-2: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 +native_color: haproxy-bundle-docker-2 allocation score on controller-1: 0 +native_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY +native_color: haproxy-bundle-docker-2 allocation score on database-0: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on database-1: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on database-2: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on messaging-0: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on messaging-1: -INFINITY +native_color: haproxy-bundle-docker-2 allocation score on messaging-2: -INFINITY +native_color: ip-10.0.0.104 allocation score on controller-0: 0 +native_color: ip-10.0.0.104 allocation score on controller-1: 0 +native_color: ip-10.0.0.104 allocation score on controller-2: 0 +native_color: ip-10.0.0.104 allocation score on database-0: -INFINITY +native_color: ip-10.0.0.104 allocation score on database-1: -INFINITY +native_color: ip-10.0.0.104 allocation score on database-2: -INFINITY +native_color: ip-10.0.0.104 allocation score on messaging-0: -INFINITY +native_color: ip-10.0.0.104 allocation score on messaging-1: -INFINITY +native_color: ip-10.0.0.104 allocation score on messaging-2: -INFINITY +native_color: ip-172.17.1.11 allocation score on controller-0: 0 +native_color: ip-172.17.1.11 allocation score on controller-1: 0 +native_color: ip-172.17.1.11 allocation score on controller-2: 0 +native_color: ip-172.17.1.11 allocation score on database-0: -INFINITY +native_color: ip-172.17.1.11 allocation score on database-1: -INFINITY +native_color: ip-172.17.1.11 allocation score on database-2: -INFINITY +native_color: ip-172.17.1.11 allocation score on messaging-0: -INFINITY +native_color: ip-172.17.1.11 allocation score on messaging-1: -INFINITY +native_color: ip-172.17.1.11 allocation score on messaging-2: -INFINITY +native_color: ip-172.17.1.19 allocation score on controller-0: 0 +native_color: ip-172.17.1.19 allocation score on controller-1: 0 +native_color: ip-172.17.1.19 allocation score on controller-2: INFINITY +native_color: ip-172.17.1.19 allocation score on database-0: -INFINITY +native_color: ip-172.17.1.19 allocation score on database-1: -INFINITY +native_color: ip-172.17.1.19 allocation score on database-2: -INFINITY +native_color: ip-172.17.1.19 allocation score on messaging-0: -INFINITY +native_color: ip-172.17.1.19 allocation score on messaging-1: -INFINITY +native_color: ip-172.17.1.19 allocation score on messaging-2: -INFINITY +native_color: ip-172.17.3.13 allocation score on controller-0: 0 +native_color: ip-172.17.3.13 allocation score on controller-1: 0 +native_color: ip-172.17.3.13 allocation score on controller-2: 0 +native_color: ip-172.17.3.13 allocation score on database-0: -INFINITY +native_color: ip-172.17.3.13 allocation score on database-1: -INFINITY +native_color: ip-172.17.3.13 allocation score on database-2: -INFINITY +native_color: ip-172.17.3.13 allocation score on messaging-0: -INFINITY +native_color: ip-172.17.3.13 allocation score on messaging-1: -INFINITY +native_color: ip-172.17.3.13 allocation score on messaging-2: -INFINITY +native_color: ip-172.17.4.19 allocation score on controller-0: 0 +native_color: ip-172.17.4.19 allocation score on controller-1: 0 +native_color: ip-172.17.4.19 allocation score on controller-2: INFINITY +native_color: ip-172.17.4.19 allocation score on database-0: -INFINITY +native_color: ip-172.17.4.19 allocation score on database-1: -INFINITY +native_color: ip-172.17.4.19 allocation score on database-2: -INFINITY +native_color: ip-172.17.4.19 allocation score on messaging-0: -INFINITY +native_color: ip-172.17.4.19 allocation score on messaging-1: -INFINITY +native_color: ip-172.17.4.19 allocation score on messaging-2: -INFINITY +native_color: ip-192.168.24.11 allocation score on controller-0: 0 +native_color: ip-192.168.24.11 allocation score on controller-1: 0 +native_color: ip-192.168.24.11 allocation score on controller-2: 0 +native_color: ip-192.168.24.11 allocation score on database-0: -INFINITY +native_color: ip-192.168.24.11 allocation score on database-1: -INFINITY +native_color: ip-192.168.24.11 allocation score on database-2: -INFINITY +native_color: ip-192.168.24.11 allocation score on messaging-0: -INFINITY +native_color: ip-192.168.24.11 allocation score on messaging-1: -INFINITY +native_color: ip-192.168.24.11 allocation score on messaging-2: -INFINITY +native_color: messaging-0 allocation score on controller-0: 0 +native_color: messaging-0 allocation score on controller-1: 0 +native_color: messaging-0 allocation score on controller-2: INFINITY +native_color: messaging-0 allocation score on database-0: -INFINITY +native_color: messaging-0 allocation score on database-1: -INFINITY +native_color: messaging-0 allocation score on database-2: -INFINITY +native_color: messaging-0 allocation score on messaging-0: -INFINITY +native_color: messaging-0 allocation score on messaging-1: -INFINITY +native_color: messaging-0 allocation score on messaging-2: -INFINITY +native_color: messaging-1 allocation score on controller-0: 0 +native_color: messaging-1 allocation score on controller-1: 0 +native_color: messaging-1 allocation score on controller-2: INFINITY +native_color: messaging-1 allocation score on database-0: -INFINITY +native_color: messaging-1 allocation score on database-1: -INFINITY +native_color: messaging-1 allocation score on database-2: -INFINITY +native_color: messaging-1 allocation score on messaging-0: -INFINITY +native_color: messaging-1 allocation score on messaging-1: -INFINITY +native_color: messaging-1 allocation score on messaging-2: -INFINITY +native_color: messaging-2 allocation score on controller-0: 0 +native_color: messaging-2 allocation score on controller-1: 0 +native_color: messaging-2 allocation score on controller-2: INFINITY +native_color: messaging-2 allocation score on database-0: -INFINITY +native_color: messaging-2 allocation score on database-1: -INFINITY +native_color: messaging-2 allocation score on database-2: -INFINITY +native_color: messaging-2 allocation score on messaging-0: -INFINITY +native_color: messaging-2 allocation score on messaging-1: -INFINITY +native_color: messaging-2 allocation score on messaging-2: -INFINITY +native_color: openstack-cinder-volume allocation score on controller-0: 0 +native_color: openstack-cinder-volume allocation score on controller-1: 0 +native_color: openstack-cinder-volume allocation score on controller-2: 0 +native_color: openstack-cinder-volume allocation score on database-0: -INFINITY +native_color: openstack-cinder-volume allocation score on database-1: -INFINITY +native_color: openstack-cinder-volume allocation score on database-2: -INFINITY +native_color: openstack-cinder-volume allocation score on messaging-0: -INFINITY +native_color: openstack-cinder-volume allocation score on messaging-1: -INFINITY +native_color: openstack-cinder-volume allocation score on messaging-2: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on controller-1: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on controller-2: INFINITY +native_color: rabbitmq-bundle-0 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on messaging-0: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on messaging-1: -INFINITY +native_color: rabbitmq-bundle-0 allocation score on messaging-2: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on controller-1: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on controller-2: INFINITY +native_color: rabbitmq-bundle-1 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on messaging-0: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on messaging-1: -INFINITY +native_color: rabbitmq-bundle-1 allocation score on messaging-2: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on controller-1: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on controller-2: INFINITY +native_color: rabbitmq-bundle-2 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on messaging-0: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on messaging-1: -INFINITY +native_color: rabbitmq-bundle-2 allocation score on messaging-2: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on controller-1: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on controller-2: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on messaging-0: INFINITY +native_color: rabbitmq-bundle-docker-0 allocation score on messaging-1: -10000 +native_color: rabbitmq-bundle-docker-0 allocation score on messaging-2: -10000 +native_color: rabbitmq-bundle-docker-1 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on controller-1: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on controller-2: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on messaging-0: -INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on messaging-1: INFINITY +native_color: rabbitmq-bundle-docker-1 allocation score on messaging-2: -10000 +native_color: rabbitmq-bundle-docker-2 allocation score on controller-0: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on controller-1: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on controller-2: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on database-0: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on database-1: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on database-2: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on messaging-0: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on messaging-1: -INFINITY +native_color: rabbitmq-bundle-docker-2 allocation score on messaging-2: INFINITY +native_color: rabbitmq:0 allocation score on rabbitmq-bundle-0: INFINITY +native_color: rabbitmq:1 allocation score on rabbitmq-bundle-1: INFINITY +native_color: rabbitmq:2 allocation score on rabbitmq-bundle-2: INFINITY +native_color: redis-bundle-0 allocation score on controller-0: INFINITY +native_color: redis-bundle-0 allocation score on controller-1: 0 +native_color: redis-bundle-0 allocation score on controller-2: 0 +native_color: redis-bundle-0 allocation score on database-0: -INFINITY +native_color: redis-bundle-0 allocation score on database-1: -INFINITY +native_color: redis-bundle-0 allocation score on database-2: -INFINITY +native_color: redis-bundle-0 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-0 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-0 allocation score on messaging-2: -INFINITY +native_color: redis-bundle-1 allocation score on controller-0: 0 +native_color: redis-bundle-1 allocation score on controller-1: 10000 +native_color: redis-bundle-1 allocation score on controller-2: 0 +native_color: redis-bundle-1 allocation score on database-0: -INFINITY +native_color: redis-bundle-1 allocation score on database-1: -INFINITY +native_color: redis-bundle-1 allocation score on database-2: -INFINITY +native_color: redis-bundle-1 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-1 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-1 allocation score on messaging-2: -INFINITY +native_color: redis-bundle-2 allocation score on controller-0: 0 +native_color: redis-bundle-2 allocation score on controller-1: 0 +native_color: redis-bundle-2 allocation score on controller-2: INFINITY +native_color: redis-bundle-2 allocation score on database-0: -INFINITY +native_color: redis-bundle-2 allocation score on database-1: -INFINITY +native_color: redis-bundle-2 allocation score on database-2: -INFINITY +native_color: redis-bundle-2 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-2 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-2 allocation score on messaging-2: -INFINITY +native_color: redis-bundle-docker-0 allocation score on controller-0: INFINITY +native_color: redis-bundle-docker-0 allocation score on controller-1: 0 +native_color: redis-bundle-docker-0 allocation score on controller-2: 0 +native_color: redis-bundle-docker-0 allocation score on database-0: -INFINITY +native_color: redis-bundle-docker-0 allocation score on database-1: -INFINITY +native_color: redis-bundle-docker-0 allocation score on database-2: -INFINITY +native_color: redis-bundle-docker-0 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-docker-0 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-docker-0 allocation score on messaging-2: -INFINITY +native_color: redis-bundle-docker-1 allocation score on controller-0: -INFINITY +native_color: redis-bundle-docker-1 allocation score on controller-1: 0 +native_color: redis-bundle-docker-1 allocation score on controller-2: -INFINITY +native_color: redis-bundle-docker-1 allocation score on database-0: -INFINITY +native_color: redis-bundle-docker-1 allocation score on database-1: -INFINITY +native_color: redis-bundle-docker-1 allocation score on database-2: -INFINITY +native_color: redis-bundle-docker-1 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-docker-1 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-docker-1 allocation score on messaging-2: -INFINITY +native_color: redis-bundle-docker-2 allocation score on controller-0: -INFINITY +native_color: redis-bundle-docker-2 allocation score on controller-1: 0 +native_color: redis-bundle-docker-2 allocation score on controller-2: INFINITY +native_color: redis-bundle-docker-2 allocation score on database-0: -INFINITY +native_color: redis-bundle-docker-2 allocation score on database-1: -INFINITY +native_color: redis-bundle-docker-2 allocation score on database-2: -INFINITY +native_color: redis-bundle-docker-2 allocation score on messaging-0: -INFINITY +native_color: redis-bundle-docker-2 allocation score on messaging-1: -INFINITY +native_color: redis-bundle-docker-2 allocation score on messaging-2: -INFINITY +native_color: redis:0 allocation score on redis-bundle-0: INFINITY +native_color: redis:1 allocation score on redis-bundle-1: INFINITY +native_color: redis:2 allocation score on redis-bundle-2: INFINITY +native_color: stonith-fence_ipmilan-525400244e09 allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-525400244e09 allocation score on controller-1: 0 +native_color: stonith-fence_ipmilan-525400244e09 allocation score on controller-2: INFINITY +native_color: stonith-fence_ipmilan-525400244e09 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-525400244e09 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-525400244e09 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-525400244e09 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-525400244e09 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-525400244e09 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-525400498d34 allocation score on controller-0: -INFINITY +native_color: stonith-fence_ipmilan-525400498d34 allocation score on controller-1: 0 +native_color: stonith-fence_ipmilan-525400498d34 allocation score on controller-2: 0 +native_color: stonith-fence_ipmilan-525400498d34 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-525400498d34 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-525400498d34 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-525400498d34 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-525400498d34 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-525400498d34 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-525400542c06 allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-525400542c06 allocation score on controller-1: 0 +native_color: stonith-fence_ipmilan-525400542c06 allocation score on controller-2: -INFINITY +native_color: stonith-fence_ipmilan-525400542c06 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-525400542c06 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-525400542c06 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-525400542c06 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-525400542c06 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-525400542c06 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-5254005ea387 allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-5254005ea387 allocation score on controller-1: 0 +native_color: stonith-fence_ipmilan-5254005ea387 allocation score on controller-2: 0 +native_color: stonith-fence_ipmilan-5254005ea387 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-5254005ea387 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-5254005ea387 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-5254005ea387 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-5254005ea387 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-5254005ea387 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-525400a25787 allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-525400a25787 allocation score on controller-1: 0 +native_color: stonith-fence_ipmilan-525400a25787 allocation score on controller-2: INFINITY +native_color: stonith-fence_ipmilan-525400a25787 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-525400a25787 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-525400a25787 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-525400a25787 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-525400a25787 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-525400a25787 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-525400a7f9e0 allocation score on controller-0: INFINITY +native_color: stonith-fence_ipmilan-525400a7f9e0 allocation score on controller-1: 0 +native_color: stonith-fence_ipmilan-525400a7f9e0 allocation score on controller-2: 0 +native_color: stonith-fence_ipmilan-525400a7f9e0 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-525400a7f9e0 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-525400a7f9e0 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-525400a7f9e0 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-525400a7f9e0 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-525400a7f9e0 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-525400aac413 allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-525400aac413 allocation score on controller-1: -INFINITY +native_color: stonith-fence_ipmilan-525400aac413 allocation score on controller-2: INFINITY +native_color: stonith-fence_ipmilan-525400aac413 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-525400aac413 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-525400aac413 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-525400aac413 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-525400aac413 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-525400aac413 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-525400c709f7 allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-525400c709f7 allocation score on controller-1: 0 +native_color: stonith-fence_ipmilan-525400c709f7 allocation score on controller-2: 0 +native_color: stonith-fence_ipmilan-525400c709f7 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-525400c709f7 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-525400c709f7 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-525400c709f7 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-525400c709f7 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-525400c709f7 allocation score on messaging-2: -INFINITY +native_color: stonith-fence_ipmilan-525400cdec10 allocation score on controller-0: 0 +native_color: stonith-fence_ipmilan-525400cdec10 allocation score on controller-1: 0 +native_color: stonith-fence_ipmilan-525400cdec10 allocation score on controller-2: INFINITY +native_color: stonith-fence_ipmilan-525400cdec10 allocation score on database-0: -INFINITY +native_color: stonith-fence_ipmilan-525400cdec10 allocation score on database-1: -INFINITY +native_color: stonith-fence_ipmilan-525400cdec10 allocation score on database-2: -INFINITY +native_color: stonith-fence_ipmilan-525400cdec10 allocation score on messaging-0: -INFINITY +native_color: stonith-fence_ipmilan-525400cdec10 allocation score on messaging-1: -INFINITY +native_color: stonith-fence_ipmilan-525400cdec10 allocation score on messaging-2: -INFINITY +redis:0 promotion score on redis-bundle-0: 1 +redis:1 promotion score on redis-bundle-1: -1 +redis:2 promotion score on redis-bundle-2: 1 diff --git a/pengine/test10/bundle-order-stop-on-remote.summary b/pengine/test10/bundle-order-stop-on-remote.summary new file mode 100644 index 00000000000..8a045990a5e --- /dev/null +++ b/pengine/test10/bundle-order-stop-on-remote.summary @@ -0,0 +1,224 @@ + +Current cluster status: +RemoteNode database-0: UNCLEAN (offline) +RemoteNode database-2: UNCLEAN (offline) +Online: [ controller-0 controller-1 controller-2 ] +RemoteOnline: [ database-1 messaging-0 messaging-1 messaging-2 ] +Containers: [ galera-bundle-1:galera-bundle-docker-1 rabbitmq-bundle-0:rabbitmq-bundle-docker-0 rabbitmq-bundle-1:rabbitmq-bundle-docker-1 rabbitmq-bundle-2:rabbitmq-bundle-docker-2 redis-bundle-0:redis-bundle-docker-0 redis-bundle-2:redis-bundle-docker-2 ] + + database-0 (ocf::pacemaker:remote): Stopped + database-1 (ocf::pacemaker:remote): Started controller-2 + database-2 (ocf::pacemaker:remote): Stopped + messaging-0 (ocf::pacemaker:remote): Started controller-2 + messaging-1 (ocf::pacemaker:remote): Started controller-2 + messaging-2 (ocf::pacemaker:remote): Started controller-2 + Docker container set: rabbitmq-bundle [192.168.24.1:8787/rhosp12/openstack-rabbitmq-docker:pcmklatest] + rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started messaging-0 + rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started messaging-1 + rabbitmq-bundle-2 (ocf::heartbeat:rabbitmq-cluster): Started messaging-2 + Docker container set: galera-bundle [192.168.24.1:8787/rhosp12/openstack-mariadb-docker:pcmklatest] + galera-bundle-0 (ocf::heartbeat:galera): FAILED Master database-0 (UNCLEAN) + galera-bundle-1 (ocf::heartbeat:galera): Master database-1 + galera-bundle-2 (ocf::heartbeat:galera): FAILED Master database-2 (UNCLEAN) + Docker container set: redis-bundle [192.168.24.1:8787/rhosp12/openstack-redis-docker:pcmklatest] + redis-bundle-0 (ocf::heartbeat:redis): Slave controller-0 + redis-bundle-1 (ocf::heartbeat:redis): Stopped + redis-bundle-2 (ocf::heartbeat:redis): Slave controller-2 + ip-192.168.24.11 (ocf::heartbeat:IPaddr2): Stopped + ip-10.0.0.104 (ocf::heartbeat:IPaddr2): Stopped + ip-172.17.1.19 (ocf::heartbeat:IPaddr2): Started controller-2 + ip-172.17.1.11 (ocf::heartbeat:IPaddr2): Stopped + ip-172.17.3.13 (ocf::heartbeat:IPaddr2): Stopped + ip-172.17.4.19 (ocf::heartbeat:IPaddr2): Started controller-2 + Docker container set: haproxy-bundle [192.168.24.1:8787/rhosp12/openstack-haproxy-docker:pcmklatest] + haproxy-bundle-docker-0 (ocf::heartbeat:docker): Started controller-0 + haproxy-bundle-docker-1 (ocf::heartbeat:docker): Stopped + haproxy-bundle-docker-2 (ocf::heartbeat:docker): Started controller-2 + openstack-cinder-volume (systemd:openstack-cinder-volume): Stopped + stonith-fence_ipmilan-525400244e09 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-525400cdec10 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-525400c709f7 (stonith:fence_ipmilan): Stopped + stonith-fence_ipmilan-525400a7f9e0 (stonith:fence_ipmilan): Started controller-0 + stonith-fence_ipmilan-525400a25787 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-5254005ea387 (stonith:fence_ipmilan): Stopped + stonith-fence_ipmilan-525400542c06 (stonith:fence_ipmilan): Stopped + stonith-fence_ipmilan-525400aac413 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-525400498d34 (stonith:fence_ipmilan): Stopped + +Transition Summary: + * Fence (reboot) galera-bundle-2 (resource: galera-bundle-docker-2) 'guest is unclean' + * Fence (reboot) galera-bundle-0 (resource: galera-bundle-docker-0) 'guest is unclean' + * Start database-0 ( controller-0 ) + * Start database-2 ( controller-1 ) + * Recover galera-bundle-docker-0 ( database-0 ) + * Start galera-bundle-0 ( controller-0 ) + * Recover galera:0 ( Master galera-bundle-0 ) + * Recover galera-bundle-docker-2 ( database-2 ) + * Start galera-bundle-2 ( controller-1 ) + * Recover galera:2 ( Master galera-bundle-2 ) + * Promote redis:0 ( Slave -> Master redis-bundle-0 ) + * Start redis-bundle-docker-1 ( controller-1 ) + * Start redis-bundle-1 ( controller-1 ) + * Start redis:1 ( redis-bundle-1 ) + * Start ip-192.168.24.11 ( controller-0 ) + * Start ip-10.0.0.104 ( controller-1 ) + * Start ip-172.17.1.11 ( controller-0 ) + * Start ip-172.17.3.13 ( controller-1 ) + * Start haproxy-bundle-docker-1 ( controller-1 ) + * Start openstack-cinder-volume ( controller-0 ) + * Start stonith-fence_ipmilan-525400c709f7 ( controller-1 ) + * Start stonith-fence_ipmilan-5254005ea387 ( controller-1 ) + * Start stonith-fence_ipmilan-525400542c06 ( controller-0 ) + * Start stonith-fence_ipmilan-525400498d34 ( controller-1 ) + +Executing cluster transition: + * Resource action: database-0 start on controller-0 + * Resource action: database-2 start on controller-1 + * Pseudo action: redis-bundle-master_pre_notify_start_0 + * Resource action: stonith-fence_ipmilan-525400c709f7 start on controller-1 + * Resource action: stonith-fence_ipmilan-5254005ea387 start on controller-1 + * Resource action: stonith-fence_ipmilan-525400542c06 start on controller-0 + * Resource action: stonith-fence_ipmilan-525400498d34 start on controller-1 + * Pseudo action: redis-bundle_start_0 + * Pseudo action: galera-bundle_demote_0 + * Resource action: database-0 monitor=20000 on controller-0 + * Resource action: database-2 monitor=20000 on controller-1 + * Pseudo action: galera-bundle-master_demote_0 + * Resource action: redis notify on redis-bundle-0 + * Resource action: redis notify on redis-bundle-2 + * Pseudo action: redis-bundle-master_confirmed-pre_notify_start_0 + * Pseudo action: redis-bundle-master_start_0 + * Resource action: stonith-fence_ipmilan-525400c709f7 monitor=60000 on controller-1 + * Resource action: stonith-fence_ipmilan-5254005ea387 monitor=60000 on controller-1 + * Resource action: stonith-fence_ipmilan-525400542c06 monitor=60000 on controller-0 + * Resource action: stonith-fence_ipmilan-525400498d34 monitor=60000 on controller-1 + * Pseudo action: galera_demote_0 + * Pseudo action: galera_demote_0 + * Pseudo action: galera-bundle-master_demoted_0 + * Pseudo action: galera-bundle_demoted_0 + * Pseudo action: galera-bundle_stop_0 + * Resource action: galera-bundle-docker-0 stop on database-0 + * Resource action: galera-bundle-docker-2 stop on database-2 + * Pseudo action: stonith-galera-bundle-2-reboot on galera-bundle-2 + * Pseudo action: stonith-galera-bundle-0-reboot on galera-bundle-0 + * Pseudo action: stonith_complete + * Pseudo action: galera-bundle-master_stop_0 + * Resource action: redis-bundle-docker-1 start on controller-1 + * Resource action: redis-bundle-1 monitor on controller-1 + * Resource action: ip-192.168.24.11 start on controller-0 + * Resource action: ip-10.0.0.104 start on controller-1 + * Resource action: ip-172.17.1.11 start on controller-0 + * Resource action: ip-172.17.3.13 start on controller-1 + * Resource action: openstack-cinder-volume start on controller-0 + * Pseudo action: haproxy-bundle_start_0 + * Pseudo action: galera_stop_0 + * Resource action: redis-bundle-docker-1 monitor=60000 on controller-1 + * Resource action: redis-bundle-1 start on controller-1 + * Resource action: ip-192.168.24.11 monitor=10000 on controller-0 + * Resource action: ip-10.0.0.104 monitor=10000 on controller-1 + * Resource action: ip-172.17.1.11 monitor=10000 on controller-0 + * Resource action: ip-172.17.3.13 monitor=10000 on controller-1 + * Resource action: haproxy-bundle-docker-1 start on controller-1 + * Resource action: openstack-cinder-volume monitor=60000 on controller-0 + * Pseudo action: haproxy-bundle_running_0 + * Pseudo action: galera_stop_0 + * Pseudo action: galera-bundle-master_stopped_0 + * Resource action: redis start on redis-bundle-1 + * Pseudo action: redis-bundle-master_running_0 + * Resource action: redis-bundle-1 monitor=60000 on controller-1 + * Resource action: haproxy-bundle-docker-1 monitor=60000 on controller-1 + * Pseudo action: galera-bundle_stopped_0 + * Pseudo action: galera-bundle_start_0 + * Pseudo action: all_stopped + * Pseudo action: galera-bundle-master_start_0 + * Resource action: galera-bundle-docker-0 start on database-0 + * Resource action: galera-bundle-0 monitor on controller-1 + * Resource action: galera-bundle-docker-2 start on database-2 + * Resource action: galera-bundle-2 monitor on controller-1 + * Pseudo action: redis-bundle-master_post_notify_running_0 + * Resource action: galera-bundle-docker-0 monitor=60000 on database-0 + * Resource action: galera-bundle-0 start on controller-0 + * Resource action: galera-bundle-docker-2 monitor=60000 on database-2 + * Resource action: galera-bundle-2 start on controller-1 + * Resource action: redis notify on redis-bundle-0 + * Resource action: redis notify on redis-bundle-1 + * Resource action: redis notify on redis-bundle-2 + * Pseudo action: redis-bundle-master_confirmed-post_notify_running_0 + * Pseudo action: redis-bundle_running_0 + * Resource action: galera start on galera-bundle-0 + * Resource action: galera start on galera-bundle-2 + * Pseudo action: galera-bundle-master_running_0 + * Resource action: galera-bundle-0 monitor=60000 on controller-0 + * Resource action: galera-bundle-2 monitor=60000 on controller-1 + * Pseudo action: redis-bundle-master_pre_notify_promote_0 + * Pseudo action: redis-bundle_promote_0 + * Pseudo action: galera-bundle_running_0 + * Resource action: redis notify on redis-bundle-0 + * Resource action: redis notify on redis-bundle-1 + * Resource action: redis notify on redis-bundle-2 + * Pseudo action: redis-bundle-master_confirmed-pre_notify_promote_0 + * Pseudo action: redis-bundle-master_promote_0 + * Pseudo action: galera-bundle_promote_0 + * Pseudo action: galera-bundle-master_promote_0 + * Resource action: redis promote on redis-bundle-0 + * Pseudo action: redis-bundle-master_promoted_0 + * Resource action: galera promote on galera-bundle-0 + * Resource action: galera promote on galera-bundle-2 + * Pseudo action: galera-bundle-master_promoted_0 + * Pseudo action: redis-bundle-master_post_notify_promoted_0 + * Pseudo action: galera-bundle_promoted_0 + * Resource action: galera monitor=10000 on galera-bundle-0 + * Resource action: galera monitor=10000 on galera-bundle-2 + * Resource action: redis notify on redis-bundle-0 + * Resource action: redis notify on redis-bundle-1 + * Resource action: redis notify on redis-bundle-2 + * Pseudo action: redis-bundle-master_confirmed-post_notify_promoted_0 + * Pseudo action: redis-bundle_promoted_0 + * Resource action: redis monitor=20000 on redis-bundle-0 + * Resource action: redis monitor=60000 on redis-bundle-1 + * Resource action: redis monitor=45000 on redis-bundle-1 + +Revised cluster status: +Online: [ controller-0 controller-1 controller-2 ] +RemoteOnline: [ database-0 database-1 database-2 messaging-0 messaging-1 messaging-2 ] +Containers: [ galera-bundle-0:galera-bundle-docker-0 galera-bundle-1:galera-bundle-docker-1 galera-bundle-2:galera-bundle-docker-2 rabbitmq-bundle-0:rabbitmq-bundle-docker-0 rabbitmq-bundle-1:rabbitmq-bundle-docker-1 rabbitmq-bundle-2:rabbitmq-bundle-docker-2 redis-bundle-0:redis-bundle-docker-0 redis-bundle-1:redis-bundle-docker-1 redis-bundle-2:redis-bundle-docker-2 ] + + database-0 (ocf::pacemaker:remote): Started controller-0 + database-1 (ocf::pacemaker:remote): Started controller-2 + database-2 (ocf::pacemaker:remote): Started controller-1 + messaging-0 (ocf::pacemaker:remote): Started controller-2 + messaging-1 (ocf::pacemaker:remote): Started controller-2 + messaging-2 (ocf::pacemaker:remote): Started controller-2 + Docker container set: rabbitmq-bundle [192.168.24.1:8787/rhosp12/openstack-rabbitmq-docker:pcmklatest] + rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started messaging-0 + rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started messaging-1 + rabbitmq-bundle-2 (ocf::heartbeat:rabbitmq-cluster): Started messaging-2 + Docker container set: galera-bundle [192.168.24.1:8787/rhosp12/openstack-mariadb-docker:pcmklatest] + galera-bundle-0 (ocf::heartbeat:galera): Master database-0 + galera-bundle-1 (ocf::heartbeat:galera): Master database-1 + galera-bundle-2 (ocf::heartbeat:galera): Master database-2 + Docker container set: redis-bundle [192.168.24.1:8787/rhosp12/openstack-redis-docker:pcmklatest] + redis-bundle-0 (ocf::heartbeat:redis): Master controller-0 + redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1 + redis-bundle-2 (ocf::heartbeat:redis): Slave controller-2 + ip-192.168.24.11 (ocf::heartbeat:IPaddr2): Started controller-0 + ip-10.0.0.104 (ocf::heartbeat:IPaddr2): Started controller-1 + ip-172.17.1.19 (ocf::heartbeat:IPaddr2): Started controller-2 + ip-172.17.1.11 (ocf::heartbeat:IPaddr2): Started controller-0 + ip-172.17.3.13 (ocf::heartbeat:IPaddr2): Started controller-1 + ip-172.17.4.19 (ocf::heartbeat:IPaddr2): Started controller-2 + Docker container set: haproxy-bundle [192.168.24.1:8787/rhosp12/openstack-haproxy-docker:pcmklatest] + haproxy-bundle-docker-0 (ocf::heartbeat:docker): Started controller-0 + haproxy-bundle-docker-1 (ocf::heartbeat:docker): Started controller-1 + haproxy-bundle-docker-2 (ocf::heartbeat:docker): Started controller-2 + openstack-cinder-volume (systemd:openstack-cinder-volume): Started controller-0 + stonith-fence_ipmilan-525400244e09 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-525400cdec10 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-525400c709f7 (stonith:fence_ipmilan): Started controller-1 + stonith-fence_ipmilan-525400a7f9e0 (stonith:fence_ipmilan): Started controller-0 + stonith-fence_ipmilan-525400a25787 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-5254005ea387 (stonith:fence_ipmilan): Started controller-1 + stonith-fence_ipmilan-525400542c06 (stonith:fence_ipmilan): Started controller-0 + stonith-fence_ipmilan-525400aac413 (stonith:fence_ipmilan): Started controller-2 + stonith-fence_ipmilan-525400498d34 (stonith:fence_ipmilan): Started controller-1 + diff --git a/pengine/test10/bundle-order-stop-on-remote.xml b/pengine/test10/bundle-order-stop-on-remote.xml new file mode 100644 index 00000000000..d3b87c82387 --- /dev/null +++ b/pengine/test10/bundle-order-stop-on-remote.xml @@ -0,0 +1,1165 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 47a5f6f5cd1fba2c6bac140329e563abd34b2ef4 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 8 Dec 2017 16:54:07 -0600 Subject: [PATCH 013/812] Low: PE: correct mispelled constant original intention of 2b1aae07 --- pengine/allocate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pengine/allocate.c b/pengine/allocate.c index 1c95e97d8eb..481a0ec093d 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1870,7 +1870,7 @@ apply_container_ordering(action_t *action, pe_working_set_t *data_set) container->id); if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE) - || safe_str_eq(action->task, CRMD_ACTION_MIGRATE)) { + || safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ @@ -2036,7 +2036,7 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set) remote_rsc->id, state2text(state)); if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE) - || safe_str_eq(action->task, CRMD_ACTION_MIGRATE)) { + || safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) { /* Migration ops map to "no_action", but we need to apply the same * ordering as for stop or demote (see get_router_node()). */ From 55c9b5ef9c6f531ea808926abaaea5c7c8890dad Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 8 Dec 2017 17:31:23 -0600 Subject: [PATCH 014/812] Doc: PE: update remote stop ordering comments for recent changes --- pengine/allocate.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pengine/allocate.c b/pengine/allocate.c index 481a0ec093d..7ae4e025e61 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -2058,9 +2058,6 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set) break; case stop_rsc: - /* Handle special case with remote node where stop actions need to be - * ordered after the connection resource starts somewhere else. - */ if(state == remote_state_alive) { order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); @@ -2076,14 +2073,18 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set) pe_order_implies_first, data_set); } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) { - /* If its not coming back up, better do what we need first */ + /* State must be remote_state_unknown or remote_state_stopped. + * Since the connection is not coming back up in this + * transition, stop this resource first. + */ order_action_then_stop(action, remote_rsc, pe_order_implies_first, data_set); } else { - /* Wait for the connection resource to be up and assume everything is as we left it */ + /* The connection is going to be started somewhere else, so + * stop this resource after that completes. + */ order_start_then_action(remote_rsc, action, pe_order_none, data_set); - } break; From 39441fa1dfe625cf00af463269052d4c2dafaa16 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 8 Dec 2017 17:16:55 -0600 Subject: [PATCH 015/812] Low: libpe_status: limit resource type check to primitives --- lib/pengine/complex.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c index d58d6beb8fb..86f290c342c 100644 --- a/lib/pengine/complex.c +++ b/lib/pengine/complex.c @@ -784,7 +784,9 @@ common_unpack(xmlNode * xml_obj, resource_t ** rsc, if(is_set((*rsc)->flags, pe_rsc_fence_device)) { value = "quorum"; - } else if (safe_str_eq(crm_element_value((*rsc)->xml, XML_AGENT_ATTR_CLASS), "ocf") + } else if (((*rsc)->variant == pe_native) + && safe_str_eq(crm_element_value((*rsc)->xml, XML_AGENT_ATTR_CLASS), + PCMK_RESOURCE_CLASS_OCF) && safe_str_eq(crm_element_value((*rsc)->xml, XML_AGENT_ATTR_PROVIDER), "pacemaker") && safe_str_eq(crm_element_value((*rsc)->xml, XML_ATTR_TYPE), "remote") ) { From 68438917c3b1ed305af6da2acd23454cd777e1d1 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 8 Dec 2017 18:00:12 -0600 Subject: [PATCH 016/812] Fix: lrmd: always use most recent remote proxy Any working proxy is sufficient, but the newest connection is the most likely to be working. We want to avoid using an old proxy that has failed but whose TCP connection has not yet timed out. --- lrmd/ipc_proxy.c | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/lrmd/ipc_proxy.c b/lrmd/ipc_proxy.c index 5d6ab344fd0..4d1ee01b3c7 100644 --- a/lrmd/ipc_proxy.c +++ b/lrmd/ipc_proxy.c @@ -42,7 +42,7 @@ static qb_ipcs_service_t *crmd_ipcs = NULL; static qb_ipcs_service_t *stonith_ipcs = NULL; /* ipc providers == crmd clients connecting from cluster nodes */ -static GHashTable *ipc_providers = NULL; +static GList *ipc_providers = NULL; /* ipc clients == things like cibadmin, crm_resource, connecting locally */ static GHashTable *ipc_clients = NULL; @@ -52,24 +52,14 @@ static GHashTable *ipc_clients = NULL; * * \return Pointer to a provider if one exists, NULL otherwise * - * \note Grab the first provider available; any provider will work, and usually - * there will be only one. These are client connections originating from a - * cluster node's crmd. + * \note Grab the first provider, which is the most recent connection. That way, + * if we haven't yet timed out an old, failed connection, we don't try to + * use it. */ crm_client_t * ipc_proxy_get_provider() { - if (ipc_providers) { - GHashTableIter iter; - gpointer key = NULL; - gpointer value = NULL; - - g_hash_table_iter_init(&iter, ipc_providers); - if (g_hash_table_iter_next(&iter, &key, &value)) { - return (crm_client_t*)value; - } - } - return NULL; + return ipc_providers? (crm_client_t*) (ipc_providers->data) : NULL; } static int32_t @@ -378,10 +368,8 @@ static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = { void ipc_proxy_add_provider(crm_client_t *ipc_proxy) { - if (ipc_providers == NULL) { - return; - } - g_hash_table_insert(ipc_providers, ipc_proxy->id, ipc_proxy); + // Prepending ensures the most recent connection is always first + ipc_providers = g_list_prepend(ipc_providers, ipc_proxy); } void @@ -393,11 +381,7 @@ ipc_proxy_remove_provider(crm_client_t *ipc_proxy) GList *remove_these = NULL; GListPtr gIter = NULL; - if (ipc_providers == NULL) { - return; - } - - g_hash_table_remove(ipc_providers, ipc_proxy->id); + ipc_providers = g_list_remove(ipc_providers, ipc_proxy); g_hash_table_iter_init(&iter, ipc_clients); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & ipc_client)) { @@ -413,6 +397,8 @@ ipc_proxy_remove_provider(crm_client_t *ipc_proxy) for (gIter = remove_these; gIter != NULL; gIter = gIter->next) { ipc_client = gIter->data; + + // Disconnection callback will free the client here qb_ipcs_disconnect(ipc_client->ipcs); } @@ -424,7 +410,6 @@ void ipc_proxy_init(void) { ipc_clients = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); - ipc_providers = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); cib_ipc_servers_init(&cib_ro, &cib_rw, @@ -446,10 +431,12 @@ void ipc_proxy_cleanup(void) { if (ipc_providers) { - g_hash_table_destroy(ipc_providers); + g_list_free(ipc_providers); + ipc_providers = NULL; } if (ipc_clients) { g_hash_table_destroy(ipc_clients); + ipc_clients = NULL; } cib_ipc_servers_destroy(cib_ro, cib_rw, cib_shm); qb_ipcs_destroy(attrd_ipcs); @@ -458,6 +445,4 @@ ipc_proxy_cleanup(void) cib_ro = NULL; cib_rw = NULL; cib_shm = NULL; - ipc_providers = NULL; - ipc_clients = NULL; } From 5042a3b19a2f2bfa3d09b4d1029f53e6b674918b Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 14 Dec 2017 09:16:47 -0600 Subject: [PATCH 017/812] Test: CTS: remove dead code makes static analysis happy --- cts/CTSaudits.py | 1 - cts/environment.py | 1 - cts/remote.py | 5 +---- cts/watcher.py | 6 +++--- 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/cts/CTSaudits.py b/cts/CTSaudits.py index aff897f6bf4..d9fbeb9510f 100755 --- a/cts/CTSaudits.py +++ b/cts/CTSaudits.py @@ -190,7 +190,6 @@ def __call__(self): if answer and answer == "n": raise ValueError("Disk full on %s" % (node)) - ret = 0 elif remaining_mb < 100 or used_percent > 90: self.CM.log("WARN: Low on log disk space (%dMB) on %s" % (remaining_mb, node)) diff --git a/cts/environment.py b/cts/environment.py index 75a18c80993..6c4831c3896 100644 --- a/cts/environment.py +++ b/cts/environment.py @@ -182,7 +182,6 @@ def set_stack(self, name): else: raise ValueError("Unknown stack: "+name) - sys.exit(1) def get_stack_short(self): # Create the Cluster Manager object diff --git a/cts/remote.py b/cts/remote.py index 8c3691826e1..7cef40e3bd7 100644 --- a/cts/remote.py +++ b/cts/remote.py @@ -220,10 +220,7 @@ def __call__(self, node, command, stdout=0, synchronous=1, silent=False, blockin if not silent: for err in errors: - if stdout == 3: - result.append("error: "+err) - else: - self.debug("cmd: stderr: %s" % err) + self.debug("cmd: stderr: %s" % err) if stdout == 0: if not silent and result: diff --git a/cts/watcher.py b/cts/watcher.py index de032f7b439..42685adfada 100644 --- a/cts/watcher.py +++ b/cts/watcher.py @@ -337,19 +337,19 @@ def __init__(self, log, regexes, name="Anon", timeout=10, debug_level=None, sile self.kind = kind else: raise - self.kind = self.Env["LogWatcher"] + #self.kind = self.Env["LogWatcher"] if log: self.filename = log else: raise - self.filename = self.Env["LogFileName"] + #self.filename = self.Env["LogFileName"] if hosts: self.hosts = hosts else: raise - self.hosts = self.Env["nodes"] + #self.hosts = self.Env["nodes"] if trace_lw: self.debug_level = 3 From 570929eba229558b1a6900ffc54e4d5ee4150f74 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 14 Dec 2017 09:23:03 -0600 Subject: [PATCH 018/812] Refactor: pengine: validate more function arguments not an issue with current code, but makes static analysis happy --- pengine/clone.c | 3 ++- pengine/utilization.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pengine/clone.c b/pengine/clone.c index 99bac7e1396..e81dbc85d37 100644 --- a/pengine/clone.c +++ b/pengine/clone.c @@ -955,6 +955,7 @@ is_child_compatible(resource_t *child_rsc, node_t * local_node, enum rsc_role_e node_t *node = NULL; enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); + CRM_CHECK(child_rsc && local_node, return FALSE); if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { /* We only want instances that haven't failed */ node = child_rsc->fns->location(child_rsc, NULL, current); @@ -965,7 +966,7 @@ is_child_compatible(resource_t *child_rsc, node_t * local_node, enum rsc_role_e return FALSE; } - if (node && local_node && node->details == local_node->details) { + if (node && (node->details == local_node->details)) { return TRUE; } else if (node) { diff --git a/pengine/utilization.c b/pengine/utilization.c index f42c85d9921..05f8d78fe2e 100644 --- a/pengine/utilization.c +++ b/pengine/utilization.c @@ -341,6 +341,7 @@ process_utilization(resource_t * rsc, node_t ** prefer, pe_working_set_t * data_ { int alloc_details = scores_log_level + 1; + CRM_CHECK(rsc && prefer && data_set, return); if (safe_str_neq(data_set->placement_strategy, "default")) { GHashTableIter iter; GListPtr colocated_rscs = NULL; From db2fdc9a452fef11d397e25202fde8ba1bad4cd3 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 14 Dec 2017 10:36:20 -0600 Subject: [PATCH 019/812] Low: libcrmservice: avoid memory leak on DBus error --- lib/services/dbus.c | 47 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/lib/services/dbus.c b/lib/services/dbus.c index fb3e867f9fd..58df9270df4 100644 --- a/lib/services/dbus.c +++ b/lib/services/dbus.c @@ -23,6 +23,15 @@ struct db_getall_data { void (*callback)(const char *name, const char *value, void *userdata); }; +static void +free_db_getall_data(struct db_getall_data *data) +{ + free(data->target); + free(data->object); + free(data->name); + free(data); +} + DBusConnection * pcmk_dbus_connect(void) { @@ -196,6 +205,20 @@ pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, return reply; } +/*! + * \internal + * \brief Send a DBus message with a callback for the reply + * + * \param[in] msg DBus message to send + * \param[in,out] connection DBus connection to send on + * \param[in] done Function to call when pending call completes + * \param[in] user_data Data to pass to done callback + * + * \return Handle for reply on success, NULL on error + * \note The caller can assume that the done callback is called always and + * only when the return value is non-NULL. (This allows the caller to + * know where it should free dynamically allocated user_data.) + */ DBusPendingCall * pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection, void(*done)(DBusPendingCall *pending, void *user_data), @@ -359,11 +382,7 @@ pcmk_dbus_lookup_result(DBusMessage *reply, struct db_getall_data *data) } cleanup: - free(data->target); - free(data->object); - free(data->name); - free(data); - + free_db_getall_data(data); return output; } @@ -424,11 +443,19 @@ pcmk_dbus_get_property(DBusConnection *connection, const char *target, query_data->name = strdup(name); } - if(query_data->callback) { - DBusPendingCall* _pending; - _pending = pcmk_dbus_send(msg, connection, pcmk_dbus_lookup_cb, query_data, timeout); - if (pending != NULL) { - *pending = _pending; + if (query_data->callback) { + DBusPendingCall *local_pending; + + local_pending = pcmk_dbus_send(msg, connection, pcmk_dbus_lookup_cb, + query_data, timeout); + if (local_pending == NULL) { + // pcmk_dbus_lookup_cb() was not called in this case + free_db_getall_data(query_data); + query_data = NULL; + } + + if (pending) { + *pending = local_pending; } } else { From 4a774710ec7269ec3a1427ae09fc6ca435c66e92 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 14 Dec 2017 12:44:04 -0600 Subject: [PATCH 020/812] Build: systemd unit files: restore DBus dependency 06e2e26 removed the unit files' DBus dependency on the advice of a systemd developer, but it is necessary --- lrmd/pacemaker_remote.service.in | 3 +++ mcp/pacemaker.service.in | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/lrmd/pacemaker_remote.service.in b/lrmd/pacemaker_remote.service.in index d5717f6d009..1c596e1005b 100644 --- a/lrmd/pacemaker_remote.service.in +++ b/lrmd/pacemaker_remote.service.in @@ -2,8 +2,11 @@ Description=Pacemaker Remote Service Documentation=man:pacemaker_remoted http://clusterlabs.org/doc/en-US/Pacemaker/1.1-pcs/html/Pacemaker_Remote/index.html +# See main pacemaker unit file for descriptions of why these are needed After=network.target After=time-sync.target +After=dbus.service +Wants=dbus.service After=resource-agents-deps.target Wants=resource-agents-deps.target After=syslog.service diff --git a/mcp/pacemaker.service.in b/mcp/pacemaker.service.in index 516de0f8299..e532ea29c99 100644 --- a/mcp/pacemaker.service.in +++ b/mcp/pacemaker.service.in @@ -14,6 +14,10 @@ After=network.target # and failure timestamps, so wait until it's done. After=time-sync.target +# Managing systemd resources requires DBus. +After=dbus.service +Wants=dbus.service + # Some OCF resources may have dependencies that aren't managed by the cluster; # these must be started before Pacemaker and stopped after it. The # resource-agents package provides this target, which lets system adminstrators From 69de188a7263ba66afa0e8a3a46a64f07a7facca Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 14 Dec 2017 16:05:12 -0600 Subject: [PATCH 021/812] Low: attrd: avoid small memory leak at start-up introduced by 3518544 --- attrd/commands.c | 1 + 1 file changed, 1 insertion(+) diff --git a/attrd/commands.c b/attrd/commands.c index 0a20b2654ea..20bd82f46e9 100644 --- a/attrd/commands.c +++ b/attrd/commands.c @@ -539,6 +539,7 @@ attrd_broadcast_protocol() crm_xml_add(attrd_op, F_ATTRD_VALUE, ATTRD_PROTOCOL_VERSION); crm_xml_add_int(attrd_op, F_ATTRD_IS_PRIVATE, 1); attrd_client_update(attrd_op); + free_xml(attrd_op); } void From 2ce5fc46463ff7b9a5a2c68602d8c5b35a7c37d7 Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Tue, 16 Jan 2018 19:05:31 +1100 Subject: [PATCH 022/812] Bug rhbz#1519812 - Prevent notify actions from causing --wait to hang --- tools/crm_resource_runtime.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 22bdebf8e32..189d1b3e070 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1343,10 +1343,19 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * return rc; } -#define action_is_pending(action) \ - ((is_set((action)->flags, pe_action_optional) == FALSE) \ - && (is_set((action)->flags, pe_action_runnable) == TRUE) \ - && (is_set((action)->flags, pe_action_pseudo) == FALSE)) +static inline int action_is_pending(action_t *action) +{ + if(is_set(action->flags, pe_action_optional)) { + return FALSE; + } else if(is_set(action->flags, pe_action_runnable) == FALSE) { + return FALSE; + } else if(is_set(action->flags, pe_action_pseudo)) { + return FALSE; + } else if(safe_str_eq("notify", action->task)) { + return FALSE; + } + return TRUE; +} /*! * \internal @@ -1362,7 +1371,9 @@ actions_are_pending(GListPtr actions) GListPtr action; for (action = actions; action != NULL; action = action->next) { - if (action_is_pending((action_t *) action->data)) { + action_t *a = (action_t *)action->data; + if (action_is_pending(a)) { + crm_notice("Waiting for %s (flags=0x%.8x)", a->uuid, a->flags); return TRUE; } } From ef15ea4f687e7f9ba1f8a99548ee1e0bf9d4b50a Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Mon, 22 Jan 2018 21:18:46 +1100 Subject: [PATCH 023/812] Fix: rhbz#1527072 - Correctly observe colocation constraints with bundles in the Master role --- pengine/container.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pengine/container.c b/pengine/container.c index f5d916c805c..15d094db604 100644 --- a/pengine/container.c +++ b/pengine/container.c @@ -486,10 +486,18 @@ container_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc, rsc_colocatio } else { node_t *chosen = tuple->docker->fns->location(tuple->docker, NULL, FALSE); - if (chosen != NULL && is_set_recursive(tuple->docker, pe_rsc_block, TRUE) == FALSE) { - pe_rsc_trace(rsc, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight); - allocated_rhs = g_list_prepend(allocated_rhs, chosen); + if (chosen == NULL || is_set_recursive(tuple->docker, pe_rsc_block, TRUE)) { + continue; + } + if(constraint->role_rh >= RSC_ROLE_MASTER && tuple->child == NULL) { + continue; } + if(constraint->role_rh >= RSC_ROLE_MASTER && tuple->child->next_role < RSC_ROLE_MASTER) { + continue; + } + + pe_rsc_trace(rsc, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight); + allocated_rhs = g_list_prepend(allocated_rhs, chosen); } } From 7c322f4b9a7f36eba1d3ca74d7dd8fe1093ca7bd Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 22 Jan 2018 11:38:22 -0600 Subject: [PATCH 024/812] Low: crmd: quorum gain should always cause new transition 0b689055 aborted the transition on quorum loss, but quorum can also be acquired without triggering a new transition, if corosync gives quorum without a node joining (e.g. forced via corosync-cmapctl, or perhaps via heuristics). This aborts the transition when quorum is gained, but only after a 5-second delay, if the transition has not been aborted in that time. This avoids an unnecessary abort in the vast majority of cases where an abort is already done, and it allows some time for all nodes to connect when quorum is gained, rather than immediately fencing remaining unseen nodes. --- crmd/membership.c | 22 +++++++++++++++++----- crmd/te_utils.c | 48 +++++++++++++++++++++++++++++++++++++++++++++-- crmd/tengine.h | 2 ++ 3 files changed, 65 insertions(+), 7 deletions(-) diff --git a/crmd/membership.c b/crmd/membership.c index c36dbedcf14..4f2fa8a81fc 100644 --- a/crmd/membership.c +++ b/crmd/membership.c @@ -438,12 +438,24 @@ crm_update_quorum(gboolean quorum, gboolean force_update) fsa_register_cib_callback(call_id, FALSE, NULL, cib_quorum_update_complete); free_xml(update); - /* If a node not running any resources is cleanly shut down and drops us - * below quorum, we won't necessarily abort the transition, so abort it - * here to be safe. + /* Quorum changes usually cause a new transition via other activity: + * quorum gained via a node joining will abort via the node join, + * and quorum lost via a node leaving will usually abort via resource + * activity and/or fencing. + * + * However, it is possible that nothing else causes a transition (e.g. + * someone forces quorum via corosync-cmaptcl, or quorum is lost due to + * a node in standby shutting down cleanly), so here ensure a new + * transition is triggered. */ - if (quorum == FALSE) { - abort_transition(INFINITY, tg_restart, "Quorum loss", NULL); + if (quorum) { + /* If quorum was gained, abort after a short delay, in case multiple + * nodes are joining around the same time, so the one that brings us + * to quorum doesn't cause all the remaining ones to be fenced. + */ + abort_after_delay(INFINITY, tg_restart, "Quorum gained", 5000); + } else { + abort_transition(INFINITY, tg_restart, "Quorum lost", NULL); } } fsa_has_quorum = quorum; diff --git a/crmd/te_utils.c b/crmd/te_utils.c index dab02d36088..8d105dc31b5 100644 --- a/crmd/te_utils.c +++ b/crmd/te_utils.c @@ -530,6 +530,46 @@ trigger_graph_processing(const char *fn, int line) mainloop_set_trigger(transition_trigger); } +static struct abort_timer_s { + bool aborted; + guint id; + int priority; + enum transition_action action; + const char *text; +} abort_timer = { 0, }; + +static gboolean +abort_timer_popped(gpointer data) +{ + if (abort_timer.aborted == FALSE) { + abort_transition(abort_timer.priority, abort_timer.action, + abort_timer.text, NULL); + } + abort_timer.id = 0; + return FALSE; // do not immediately reschedule timer +} + +/*! + * \internal + * \brief Abort transition after delay, if not already aborted in that time + * + * \param[in] abort_text Must be literal string + */ +void +abort_after_delay(int abort_priority, enum transition_action abort_action, + const char *abort_text, guint delay_ms) +{ + if (abort_timer.id) { + // Timer already in progress, stop and reschedule + g_source_remove(abort_timer.id); + } + abort_timer.aborted = FALSE; + abort_timer.priority = abort_priority; + abort_timer.action = abort_action; + abort_timer.text = abort_text; + abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL); +} + void abort_transition_graph(int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode * reason, const char *fn, int line) @@ -557,6 +597,8 @@ abort_transition_graph(int abort_priority, enum transition_action abort_action, break; } + abort_timer.aborted = TRUE; + /* Make sure any queued calculations are discarded ASAP */ free(fsa_pe_ref); fsa_pe_ref = NULL; @@ -660,10 +702,12 @@ abort_transition_graph(int abort_priority, enum transition_action abort_action, (transition_graph->complete? "true" : "false")); } else { + const char *id = ID(reason); + do_crm_log(level, "Transition aborted by %s.%s '%s': %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", - TYPE(reason), ID(reason), (op? op : "change"), abort_text, - add[0], add[1], add[2], fn, line, path, + TYPE(reason), (id? id : ""), (op? op : "change"), + abort_text, add[0], add[1], add[2], fn, line, path, (transition_graph->complete? "true" : "false")); } } diff --git a/crmd/tengine.h b/crmd/tengine.h index 7205c16cc44..6a75a08c5cf 100644 --- a/crmd/tengine.h +++ b/crmd/tengine.h @@ -59,6 +59,8 @@ extern void notify_crmd(crm_graph_t * graph); # include extern void trigger_graph_processing(const char *fn, int line); +void abort_after_delay(int abort_priority, enum transition_action abort_action, + const char *abort_text, guint delay_ms); extern void abort_transition_graph(int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode * reason, const char *fn, int line); From 18572d4e1e84c9d1f293b9a3082190133367154e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 26 Jan 2018 12:31:09 -0600 Subject: [PATCH 025/812] Fix: tools: crm_master should always work on node attribute Before ccbdb2a, crm_master would always set --node, thus ensuring crm_attribute would treat the value as a node attribute. That commit removed that so that crm_attribute could determine the local node name properly, but that introduced an issue where the master value would be set as a cluster property instead of a node attribute if --lifetime (or --node) was not set explicitly. This fixes it by setting the default value of --lifetime explicitly. --- tools/crm_master | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/crm_master b/tools/crm_master index 7e31cea2dbc..f4a0772fad2 100755 --- a/tools/crm_master +++ b/tools/crm_master @@ -8,6 +8,10 @@ if [ $? != 0 ] ; then echo "crm_master - A convenience wrapper for crm_attribute # Note the quotes around `$TEMP': they are essential! eval set -- "$TEMP" +# Explicitly set the (usual default) lifetime, so the attribute gets set as a +# node attribute and not a cluster property. +options="--lifetime forever" + while true ; do case "$1" in -N|--node|-U|--uname) options="$options $1 $2"; shift; shift;; From 5257156aaaf46ebc4c2fff673d21740ec6ebc33a Mon Sep 17 00:00:00 2001 From: Hideo Yamauchi Date: Fri, 23 Feb 2018 09:28:47 +0900 Subject: [PATCH 026/812] Mid: attrd: Synchronize attributes held only on own node. --- attrd/commands.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++ attrd/internal.h | 1 + 2 files changed, 70 insertions(+) diff --git a/attrd/commands.c b/attrd/commands.c index 20bd82f46e9..f5a93b8aee0 100644 --- a/attrd/commands.c +++ b/attrd/commands.c @@ -56,6 +56,7 @@ GHashTable *attributes = NULL; void write_attribute(attribute_t *a); void write_or_elect_attribute(attribute_t *a); +void attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml); void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter); void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); void attrd_peer_remove(const char *host, gboolean uncache, const char *source); @@ -128,6 +129,24 @@ build_attribute_xml( return xml; } +static void +clear_attribute_value_seen(void) +{ + GHashTableIter aIter; + GHashTableIter vIter; + attribute_t *a; + attribute_value_t *v = NULL; + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { + v->seen = FALSE; + crm_trace("Clear seen flag %s[%s] = %s.", a->id, v->nodename, v->current); + } + } +} + static attribute_t * create_attribute(xmlNode *xml) { @@ -640,10 +659,21 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) xmlNode *child = NULL; crm_info("Processing %s from %s", op, peer->uname); + + /* Clear the seen flag for attribute processing held only in the own node. */ + if (peer_state == election_won) { + clear_attribute_value_seen(); + } + for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) { host = crm_element_value(child, F_ATTRD_HOST); attrd_peer_update(peer, child, host, TRUE); } + + if (peer_state == election_won) { + /* Synchronize if there is an attribute held only by own node that Writer does not have. */ + attrd_current_only_attribute_update(peer, xml); + } } } @@ -748,6 +778,42 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml) return(v); } +void +attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml) +{ + GHashTableIter aIter; + GHashTableIter vIter; + attribute_t *a; + attribute_value_t *v = NULL; + xmlNode *sync = create_xml_node(NULL, __FUNCTION__); + gboolean build = FALSE; + + crm_xml_add(sync, F_ATTRD_TASK, ATTRD_OP_SYNC_RESPONSE); + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { + if (safe_str_eq(v->nodename, attrd_cluster->uname) && v->seen == FALSE) { + crm_trace("Syncing %s[%s] = %s to everyone.(from local only attributes)", a->id, v->nodename, v->current); + + build = TRUE; + build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private, + v->nodename, v->nodeid, v->current); + } else { + crm_trace("Local attribute(%s[%s] = %s) was ignore.(another host) : [%s]", a->id, v->nodename, v->current, attrd_cluster->uname); + continue; + } + } + } + + if (build) { + crm_debug("Syncing values to everyone.(from local only attributes)"); + send_attrd_message(NULL, sync); + } + free_xml(sync); +} + void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter) { @@ -876,6 +942,9 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter) crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value); } + /* Set the seen flag for attribute processing held only in the own node. */ + v->seen = TRUE; + /* If this is a cluster node whose node ID we are learning, remember it */ if ((v->nodeid == 0) && (v->is_remote == FALSE) && (crm_element_value_int(xml, F_ATTRD_HOST_ID, (int*)&v->nodeid) == 0)) { diff --git a/attrd/internal.h b/attrd/internal.h index 23bcbda751f..1b2c8d891f7 100644 --- a/attrd/internal.h +++ b/attrd/internal.h @@ -43,6 +43,7 @@ typedef struct attribute_value_s { char *nodename; char *current; char *requested; + gboolean seen; } attribute_value_t; crm_cluster_t *attrd_cluster; From 8e155be7f99b2ad55f1471367731a0194d4def2d Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 15 Dec 2017 14:41:02 -0600 Subject: [PATCH 027/812] Low: libpe_status: recover after failed demote when appropriate Previously, the PE would always do a full stop after any failed demote. However, if the resource needed recovery, the necessary start wouln't be scheduled until the next transition. Now, we skip the forced stop. It wasn't needed because native_create_actions() does the right thing already. If the resource's next role is stopped, it will schedule the right actions to get to stopped. If the resource's next role is something else, it will force a stop and start for master/slave resources when both a current node and a chosen node have been set (indicating recovery). --- lib/pengine/unpack.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 71307790e4e..ac1784d882b 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -2662,11 +2662,6 @@ unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { - /* - * staying in role=master ends up putting the PE/TE into a loop - * setting role=slave is not dangerous because no master will be - * promoted until the failed resource has been fully stopped - */ if (action->on_fail == action_fail_block) { rsc->role = RSC_ROLE_MASTER; rsc->next_role = RSC_ROLE_STOPPED; @@ -2675,9 +2670,13 @@ unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, rsc->role = RSC_ROLE_STOPPED; } else { - crm_warn("Forcing %s to stop after a failed demote action", rsc->id); + /* + * Staying in master role would put the PE/TE into a loop. Setting + * slave role is not dangerous because the resource will be stopped + * as part of recovery, and any master promotion will be ordered + * after that stop. + */ rsc->role = RSC_ROLE_SLAVE; - rsc->next_role = RSC_ROLE_STOPPED; } } else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) { From 9a5c842f2f150729b4bcc37d7b8048a3aedfd396 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 26 Feb 2018 18:22:12 -0600 Subject: [PATCH 028/812] Low: tools: warn if crm_resource --wait called in mixed-version cluster --- tools/crm_resource_runtime.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 189d1b3e070..5e54f9e320c 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1435,6 +1435,7 @@ wait_till_stable(int timeout_ms, cib_t * cib) int timeout_s = timeout_ms? ((timeout_ms + 999) / 1000) : WAIT_DEFAULT_TIMEOUT_S; time_t expire_time = time(NULL) + timeout_s; time_t time_diff; + bool printed_version_warning = BE_QUIET; // i.e. don't print if quiet set_working_set_defaults(&data_set); do { @@ -1461,6 +1462,24 @@ wait_till_stable(int timeout_ms, cib_t * cib) } do_calculations(&data_set, data_set.input, NULL); + if (!printed_version_warning) { + /* If the DC has a different version than the local node, the two + * could come to different conclusions about what actions need to be + * done. Warn the user in this case. + * + * @TODO A possible long-term solution would be to reimplement the + * wait as a new crmd operation that would be forwarded to the DC. + * However, that would have potential problems of its own. + */ + const char *dc_version = g_hash_table_lookup(data_set.config_hash, + "dc-version"); + + if (safe_str_neq(dc_version, PACEMAKER_VERSION "-" BUILD_VERSION)) { + printf("warning: --wait command may not work properly in mixed-version cluster\n"); + printed_version_warning = TRUE; + } + } + } while (actions_are_pending(data_set.actions)); return pcmk_ok; From 5a35a34d7e230f6f4ee090768bca9c2fef744bda Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 2 Mar 2018 09:01:47 -0600 Subject: [PATCH 029/812] Build: systemd: update documentation link in unit file --- mcp/pacemaker.service.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mcp/pacemaker.service.in b/mcp/pacemaker.service.in index e532ea29c99..7e5ecf60745 100644 --- a/mcp/pacemaker.service.in +++ b/mcp/pacemaker.service.in @@ -1,6 +1,7 @@ [Unit] Description=Pacemaker High Availability Cluster Manager -Documentation=man:pacemakerd http://clusterlabs.org/doc/en-US/Pacemaker/1.1-pcs/html/Pacemaker_Explained/index.html +Documentation=man:pacemakerd +Documentation=https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/1.1/html-single/Pacemaker_Explained/index.html # DefaultDependencies takes care of sysinit.target, # basic.target, and shutdown.target From 380a0368175d11750eba3182a6f021e4cc11f6be Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 19 Feb 2018 13:19:06 -0600 Subject: [PATCH 030/812] Feature: libpe_status: deprecate stonith-action=poweroff --- lib/pengine/common.c | 4 ++-- lib/pengine/unpack.c | 6 ++++++ lib/pengine/unpack.h | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/pengine/common.c b/lib/pengine/common.c index 4b748635ffa..c54bc442bf0 100644 --- a/lib/pengine/common.c +++ b/lib/pengine/common.c @@ -105,8 +105,8 @@ pe_cluster_option pe_opts[] = { /* Stonith Options */ { "stonith-enabled", "stonith_enabled", "boolean", NULL, "true", &check_boolean, "Failed nodes are STONITH'd", NULL }, - { "stonith-action", "stonith_action", "enum", "reboot, poweroff, off", "reboot", &check_stonith_action, - "Action to send to STONITH device", NULL }, + { "stonith-action", "stonith_action", "enum", "reboot, off, poweroff", "reboot", &check_stonith_action, + "Action to send to STONITH device ('poweroff' is a deprecated alias for 'off')", NULL }, { "stonith-timeout", NULL, "time", NULL, "60s", &check_timer, "How long to wait for the STONITH action (reboot,on,off) to complete", NULL }, { XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL, "false", &check_boolean, diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index ac1784d882b..80ef1787563 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -210,6 +210,12 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set) is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); + if (!strcmp(data_set->stonith_action, "poweroff")) { + pe_warn_once(pe_wo_poweroff_off, + "Support for stonith-action of 'poweroff' is deprecated " + "and will be removed in a future release (use 'off' instead)"); + data_set->stonith_action = "off"; + } crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing); diff --git a/lib/pengine/unpack.h b/lib/pengine/unpack.h index 02600ddf3b9..31acfbebd68 100644 --- a/lib/pengine/unpack.h +++ b/lib/pengine/unpack.h @@ -108,6 +108,7 @@ enum pe_warn_once_e { pe_wo_rsc_failstick = 0x0200, pe_wo_default_rscfs = 0x0400, pe_wo_legacy_notifs = 0x0800, + pe_wo_poweroff_off = 0x1000, }; extern uint32_t pe_wo; From 85a1b1264a4d350320183764c574d520527e85c1 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 5 Mar 2018 16:50:07 -0600 Subject: [PATCH 031/812] Build: gitignore: ignore 2.0-built files in 1.1 branch --- .gitignore | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.gitignore b/.gitignore index e7fb484243a..af50b7395e6 100644 --- a/.gitignore +++ b/.gitignore @@ -152,6 +152,16 @@ lib/gnu/libgnu.a lib/gnu/stdalign.h *.coverity +# Built only in 2.0 branch (makes switching branches easier) +/cts/cts-coverage +/cts/cts-lrmd +/cts/cts-pengine +/cts/cts-stonithd +/cts/fence_dummy +/cts/pengine/ +/doc/Pacemaker_Administration.build +/doc/Pacemaker_Administration/ + #Other mock HTML From 72a19dce7f8b5492d133d9aa437edc8c8be264bd Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 26 Feb 2018 11:44:19 -0600 Subject: [PATCH 032/812] Low: crmd: validate CIB diffs better This contains refactoring and logging changes to make the diff processing code more readable, validate CIB diffs better, and log diff issues consistently. --- crmd/te_callbacks.c | 432 ++++++++++++++++++++++++-------------------- 1 file changed, 240 insertions(+), 192 deletions(-) diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c index 2273b86de73..a8e7091e993 100644 --- a/crmd/te_callbacks.c +++ b/crmd/te_callbacks.c @@ -66,7 +66,7 @@ update_stonith_max_attempts(const char* value) } } static void -te_legacy_update_diff(const char *event, xmlNode * diff) +te_update_diff_v1(const char *event, xmlNode *diff) { int lpc, max; xmlXPathObject *xpathObj = NULL; @@ -144,46 +144,40 @@ te_legacy_update_diff(const char *event, xmlNode * diff) freeXpathObject(xpathObj); /* - * Check for and fast-track the processing of LRM refreshes - * In large clusters this can result in _huge_ speedups + * Updates by, or in response to, TE actions will never contain updates + * for more than one resource at a time, so such updates indicate an + * LRM refresh. * - * Unfortunately we can only do so when there are no pending actions - * Otherwise we could miss updates we're waiting for and stall + * In that case, start a new transition rather than check each result + * individually, which can result in _huge_ speedups in large clusters. * + * Unfortunately, we can only do so when there are no pending actions. + * Otherwise, we could mistakenly throw away those results here, and + * the cluster will stall waiting for them and time out the operation. */ - xpathObj = NULL; if (transition_graph->pending == 0) { - xpathObj = - xpath_search(diff, - "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" - XML_LRM_TAG_RESOURCE); - } - - max = numXpathResults(xpathObj); - if (max > 1) { - /* Updates by, or in response to, TE actions will never contain updates - * for more than one resource at a time - */ - crm_debug("Detected LRM refresh - %d resources updated: Skipping all resource events", max); - crm_log_xml_trace(diff, "lrm-refresh"); - abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL); - goto bail; + xpathObj = xpath_search(diff, + "//" F_CIB_UPDATE_RESULT + "//" XML_TAG_DIFF_ADDED + "//" XML_LRM_TAG_RESOURCE); + max = numXpathResults(xpathObj); + if (max > 1) { + crm_debug("Ignoring resource operation updates due to LRM refresh of %d resources", + max); + crm_log_xml_trace(diff, "lrm-refresh"); + abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL); + goto bail; + } + freeXpathObject(xpathObj); } - freeXpathObject(xpathObj); /* Process operation updates */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); - if (numXpathResults(xpathObj)) { -/* - - - - - -*/ - int lpc = 0, max = numXpathResults(xpathObj); + max = numXpathResults(xpathObj); + if (max > 0) { + int lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); @@ -241,38 +235,50 @@ te_legacy_update_diff(const char *event, xmlNode * diff) freeXpathObject(xpathObj); } -static void process_resource_updates( - const char *node, xmlNode *xml, xmlNode *change, const char *op, const char *xpath) +static void +process_lrm_resource_diff(xmlNode *lrm_resource, const char *node) +{ + for (xmlNode *rsc_op = __xml_first_child(lrm_resource); rsc_op != NULL; + rsc_op = __xml_next(rsc_op)) { + process_graph_event(rsc_op, node); + } +} + +static void +process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, + const char *op, const char *xpath) { xmlNode *cIter = NULL; xmlNode *rsc = NULL; - xmlNode *rsc_op = NULL; int num_resources = 0; - if(xml == NULL) { + if (xml == NULL) { return; - } else if(strcmp((const char*)xml->name, XML_CIB_TAG_LRM) == 0) { + } else if (strcmp((const char*)xml->name, XML_CIB_TAG_LRM) == 0) { xml = first_named_child(xml, XML_LRM_TAG_RESOURCES); crm_trace("Got %p in %s", xml, XML_CIB_TAG_LRM); } CRM_ASSERT(strcmp((const char*)xml->name, XML_LRM_TAG_RESOURCES) == 0); - for(cIter = xml->children; cIter; cIter = cIter->next) { + for (cIter = xml->children; cIter; cIter = cIter->next) { num_resources++; } - if(num_resources > 1) { + if (num_resources > 1) { /* - * Check for and fast-track the processing of LRM refreshes - * In large clusters this can result in _huge_ speedups + * Updates by, or in response to, TE actions will never contain updates + * for more than one resource at a time, so such updates indicate an + * LRM refresh. * - * Unfortunately we can only do so when there are no pending actions - * Otherwise we could miss updates we're waiting for and stall + * In that case, start a new transition rather than check each result + * individually, which can result in _huge_ speedups in large clusters. * + * Unfortunately, we can only do so when there are no pending actions. + * Otherwise, we could mistakenly throw away those results here, and + * the cluster will stall waiting for them and time out the operation. */ - crm_debug("Detected LRM refresh - %d resources updated", num_resources); crm_log_xml_trace(change, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL); @@ -281,10 +287,7 @@ static void process_resource_updates( for (rsc = __xml_first_child(xml); rsc != NULL; rsc = __xml_next(rsc)) { crm_trace("Processing %s", ID(rsc)); - for (rsc_op = __xml_first_child(rsc); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { - crm_trace("Processing %s", ID(rsc_op)); - process_graph_event(rsc_op, node); - } + process_lrm_resource_diff(rsc, node); } } @@ -361,84 +364,142 @@ abort_unless_down(const char *xpath, const char *op, xmlNode *change, free(node_uuid); } -void -te_update_diff(const char *event, xmlNode * msg) +static void +process_op_deletion(const char *xpath, xmlNode *change) { - int rc = -EINVAL; - int format = 1; - xmlNode *change = NULL; - const char *op = NULL; - - xmlNode *diff = NULL; + char *mutable_key = strdup(xpath); + char *key; + char *node_uuid; + crm_action_t *cancel = NULL; + + // Extract the part of xpath between last pair of single quotes + key = strrchr(mutable_key, '\''); + if (key != NULL) { + *key = '\0'; + key = strrchr(mutable_key, '\''); + } + if (key == NULL) { + crm_warn("Ignoring malformed CIB update (resource deletion of %s)", + xpath); + free(mutable_key); + return; + } + ++key; - int p_add[] = { 0, 0, 0 }; - int p_del[] = { 0, 0, 0 }; + node_uuid = extract_node_uuid(xpath); + cancel = get_cancel_action(key, node_uuid); + if (cancel) { + crm_info("Cancellation of %s on %s confirmed (%d)", + key, node_uuid, cancel->id); + stop_te_timer(cancel->timer); + te_action_confirmed(cancel); + update_graph(transition_graph, cancel); + trigger_graph(); + } else { + abort_transition(INFINITY, tg_restart, "Resource operation removal", + change); + } + free(mutable_key); + free(node_uuid); +} - CRM_CHECK(msg != NULL, return); - crm_element_value_int(msg, F_CIB_RC, &rc); +static void +process_delete_diff(const char *xpath, const char *op, xmlNode *change) +{ + if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) { + process_op_deletion(xpath, change); - if (transition_graph == NULL) { - crm_trace("No graph"); - return; + } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) { + abort_unless_down(xpath, op, change, "Resource state removal"); - } else if (rc < pcmk_ok) { - crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); - return; + } else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) { + abort_unless_down(xpath, op, change, "Node state removal"); - } else if (transition_graph->complete == TRUE - && fsa_state != S_IDLE - && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { - crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state), - transition_graph->complete); - return; + } else { + crm_trace("Ignoring delete of %s", xpath); } +} - op = crm_element_value(msg, F_CIB_OPERATION); - diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); +static void +process_node_state_diff(xmlNode *state, xmlNode *change, const char *op, + const char *xpath) +{ + xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); - xml_patch_versions(diff, p_add, p_del); - crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op, - p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2], - fsa_state2string(fsa_state)); + process_resource_updates(ID(state), lrm, change, op, xpath); +} - crm_element_value_int(diff, "format", &format); - switch(format) { - case 1: - te_legacy_update_diff(event, diff); - return; - case 2: - /* Cool, we know what to do here */ - crm_log_xml_trace(diff, "Patch:Raw"); - break; - default: - crm_warn("Unknown patch format: %d", format); - return; +static void +process_status_diff(xmlNode *status, xmlNode *change, const char *op, + const char *xpath) +{ + for (xmlNode *state = __xml_first_child(status); state != NULL; + state = __xml_next(state)) { + process_node_state_diff(state, change, op, xpath); + } +} + +static void +process_cib_diff(xmlNode *cib, xmlNode *change, const char *op, + const char *xpath) +{ + xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS); + xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION); + + if (status) { + process_status_diff(status, change, op, xpath); } + if (config) { + abort_transition(INFINITY, tg_restart, + "Non-status-only change", change); + } +} - for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) { +static void +te_update_diff_v2(xmlNode *diff) +{ + crm_log_xml_trace(diff, "Patch:Raw"); + + for (xmlNode *change = __xml_first_child(diff); change != NULL; + change = __xml_next(change)) { + + xmlNode *match = NULL; const char *name = NULL; - const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); - xmlNode *match = NULL; - const char *node = NULL; - if(op == NULL) { + // Possible ops: create, modify, delete, move + const char *op = crm_element_value(change, XML_DIFF_OP); + + // Ignore uninteresting updates + if (op == NULL) { continue; - } else if(strcmp(op, "create") == 0) { - match = change->children; + } else if (xpath == NULL) { + crm_trace("Ignoring %s change for version field", op); + continue; - } else if(strcmp(op, "move") == 0) { + } else if (strcmp(op, "move") == 0) { + crm_trace("Ignoring move change at %s", xpath); continue; + } - } else if(strcmp(op, "modify") == 0) { + // Find the result of create/modify ops + if (strcmp(op, "create") == 0) { + match = change->children; + + } else if (strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } + + } else if (strcmp(op, "delete") != 0) { + crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)", + op, xpath); + continue; } - if(match) { + if (match) { if (match->type == XML_COMMENT_NODE) { crm_trace("Ignoring %s operation for comment at %s", op, xpath); continue; @@ -449,130 +510,117 @@ te_update_diff(const char *event, xmlNode * msg) crm_trace("Handling %s operation for %s%s%s", op, (xpath? xpath : "CIB"), (name? " matched by " : ""), (name? name : "")); - if(xpath == NULL) { - /* Version field, ignore */ - } else if(strstr(xpath, "/cib/configuration")) { - abort_transition(INFINITY, tg_restart, "Configuration change", change); - break; /* Won't be packaged with any resource operations we may be waiting for */ + if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) { + abort_transition(INFINITY, tg_restart, "Configuration change", + change); + break; // Won't be packaged with operation results we may be waiting for - } else if(strstr(xpath, "/"XML_CIB_TAG_TICKETS) || safe_str_eq(name, XML_CIB_TAG_TICKETS)) { + } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS) + || safe_str_eq(name, XML_CIB_TAG_TICKETS)) { abort_transition(INFINITY, tg_restart, "Ticket attribute change", change); - break; /* Won't be packaged with any resource operations we may be waiting for */ + break; // Won't be packaged with operation results we may be waiting for - } else if(strstr(xpath, "/"XML_TAG_TRANSIENT_NODEATTRS"[") || safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) { + } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[") + || safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) { abort_unless_down(xpath, op, change, "Transient attribute change"); - break; /* Won't be packaged with any resource operations we may be waiting for */ - - } else if(strstr(xpath, "/"XML_LRM_TAG_RSC_OP"[") && safe_str_eq(op, "delete")) { - crm_action_t *cancel = NULL; - char *mutable_key = strdup(xpath); - char *key, *node_uuid; - - /* Extract the part of xpath between last pair of single quotes */ - key = strrchr(mutable_key, '\''); - if (key != NULL) { - *key = '\0'; - key = strrchr(mutable_key, '\''); - } - if (key == NULL) { - crm_warn("Ignoring malformed CIB update (resource deletion)"); - free(mutable_key); - continue; - } - ++key; - - node_uuid = extract_node_uuid(xpath); - cancel = get_cancel_action(key, node_uuid); - if (cancel == NULL) { - abort_transition(INFINITY, tg_restart, "Resource operation removal", change); + break; // Won't be packaged with operation results we may be waiting for - } else { - crm_info("Cancellation of %s on %s confirmed (%d)", key, node_uuid, cancel->id); - stop_te_timer(cancel->timer); - te_action_confirmed(cancel); - - update_graph(transition_graph, cancel); - trigger_graph(); - - } - free(mutable_key); - free(node_uuid); - - } else if(strstr(xpath, "/"XML_CIB_TAG_LRM"[") && safe_str_eq(op, "delete")) { - abort_unless_down(xpath, op, change, "Resource state removal"); - - } else if(strstr(xpath, "/"XML_CIB_TAG_STATE"[") && safe_str_eq(op, "delete")) { - abort_unless_down(xpath, op, change, "Node state removal"); - - } else if(name == NULL) { - crm_debug("No result for %s operation to %s", op, xpath); - CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0); - - } else if(strcmp(name, XML_TAG_CIB) == 0) { - xmlNode *state = NULL; - xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS); - xmlNode *config = first_named_child(match, XML_CIB_TAG_CONFIGURATION); - - for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { - xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); - - node = ID(state); - process_resource_updates(node, lrm, change, op, xpath); - } - - if(config) { - abort_transition(INFINITY, tg_restart, "Non-status-only change", change); - } + } else if (strcmp(op, "delete") == 0) { + process_delete_diff(xpath, op, change); - } else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) { - xmlNode *state = NULL; + } else if (name == NULL) { + crm_warn("Ignoring malformed CIB update (%s at %s has no result)", + op, xpath); - for (state = __xml_first_child(match); state != NULL; state = __xml_next(state)) { - xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); - - node = ID(state); - process_resource_updates(node, lrm, change, op, xpath); - } + } else if (strcmp(name, XML_TAG_CIB) == 0) { + process_cib_diff(match, change, op, xpath); - } else if(strcmp(name, XML_CIB_TAG_STATE) == 0) { - xmlNode *lrm = first_named_child(match, XML_CIB_TAG_LRM); + } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) { + process_status_diff(match, change, op, xpath); - node = ID(match); - process_resource_updates(node, lrm, change, op, xpath); + } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) { + process_node_state_diff(match, change, op, xpath); - } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) { - node = ID(match); - process_resource_updates(node, match, change, op, xpath); + } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) { + process_resource_updates(ID(match), match, change, op, xpath); - } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { + } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = get_node_from_xpath(xpath); process_resource_updates(local_node, match, change, op, xpath); free(local_node); - } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { - - xmlNode *rsc_op; + } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = get_node_from_xpath(xpath); - for (rsc_op = __xml_first_child(match); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { - process_graph_event(rsc_op, local_node); - } + process_lrm_resource_diff(match, local_node); free(local_node); - } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { + } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = get_node_from_xpath(xpath); process_graph_event(match, local_node); free(local_node); } else { - crm_err("Ignoring %s operation for %s %p, %s", op, xpath, match, name); + crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)", + op, xpath, name); } } } +void +te_update_diff(const char *event, xmlNode * msg) +{ + xmlNode *diff = NULL; + const char *op = NULL; + int rc = -EINVAL; + int format = 1; + int p_add[] = { 0, 0, 0 }; + int p_del[] = { 0, 0, 0 }; + + CRM_CHECK(msg != NULL, return); + crm_element_value_int(msg, F_CIB_RC, &rc); + + if (transition_graph == NULL) { + crm_trace("No graph"); + return; + + } else if (rc < pcmk_ok) { + crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); + return; + + } else if (transition_graph->complete + && fsa_state != S_IDLE + && fsa_state != S_TRANSITION_ENGINE + && fsa_state != S_POLICY_ENGINE) { + crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state), + transition_graph->complete); + return; + } + + op = crm_element_value(msg, F_CIB_OPERATION); + diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + xml_patch_versions(diff, p_add, p_del); + crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op, + p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2], + fsa_state2string(fsa_state)); + + crm_element_value_int(diff, "format", &format); + switch (format) { + case 1: + te_update_diff_v1(event, diff); + break; + case 2: + te_update_diff_v2(diff); + break; + default: + crm_warn("Ignoring malformed CIB update (unknown patch format %d)", + format); + } +} gboolean process_te_message(xmlNode * msg, xmlNode * xml_data) From ccf9c38731ffeaec2d353cf95d564bffc7f6985d Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 27 Feb 2018 12:37:10 -0600 Subject: [PATCH 033/812] Low: crmd: don't abort for v2 diff LRM refresh if actions pending Already the case for v1 diffs, and should apply to v2 as well. --- crmd/te_callbacks.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c index a8e7091e993..1941cd7348e 100644 --- a/crmd/te_callbacks.c +++ b/crmd/te_callbacks.c @@ -248,9 +248,7 @@ static void process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, const char *op, const char *xpath) { - xmlNode *cIter = NULL; xmlNode *rsc = NULL; - int num_resources = 0; if (xml == NULL) { return; @@ -262,24 +260,21 @@ process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, CRM_ASSERT(strcmp((const char*)xml->name, XML_LRM_TAG_RESOURCES) == 0); - for (cIter = xml->children; cIter; cIter = cIter->next) { - num_resources++; - } + /* + * Updates by, or in response to, TE actions will never contain updates + * for more than one resource at a time, so such updates indicate an + * LRM refresh. + * + * In that case, start a new transition rather than check each result + * individually, which can result in _huge_ speedups in large clusters. + * + * Unfortunately, we can only do so when there are no pending actions. + * Otherwise, we could mistakenly throw away those results here, and + * the cluster will stall waiting for them and time out the operation. + */ + if ((transition_graph->pending == 0) + && xml->children && xml->children->next) { - if (num_resources > 1) { - /* - * Updates by, or in response to, TE actions will never contain updates - * for more than one resource at a time, so such updates indicate an - * LRM refresh. - * - * In that case, start a new transition rather than check each result - * individually, which can result in _huge_ speedups in large clusters. - * - * Unfortunately, we can only do so when there are no pending actions. - * Otherwise, we could mistakenly throw away those results here, and - * the cluster will stall waiting for them and time out the operation. - */ - crm_debug("Detected LRM refresh - %d resources updated", num_resources); crm_log_xml_trace(change, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL); return; From c07d403fa4d3ce4049c64844656abba160b38684 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 26 Feb 2018 14:48:57 -0600 Subject: [PATCH 034/812] Refactor: crmd: don't need return value from process_graph_event() --- crmd/te_events.c | 14 ++++---------- crmd/tengine.h | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/crmd/te_events.c b/crmd/te_events.c index ed091a98535..944bb601b21 100644 --- a/crmd/te_events.c +++ b/crmd/te_events.c @@ -436,8 +436,8 @@ match_down_event(const char *target, bool quiet) return match; } -gboolean -process_graph_event(xmlNode * event, const char *event_node) +void +process_graph_event(xmlNode *event, const char *event_node) { int rc = -1; int status = -1; @@ -450,7 +450,6 @@ process_graph_event(xmlNode * event, const char *event_node) int transition_num = -1; char *update_te_uuid = NULL; - gboolean stop_early = FALSE; gboolean ignore_failures = FALSE; const char *id = NULL; const char *desc = NULL; @@ -470,14 +469,14 @@ process_graph_event(xmlNode * event, const char *event_node) magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY); if (magic == NULL) { /* non-change */ - return FALSE; + return; } if (decode_transition_key(magic, &update_te_uuid, &transition_num, &action_num, &target_rc) == FALSE) { crm_err("Invalid event %s.%d detected: %s", id, callid, magic); abort_transition(INFINITY, tg_restart, "Bad event", event); - return FALSE; + return; } if (status == PCMK_LRM_OP_PENDING) { @@ -491,12 +490,10 @@ process_graph_event(xmlNode * event, const char *event_node) } else if ((action_num < 0) || (crm_str_eq(update_te_uuid, te_uuid, TRUE) == FALSE)) { desc = "initiated by a different node"; abort_transition(INFINITY, tg_restart, "Foreign event", event); - stop_early = TRUE; /* This could be an lrm status refresh */ } else if (transition_graph->id != transition_num) { desc = "arrived really late"; abort_transition(INFINITY, tg_restart, "Old event", event); - stop_early = TRUE; /* This could be an lrm status refresh */ } else if (transition_graph->complete) { desc = "arrived late"; @@ -521,8 +518,6 @@ process_graph_event(xmlNode * event, const char *event_node) } else { if (update_failcount(event, event_node, rc, target_rc, (transition_num == -1), ignore_failures)) { - /* Turns out this wasn't an lrm status refresh update afterall */ - stop_early = FALSE; desc = "failed"; } crm_info("Detected action (%d.%d) %s.%d=%s: %s", transition_num, @@ -531,5 +526,4 @@ process_graph_event(xmlNode * event, const char *event_node) bail: free(update_te_uuid); - return stop_early; } diff --git a/crmd/tengine.h b/crmd/tengine.h index 6a75a08c5cf..df49731c244 100644 --- a/crmd/tengine.h +++ b/crmd/tengine.h @@ -38,7 +38,7 @@ extern crm_action_t *get_cancel_action(const char *id, const char *node); extern gboolean cib_action_update(crm_action_t * action, int status, int op_rc); extern gboolean fail_incompletable_actions(crm_graph_t * graph, const char *down_node); -extern gboolean process_graph_event(xmlNode * event, const char *event_node); +void process_graph_event(xmlNode *event, const char *event_node); /* utils */ extern crm_action_t *get_action(int id, gboolean confirmed); From ace236de4371202d8043cfac9e781b9ddab2acc9 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 27 Feb 2018 15:51:25 -0600 Subject: [PATCH 035/812] Refactor: crmd: functionize pieces of do_lrm_invoke() more readable --- crmd/lrm.c | 584 +++++++++++++++++++++++++++++------------------------ 1 file changed, 318 insertions(+), 266 deletions(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index ef86cb88c51..a6366b86860 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1366,41 +1366,42 @@ get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) } static void -force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) +force_reprobe(lrm_state_t *lrm_state, const char *from_sys, + const char *from_host, const char *user_name, + gboolean is_remote_node) { - GHashTableIter gIter; - rsc_history_t *entry = NULL; - - - crm_info("clearing resource history on node %s", lrm_state->node_name); - g_hash_table_iter_init(&gIter, lrm_state->resource_history); - while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { - /* only unregister the resource during a reprobe if it is not a remote connection - * resource. otherwise unregistering the connection will terminate remote-node - * membership */ - gboolean unregister = TRUE; - - if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { - lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); - if (remote_lrm_state) { - /* when forcing a reprobe, make sure to clear remote node before - * clearing the remote node's connection resource */ - force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); - } - unregister = FALSE; - } + GHashTableIter gIter; + rsc_history_t *entry = NULL; - delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, - user_name, NULL, unregister); + crm_info("Clearing resource history on node %s", lrm_state->node_name); + g_hash_table_iter_init(&gIter, lrm_state->resource_history); + while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { + /* only unregister the resource during a reprobe if it is not a remote connection + * resource. otherwise unregistering the connection will terminate remote-node + * membership */ + gboolean unregister = TRUE; + + if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { + lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); + if (remote_lrm_state) { + /* when forcing a reprobe, make sure to clear remote node before + * clearing the remote node's connection resource */ + force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); + } + unregister = FALSE; } - /* Now delete the copy in the CIB */ - erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); + delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, + user_name, NULL, unregister); + } + + /* Now delete the copy in the CIB */ + erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); - /* And finally, _delete_ the value in attrd - * Setting it to FALSE results in the PE sending us back here again - */ - update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); + /* And finally, _delete_ the value in attrd + * Setting it to FALSE results in the PE sending us back here again + */ + update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } static void @@ -1463,6 +1464,267 @@ synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) lrmd_free_event(op); } +/*! + * \internal + * \brief Get target of an LRM operation + * + * \param[in] xml LRM operation data XML + * + * \return LRM operation target node name (local node or Pacemaker Remote node) + */ +static const char * +lrm_op_target(xmlNode *xml) +{ + const char *target = NULL; + + if (xml) { + target = crm_element_value(xml, XML_LRM_ATTR_TARGET); + } + if (target == NULL) { + target = fsa_our_uname; + } + return target; +} + +static void +fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, + const char *from_host, const char *from_sys) +{ + lrmd_event_data_t *op = NULL; + lrmd_rsc_info_t *rsc = NULL; + xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE); + + CRM_CHECK(xml_rsc != NULL, return); + + /* The lrmd simply executes operations and reports the results, without any + * concept of success or failure, so to fail a resource, we must fake what a + * failure looks like. + * + * To do this, we create a fake lrmd operation event for the resource, and + * pass that event to the lrmd client callback so it will be processed as if + * it came from the lrmd. + */ + op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); + CRM_ASSERT(op != NULL); + + free((char*) op->user_data); + op->user_data = NULL; + op->call_id = get_fake_call_id(lrm_state, op->rsc_id); + op->interval = 0; + op->op_status = PCMK_LRM_OP_DONE; + op->rc = PCMK_OCF_UNKNOWN_ERROR; + op->t_run = time(NULL); + op->t_rcchange = op->t_run; + +#if ENABLE_ACL + if (user_name && is_privileged(user_name) == FALSE) { + crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); + send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); + lrmd_free_event(op); + return; + } +#endif + + rsc = get_lrm_resource(lrm_state, xml_rsc, xml, TRUE); + if (rsc) { + crm_info("Failing resource %s...", rsc->id); + process_lrm_event(lrm_state, op, NULL); + op->op_status = PCMK_LRM_OP_DONE; + op->rc = PCMK_OCF_OK; + lrmd_free_rsc_info(rsc); + } else { + crm_info("Cannot find/create resource in order to fail it..."); + crm_log_xml_warn(xml, "bad input"); + } + + send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); + lrmd_free_event(op); +} + +static void +handle_refresh_op(lrm_state_t *lrm_state, const char *user_name, + const char *from_host, const char *from_sys) +{ + int rc = pcmk_ok; + xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); + + fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); + crm_info("Forced a local LRM refresh: call=%d", rc); + + if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) { + xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host, + from_sys, CRM_SYSTEM_LRMD, + fsa_our_uuid); + + crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); + + if (relay_message(reply, TRUE) == FALSE) { + crm_log_xml_err(reply, "Unable to route reply"); + } + free_xml(reply); + } + + free_xml(fragment); +} + +static void +handle_query_op(xmlNode *msg, lrm_state_t *lrm_state) +{ + xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); + xmlNode *reply = create_reply(msg, data); + + if (relay_message(reply, TRUE) == FALSE) { + crm_err("Unable to route reply"); + crm_log_xml_err(reply, "reply"); + } + free_xml(reply); + free_xml(data); +} + +static void +handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, + const char *from_host, const char *user_name, + gboolean is_remote_node) +{ + crm_notice("Forcing the status of all resources to be redetected"); + force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); + + if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys) + && safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) { + + xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, + from_sys, CRM_SYSTEM_LRMD, + fsa_our_uuid); + + crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); + + if (relay_message(reply, TRUE) == FALSE) { + crm_log_xml_err(reply, "Unable to route reply"); + } + free_xml(reply); + } +} + +static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, + lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) +{ + char *op_key = NULL; + char *meta_key = NULL; + int call = 0; + const char *call_id = NULL; + const char *op_task = NULL; + const char *op_interval = NULL; + gboolean in_progress = FALSE; + xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); + + CRM_CHECK(params != NULL, return FALSE); + + meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL); + op_interval = crm_element_value(params, meta_key); + free(meta_key); + CRM_CHECK(op_interval != NULL, return FALSE); + + meta_key = crm_meta_name(XML_LRM_ATTR_TASK); + op_task = crm_element_value(params, meta_key); + free(meta_key); + CRM_CHECK(op_task != NULL, return FALSE); + + meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); + call_id = crm_element_value(params, meta_key); + free(meta_key); + + op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0")); + + crm_debug("PE requested op %s (call=%s) be cancelled", + op_key, (call_id? call_id : "NA")); + call = crm_parse_int(call_id, "0"); + if (call == 0) { + /* the normal case when the PE cancels a recurring op */ + in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); + + } else { + /* the normal case when the PE cancels an orphan op */ + in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); + } + + // Acknowledge cancellation operation if for a remote connection resource + if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { + char *op_id = make_stop_id(rsc->id, call); + + if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { + crm_info("Nothing known about operation %d for %s", call, op_key); + } + erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call); + send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, + from_host, from_sys); + + /* needed at least for cancellation of a remote operation */ + g_hash_table_remove(lrm_state->pending_ops, op_id); + free(op_id); + + } else { + /* No ack is needed since abcdaa8, but peers with older versions + * in a rolling upgrade need one. We didn't bump the feature set + * at that commit, so we can only compare against the previous + * CRM version (3.0.8). If any peers have feature set 3.0.9 but + * not abcdaa8, they will time out waiting for the ack (no + * released versions of Pacemaker are affected). + */ + const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); + + if (compare_version(peer_version, "3.0.8") <= 0) { + crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", + op_key, from_host, peer_version); + send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, + from_host, from_sys); + } + } + + free(op_key); + return TRUE; +} + +static void +do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, + lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, + bool crm_rsc_delete, const char *user_name) +{ + gboolean unregister = TRUE; + +#if ENABLE_ACL + int cib_rc = delete_rsc_status(lrm_state, rsc->id, + cib_dryrun|cib_sync_call, user_name); + + if (cib_rc != pcmk_ok) { + lrmd_event_data_t *op = NULL; + + crm_err("Could not delete resource status of %s for %s (user %s) on %s: %s" + CRM_XS " rc=%d", + rsc->id, from_sys, (user_name? user_name : "unknown"), + from_host, pcmk_strerror(cib_rc), cib_rc); + + op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); + op->op_status = PCMK_LRM_OP_ERROR; + + if (cib_rc == -EACCES) { + op->rc = PCMK_OCF_INSUFFICIENT_PRIV; + } else { + op->rc = PCMK_OCF_UNKNOWN_ERROR; + } + send_direct_ack(from_host, from_sys, NULL, op, rsc->id); + lrmd_free_event(op); + lrmd_free_rsc_info(rsc); + return; + } +#endif + + if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { + unregister = FALSE; + } + + delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, + user_name, input, unregister); +} /* A_LRM_INVOKE */ void @@ -1471,7 +1733,6 @@ do_lrm_invoke(long long action, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { - gboolean create_rsc = TRUE; lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; @@ -1481,21 +1742,13 @@ do_lrm_invoke(long long action, const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; - gboolean crm_rsc_delete = FALSE; + bool crm_rsc_delete = FALSE; - if (input->xml != NULL) { - /* Remote node operations are routed here to their remote connections */ - target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); - } - if (target_node == NULL) { - target_node = fsa_our_uname; - } else if (safe_str_neq(target_node, fsa_our_uname)) { - is_remote_node = TRUE; - } + target_node = lrm_op_target(input->xml); + is_remote_node = safe_str_neq(target_node, fsa_our_uname); lrm_state = lrm_state_find(target_node); - - if (lrm_state == NULL && is_remote_node) { + if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because remote node %s has no connection to cluster node %s", target_node, fsa_our_uname); @@ -1503,7 +1756,6 @@ do_lrm_invoke(long long action, synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED); return; } - CRM_ASSERT(lrm_state != NULL); #if ENABLE_ACL @@ -1516,68 +1768,15 @@ do_lrm_invoke(long long action, if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } - - crm_trace("LRM command from: %s", from_sys); + crm_trace("LRM %s command from %s", crm_op, from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { - /* remember this delete op came from crm_resource */ - crm_rsc_delete = TRUE; + crm_rsc_delete = TRUE; // Only crm_resource uses this op operation = CRMD_ACTION_DELETE; - } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { - operation = CRM_OP_LRM_REFRESH; - } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { - lrmd_event_data_t *op = NULL; - lrmd_rsc_info_t *rsc = NULL; - xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); - - CRM_CHECK(xml_rsc != NULL, return); - - /* The lrmd can not fail a resource, it does not understand the - * concept of success or failure in relation to a resource, it simply - * executes operations and reports the results. We determine what a failure is. - * Because of this, if we want to fail a resource we have to fake what we - * understand a failure to look like. - * - * To do this we create a fake lrmd operation event for the resource - * we want to fail. We then pass that event to the lrmd client callback - * so it will be processed as if it actually came from the lrmd. */ - op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon"); - CRM_ASSERT(op != NULL); - - free((char *)op->user_data); - op->user_data = NULL; - op->call_id = get_fake_call_id(lrm_state, op->rsc_id); - op->interval = 0; - op->op_status = PCMK_LRM_OP_DONE; - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->t_run = time(NULL); - op->t_rcchange = op->t_run; - -#if ENABLE_ACL - if (user_name && is_privileged(user_name) == FALSE) { - crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); - send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); - lrmd_free_event(op); - return; - } -#endif - - rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); - if (rsc) { - crm_info("Failing resource %s...", rsc->id); - process_lrm_event(lrm_state, op, NULL); - op->op_status = PCMK_LRM_OP_DONE; - op->rc = PCMK_OCF_OK; - lrmd_free_rsc_info(rsc); - } else { - crm_info("Cannot find/create resource in order to fail it..."); - crm_log_xml_warn(input->msg, "bad input"); - } - - send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); - lrmd_free_event(op); + fail_lrm_resource(input->xml, lrm_state, user_name, from_host, + from_sys); return; } else if (input->xml != NULL) { @@ -1585,81 +1784,34 @@ do_lrm_invoke(long long action, } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { - int rc = pcmk_ok; - xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); - - fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); - crm_info("Forced a local LRM refresh: call=%d", rc); - - if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) { - xmlNode *reply = create_request( - CRM_OP_INVOKE_LRM, fragment, - from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); - - crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); - - if (relay_message(reply, TRUE) == FALSE) { - crm_log_xml_err(reply, "Unable to route reply"); - } - free_xml(reply); - } - - free_xml(fragment); + handle_refresh_op(lrm_state, user_name, from_host, from_sys); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { - xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); - xmlNode *reply = create_reply(input->msg, data); - - if (relay_message(reply, TRUE) == FALSE) { - crm_err("Unable to route reply"); - crm_log_xml_err(reply, "reply"); - } - free_xml(reply); - free_xml(data); + handle_query_op(input->msg, lrm_state); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { - update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); - - } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { - crm_notice("Forcing the status of all resources to be redetected"); - - force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); + update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, + user_name, is_remote_node); - if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys) - && safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) { - - xmlNode *reply = create_request( - CRM_OP_INVOKE_LRM, NULL, - from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); - - crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); - - if (relay_message(reply, TRUE) == FALSE) { - crm_log_xml_err(reply, "Unable to route reply"); - } - free_xml(reply); - } + } else if (safe_str_eq(operation, CRM_OP_REPROBE) + || safe_str_eq(crm_op, CRM_OP_REPROBE)) { + handle_reprobe_op(lrm_state, from_sys, from_host, user_name, + is_remote_node); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; - xmlNode *params = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); + gboolean create_rsc = safe_str_neq(operation, CRMD_ACTION_DELETE); CRM_CHECK(xml_rsc != NULL, return); - params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); - - if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { - create_rsc = FALSE; - } - - if(lrm_state_is_connected(lrm_state) == FALSE) { + if (lrm_state_is_connected(lrm_state) == FALSE) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED); return; } rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); - if (rsc == NULL && create_rsc) { + if ((rsc == NULL) && create_rsc) { crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "bad input"); @@ -1668,7 +1820,9 @@ do_lrm_invoke(long long action, synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED); } else if (rsc == NULL) { - crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml)); + crm_notice("Not creating %s resource for a %s event " + CRM_XS " transition key %s", + ID(xml_rsc), operation, ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); /* Deleting something that does not exist is a success */ @@ -1676,115 +1830,13 @@ do_lrm_invoke(long long action, from_host, from_sys); } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { - char *op_key = NULL; - char *meta_key = NULL; - int call = 0; - const char *call_id = NULL; - const char *op_task = NULL; - const char *op_interval = NULL; - gboolean in_progress = FALSE; - - CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); - lrmd_free_rsc_info(rsc); return); - - meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL); - op_interval = crm_element_value(params, meta_key); - free(meta_key); - - meta_key = crm_meta_name(XML_LRM_ATTR_TASK); - op_task = crm_element_value(params, meta_key); - free(meta_key); - - meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); - call_id = crm_element_value(params, meta_key); - free(meta_key); - - CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); - lrmd_free_rsc_info(rsc); return); - CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); - lrmd_free_rsc_info(rsc); return); - - op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0")); - - crm_debug("PE requested op %s (call=%s) be cancelled", - op_key, call_id ? call_id : "NA"); - call = crm_parse_int(call_id, "0"); - if (call == 0) { - /* the normal case when the PE cancels a recurring op */ - in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); - - } else { - /* the normal case when the PE cancels an orphan op */ - in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); - } - - /* Acknowledge the cancellation operation if it's for a remote connection resource */ - if (in_progress == FALSE || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { - char *op_id = make_stop_id(rsc->id, call); - - if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { - crm_info("Nothing known about operation %d for %s", call, op_key); - } - erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call); - send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, - from_host, from_sys); - - /* needed at least for cancellation of a remote operation */ - g_hash_table_remove(lrm_state->pending_ops, op_id); - free(op_id); - - } else { - /* No ack is needed since abcdaa8, but peers with older versions - * in a rolling upgrade need one. We didn't bump the feature set - * at that commit, so we can only compare against the previous - * CRM version (3.0.8). If any peers have feature set 3.0.9 but - * not abcdaa8, they will time out waiting for the ack (no - * released versions of Pacemaker are affected). - */ - const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); - - if (compare_version(peer_version, "3.0.8") <= 0) { - crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", - op_key, from_host, peer_version); - send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, - from_host, from_sys); - } + if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { + crm_log_xml_warn(input->xml, "Bad command"); } - free(op_key); - } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { - gboolean unregister = TRUE; - -#if ENABLE_ACL - int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name); - if (cib_rc != pcmk_ok) { - lrmd_event_data_t *op = NULL; - - crm_err - ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", - rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc, - pcmk_strerror(cib_rc)); - - op = construct_op(lrm_state, input->xml, rsc->id, operation); - op->op_status = PCMK_LRM_OP_ERROR; - - if (cib_rc == -EACCES) { - op->rc = PCMK_OCF_INSUFFICIENT_PRIV; - } else { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - } - send_direct_ack(from_host, from_sys, NULL, op, rsc->id); - lrmd_free_event(op); - lrmd_free_rsc_info(rsc); - return; - } -#endif - if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { - unregister = FALSE; - } - - delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister); + do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, + crm_rsc_delete, user_name); } else { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); From a468b8c4f137f7895152c274e4b137e693832f84 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 22 Feb 2018 10:27:36 -0600 Subject: [PATCH 036/812] Fix: crmd: match only executed down events Before, if a downed node had more than one event that could make it down (e.g. a remote node with a fence and a connection stop), the crmd could match an action that hadn't yet been initiated, and thus abort the transition unnecessarily. --- crmd/te_callbacks.c | 2 +- crmd/te_events.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c index 1941cd7348e..d67dabc1897 100644 --- a/crmd/te_callbacks.c +++ b/crmd/te_callbacks.c @@ -350,7 +350,7 @@ abort_unless_down(const char *xpath, const char *op, xmlNode *change, } down = match_down_event(node_uuid, TRUE); - if(down == NULL || down->executed == false) { + if (down == NULL) { crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath); abort_transition(INFINITY, tg_restart, reason, change); } else { diff --git a/crmd/te_events.c b/crmd/te_events.c index 944bb601b21..28a8ab287c3 100644 --- a/crmd/te_events.c +++ b/crmd/te_events.c @@ -414,11 +414,16 @@ match_down_event(const char *target, bool quiet) gIter2 = gIter2->next) { match = (crm_action_t*)gIter2->data; - xpath_ret = xpath_search(match->xml, xpath); - if (numXpathResults(xpath_ret) < 1) { + if (match->executed) { + xpath_ret = xpath_search(match->xml, xpath); + if (numXpathResults(xpath_ret) < 1) { + match = NULL; + } + freeXpathObject(xpath_ret); + } else { + // Only actions that were actually started can match match = NULL; } - freeXpathObject(xpath_ret); } } From caf6eb19c6b9ca96c0eed05aacdfb2b71b2daceb Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 28 Feb 2018 14:58:04 -0600 Subject: [PATCH 037/812] Refactor: crmd: return rich error codes from get_lrm_resource() will allow for improved error handling --- crmd/lrm.c | 107 +++++++++++++++++++++++++++++++---------------- crmd/lrm_state.c | 17 ++++---- 2 files changed, 79 insertions(+), 45 deletions(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index a6366b86860..5d0fcc0bbbe 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1256,49 +1256,77 @@ cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, g return data.done; } -static lrmd_rsc_info_t * -get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg, gboolean do_create) +/*! + * \internal + * \brief Retrieve resource information from LRM + * + * \param[in] lrm_state LRM connection to use + * \param[in] rsc_xml XML containing resource configuration + * \param[in] do_create If true, register resource with LRM if not already + * \param[out] rsc_info Where to store resource information obtained from LRM + * + * \retval pcmk_ok Success (and rsc_info holds newly allocated result) + * \retval -EINVAL Required information is missing from arguments + * \retval -ENOTCONN No active connection to LRM + * \retval -ENODEV Resource not found + * \retval -errno Error communicating with lrmd when registering resource + * + * \note Caller is responsible for freeing result on success. + */ +static int +get_lrm_resource(lrm_state_t *lrm_state, xmlNode *rsc_xml, gboolean do_create, + lrmd_rsc_info_t **rsc_info) { - lrmd_rsc_info_t *rsc = NULL; - const char *id = ID(resource); - const char *type = crm_element_value(resource, XML_ATTR_TYPE); - const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); - const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); - const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); - - crm_trace("Retrieving %s from the LRM.", id); - CRM_CHECK(id != NULL, return NULL); + const char *id = ID(rsc_xml); - rsc = lrm_state_get_rsc_info(lrm_state, id, 0); + CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); + CRM_CHECK(id, return -EINVAL); - if (!rsc && long_id) { - rsc = lrm_state_get_rsc_info(lrm_state, long_id, 0); + if (lrm_state_is_connected(lrm_state) == FALSE) { + return -ENOTCONN; } - if (!rsc && do_create) { - CRM_CHECK(class != NULL, return NULL); - CRM_CHECK(type != NULL, return NULL); + crm_trace("Retrieving resource information for %s from the LRM", id); + *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); - crm_trace("Adding rsc %s before operation", id); + // If resource isn't known by ID, try clone name, if provided + if (!*rsc_info) { + const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG); - lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); + if (long_id) { + *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); + } + } - rsc = lrm_state_get_rsc_info(lrm_state, id, 0); + if ((*rsc_info == NULL) && do_create) { + const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS); + const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER); + const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE); + int rc; - if (!rsc) { + crm_trace("Registering resource %s with LRM", id); + rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, + lrmd_opt_drop_recurring); + if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; - crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name); - /* only register this as a internal error if this involves the local - * lrmd. Otherwise we're likely dealing with an unresponsive remote-node - * which is not a FSA failure. */ + crm_err("Could not register resource %s with LRM on %s: %s " + CRM_XS " rc=%d", + id, lrm_state->node_name, pcmk_strerror(rc), rc); + + /* Register this as an internal error if this involves the local + * lrmd. Otherwise, we're likely dealing with an unresponsive remote + * node, which is not an FSA failure. + */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } + return rc; } - } - return rsc; + *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); + } + return *rsc_info? pcmk_ok : -ENODEV; } static void @@ -1525,13 +1553,13 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, } #endif - rsc = get_lrm_resource(lrm_state, xml_rsc, xml, TRUE); - if (rsc) { + if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); process_lrm_event(lrm_state, op, NULL); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; lrmd_free_rsc_info(rsc); + } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); @@ -1802,24 +1830,27 @@ do_lrm_invoke(long long action, lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); gboolean create_rsc = safe_str_neq(operation, CRMD_ACTION_DELETE); + int rc; - CRM_CHECK(xml_rsc != NULL, return); + // We can't return anything meaningful without a resource ID + CRM_CHECK(xml_rsc && ID(xml_rsc), return); - if (lrm_state_is_connected(lrm_state) == FALSE) { - synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED); + rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); + if (rc == -ENOTCONN) { + synthesize_lrmd_failure(lrm_state, input->xml, + PCMK_OCF_CONNECTION_DIED); return; - } - rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); - if ((rsc == NULL) && create_rsc) { + } else if ((rc < 0) && create_rsc) { crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "bad input"); /* if the operation couldn't complete because we can't register * the resource, return a generic error */ synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED); + return; - } else if (rsc == NULL) { + } else if (rc < 0) { crm_notice("Not creating %s resource for a %s event " CRM_XS " transition key %s", ID(xml_rsc), operation, ID(input->xml)); @@ -1828,8 +1859,10 @@ do_lrm_invoke(long long action, /* Deleting something that does not exist is a success */ send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation, from_host, from_sys); + return; + } - } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { + if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c index 972584502aa..23d8ae677df 100644 --- a/crmd/lrm_state.c +++ b/crmd/lrm_state.c @@ -684,20 +684,21 @@ lrm_state_register_rsc(lrm_state_t * lrm_state, const char *class, const char *provider, const char *agent, enum lrmd_call_options options) { - if (!lrm_state->conn) { + lrmd_t *conn = (lrmd_t *) lrm_state->conn; + + if (conn == NULL) { return -ENOTCONN; } - /* optimize this... this function is a synced round trip from client to daemon. - * The crmd/lrm.c code path should be re-factored to allow the register of resources - * to be performed async. The lrmd client api needs to make an async version - * of register available. */ if (is_remote_lrmd_ra(agent, provider, NULL)) { - return lrm_state_find_or_create(rsc_id) ? pcmk_ok : -1; + return lrm_state_find_or_create(rsc_id)? pcmk_ok : -EINVAL; } - return ((lrmd_t *) lrm_state->conn)->cmds->register_rsc(lrm_state->conn, rsc_id, class, - provider, agent, options); + /* @TODO Implement an asynchronous version of this (currently a blocking + * call to the lrmd). + */ + return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider, + agent, options); } int From 1c6677e39157ac9580b207825be014b290bd2fca Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 1 Mar 2018 10:42:58 -0600 Subject: [PATCH 038/812] Low: crmd: improve lrmd failure handling Previously, do_lrm_invoke() treated all get_lrm_resource() failures as malformed resource configuration. Now, it distinguishes bad configuration (fatal) from lrmd communication errors (hard). --- crmd/lrm.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index 5d0fcc0bbbe..10b7b8698aa 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1841,24 +1841,35 @@ do_lrm_invoke(long long action, PCMK_OCF_CONNECTION_DIED); return; - } else if ((rc < 0) && create_rsc) { - crm_err("Invalid resource definition for %s", ID(xml_rsc)); - crm_log_xml_warn(input->msg, "bad input"); + } else if (!create_rsc) { + /* Delete of malformed or nonexistent resource + * (deleting something that does not exist is a success) + */ + crm_notice("Not registering resource '%s' for a %s event " + CRM_XS " get-rc=%d (%s) transition-key=%s", + ID(xml_rsc), operation, + rc, pcmk_strerror(rc), ID(input->xml)); + delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, + user_name); + send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation, + from_host, from_sys); + return; - /* if the operation couldn't complete because we can't register - * the resource, return a generic error */ - synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED); + } else if (rc == -EINVAL) { + // Resource operation on malformed resource + crm_err("Invalid resource definition for %s", ID(xml_rsc)); + crm_log_xml_warn(input->msg, "invalid resource"); + synthesize_lrmd_failure(lrm_state, input->xml, + PCMK_OCF_NOT_CONFIGURED); // fatal error return; } else if (rc < 0) { - crm_notice("Not creating %s resource for a %s event " - CRM_XS " transition key %s", - ID(xml_rsc), operation, ID(input->xml)); - delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); - - /* Deleting something that does not exist is a success */ - send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation, - from_host, from_sys); + // Error communicating with lrmd + crm_err("Could not register resource '%s' with lrmd: %s " CRM_XS " rc=%d", + ID(xml_rsc), pcmk_strerror(rc), rc); + crm_log_xml_warn(input->msg, "failed registration"); + synthesize_lrmd_failure(lrm_state, input->xml, + PCMK_OCF_INVALID_PARAM); // hard error return; } From bce8b2fbe1c30935ea984282eefe020c30fd9032 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 1 Mar 2018 11:05:21 -0600 Subject: [PATCH 039/812] Low: crmd: avoid core dump if remote connection doesn't exist do_lrm_invoke() calls synthesize_lrmd_failure() with a NULL lrm_state if asked to perform an operation on a remote node that has never been connected locally. Previously, this would cause a null dereference in construct_op(). Now, construct_op() handles the situation reasonably. --- crmd/lrm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index 10b7b8698aa..2116f788530 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1946,7 +1946,7 @@ construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, cons #if ENABLE_VERSIONED_ATTRS // Resolve any versioned parameters - if (safe_str_neq(op->op_type, RSC_METADATA) + if (lrm_state && safe_str_neq(op->op_type, RSC_METADATA) && safe_str_neq(op->op_type, CRMD_ACTION_DELETE) && !is_remote_lrmd_ra(NULL, NULL, rsc_id)) { @@ -2006,7 +2006,11 @@ construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, cons op->params = params; } else { - rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); + rsc_history_t *entry = NULL; + + if (lrm_state) { + entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); + } /* If we do not have stop parameters cached, use * whatever we are given */ From 785a826ae09197c2fbf4aec56e6968f1c9a72877 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 1 Mar 2018 11:09:35 -0600 Subject: [PATCH 040/812] Low: crmd: always assert when operation can't be created Previously, all callers but one asserted if construct_op() returned NULL. Now, the assert is done in construct_op() to reduce code duplication and chance for mistakes. --- crmd/lrm.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index 2116f788530..b9f81aee529 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -295,7 +295,6 @@ send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); - CRM_ASSERT(op != NULL); op->rc = PCMK_OCF_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id); @@ -886,7 +885,6 @@ notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_ ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); - CRM_ASSERT(op != NULL); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; @@ -1454,7 +1452,6 @@ synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) } op = construct_op(lrm_state, action, ID(xml_rsc), operation); - CRM_ASSERT(op != NULL); op->call_id = get_fake_call_id(lrm_state, op->rsc_id); if(safe_str_eq(operation, RSC_NOTIFY)) { @@ -1533,7 +1530,6 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, * it came from the lrmd. */ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); - CRM_ASSERT(op != NULL); free((char*) op->user_data); op->user_data = NULL; @@ -1905,9 +1901,11 @@ construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, cons const char *transition = NULL; - CRM_ASSERT(rsc_id != NULL); + CRM_ASSERT(rsc_id && operation); op = calloc(1, sizeof(lrmd_event_data_t)); + CRM_ASSERT(op != NULL); + op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; From 6cf2ff94760650356fdf4b0360197e5eafae411a Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 1 Mar 2018 11:37:55 -0600 Subject: [PATCH 041/812] Refactor: crmd: functionize setting fake operation status reduces code duplication and enhances readability --- crmd/lrm.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index b9f81aee529..d39d13b78f6 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1391,6 +1391,17 @@ get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) return call_id; } +static void +fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, + enum ocf_exitcode op_exitcode) +{ + op->call_id = get_fake_call_id(lrm_state, op->rsc_id); + op->t_run = time(NULL); + op->t_rcchange = op->t_run; + op->op_status = op_status; + op->rc = op_exitcode; +} + static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, @@ -1453,18 +1464,11 @@ synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) op = construct_op(lrm_state, action, ID(xml_rsc), operation); - op->call_id = get_fake_call_id(lrm_state, op->rsc_id); - if(safe_str_eq(operation, RSC_NOTIFY)) { - /* Notifications can't fail yet */ - op->op_status = PCMK_LRM_OP_DONE; - op->rc = PCMK_OCF_OK; - + if (safe_str_eq(operation, RSC_NOTIFY)) { // Notifications can't fail + fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_OK); } else { - op->op_status = PCMK_LRM_OP_ERROR; - op->rc = rc; + fake_op_status(lrm_state, op, PCMK_LRM_OP_ERROR, rc); } - op->t_run = time(NULL); - op->t_rcchange = op->t_run; crm_info("Faking result %d for %s_%s_%d on %s (%p)", op->rc, op->rsc_id, op->op_type, op->interval, target_node, lrm_state); @@ -1530,15 +1534,11 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, * it came from the lrmd. */ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); + fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); free((char*) op->user_data); op->user_data = NULL; - op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->interval = 0; - op->op_status = PCMK_LRM_OP_DONE; - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->t_run = time(NULL); - op->t_rcchange = op->t_run; #if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { @@ -2311,13 +2311,9 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if (call_id <= 0) { - - crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id); - op->call_id = get_fake_call_id(lrm_state, rsc->id); - op->op_status = PCMK_LRM_OP_DONE; - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->t_run = time(NULL); - op->t_rcchange = op->t_run; + crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", + operation, rsc->id, lrm_state->node_name, call_id); + fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); process_lrm_event(lrm_state, op, NULL); } else { From aedf69545c09b5ce03771b1f36c966136d0ee22c Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 1 Mar 2018 12:36:23 -0600 Subject: [PATCH 042/812] Refactor: liblrmd: add function to create resource info structure reduces code duplication, enhances readability, and allows reuse --- include/crm/lrmd.h | 2 ++ lib/lrmd/lrmd_client.c | 45 +++++++++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h index 3398c6e484c..5c1947efc4b 100644 --- a/include/crm/lrmd.h +++ b/include/crm/lrmd.h @@ -264,6 +264,8 @@ typedef struct lrmd_rsc_info_s { char *provider; } lrmd_rsc_info_t; +lrmd_rsc_info_t *lrmd_new_rsc_info(const char *rsc_id, const char *standard, + const char *provider, const char *type); lrmd_rsc_info_t *lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info); void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info); diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index b91f7eb6c46..3fd64792e63 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1455,20 +1455,36 @@ lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_option } lrmd_rsc_info_t * -lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) +lrmd_new_rsc_info(const char *rsc_id, const char *standard, + const char *provider, const char *type) { - lrmd_rsc_info_t *copy = NULL; - - copy = calloc(1, sizeof(lrmd_rsc_info_t)); + lrmd_rsc_info_t *rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); - copy->id = strdup(rsc_info->id); - copy->type = strdup(rsc_info->type); - copy->class = strdup(rsc_info->class); - if (rsc_info->provider) { - copy->provider = strdup(rsc_info->provider); + CRM_ASSERT(rsc_info); + if (rsc_id) { + rsc_info->id = strdup(rsc_id); + CRM_ASSERT(rsc_info->id); + } + if (standard) { + rsc_info->class = strdup(standard); + CRM_ASSERT(rsc_info->class); } + if (provider) { + rsc_info->provider = strdup(provider); + CRM_ASSERT(rsc_info->provider); + } + if (type) { + rsc_info->type = strdup(type); + CRM_ASSERT(rsc_info->type); + } + return rsc_info; +} - return copy; +lrmd_rsc_info_t * +lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) +{ + return lrmd_new_rsc_info(rsc_info->id, rsc_info->class, + rsc_info->provider, rsc_info->type); } void @@ -1515,14 +1531,7 @@ lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options return NULL; } - rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); - rsc_info->id = strdup(rsc_id); - rsc_info->class = strdup(class); - if (provider) { - rsc_info->provider = strdup(provider); - } - rsc_info->type = strdup(type); - + rsc_info = lrmd_new_rsc_info(rsc_id, class, provider, type); free_xml(output); return rsc_info; } From 8ad7b137f4e932f972276b27a1e976de32c143d0 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 1 Mar 2018 15:31:37 -0600 Subject: [PATCH 043/812] Fix: crmd: always write faked failures to CIB whenever possible Previously, when the crmd had to fake an LRM result, it would call process_lrm_event() as long as an lrm_state was available. However, if the lrm_state was disconnected and did not have the resource info cached (e.g. when attempting to recover a resource on a remote node whose connection has just died), then the eventual call to do_update_resource() would be unable to write the result to the CIB, and the PE would never learn about it. Now, when faking a result, we check that the resource info is available from the lrm_state before attempting to process the event normally. If not, we call do_update_resource() directly with created resource info, the same as is done when an lrm_state is not available. --- crmd/lrm.c | 59 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index d39d13b78f6..41fd9a833a4 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1445,20 +1445,22 @@ static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) { lrmd_event_data_t *op = NULL; + lrmd_rsc_info_t *rsc_info = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); - if(xml_rsc == NULL) { + if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ - crm_info("Skipping %s=%d on %s (%p): no resource", - crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node, lrm_state); + crm_info("Can't fake %s failure (%d) on %s without resource configuration", + crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, + target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ - crm_info("Skipping %s=%d on %s (%p): no operation", - crm_element_value(action, XML_ATTR_TRANSITION_KEY), rc, target_node, lrm_state); + crm_info("Can't fake %s failure (%d) on %s without operation", + ID(xml_rsc), rc, target_node); return; } @@ -1470,25 +1472,36 @@ synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) fake_op_status(lrm_state, op, PCMK_LRM_OP_ERROR, rc); } - crm_info("Faking result %d for %s_%s_%d on %s (%p)", op->rc, op->rsc_id, op->op_type, op->interval, target_node, lrm_state); + crm_info("Faking %s_%s_%d result (%d) on %s", + op->rsc_id, op->op_type, op->interval, op->rc, target_node); - if(lrm_state) { + /* Process the result as if it came from the LRM, if possible + * (i.e. resource info can be obtained from the lrm_state). + */ + if (lrm_state) { + rsc_info = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); + } + if (rsc_info) { process_lrm_event(lrm_state, op, NULL); } else { - lrmd_rsc_info_t rsc; - - rsc.id = strdup(op->rsc_id); - rsc.type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE); - rsc.class = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS); - rsc.provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER); - - do_update_resource(target_node, &rsc, op); - - free(rsc.id); - free(rsc.type); - free(rsc.class); - free(rsc.provider); + /* If we can't process the result normally, at least write it to the CIB + * if possible, so the PE can act on it. + */ + char *standard = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS); + char *provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER); + char *type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE); + + if (standard && type) { + rsc_info = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); + do_update_resource(target_node, rsc_info, op); + lrmd_free_rsc_info(rsc_info); + } else { + // @TODO Should we direct ack? + crm_info("Can't fake %s failure (%d) on %s without resource standard and type", + crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, + target_node); + } } lrmd_free_event(op); } @@ -1773,10 +1786,8 @@ do_lrm_invoke(long long action, lrm_state = lrm_state_find(target_node); if ((lrm_state == NULL) && is_remote_node) { - crm_err("Failing action because remote node %s has no connection to cluster node %s", - target_node, fsa_our_uname); - - /* The action must be recorded here and in the CIB as failed */ + crm_err("Failing action because local node has never had connection to remote node %s", + target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED); return; } From 55d551977b4e4bd03bfd3a4e525b9b8198f7c747 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 26 Feb 2018 16:35:07 -0600 Subject: [PATCH 044/812] Build: spec: update SuSE spec modifications for recent changes --- GNUmakefile | 29 +++++++++++++++-------------- pacemaker.spec.in | 2 +- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 5752276efb7..b854665aa18 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -154,20 +154,21 @@ $(PACKAGE)-suse.spec: $(PACKAGE).spec.in GNUmakefile git show $(TAG):$(PACKAGE).spec.in >> $@; \ echo "Rebuilt $@ from $(TAG)"; \ fi - sed -i s:%{_docdir}/%{name}:%{_docdir}/%{name}-%{version}:g $@ - sed -i s:corosynclib:libcorosync:g $@ - sed -i s:libexecdir}/lcrso:libdir}/lcrso:g $@ - sed -i 's:%{name}-libs:lib%{name}3:g' $@ - sed -i s:cluster-glue-libs:libglue:g $@ - sed -i s:bzip2-devel:libbz2-devel:g $@ - sed -i s:docbook-style-xsl:docbook-xsl-stylesheets:g $@ - sed -i s:libtool-ltdl-devel::g $@ - sed -i s:publican::g $@ - sed -i s:byacc::g $@ - sed -i s:gnutls-devel:libgnutls-devel:g $@ - sed -i s:189:90:g $@ - sed -i 's:python-devel:python-curses python-xml python-devel:' $@ - sed -i 's@Requires: python@Requires: python-curses python-xml python@' $@ + sed -i \ + -e 's:%{_docdir}/%{name}:%{_docdir}/%{name}-%{version}:g' \ + -e 's:%{name}-libs:lib%{name}3:g' \ + -e 's@Requires: python@Requires: python-curses python-xml python@' \ + -e 's:libtool-ltdl-devel::g $@' \ + -e 's:bzip2-devel:libbz2-devel:g' \ + -e 's:docbook-style-xsl:docbook-xsl-stylesheets:g' \ + -e 's: byacc::g' \ + -e 's:gnutls-devel:libgnutls-devel:g' \ + -e 's:corosynclib:libcorosync:g' \ + -e 's:cluster-glue-libs:libglue:g' \ + -e 's: publican::g' \ + -e 's: 189: 90:g' \ + -e 's:%{_libexecdir}/lcrso:%{_libdir}/lcrso:g' \ + $@ @echo "Applied SUSE-specific modifications" diff --git a/pacemaker.spec.in b/pacemaker.spec.in index 5c9ce98c7db..f8b1a6757ee 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -227,7 +227,7 @@ BuildRequires: cluster-glue-libs-devel ## (note no avoiding effect when building through non-customized mock) %if !%{bleeding} %if %{with doc} -BuildRequires: publican inkscape asciidoc +BuildRequires: inkscape asciidoc publican %endif %endif From 11a019b78e2ab28e36e415fc2374d72dce7b6020 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 9 Mar 2018 12:13:07 -0600 Subject: [PATCH 045/812] Build: libcrmcommon: OpenBSD doesn't support some POSIX errno's --- lib/common/logging.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/common/logging.c b/lib/common/logging.c index deb39413483..1197090ceb5 100644 --- a/lib/common/logging.c +++ b/lib/common/logging.c @@ -1021,7 +1021,9 @@ pcmk_errorname(int rc) case EMFILE: return "EMFILE"; case EMLINK: return "EMLINK"; case EMSGSIZE: return "EMSGSIZE"; +#ifdef EMULTIHOP // Not available on OpenBSD case EMULTIHOP: return "EMULTIHOP"; +#endif case ENAMETOOLONG: return "ENAMETOOLONG"; case ENETDOWN: return "ENETDOWN"; case ENETRESET: return "ENETRESET"; @@ -1034,7 +1036,9 @@ pcmk_errorname(int rc) case ENOEXEC: return "ENOEXEC"; case ENOKEY: return "ENOKEY"; case ENOLCK: return "ENOLCK"; +#ifdef ENOLINK // Not available on OpenBSD case ENOLINK: return "ENOLINK"; +#endif case ENOMEM: return "ENOMEM"; case ENOMSG: return "ENOMSG"; case ENOPROTOOPT: return "ENOPROTOOPT"; From 783f54fa3300f2ad00f2aadba69eb1caca3a1f1c Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 12 Mar 2018 09:18:09 -0500 Subject: [PATCH 046/812] Build: spec: make shadow-utils dependency explicit needed for useradd/groupadd --- GNUmakefile | 1 + pacemaker.spec.in | 1 + 2 files changed, 2 insertions(+) diff --git a/GNUmakefile b/GNUmakefile index b854665aa18..7ea15aea75e 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -165,6 +165,7 @@ $(PACKAGE)-suse.spec: $(PACKAGE).spec.in GNUmakefile -e 's:gnutls-devel:libgnutls-devel:g' \ -e 's:corosynclib:libcorosync:g' \ -e 's:cluster-glue-libs:libglue:g' \ + -e 's:shadow-utils:shadow:g' \ -e 's: publican::g' \ -e 's: 189: 90:g' \ -e 's:%{_libexecdir}/lcrso:%{_libdir}/lcrso:g' \ diff --git a/pacemaker.spec.in b/pacemaker.spec.in index f8b1a6757ee..7c3ff768ed9 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -265,6 +265,7 @@ be part of the cluster. License: GPLv2+ and LGPLv2+ Summary: Core Pacemaker libraries Group: System Environment/Daemons +Requires: shadow-utils %description -n %{name}-libs Pacemaker is an advanced, scalable High-Availability cluster resource From d5931476cfb3c292376acf3640e7a3acd511f242 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 12 Mar 2018 09:41:15 -0500 Subject: [PATCH 047/812] Build: lrmd: guard conditional struct member fix OpenBSD build --- lrmd/lrmd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index 918261c8820..306260fc27b 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -582,8 +582,10 @@ cmd_reset(lrmd_cmd_t * cmd) { cmd->lrmd_op_status = 0; cmd->last_pid = 0; +#ifdef HAVE_SYS_TIMEB_H memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); +#endif free(cmd->exit_reason); cmd->exit_reason = NULL; free(cmd->output); From a947d675552285ecf74c499897e8eeef9def1604 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 12 Mar 2018 09:12:49 -0500 Subject: [PATCH 048/812] Low: libcrmcommon: missing include file needed explicitly on OpenBSD --- lib/common/watchdog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c index 7e143495312..a9e51b0c8e9 100644 --- a/lib/common/watchdog.c +++ b/lib/common/watchdog.c @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef _POSIX_MEMLOCK # include From dba271f9690d9aedeb1c17d5b022549ee3aa20d1 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 12 Mar 2018 13:45:39 -0500 Subject: [PATCH 049/812] Low: crmd: complete e8e7660 for 1.1 branch --- crmd/corosync.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/crmd/corosync.c b/crmd/corosync.c index 1316d30c947..8d6a115c230 100644 --- a/crmd/corosync.c +++ b/crmd/corosync.c @@ -145,12 +145,19 @@ crmd_cman_dispatch(unsigned long long seq, gboolean quorate) return TRUE; } +// OpenBSD doesn't have ENOLINK +#ifdef ENOLINK +#define CRM_EX_NOLINK ENOLINK +#else +#define CRM_EX_NOLINK ENOTCONN +#endif + static void crmd_quorum_destroy(gpointer user_data) { if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); - crmd_exit(ENOLINK); + crmd_exit(CRM_EX_NOLINK); } else { crm_info("connection closed"); @@ -162,7 +169,7 @@ crmd_cs_destroy(gpointer user_data) { if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); - crmd_exit(ENOLINK); + crmd_exit(CRM_EX_NOLINK); } else { crm_info("connection closed"); @@ -175,7 +182,7 @@ crmd_cman_destroy(gpointer user_data) { if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); - crmd_exit(ENOLINK); + crmd_exit(CRM_EX_NOLINK); } else { crm_info("connection closed"); From eb76099e924b787ddc026441d87f77871ead9884 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 13 Feb 2018 17:21:37 -0600 Subject: [PATCH 050/812] Low: libcrmcommon: use /tmp when creating temporary XML file ... instead of CRM_STATE_DIR, which may not be usable from an ordinary user's account, thus generating incorrect CLI regression test output when run from a checkout. --- lib/common/schemas.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/common/schemas.c b/lib/common/schemas.c index b529ff5d83c..7e76af30e36 100644 --- a/lib/common/schemas.c +++ b/lib/common/schemas.c @@ -625,9 +625,13 @@ validate_xml_verbose(xmlNode *xml_blob) xmlDoc *doc = NULL; xmlNode *xml = NULL; gboolean rc = FALSE; - char *filename = strdup(CRM_STATE_DIR "/cib-invalid.XXXXXX"); + const char *tmpdir = getenv("TMPDIR"); + char *filename = NULL; - CRM_CHECK(filename != NULL, return FALSE); + if ((tmpdir == NULL) || (*tmpdir != '/')) { + tmpdir = "/tmp"; + } + filename = crm_strdup_printf("%s/cib-invalid.XXXXXX", tmpdir); umask(S_IWGRP | S_IWOTH | S_IROTH); fd = mkstemp(filename); From 48c9a80a7c3a621bd606ffcc14ae8a86072e41e0 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 13 Feb 2018 17:24:04 -0600 Subject: [PATCH 051/812] Test: tools: update regression test with correct output Verbose XML wasn't been shown from an ordinary user's checkout previously due to a bug that has since been fixed. --- tools/regression.validity.exp | 46 +++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tools/regression.validity.exp b/tools/regression.validity.exp index 74747462acc..75574543c73 100644 --- a/tools/regression.validity.exp +++ b/tools/regression.validity.exp @@ -3,6 +3,21 @@ Setting up shadow instance A new shadow instance was created. To begin using it paste the following into your shell: CIB_shadow=tools-regression ; export CIB_shadow =#=#=#= Begin test: Try to make resulting CIB invalid (enum violation) =#=#=#= + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 Call failed: Update does not conform to the configured schema =#=#=#= Current cib after: Try to make resulting CIB invalid (enum violation) =#=#=#= @@ -78,6 +93,21 @@ Your current configuration pacemaker-1.2 could not validate with any schema in r =#=#=#= End test: Run crm_simulate with invalid CIB (enum violation) - Required key not available (126) =#=#=#= * Passed: crm_simulate - Run crm_simulate with invalid CIB (enum violation) =#=#=#= Begin test: Try to make resulting CIB invalid (unrecognized validate-with) =#=#=#= + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 Call failed: Update does not conform to the configured schema =#=#=#= Current cib after: Try to make resulting CIB invalid (unrecognized validate-with) =#=#=#= @@ -161,6 +191,22 @@ Your current configuration pacemaker-9999.0 could not validate with any schema i =#=#=#= End test: Run crm_simulate with invalid CIB (unrecognized validate-with) - Required key not available (126) =#=#=#= * Passed: crm_simulate - Run crm_simulate with invalid CIB (unrecognized validate-with) =#=#=#= Begin test: Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1) =#=#=#= + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 Call failed: Update does not conform to the configured schema =#=#=#= Current cib after: Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1) =#=#=#= From c0fc7e1a5a996b967a038fcbab575cf4faa8138b Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 12 Mar 2018 11:35:11 -0500 Subject: [PATCH 052/812] Fix: libpe_status: handle unique bundle children correctly fixes segfault introduced by 4b53b163, when processing resource history for a unique clone instance inside a bundle, and the particular instance isn't found but the clone name is --- include/crm/pengine/internal.h | 2 +- lib/pengine/container.c | 40 ++++++++++++++-------------- lib/pengine/unpack.c | 48 ++++++++++++++++++---------------- 3 files changed, 45 insertions(+), 45 deletions(-) diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index 44aef048fef..e9d7582dd75 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -319,7 +319,7 @@ node_t *pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); bool remote_id_conflict(const char *remote_name, pe_working_set_t *data); void common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *node, long options, void *print_data); -resource_t *find_container_child(const char *stem, resource_t * rsc, node_t *node); +resource_t *find_container_child(const resource_t *bundle, const node_t *node); bool container_fix_remote_addr(resource_t *rsc); const char *container_fix_remote_addr_in(resource_t *rsc, xmlNode *xml, const char *field); const char *pe_node_attribute_calculated(pe_node_t *node, const char *name, resource_t *rsc); diff --git a/lib/pengine/container.c b/lib/pengine/container.c index 4d2d876a5b2..1dd2f66f15d 100644 --- a/lib/pengine/container.c +++ b/lib/pengine/container.c @@ -1164,34 +1164,32 @@ container_active(resource_t * rsc, gboolean all) return all; } +/*! + * \internal + * \brief Find the container child corresponding to a given node + * + * \param[in] bundle Top-level bundle resource + * \param[in] node Node to search for + * + * \return Container child if found, NULL otherwise + */ resource_t * -find_container_child(const char *stem, resource_t * rsc, node_t *node) +find_container_child(const resource_t *bundle, const node_t *node) { container_variant_data_t *container_data = NULL; - resource_t *parent = uber_parent(rsc); - CRM_ASSERT(parent->parent); - - parent = parent->parent; - get_container_variant_data(container_data, parent); + CRM_ASSERT(bundle && node); - if (is_not_set(rsc->flags, pe_rsc_unique)) { - for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { - container_grouping_t *tuple = (container_grouping_t *)gIter->data; + get_container_variant_data(container_data, bundle); + for (GListPtr gIter = container_data->tuples; gIter != NULL; + gIter = gIter->next) { + container_grouping_t *tuple = (container_grouping_t *)gIter->data; - CRM_ASSERT(tuple); - if(tuple->node->details == node->details) { - rsc = tuple->child; - break; - } + CRM_ASSERT(tuple && tuple->node); + if (tuple->node->details == node->details) { + return tuple->child; } } - - if (rsc && safe_str_neq(stem, rsc->id)) { - free(rsc->clone_name); - rsc->clone_name = strdup(stem); - } - - return rsc; + return NULL; } static void diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 80ef1787563..355cdcf4ca8 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1774,13 +1774,6 @@ find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * pa pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, node->details->uname); } - - if (safe_str_neq(rsc_id, rsc->id)) { - pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", - rsc_id, node->details->uname, rsc->id, - is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); - } - return rsc; } @@ -1794,20 +1787,25 @@ unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, rsc_id); - /* no match */ if (rsc == NULL) { - /* Even when clone-max=0, we still create a single :0 orphan to match against */ - char *tmp = clone_zero(rsc_id); - resource_t *clone0 = pe_find_resource(data_set->resources, tmp); + /* If we didn't find the resource by its name in the operation history, + * check it again as a clone instance. Even when clone-max=0, we create + * a single :0 orphan to match against here. + */ + char *clone0_id = clone_zero(rsc_id); + resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id); if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; } else { - crm_trace("%s is not known as %s either", rsc_id, tmp); + crm_trace("%s is not known as %s either", rsc_id, clone0_id); } + /* Grab the parent clone even if this a different unique instance, + * so we can remember the clone name, which will be the same. + */ parent = uber_parent(clone0); - free(tmp); + free(clone0_id); crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan"); @@ -1820,24 +1818,28 @@ unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc parent = uber_parent(rsc); } - if(parent && parent->parent) { - rsc = find_container_child(rsc_id, rsc, node); + if (pe_rsc_is_anon_clone(parent)) { - } else if (pe_rsc_is_clone(parent)) { - if (is_not_set(parent->flags, pe_rsc_unique)) { + if (parent && parent->parent) { + rsc = find_container_child(parent->parent, node); + } else { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(data_set, node, parent, base); - CRM_ASSERT(rsc != NULL); free(base); - } - - if (rsc && safe_str_neq(rsc_id, rsc->id)) { - free(rsc->clone_name); - rsc->clone_name = strdup(rsc_id); + CRM_ASSERT(rsc != NULL); } } + if (rsc && safe_str_neq(rsc_id, rsc->id) + && safe_str_neq(rsc_id, rsc->clone_name)) { + + free(rsc->clone_name); + rsc->clone_name = strdup(rsc_id); + pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", + rsc_id, node->details->uname, rsc->id, + (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : "")); + } return rsc; } From d1894774e5b982fc63071ca828772d5f49480763 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 12 Mar 2018 11:52:10 -0500 Subject: [PATCH 053/812] Test: cts-pengine: add regression test for changing bundle replica count --- pengine/regression.sh | 1 + pengine/test10/bundle-replicas-change.dot | 107 ++++ pengine/test10/bundle-replicas-change.exp | 586 ++++++++++++++++++ pengine/test10/bundle-replicas-change.scores | 37 ++ pengine/test10/bundle-replicas-change.summary | 76 +++ pengine/test10/bundle-replicas-change.xml | 73 +++ 6 files changed, 880 insertions(+) create mode 100644 pengine/test10/bundle-replicas-change.dot create mode 100644 pengine/test10/bundle-replicas-change.exp create mode 100644 pengine/test10/bundle-replicas-change.scores create mode 100644 pengine/test10/bundle-replicas-change.summary create mode 100644 pengine/test10/bundle-replicas-change.xml diff --git a/pengine/regression.sh b/pengine/regression.sh index cf1824abdcd..5ba64a1a5e8 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -832,6 +832,7 @@ do_test bundle-probe-order-1 "order 1" do_test bundle-probe-order-2 "order 2" do_test bundle-probe-order-3 "order 3" do_test bundle-probe-remotes "Ensure remotes get probed too" +do_test bundle-replicas-change "Change bundle from 1 replica to multiple" echo "" do_test whitebox-fail1 "Fail whitebox container rsc." diff --git a/pengine/test10/bundle-replicas-change.dot b/pengine/test10/bundle-replicas-change.dot new file mode 100644 index 00000000000..be8554cdba3 --- /dev/null +++ b/pengine/test10/bundle-replicas-change.dot @@ -0,0 +1,107 @@ +digraph "g" { +"all_stopped" [ style=bold color="green" fontcolor="orange"] +"httpd-bundle-0_monitor_30000 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-0_start_0 rh74-test" -> "httpd-bundle-0_monitor_30000 rh74-test" [ style = bold] +"httpd-bundle-0_start_0 rh74-test" -> "httpd:0_monitor_0 httpd-bundle-0" [ style = bold] +"httpd-bundle-0_start_0 rh74-test" -> "httpd:0_monitor_10000 httpd-bundle-0" [ style = bold] +"httpd-bundle-0_start_0 rh74-test" -> "httpd:0_start_0 httpd-bundle-0" [ style = bold] +"httpd-bundle-0_start_0 rh74-test" -> "httpd_delete_0 httpd-bundle-0" [ style = bold] +"httpd-bundle-0_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-0_stop_0 rh74-test" -> "all_stopped" [ style = bold] +"httpd-bundle-0_stop_0 rh74-test" -> "httpd-bundle-0_start_0 rh74-test" [ style = bold] +"httpd-bundle-0_stop_0 rh74-test" -> "httpd-bundle-docker-0_stop_0 rh74-test" [ style = bold] +"httpd-bundle-0_stop_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-1_monitor_0 rh74-test" -> "httpd-bundle-1_start_0 rh74-test" [ style = bold] +"httpd-bundle-1_monitor_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-1_monitor_30000 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-1_start_0 rh74-test" -> "httpd-bundle-1_monitor_30000 rh74-test" [ style = bold] +"httpd-bundle-1_start_0 rh74-test" -> "httpd:1_monitor_10000 httpd-bundle-1" [ style = bold] +"httpd-bundle-1_start_0 rh74-test" -> "httpd:1_start_0 httpd-bundle-1" [ style = bold] +"httpd-bundle-1_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-2_monitor_0 rh74-test" -> "httpd-bundle-2_start_0 rh74-test" [ style = bold] +"httpd-bundle-2_monitor_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-2_monitor_30000 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-2_start_0 rh74-test" -> "httpd-bundle-2_monitor_30000 rh74-test" [ style = bold] +"httpd-bundle-2_start_0 rh74-test" -> "httpd:2_monitor_10000 httpd-bundle-2" [ style = bold] +"httpd-bundle-2_start_0 rh74-test" -> "httpd:2_start_0 httpd-bundle-2" [ style = bold] +"httpd-bundle-2_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-clone_running_0" -> "httpd-bundle_running_0" [ style = bold] +"httpd-bundle-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"httpd-bundle-clone_start_0" -> "httpd-bundle-clone_running_0" [ style = bold] +"httpd-bundle-clone_start_0" -> "httpd:0_start_0 httpd-bundle-0" [ style = bold] +"httpd-bundle-clone_start_0" -> "httpd:1_start_0 httpd-bundle-1" [ style = bold] +"httpd-bundle-clone_start_0" -> "httpd:2_start_0 httpd-bundle-2" [ style = bold] +"httpd-bundle-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"httpd-bundle-docker-0_monitor_60000 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-docker-0_start_0 rh74-test" -> "httpd-bundle-0_start_0 rh74-test" [ style = bold] +"httpd-bundle-docker-0_start_0 rh74-test" -> "httpd-bundle-docker-0_monitor_60000 rh74-test" [ style = bold] +"httpd-bundle-docker-0_start_0 rh74-test" -> "httpd-bundle_running_0" [ style = bold] +"httpd-bundle-docker-0_start_0 rh74-test" -> "httpd:0_start_0 httpd-bundle-0" [ style = bold] +"httpd-bundle-docker-0_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-docker-0_stop_0 rh74-test" -> "all_stopped" [ style = bold] +"httpd-bundle-docker-0_stop_0 rh74-test" -> "httpd-bundle-docker-0_start_0 rh74-test" [ style = bold] +"httpd-bundle-docker-0_stop_0 rh74-test" -> "httpd-bundle_stopped_0" [ style = bold] +"httpd-bundle-docker-0_stop_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-docker-1_monitor_0 rh74-test" -> "httpd-bundle-clone_start_0" [ style = bold] +"httpd-bundle-docker-1_monitor_0 rh74-test" -> "httpd-bundle-docker-1_start_0 rh74-test" [ style = bold] +"httpd-bundle-docker-1_monitor_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-docker-1_monitor_60000 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-docker-1_start_0 rh74-test" -> "httpd-bundle-1_monitor_0 rh74-test" [ style = bold] +"httpd-bundle-docker-1_start_0 rh74-test" -> "httpd-bundle-1_start_0 rh74-test" [ style = bold] +"httpd-bundle-docker-1_start_0 rh74-test" -> "httpd-bundle-docker-1_monitor_60000 rh74-test" [ style = bold] +"httpd-bundle-docker-1_start_0 rh74-test" -> "httpd-bundle_running_0" [ style = bold] +"httpd-bundle-docker-1_start_0 rh74-test" -> "httpd:1_start_0 httpd-bundle-1" [ style = bold] +"httpd-bundle-docker-1_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-docker-2_monitor_0 rh74-test" -> "httpd-bundle-clone_start_0" [ style = bold] +"httpd-bundle-docker-2_monitor_0 rh74-test" -> "httpd-bundle-docker-2_start_0 rh74-test" [ style = bold] +"httpd-bundle-docker-2_monitor_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-docker-2_monitor_60000 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-docker-2_start_0 rh74-test" -> "httpd-bundle-2_monitor_0 rh74-test" [ style = bold] +"httpd-bundle-docker-2_start_0 rh74-test" -> "httpd-bundle-2_start_0 rh74-test" [ style = bold] +"httpd-bundle-docker-2_start_0 rh74-test" -> "httpd-bundle-docker-2_monitor_60000 rh74-test" [ style = bold] +"httpd-bundle-docker-2_start_0 rh74-test" -> "httpd-bundle_running_0" [ style = bold] +"httpd-bundle-docker-2_start_0 rh74-test" -> "httpd:2_start_0 httpd-bundle-2" [ style = bold] +"httpd-bundle-docker-2_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-ip-192.168.20.189_monitor_0 rh74-test" -> "httpd-bundle-ip-192.168.20.189_start_0 rh74-test" [ style = bold] +"httpd-bundle-ip-192.168.20.189_monitor_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-ip-192.168.20.189_monitor_60000 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-ip-192.168.20.189_start_0 rh74-test" -> "httpd-bundle-docker-1_start_0 rh74-test" [ style = bold] +"httpd-bundle-ip-192.168.20.189_start_0 rh74-test" -> "httpd-bundle-ip-192.168.20.189_monitor_60000 rh74-test" [ style = bold] +"httpd-bundle-ip-192.168.20.189_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-ip-192.168.20.190_monitor_0 rh74-test" -> "httpd-bundle-ip-192.168.20.190_start_0 rh74-test" [ style = bold] +"httpd-bundle-ip-192.168.20.190_monitor_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-ip-192.168.20.190_monitor_60000 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-ip-192.168.20.190_start_0 rh74-test" -> "httpd-bundle-docker-2_start_0 rh74-test" [ style = bold] +"httpd-bundle-ip-192.168.20.190_start_0 rh74-test" -> "httpd-bundle-ip-192.168.20.190_monitor_60000 rh74-test" [ style = bold] +"httpd-bundle-ip-192.168.20.190_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle_running_0" [ style=bold color="green" fontcolor="orange"] +"httpd-bundle_start_0" -> "httpd-bundle-clone_start_0" [ style = bold] +"httpd-bundle_start_0" -> "httpd-bundle-docker-0_start_0 rh74-test" [ style = bold] +"httpd-bundle_start_0" -> "httpd-bundle-docker-1_start_0 rh74-test" [ style = bold] +"httpd-bundle_start_0" -> "httpd-bundle-docker-2_start_0 rh74-test" [ style = bold] +"httpd-bundle_start_0" [ style=bold color="green" fontcolor="orange"] +"httpd-bundle_stop_0" -> "httpd-bundle-docker-0_stop_0 rh74-test" [ style = bold] +"httpd-bundle_stop_0" [ style=bold color="green" fontcolor="orange"] +"httpd-bundle_stopped_0" [ style=bold color="green" fontcolor="orange"] +"httpd:0_monitor_0 httpd-bundle-0" -> "httpd-bundle-clone_start_0" [ style = bold] +"httpd:0_monitor_0 httpd-bundle-0" [ style=bold color="green" fontcolor="black"] +"httpd:0_monitor_10000 httpd-bundle-0" [ style=bold color="green" fontcolor="black"] +"httpd:0_start_0 httpd-bundle-0" -> "httpd-bundle-clone_running_0" [ style = bold] +"httpd:0_start_0 httpd-bundle-0" -> "httpd:0_monitor_10000 httpd-bundle-0" [ style = bold] +"httpd:0_start_0 httpd-bundle-0" -> "httpd:1_start_0 httpd-bundle-1" [ style = bold] +"httpd:0_start_0 httpd-bundle-0" [ style=bold color="green" fontcolor="black"] +"httpd:1_monitor_10000 httpd-bundle-1" [ style=bold color="green" fontcolor="black"] +"httpd:1_start_0 httpd-bundle-1" -> "httpd-bundle-clone_running_0" [ style = bold] +"httpd:1_start_0 httpd-bundle-1" -> "httpd:1_monitor_10000 httpd-bundle-1" [ style = bold] +"httpd:1_start_0 httpd-bundle-1" -> "httpd:2_start_0 httpd-bundle-2" [ style = bold] +"httpd:1_start_0 httpd-bundle-1" [ style=bold color="green" fontcolor="black"] +"httpd:2_monitor_10000 httpd-bundle-2" [ style=bold color="green" fontcolor="black"] +"httpd:2_start_0 httpd-bundle-2" -> "httpd-bundle-clone_running_0" [ style = bold] +"httpd:2_start_0 httpd-bundle-2" -> "httpd:2_monitor_10000 httpd-bundle-2" [ style = bold] +"httpd:2_start_0 httpd-bundle-2" [ style=bold color="green" fontcolor="black"] +"httpd_delete_0 httpd-bundle-0" [ style=bold color="green" fontcolor="black"] +"httpd_stop_0 httpd-bundle-0" -> "all_stopped" [ style = bold] +"httpd_stop_0 httpd-bundle-0" -> "httpd-bundle-0_stop_0 rh74-test" [ style = bold] +"httpd_stop_0 httpd-bundle-0" -> "httpd_delete_0 httpd-bundle-0" [ style = bold] +"httpd_stop_0 httpd-bundle-0" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/bundle-replicas-change.exp b/pengine/test10/bundle-replicas-change.exp new file mode 100644 index 00000000000..138dfcd1fa8 --- /dev/null +++ b/pengine/test10/bundle-replicas-change.exp @@ -0,0 +1,586 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/bundle-replicas-change.scores b/pengine/test10/bundle-replicas-change.scores new file mode 100644 index 00000000000..8b195314f24 --- /dev/null +++ b/pengine/test10/bundle-replicas-change.scores @@ -0,0 +1,37 @@ +Allocation scores: +clone_color: httpd-bundle-clone allocation score on httpd-bundle-0: 0 +clone_color: httpd-bundle-clone allocation score on httpd-bundle-1: 0 +clone_color: httpd-bundle-clone allocation score on httpd-bundle-2: 0 +clone_color: httpd-bundle-clone allocation score on rh74-test: -INFINITY +clone_color: httpd:0 allocation score on httpd-bundle-0: INFINITY +clone_color: httpd:1 allocation score on httpd-bundle-1: INFINITY +clone_color: httpd:2 allocation score on httpd-bundle-2: INFINITY +container_color: httpd-bundle allocation score on rh74-test: 0 +container_color: httpd-bundle-0 allocation score on rh74-test: INFINITY +container_color: httpd-bundle-1 allocation score on rh74-test: 0 +container_color: httpd-bundle-2 allocation score on rh74-test: 0 +container_color: httpd-bundle-clone allocation score on httpd-bundle-0: -INFINITY +container_color: httpd-bundle-clone allocation score on httpd-bundle-1: -INFINITY +container_color: httpd-bundle-clone allocation score on httpd-bundle-2: -INFINITY +container_color: httpd-bundle-clone allocation score on rh74-test: 0 +container_color: httpd-bundle-docker-0 allocation score on rh74-test: INFINITY +container_color: httpd-bundle-docker-1 allocation score on rh74-test: 0 +container_color: httpd-bundle-docker-2 allocation score on rh74-test: 0 +container_color: httpd-bundle-ip-192.168.20.188 allocation score on rh74-test: INFINITY +container_color: httpd-bundle-ip-192.168.20.189 allocation score on rh74-test: 0 +container_color: httpd-bundle-ip-192.168.20.190 allocation score on rh74-test: 0 +container_color: httpd:0 allocation score on httpd-bundle-0: 500 +container_color: httpd:1 allocation score on httpd-bundle-1: 500 +container_color: httpd:2 allocation score on httpd-bundle-2: 500 +native_color: httpd-bundle-0 allocation score on rh74-test: INFINITY +native_color: httpd-bundle-1 allocation score on rh74-test: 10000 +native_color: httpd-bundle-2 allocation score on rh74-test: 10000 +native_color: httpd-bundle-docker-0 allocation score on rh74-test: INFINITY +native_color: httpd-bundle-docker-1 allocation score on rh74-test: 0 +native_color: httpd-bundle-docker-2 allocation score on rh74-test: 0 +native_color: httpd-bundle-ip-192.168.20.188 allocation score on rh74-test: INFINITY +native_color: httpd-bundle-ip-192.168.20.189 allocation score on rh74-test: 0 +native_color: httpd-bundle-ip-192.168.20.190 allocation score on rh74-test: 0 +native_color: httpd:0 allocation score on httpd-bundle-0: INFINITY +native_color: httpd:1 allocation score on httpd-bundle-1: INFINITY +native_color: httpd:2 allocation score on httpd-bundle-2: INFINITY diff --git a/pengine/test10/bundle-replicas-change.summary b/pengine/test10/bundle-replicas-change.summary new file mode 100644 index 00000000000..05b1bb47a79 --- /dev/null +++ b/pengine/test10/bundle-replicas-change.summary @@ -0,0 +1,76 @@ + +Current cluster status: +Online: [ rh74-test ] +Containers: [ httpd-bundle-0:httpd-bundle-docker-0 ] + + Docker container set: httpd-bundle [pcmktest:http] (unique) + httpd-bundle-0 (192.168.20.188) (ocf::heartbeat:apache): Stopped rh74-test + httpd-bundle-1 (192.168.20.189) (ocf::heartbeat:apache): Stopped + httpd-bundle-2 (192.168.20.190) (ocf::heartbeat:apache): Stopped + httpd (ocf::heartbeat:apache): ORPHANED Started httpd-bundle-0 + +Transition Summary: + * Restart httpd-bundle-docker-0 ( rh74-test ) + * Restart httpd-bundle-0 ( rh74-test ) due to required httpd-bundle-docker-0 start + * Start httpd:0 ( httpd-bundle-0 ) + * Start httpd-bundle-ip-192.168.20.189 ( rh74-test ) + * Start httpd-bundle-docker-1 ( rh74-test ) + * Start httpd-bundle-1 ( rh74-test ) + * Start httpd:1 ( httpd-bundle-1 ) + * Start httpd-bundle-ip-192.168.20.190 ( rh74-test ) + * Start httpd-bundle-docker-2 ( rh74-test ) + * Start httpd-bundle-2 ( rh74-test ) + * Start httpd:2 ( httpd-bundle-2 ) + * Stop httpd ( httpd-bundle-0 ) due to node availability + +Executing cluster transition: + * Resource action: httpd-bundle-ip-192.168.20.189 monitor on rh74-test + * Resource action: httpd-bundle-docker-1 monitor on rh74-test + * Resource action: httpd-bundle-ip-192.168.20.190 monitor on rh74-test + * Resource action: httpd-bundle-docker-2 monitor on rh74-test + * Resource action: httpd stop on httpd-bundle-0 + * Pseudo action: httpd-bundle_stop_0 + * Pseudo action: httpd-bundle_start_0 + * Resource action: httpd-bundle-0 stop on rh74-test + * Resource action: httpd-bundle-ip-192.168.20.189 start on rh74-test + * Resource action: httpd-bundle-docker-1 start on rh74-test + * Resource action: httpd-bundle-1 monitor on rh74-test + * Resource action: httpd-bundle-ip-192.168.20.190 start on rh74-test + * Resource action: httpd-bundle-docker-2 start on rh74-test + * Resource action: httpd-bundle-2 monitor on rh74-test + * Resource action: httpd-bundle-docker-0 stop on rh74-test + * Resource action: httpd-bundle-docker-0 start on rh74-test + * Resource action: httpd-bundle-docker-0 monitor=60000 on rh74-test + * Resource action: httpd-bundle-0 start on rh74-test + * Resource action: httpd-bundle-0 monitor=30000 on rh74-test + * Resource action: httpd-bundle-ip-192.168.20.189 monitor=60000 on rh74-test + * Resource action: httpd-bundle-docker-1 monitor=60000 on rh74-test + * Resource action: httpd-bundle-1 start on rh74-test + * Resource action: httpd-bundle-ip-192.168.20.190 monitor=60000 on rh74-test + * Resource action: httpd-bundle-docker-2 monitor=60000 on rh74-test + * Resource action: httpd-bundle-2 start on rh74-test + * Resource action: httpd delete on httpd-bundle-0 + * Pseudo action: httpd-bundle_stopped_0 + * Pseudo action: all_stopped + * Resource action: httpd:0 monitor on httpd-bundle-0 + * Pseudo action: httpd-bundle-clone_start_0 + * Resource action: httpd-bundle-1 monitor=30000 on rh74-test + * Resource action: httpd-bundle-2 monitor=30000 on rh74-test + * Resource action: httpd:0 start on httpd-bundle-0 + * Resource action: httpd:1 start on httpd-bundle-1 + * Resource action: httpd:2 start on httpd-bundle-2 + * Pseudo action: httpd-bundle-clone_running_0 + * Pseudo action: httpd-bundle_running_0 + * Resource action: httpd:0 monitor=10000 on httpd-bundle-0 + * Resource action: httpd:1 monitor=10000 on httpd-bundle-1 + * Resource action: httpd:2 monitor=10000 on httpd-bundle-2 + +Revised cluster status: +Online: [ rh74-test ] +Containers: [ httpd-bundle-0:httpd-bundle-docker-0 httpd-bundle-1:httpd-bundle-docker-1 httpd-bundle-2:httpd-bundle-docker-2 ] + + Docker container set: httpd-bundle [pcmktest:http] (unique) + httpd-bundle-0 (192.168.20.188) (ocf::heartbeat:apache): Started rh74-test + httpd-bundle-1 (192.168.20.189) (ocf::heartbeat:apache): Started rh74-test + httpd-bundle-2 (192.168.20.190) (ocf::heartbeat:apache): Started rh74-test + diff --git a/pengine/test10/bundle-replicas-change.xml b/pengine/test10/bundle-replicas-change.xml new file mode 100644 index 00000000000..60b148b2ae8 --- /dev/null +++ b/pengine/test10/bundle-replicas-change.xml @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 495b1be3c82607b415a4d6c88a7261b7af0d6669 Mon Sep 17 00:00:00 2001 From: "Gao,Yan" Date: Thu, 22 Mar 2018 13:15:39 +0100 Subject: [PATCH 054/812] Build: spec: Make sure shadow package is installed before adding user and group Only a Requires(pre) can be guaranteed to be installed before the dependent package. --- pacemaker.spec.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pacemaker.spec.in b/pacemaker.spec.in index 7c3ff768ed9..37907b37f23 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -265,7 +265,7 @@ be part of the cluster. License: GPLv2+ and LGPLv2+ Summary: Core Pacemaker libraries Group: System Environment/Daemons -Requires: shadow-utils +Requires(pre): shadow-utils %description -n %{name}-libs Pacemaker is an advanced, scalable High-Availability cluster resource From 4dccd85e8760b7c292e7ef256cb0304e1435d1d2 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 14 Mar 2018 15:25:32 -0500 Subject: [PATCH 055/812] Build: spec: be specific about documentation license version --- pacemaker.spec.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pacemaker.spec.in b/pacemaker.spec.in index 37907b37f23..4487b8f04a6 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -354,7 +354,7 @@ Requires: systemd-python Test framework for cluster-related technologies like Pacemaker %package doc -License: CC-BY-SA +License: CC-BY-SA-4.0 Summary: Documentation for Pacemaker Group: Documentation From 0500959a1a61f16dc4347baf45e32616890cc38f Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 22 Mar 2018 11:46:02 -0500 Subject: [PATCH 056/812] Build: resources: avoid use of sh in Makefile Some of the agents require bash, and sh may not be bash. Fixes CLBZ#5343 --- extra/resources/ClusterMon | 0 extra/resources/Dummy | 0 extra/resources/HealthCPU | 0 extra/resources/HealthSMART | 0 extra/resources/Makefile.am | 2 +- extra/resources/Stateful | 0 extra/resources/SysInfo | 0 extra/resources/SystemHealth | 0 extra/resources/attribute | 0 extra/resources/controld | 0 extra/resources/ifspeed | 0 extra/resources/pingd | 0 extra/resources/remote | 0 13 files changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 extra/resources/ClusterMon mode change 100644 => 100755 extra/resources/Dummy mode change 100644 => 100755 extra/resources/HealthCPU mode change 100644 => 100755 extra/resources/HealthSMART mode change 100644 => 100755 extra/resources/Stateful mode change 100644 => 100755 extra/resources/SysInfo mode change 100644 => 100755 extra/resources/SystemHealth mode change 100644 => 100755 extra/resources/attribute mode change 100644 => 100755 extra/resources/controld mode change 100644 => 100755 extra/resources/ifspeed mode change 100644 => 100755 extra/resources/pingd mode change 100644 => 100755 extra/resources/remote diff --git a/extra/resources/ClusterMon b/extra/resources/ClusterMon old mode 100644 new mode 100755 diff --git a/extra/resources/Dummy b/extra/resources/Dummy old mode 100644 new mode 100755 diff --git a/extra/resources/HealthCPU b/extra/resources/HealthCPU old mode 100644 new mode 100755 diff --git a/extra/resources/HealthSMART b/extra/resources/HealthSMART old mode 100644 new mode 100755 diff --git a/extra/resources/Makefile.am b/extra/resources/Makefile.am index 0f3a67d7b5b..c84dfdf5880 100644 --- a/extra/resources/Makefile.am +++ b/extra/resources/Makefile.am @@ -49,7 +49,7 @@ man7_MANS = $(ocf_SCRIPTS:%=ocf_pacemaker_%.7) DBOOK_OPTS = --stringparam command.prefix ocf_pacemaker_ --stringparam variable.prefix OCF_RESKEY_ --param man.vol 7 ocf_pacemaker_%.xml: % - $(AM_V_GEN)OCF_FUNCTIONS=/dev/null OCF_ROOT=$(OCF_ROOT_DIR) sh $(abs_builddir)/$< meta-data > $@ + $(AM_V_GEN)OCF_FUNCTIONS=/dev/null OCF_ROOT=$(OCF_ROOT_DIR) $(abs_builddir)/$< meta-data > $@ endif diff --git a/extra/resources/Stateful b/extra/resources/Stateful old mode 100644 new mode 100755 diff --git a/extra/resources/SysInfo b/extra/resources/SysInfo old mode 100644 new mode 100755 diff --git a/extra/resources/SystemHealth b/extra/resources/SystemHealth old mode 100644 new mode 100755 diff --git a/extra/resources/attribute b/extra/resources/attribute old mode 100644 new mode 100755 diff --git a/extra/resources/controld b/extra/resources/controld old mode 100644 new mode 100755 diff --git a/extra/resources/ifspeed b/extra/resources/ifspeed old mode 100644 new mode 100755 diff --git a/extra/resources/pingd b/extra/resources/pingd old mode 100644 new mode 100755 diff --git a/extra/resources/remote b/extra/resources/remote old mode 100644 new mode 100755 From 2f9a164407c8e819964ceb8ee96b6e74cae27245 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 13 Mar 2018 15:22:08 -0500 Subject: [PATCH 057/812] Refactor: crmd,libcrmcommon: add conditional for procfs support --- crmd/throttle.c | 23 +++++------------------ include/portability.h | 31 ++++++++++++------------------- 2 files changed, 17 insertions(+), 37 deletions(-) diff --git a/crmd/throttle.c b/crmd/throttle.c index 90ddb909ed5..872b4cdb1e4 100644 --- a/crmd/throttle.c +++ b/crmd/throttle.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2013 Andrew Beekhof + * Copyright 2013-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include @@ -292,15 +281,12 @@ throttle_handle_load(float load, const char *desc, int cores) static enum throttle_state_e throttle_mode(void) { +#if SUPPORT_PROCFS unsigned int cores; float load; float thresholds[4]; enum throttle_state_e mode = throttle_none; -#if defined(ON_BSD) || defined(ON_SOLARIS) - return throttle_none; -#endif - cores = crm_procfs_num_cores(); if(throttle_cib_load(&load)) { float cib_max_cpu = 0.95; @@ -351,6 +337,7 @@ throttle_mode(void) } else if(mode & throttle_low) { return throttle_low; } +#endif // SUPPORT_PROCFS return throttle_none; } diff --git a/include/portability.h b/include/portability.h index 303cfb26f9c..50ba6fe095c 100644 --- a/include/portability.h +++ b/include/portability.h @@ -1,25 +1,11 @@ -#ifndef PORTABILITY_H -# define PORTABILITY_H - /* - * Copyright (C) 2001 Alan Robertson - * This software licensed under the GNU LGPL. - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. + * Copyright 2001-2018 Alan Robertson * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ +#ifndef PORTABILITY_H +# define PORTABILITY_H # define EOS '\0' # define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) @@ -74,6 +60,13 @@ char *strndup(const char *str, size_t len); # define USE_GNU # endif +// This test could be better, but it covers platforms of interest +# if defined(ON_BSD) || defined(ON_SOLARIS) +# define SUPPORT_PROCFS 0 +# else +# define SUPPORT_PROCFS 1 +# endif + # include # if !GLIB_CHECK_VERSION(2,14,0) From 601c4a401bb2951920ebacf7bc512951fbd4c673 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 13 Mar 2018 15:32:23 -0500 Subject: [PATCH 058/812] Low: mcp: conditionalize procfs usage to avoid unnecessary log messages on OSes that don't support /proc fixes CLBZ#5344 --- mcp/pacemaker.c | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c index 242fd50887d..f57fc258962 100644 --- a/mcp/pacemaker.c +++ b/mcp/pacemaker.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2010 Andrew Beekhof + * Copyright 2010-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include @@ -722,19 +711,20 @@ check_active_before_startup_processes(gpointer user_data) return keep_tracking; } -static bool +static void find_and_track_existing_processes(void) { +#if SUPPORT_PROCFS DIR *dp; struct dirent *entry; - int start_tracker = 0; + bool start_tracker = FALSE; char entry_name[64]; dp = opendir("/proc"); if (!dp) { /* no proc directory to search through */ crm_notice("Can not read /proc directory to track existing components"); - return FALSE; + return; } while ((entry = readdir(dp)) != NULL) { @@ -758,7 +748,7 @@ find_and_track_existing_processes(void) crm_notice("Tracking existing %s process (pid=%d)", name, pid); pcmk_children[i].pid = pid; pcmk_children[i].active_before_startup = TRUE; - start_tracker = 1; + start_tracker = TRUE; break; } } @@ -769,8 +759,9 @@ find_and_track_existing_processes(void) NULL); } closedir(dp); - - return start_tracker; +#else + crm_notice("No procfs support, so skipping check for existing components"); +#endif // SUPPORT_PROCFS } static void @@ -1041,14 +1032,6 @@ main(int argc, char **argv) " Core files are an important diagnositic tool," " please consider enabling them by default."); } -#if 0 - /* system() is not thread-safe, can't call from here - * Actually, it's a pretty hacky way to try and achieve this anyway - */ - if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { - crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid"); - } -#endif } rc = pcmk_ok; From d2b99041f8b6bcf006163e7407c828f0b9e5c5ca Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 13 Mar 2018 15:57:37 -0500 Subject: [PATCH 059/812] Low: libcrmcommon: conditionalize procfs usage to avoid unnecessary log messages on OSes that don't support /proc (CLBZ#5342) --- lib/common/watchdog.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c index a9e51b0c8e9..31edaa50e7f 100644 --- a/lib/common/watchdog.c +++ b/lib/common/watchdog.c @@ -1,6 +1,6 @@ /* - * Copyright (C) 2013 Lars Marowsky-Bree - * 2014 Andrew Beekhof + * Copyright 2013 Lars Marowsky-Bree + * 2014-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. @@ -38,6 +38,7 @@ enum pcmk_panic_flags void sysrq_init(void) { +#if SUPPORT_PROCFS static bool need_init = true; FILE* procf; int c; @@ -50,7 +51,7 @@ sysrq_init(void) procf = fopen(SYSRQ, "r"); if (!procf) { - crm_perror(LOG_ERR, "Cannot open "SYSRQ" for read"); + crm_perror(LOG_WARNING, "Cannot open "SYSRQ" for read"); return; } if (fscanf(procf, "%d", &c) != 1) { @@ -70,24 +71,27 @@ sysrq_init(void) } fprintf(procf, "%d", c); fclose(procf); +#endif // SUPPORT_PROCFS return; } static void sysrq_trigger(char t) { +#if SUPPORT_PROCFS FILE *procf; sysrq_init(); procf = fopen("/proc/sysrq-trigger", "a"); if (!procf) { - crm_perror(LOG_ERR, "Opening sysrq-trigger failed"); + crm_perror(LOG_WARNING, "Opening sysrq-trigger failed"); return; } crm_info("sysrq-trigger: %c", t); fprintf(procf, "%c\n", t); fclose(procf); +#endif // SUPPORT_PROCFS return; } @@ -110,9 +114,10 @@ pcmk_panic_local(void) return; } else if (uid != 0) { +#if SUPPORT_PROCFS /* - * No permissions and no pacemakerd parent to escalate to - * Track down the new pacakerd process and send a signal instead + * No permissions, and no pacemakerd parent to escalate to. + * Track down the new pacemakerd process and send a signal instead. */ union sigval signal_value; @@ -123,6 +128,8 @@ pcmk_panic_local(void) if(ppid > 1 && sigqueue(ppid, SIGQUIT, signal_value) < 0) { crm_perror(LOG_EMERG, "Cannot signal pacemakerd(%d) to panic", ppid); } +#endif // SUPPORT_PROCFS + /* The best we can do now is die */ crm_exit(pcmk_err_panic); return; @@ -226,10 +233,12 @@ pcmk_locate_sbd(void) if(sbd_pid > 0) { crm_trace("SBD detected at pid=%d (file)", sbd_pid); +#if SUPPORT_PROCFS } else { /* Fall back to /proc for systems that support it */ sbd_pid = crm_procfs_pid_of("sbd"); crm_trace("SBD detected at pid=%d (proc)", sbd_pid); +#endif // SUPPORT_PROCFS } if(sbd_pid < 0) { From 252b4df0f3cba4a5f18e1cff3190f21ba87452bb Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 29 Mar 2018 09:58:47 -0500 Subject: [PATCH 060/812] Low: crmd: restore compiling without procfs support regression introduced in 73711ce --- crmd/throttle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crmd/throttle.c b/crmd/throttle.c index 872b4cdb1e4..a0729afe1da 100644 --- a/crmd/throttle.c +++ b/crmd/throttle.c @@ -46,6 +46,7 @@ static float throttle_load_target = 0.0; static GHashTable *throttle_records = NULL; static mainloop_timer_t *throttle_timer = NULL; +#if SUPPORT_PROCFS /*! * \internal * \brief Return name of /proc file containing the CIB deamon's load statistics @@ -277,6 +278,7 @@ throttle_handle_load(float load, const char *desc, int cores) return throttle_check_thresholds(load, desc, thresholds); } +#endif static enum throttle_state_e throttle_mode(void) From 191c5be52b1633a8642d28868505a9879b5d5622 Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Tue, 3 Apr 2018 13:56:22 +1000 Subject: [PATCH 061/812] Fix: rhbz#1545449 - Do not perform notifications for events we know wont be executed --- pengine/notif.c | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/pengine/notif.c b/pengine/notif.c index 39d8c72c1c8..7ce8f577446 100644 --- a/pengine/notif.c +++ b/pengine/notif.c @@ -498,12 +498,18 @@ collect_notification_data(resource_t * rsc, gboolean state, gboolean activity, action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) { + task = text2task(op->task); + + if(task == stop_rsc && op->node->details->unclean) { + /* Create one anyway,, some additional noise if op->node cannot be fenced */ + } else if(is_not_set(op->flags, pe_action_runnable)) { + continue; + } entry = calloc(1, sizeof(notify_entry_t)); entry->node = op->node; entry->rsc = rsc; - task = text2task(op->task); switch (task) { case start_rsc: n_data->start = g_list_prepend(n_data->start, entry); @@ -656,8 +662,7 @@ create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t /* Copy notification details into standard ops */ - gIter = rsc->actions; - for (; gIter != NULL; gIter = gIter->next) { + for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) { @@ -676,6 +681,35 @@ create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t } } + switch (task) { + case start_rsc: + if(g_list_length(n_data->start) == 0) { + pe_rsc_trace(rsc, "Skipping empty notification for: %s.%s (%s->%s)", + n_data->action, rsc->id, role2text(rsc->role), role2text(rsc->next_role)); + return; + } + break; + case action_promote: + if(g_list_length(n_data->promote) == 0) { + pe_rsc_trace(rsc, "Skipping empty notification for: %s.%s (%s->%s)", + n_data->action, rsc->id, role2text(rsc->role), role2text(rsc->next_role)); + return; + } + break; + case action_demote: + if(g_list_length(n_data->demote) == 0) { + pe_rsc_trace(rsc, "Skipping empty notification for: %s.%s (%s->%s)", + n_data->action, rsc->id, role2text(rsc->role), role2text(rsc->next_role)); + return; + } + break; + default: + /* We cannot do the same for stop_rsc/n_data->stop at it + * might be implied by fencing + */ + break; + } + pe_rsc_trace(rsc, "Creating notifications for: %s.%s (%s->%s)", n_data->action, rsc->id, role2text(rsc->role), role2text(rsc->next_role)); From 9124d820accfca98387ad714881c1ee7372269ae Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 6 Apr 2018 12:39:44 -0500 Subject: [PATCH 062/812] Test: pengine: fix test input from d1894774 for 1.1 --- pengine/test10/bundle-replicas-change.xml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pengine/test10/bundle-replicas-change.xml b/pengine/test10/bundle-replicas-change.xml index 60b148b2ae8..88d3c1c45f9 100644 --- a/pengine/test10/bundle-replicas-change.xml +++ b/pengine/test10/bundle-replicas-change.xml @@ -1,4 +1,4 @@ - + @@ -45,16 +45,16 @@ - - + + - - + + - - + + @@ -63,8 +63,8 @@ - - + + From ef683f4767d75aae7a9483b1c325b13fc39f2821 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 6 Apr 2018 12:53:26 -0500 Subject: [PATCH 063/812] Test: pengine: update regression tests for notification changes --- pengine/test10/a-demote-then-b-migrate.exp | 32 +- pengine/test10/a-promote-then-b-migrate.exp | 16 +- pengine/test10/bug-1572-2.exp | 28 +- pengine/test10/bug-1685.exp | 16 +- pengine/test10/bug-5143-ms-shuffle.exp | 16 +- pengine/test10/bug-cl-5168.exp | 16 +- pengine/test10/bug-cl-5212.exp | 12 +- pengine/test10/bug-cl-5247.exp | 12 +- pengine/test10/bug-lf-2153.exp | 12 +- pengine/test10/bug-lf-2317.exp | 16 +- pengine/test10/bundle-order-fencing.dot | 34 -- pengine/test10/bundle-order-fencing.exp | 527 ++++++------------ pengine/test10/bundle-order-fencing.summary | 10 - .../test10/bundle-order-partial-start-2.exp | 8 +- pengine/test10/bundle-order-partial-start.exp | 8 +- pengine/test10/bundle-order-partial-stop.dot | 3 - pengine/test10/bundle-order-partial-stop.exp | 14 +- pengine/test10/bundle-order-startup-clone.dot | 3 - pengine/test10/bundle-order-stop-clone.exp | 20 +- .../test10/bundle-order-stop-on-remote.exp | 44 +- pengine/test10/bundle-order-stop.dot | 3 - pengine/test10/bundle-order-stop.exp | 14 +- pengine/test10/bundle-replicas-change.dot | 14 +- pengine/test10/bundle-replicas-change.exp | 213 +++---- pengine/test10/bundle-replicas-change.summary | 7 +- pengine/test10/colo_master_w_native.exp | 32 +- pengine/test10/colo_slave_w_native.exp | 32 +- pengine/test10/group-dependents.exp | 32 +- pengine/test10/inc10.exp | 28 +- pengine/test10/interleave-pseudo-stop.exp | 12 +- pengine/test10/interleave-stop.exp | 36 +- pengine/test10/master-13.exp | 32 +- pengine/test10/master-demote.exp | 16 +- pengine/test10/master-dependent-ban.exp | 16 +- pengine/test10/master-failed-demote.exp | 36 +- pengine/test10/master-move.exp | 32 +- pengine/test10/master-notify.exp | 24 +- .../test10/master-partially-demoted-group.exp | 32 +- pengine/test10/novell-239082.exp | 36 +- pengine/test10/novell-252693.exp | 36 +- .../one-or-more-unrunnable-instances.exp | 12 +- .../test10/order_constraint_stops_master.dot | 1 - .../test10/order_constraint_stops_master.exp | 10 +- .../test10/order_constraint_stops_slave.dot | 1 - .../test10/order_constraint_stops_slave.exp | 4 +- pengine/test10/probe-0.exp | 8 +- pengine/test10/probe-2.exp | 48 +- pengine/test10/remote-recover-all.exp | 8 +- pengine/test10/remote-recover-connection.exp | 8 +- .../test10/remote-recover-no-resources.exp | 8 +- pengine/test10/remote-recover-unknown.exp | 8 +- pengine/test10/remote-recovery.exp | 8 +- 52 files changed, 726 insertions(+), 928 deletions(-) diff --git a/pengine/test10/a-demote-then-b-migrate.exp b/pengine/test10/a-demote-then-b-migrate.exp index 2b12db24a00..e31e1254953 100644 --- a/pengine/test10/a-demote-then-b-migrate.exp +++ b/pengine/test10/a-demote-then-b-migrate.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -27,7 +27,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -97,7 +97,7 @@ - + @@ -110,7 +110,7 @@ - + @@ -123,7 +123,7 @@ - + @@ -136,7 +136,7 @@ - + @@ -202,10 +202,10 @@ - + - + @@ -235,10 +235,10 @@ - + - + @@ -288,10 +288,10 @@ - + - + @@ -321,10 +321,10 @@ - + - + diff --git a/pengine/test10/a-promote-then-b-migrate.exp b/pengine/test10/a-promote-then-b-migrate.exp index 26cd1b146cd..f5ef735f42f 100644 --- a/pengine/test10/a-promote-then-b-migrate.exp +++ b/pengine/test10/a-promote-then-b-migrate.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -27,7 +27,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -103,10 +103,10 @@ - + - + @@ -136,10 +136,10 @@ - + - + diff --git a/pengine/test10/bug-1572-2.exp b/pengine/test10/bug-1572-2.exp index a854f629940..58885e27acd 100644 --- a/pengine/test10/bug-1572-2.exp +++ b/pengine/test10/bug-1572-2.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -27,7 +27,7 @@ - + @@ -53,7 +53,7 @@ - + @@ -66,7 +66,7 @@ - + @@ -79,7 +79,7 @@ - + @@ -92,7 +92,7 @@ - + @@ -127,10 +127,10 @@ - + - + @@ -160,10 +160,10 @@ - + - + @@ -216,7 +216,7 @@ - + @@ -246,10 +246,10 @@ - + - + diff --git a/pengine/test10/bug-1685.exp b/pengine/test10/bug-1685.exp index 8b46500927c..7d6f29b21f4 100644 --- a/pengine/test10/bug-1685.exp +++ b/pengine/test10/bug-1685.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -53,7 +53,7 @@ - + @@ -88,10 +88,10 @@ - + - + @@ -121,10 +121,10 @@ - + - + diff --git a/pengine/test10/bug-5143-ms-shuffle.exp b/pengine/test10/bug-5143-ms-shuffle.exp index e8fb58cf81b..caa14743cb7 100644 --- a/pengine/test10/bug-5143-ms-shuffle.exp +++ b/pengine/test10/bug-5143-ms-shuffle.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -53,7 +53,7 @@ - + @@ -104,10 +104,10 @@ - + - + @@ -137,10 +137,10 @@ - + - + diff --git a/pengine/test10/bug-cl-5168.exp b/pengine/test10/bug-cl-5168.exp index f4b3b79e073..729c7a64ce8 100644 --- a/pengine/test10/bug-cl-5168.exp +++ b/pengine/test10/bug-cl-5168.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -53,7 +53,7 @@ - + @@ -104,10 +104,10 @@ - + - + @@ -137,10 +137,10 @@ - + - + diff --git a/pengine/test10/bug-cl-5212.exp b/pengine/test10/bug-cl-5212.exp index 344711b650b..4ee6d8850f3 100644 --- a/pengine/test10/bug-cl-5212.exp +++ b/pengine/test10/bug-cl-5212.exp @@ -41,9 +41,9 @@ - + - + @@ -54,9 +54,9 @@ - + - + @@ -76,7 +76,7 @@ - + @@ -106,7 +106,7 @@ - + diff --git a/pengine/test10/bug-cl-5247.exp b/pengine/test10/bug-cl-5247.exp index 7f55c641cae..c21ed7d15a9 100644 --- a/pengine/test10/bug-cl-5247.exp +++ b/pengine/test10/bug-cl-5247.exp @@ -451,7 +451,7 @@ - + @@ -464,7 +464,7 @@ - + @@ -477,7 +477,7 @@ - + @@ -534,7 +534,7 @@ - + @@ -564,7 +564,7 @@ - + @@ -644,7 +644,7 @@ - + diff --git a/pengine/test10/bug-lf-2153.exp b/pengine/test10/bug-lf-2153.exp index abe155401ab..6b8713c3e89 100644 --- a/pengine/test10/bug-lf-2153.exp +++ b/pengine/test10/bug-lf-2153.exp @@ -1,7 +1,7 @@ - + @@ -27,7 +27,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -62,7 +62,7 @@ - + @@ -92,10 +92,10 @@ - + - + diff --git a/pengine/test10/bug-lf-2317.exp b/pengine/test10/bug-lf-2317.exp index cf84cd50e17..a50bce3c311 100644 --- a/pengine/test10/bug-lf-2317.exp +++ b/pengine/test10/bug-lf-2317.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -27,7 +27,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -103,10 +103,10 @@ - + - + @@ -136,10 +136,10 @@ - + - + diff --git a/pengine/test10/bundle-order-fencing.dot b/pengine/test10/bundle-order-fencing.dot index 980bab4b231..e53a06284bf 100644 --- a/pengine/test10/bundle-order-fencing.dot +++ b/pengine/test10/bundle-order-fencing.dot @@ -114,7 +114,6 @@ digraph "g" { "rabbitmq-bundle-1_monitor_0 controller-2" [ style=bold color="green" fontcolor="black"] "rabbitmq-bundle-2_monitor_0 controller-1" [ style=bold color="green" fontcolor="black"] "rabbitmq-bundle-clone_confirmed-post_notify_running_0" -> "rabbitmq-bundle_running_0" [ style = bold] -"rabbitmq-bundle-clone_confirmed-post_notify_running_0" -> "rabbitmq_monitor_10000 rabbitmq-bundle-0" [ style = dashed] "rabbitmq-bundle-clone_confirmed-post_notify_running_0" [ style=bold color="green" fontcolor="orange"] "rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" -> "all_stopped" [ style = bold] "rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" -> "rabbitmq-bundle-clone_pre_notify_start_0" [ style = bold] @@ -127,17 +126,12 @@ digraph "g" { "rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" -> "rabbitmq-bundle-clone_stop_0" [ style = bold] "rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" [ style=bold color="green" fontcolor="orange"] "rabbitmq-bundle-clone_post_notify_running_0" -> "rabbitmq-bundle-clone_confirmed-post_notify_running_0" [ style = bold] -"rabbitmq-bundle-clone_post_notify_running_0" -> "rabbitmq_post_notify_running_0 rabbitmq-bundle-0" [ style = bold] -"rabbitmq-bundle-clone_post_notify_running_0" -> "rabbitmq_post_notify_running_0 rabbitmq-bundle-1" [ style = bold] -"rabbitmq-bundle-clone_post_notify_running_0" -> "rabbitmq_post_notify_running_0 rabbitmq-bundle-2" [ style = bold] "rabbitmq-bundle-clone_post_notify_running_0" [ style=bold color="green" fontcolor="orange"] "rabbitmq-bundle-clone_post_notify_stopped_0" -> "rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" [ style = bold] "rabbitmq-bundle-clone_post_notify_stopped_0" -> "rabbitmq_post_notify_stonith_0 rabbitmq-bundle-1" [ style = bold] "rabbitmq-bundle-clone_post_notify_stopped_0" -> "rabbitmq_post_notify_stonith_0 rabbitmq-bundle-2" [ style = bold] "rabbitmq-bundle-clone_post_notify_stopped_0" [ style=bold color="green" fontcolor="orange"] "rabbitmq-bundle-clone_pre_notify_start_0" -> "rabbitmq-bundle-clone_confirmed-pre_notify_start_0" [ style = bold] -"rabbitmq-bundle-clone_pre_notify_start_0" -> "rabbitmq_pre_notify_start_0 rabbitmq-bundle-1" [ style = bold] -"rabbitmq-bundle-clone_pre_notify_start_0" -> "rabbitmq_pre_notify_start_0 rabbitmq-bundle-2" [ style = bold] "rabbitmq-bundle-clone_pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] "rabbitmq-bundle-clone_pre_notify_stop_0" -> "rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" [ style = bold] "rabbitmq-bundle-clone_pre_notify_stop_0" -> "rabbitmq_pre_notify_stop_0 rabbitmq-bundle-1" [ style = bold] @@ -168,12 +162,6 @@ digraph "g" { "rabbitmq_confirmed-post_notify_stonith_0" -> "all_stopped" [ style = bold] "rabbitmq_confirmed-post_notify_stonith_0" [ style=bold color="green" fontcolor="orange"] "rabbitmq_monitor_10000 rabbitmq-bundle-0" [ style=dashed color="red" fontcolor="black"] -"rabbitmq_post_notify_running_0 rabbitmq-bundle-0" -> "rabbitmq-bundle-clone_confirmed-post_notify_running_0" [ style = bold] -"rabbitmq_post_notify_running_0 rabbitmq-bundle-0" [ style=bold color="green" fontcolor="black"] -"rabbitmq_post_notify_running_0 rabbitmq-bundle-1" -> "rabbitmq-bundle-clone_confirmed-post_notify_running_0" [ style = bold] -"rabbitmq_post_notify_running_0 rabbitmq-bundle-1" [ style=bold color="green" fontcolor="black"] -"rabbitmq_post_notify_running_0 rabbitmq-bundle-2" -> "rabbitmq-bundle-clone_confirmed-post_notify_running_0" [ style = bold] -"rabbitmq_post_notify_running_0 rabbitmq-bundle-2" [ style=bold color="green" fontcolor="black"] "rabbitmq_post_notify_stonith_0 rabbitmq-bundle-1" -> "rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" [ style = bold] "rabbitmq_post_notify_stonith_0 rabbitmq-bundle-1" -> "rabbitmq_confirmed-post_notify_stonith_0" [ style = bold] "rabbitmq_post_notify_stonith_0 rabbitmq-bundle-1" [ style=bold color="green" fontcolor="black"] @@ -184,10 +172,6 @@ digraph "g" { "rabbitmq_post_notify_stonith_0" -> "rabbitmq_post_notify_stonith_0 rabbitmq-bundle-1" [ style = bold] "rabbitmq_post_notify_stonith_0" -> "rabbitmq_post_notify_stonith_0 rabbitmq-bundle-2" [ style = bold] "rabbitmq_post_notify_stonith_0" [ style=bold color="green" fontcolor="orange"] -"rabbitmq_pre_notify_start_0 rabbitmq-bundle-1" -> "rabbitmq-bundle-clone_confirmed-pre_notify_start_0" [ style = bold] -"rabbitmq_pre_notify_start_0 rabbitmq-bundle-1" [ style=bold color="green" fontcolor="black"] -"rabbitmq_pre_notify_start_0 rabbitmq-bundle-2" -> "rabbitmq-bundle-clone_confirmed-pre_notify_start_0" [ style = bold] -"rabbitmq_pre_notify_start_0 rabbitmq-bundle-2" [ style=bold color="green" fontcolor="black"] "rabbitmq_pre_notify_stop_0 rabbitmq-bundle-1" -> "rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" [ style = bold] "rabbitmq_pre_notify_stop_0 rabbitmq-bundle-1" [ style=bold color="green" fontcolor="black"] "rabbitmq_pre_notify_stop_0 rabbitmq-bundle-2" -> "rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" [ style = bold] @@ -231,9 +215,6 @@ digraph "g" { "redis-bundle-master_confirmed-post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] "redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle-master_pre_notify_promote_0" [ style = bold] "redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle_running_0" [ style = bold] -"redis-bundle-master_confirmed-post_notify_running_0" -> "redis_monitor_20000 redis-bundle-1" [ style = bold] -"redis-bundle-master_confirmed-post_notify_running_0" -> "redis_monitor_45000 redis-bundle-0" [ style = dashed] -"redis-bundle-master_confirmed-post_notify_running_0" -> "redis_monitor_60000 redis-bundle-0" [ style = dashed] "redis-bundle-master_confirmed-post_notify_running_0" [ style=bold color="green" fontcolor="orange"] "redis-bundle-master_confirmed-post_notify_stopped_0" -> "all_stopped" [ style = bold] "redis-bundle-master_confirmed-post_notify_stopped_0" -> "redis-bundle-master_pre_notify_promote_0" [ style = bold] @@ -271,9 +252,6 @@ digraph "g" { "redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-2" [ style = bold] "redis-bundle-master_post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] "redis-bundle-master_post_notify_running_0" -> "redis-bundle-master_confirmed-post_notify_running_0" [ style = bold] -"redis-bundle-master_post_notify_running_0" -> "redis_post_notify_running_0 redis-bundle-0" [ style = bold] -"redis-bundle-master_post_notify_running_0" -> "redis_post_notify_running_0 redis-bundle-1" [ style = bold] -"redis-bundle-master_post_notify_running_0" -> "redis_post_notify_running_0 redis-bundle-2" [ style = bold] "redis-bundle-master_post_notify_running_0" [ style=bold color="green" fontcolor="orange"] "redis-bundle-master_post_notify_stopped_0" -> "redis-bundle-master_confirmed-post_notify_stopped_0" [ style = bold] "redis-bundle-master_post_notify_stopped_0" -> "redis_post_notify_stonith_0 redis-bundle-1" [ style = bold] @@ -289,8 +267,6 @@ digraph "g" { "redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-2" [ style = bold] "redis-bundle-master_pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] "redis-bundle-master_pre_notify_start_0" -> "redis-bundle-master_confirmed-pre_notify_start_0" [ style = bold] -"redis-bundle-master_pre_notify_start_0" -> "redis_pre_notify_start_0 redis-bundle-1" [ style = bold] -"redis-bundle-master_pre_notify_start_0" -> "redis_pre_notify_start_0 redis-bundle-2" [ style = bold] "redis-bundle-master_pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] "redis-bundle-master_pre_notify_stop_0" -> "redis-bundle-master_confirmed-pre_notify_stop_0" [ style = bold] "redis-bundle-master_pre_notify_stop_0" -> "redis_pre_notify_stop_0 redis-bundle-1" [ style = bold] @@ -355,12 +331,6 @@ digraph "g" { "redis_post_notify_promoted_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] "redis_post_notify_promoted_0 redis-bundle-2" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] "redis_post_notify_promoted_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] -"redis_post_notify_running_0 redis-bundle-0" -> "redis-bundle-master_confirmed-post_notify_running_0" [ style = bold] -"redis_post_notify_running_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] -"redis_post_notify_running_0 redis-bundle-1" -> "redis-bundle-master_confirmed-post_notify_running_0" [ style = bold] -"redis_post_notify_running_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] -"redis_post_notify_running_0 redis-bundle-2" -> "redis-bundle-master_confirmed-post_notify_running_0" [ style = bold] -"redis_post_notify_running_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] "redis_post_notify_stonith_0 redis-bundle-1" -> "redis-bundle-master_confirmed-post_notify_stopped_0" [ style = bold] "redis_post_notify_stonith_0 redis-bundle-1" -> "redis_confirmed-post_notify_stonith_0" [ style = bold] "redis_post_notify_stonith_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] @@ -381,10 +351,6 @@ digraph "g" { "redis_pre_notify_promote_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] "redis_pre_notify_promote_0 redis-bundle-2" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] "redis_pre_notify_promote_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] -"redis_pre_notify_start_0 redis-bundle-1" -> "redis-bundle-master_confirmed-pre_notify_start_0" [ style = bold] -"redis_pre_notify_start_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] -"redis_pre_notify_start_0 redis-bundle-2" -> "redis-bundle-master_confirmed-pre_notify_start_0" [ style = bold] -"redis_pre_notify_start_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] "redis_pre_notify_stop_0 redis-bundle-1" -> "redis-bundle-master_confirmed-pre_notify_stop_0" [ style = bold] "redis_pre_notify_stop_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] "redis_pre_notify_stop_0 redis-bundle-2" -> "redis-bundle-master_confirmed-pre_notify_stop_0" [ style = bold] diff --git a/pengine/test10/bundle-order-fencing.exp b/pengine/test10/bundle-order-fencing.exp index dc4c5c99108..599c299db13 100644 --- a/pengine/test10/bundle-order-fencing.exp +++ b/pengine/test10/bundle-order-fencing.exp @@ -1,18 +1,5 @@ - - - - - - - - - - - - - @@ -30,7 +17,7 @@ - + @@ -42,10 +29,10 @@ - + - + @@ -57,11 +44,11 @@ - + - + - + @@ -70,37 +57,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + @@ -112,11 +73,11 @@ - + - + - + @@ -125,37 +86,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + @@ -167,7 +102,7 @@ - + @@ -185,7 +120,7 @@ - + @@ -200,7 +135,7 @@ - + @@ -211,14 +146,14 @@ - + - + - + @@ -226,7 +161,7 @@ - + @@ -241,7 +176,7 @@ - + @@ -259,7 +194,7 @@ - + @@ -269,18 +204,9 @@ - - - - - - - - - - + @@ -295,7 +221,7 @@ - + @@ -305,15 +231,9 @@ - - - - - - - + @@ -325,7 +245,7 @@ - + @@ -337,7 +257,7 @@ - + @@ -355,7 +275,7 @@ - + @@ -373,7 +293,7 @@ - + @@ -381,7 +301,7 @@ - + @@ -390,7 +310,7 @@ - + @@ -399,7 +319,7 @@ - + @@ -408,7 +328,7 @@ - + @@ -417,7 +337,7 @@ - + @@ -435,7 +355,7 @@ - + @@ -447,7 +367,7 @@ - + @@ -462,7 +382,7 @@ - + @@ -474,7 +394,7 @@ - + @@ -489,7 +409,7 @@ - + @@ -507,7 +427,7 @@ - + @@ -519,7 +439,7 @@ - + @@ -537,7 +457,7 @@ - + @@ -555,7 +475,7 @@ - + @@ -563,7 +483,7 @@ - + @@ -572,7 +492,7 @@ - + @@ -581,7 +501,7 @@ - + @@ -590,7 +510,7 @@ - + @@ -599,11 +519,11 @@ - + - + - + @@ -612,11 +532,11 @@ - + - + - + @@ -625,20 +545,7 @@ - - - - - - - - - - - - - - + @@ -656,7 +563,7 @@ - + @@ -668,10 +575,10 @@ - + - + @@ -686,10 +593,10 @@ - + - + @@ -698,11 +605,11 @@ - + - + - + @@ -711,11 +618,11 @@ - + - + - + @@ -724,11 +631,11 @@ - + - + - + @@ -737,11 +644,11 @@ - + - + - + @@ -750,11 +657,11 @@ - + - + - + @@ -763,37 +670,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + @@ -805,7 +686,7 @@ - + @@ -816,9 +697,6 @@ - - - @@ -833,11 +711,11 @@ - + - + @@ -855,7 +733,7 @@ - + @@ -864,7 +742,7 @@ - + @@ -873,11 +751,11 @@ - + - + - + @@ -886,11 +764,11 @@ - + - + - + @@ -899,11 +777,11 @@ - + - + - + @@ -912,11 +790,11 @@ - + - + - + @@ -925,11 +803,11 @@ - + - + - + @@ -938,37 +816,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + @@ -980,7 +832,7 @@ - + @@ -991,14 +843,14 @@ - + - + - + @@ -1013,7 +865,7 @@ - + @@ -1024,14 +876,14 @@ - + - + - + @@ -1039,7 +891,7 @@ - + @@ -1054,7 +906,7 @@ - + @@ -1069,7 +921,7 @@ - + @@ -1080,17 +932,17 @@ - + - + - + - + @@ -1105,7 +957,7 @@ - + @@ -1116,17 +968,17 @@ - + - + - + - + @@ -1144,7 +996,7 @@ - + @@ -1156,7 +1008,7 @@ - + @@ -1180,7 +1032,7 @@ - + @@ -1198,7 +1050,7 @@ - + @@ -1213,7 +1065,7 @@ - + @@ -1224,14 +1076,14 @@ - + - + - + @@ -1243,7 +1095,7 @@ - + @@ -1258,7 +1110,7 @@ - + @@ -1279,7 +1131,7 @@ - + @@ -1289,18 +1141,9 @@ - - - - - - - - - - + @@ -1315,7 +1158,7 @@ - + @@ -1325,15 +1168,9 @@ - - - - - - - + @@ -1348,7 +1185,7 @@ - + @@ -1360,7 +1197,7 @@ - + @@ -1381,7 +1218,7 @@ - + @@ -1399,7 +1236,7 @@ - + @@ -1407,7 +1244,7 @@ - + @@ -1416,7 +1253,7 @@ - + @@ -1425,7 +1262,7 @@ - + @@ -1434,7 +1271,7 @@ - + @@ -1443,7 +1280,7 @@ - + @@ -1456,7 +1293,7 @@ - + @@ -1472,7 +1309,7 @@ - + @@ -1487,7 +1324,7 @@ - + @@ -1500,7 +1337,7 @@ - + @@ -1516,7 +1353,7 @@ - + @@ -1531,7 +1368,7 @@ - + @@ -1544,7 +1381,7 @@ - + @@ -1560,7 +1397,7 @@ - + @@ -1575,7 +1412,7 @@ - + @@ -1590,7 +1427,7 @@ - + @@ -1603,7 +1440,7 @@ - + @@ -1616,7 +1453,7 @@ - + @@ -1624,7 +1461,7 @@ - + @@ -1637,7 +1474,7 @@ - + @@ -1650,7 +1487,7 @@ - + @@ -1658,7 +1495,7 @@ - + @@ -1673,7 +1510,7 @@ - + @@ -1688,7 +1525,7 @@ - + @@ -1703,7 +1540,7 @@ - + @@ -1724,7 +1561,7 @@ - + @@ -1736,7 +1573,7 @@ - + @@ -1744,7 +1581,7 @@ - + @@ -1759,7 +1596,7 @@ - + @@ -1767,7 +1604,7 @@ - + @@ -1779,7 +1616,7 @@ - + @@ -1797,7 +1634,7 @@ - + @@ -1812,7 +1649,7 @@ - + @@ -1824,7 +1661,7 @@ - + @@ -1836,7 +1673,7 @@ - + @@ -1851,7 +1688,7 @@ - + @@ -1866,7 +1703,7 @@ - + @@ -1874,7 +1711,7 @@ - + @@ -1889,7 +1726,7 @@ - + @@ -1901,7 +1738,7 @@ - + @@ -1913,7 +1750,7 @@ - + @@ -1928,7 +1765,7 @@ - + @@ -1943,7 +1780,7 @@ - + @@ -1951,7 +1788,7 @@ - + @@ -1963,7 +1800,7 @@ - + @@ -1971,7 +1808,7 @@ - + @@ -2040,7 +1877,7 @@ - + diff --git a/pengine/test10/bundle-order-fencing.summary b/pengine/test10/bundle-order-fencing.summary index 0457f833ba5..d398a120c64 100644 --- a/pengine/test10/bundle-order-fencing.summary +++ b/pengine/test10/bundle-order-fencing.summary @@ -147,8 +147,6 @@ Executing cluster transition: * Pseudo action: redis-bundle-docker-0_stop_0 * Pseudo action: rabbitmq-bundle_stopped_0 * Pseudo action: rabbitmq_notified_0 - * Resource action: rabbitmq notify on rabbitmq-bundle-1 - * Resource action: rabbitmq notify on rabbitmq-bundle-2 * Pseudo action: rabbitmq-bundle-clone_confirmed-pre_notify_start_0 * Pseudo action: rabbitmq-bundle-clone_start_0 * Pseudo action: galera-bundle-master_running_0 @@ -164,22 +162,14 @@ Executing cluster transition: * Pseudo action: redis-bundle-master_pre_notify_start_0 * Pseudo action: redis-bundle_stopped_0 * Pseudo action: redis-bundle_start_0 - * Resource action: rabbitmq notify on rabbitmq-bundle-0 - * Resource action: rabbitmq notify on rabbitmq-bundle-1 - * Resource action: rabbitmq notify on rabbitmq-bundle-2 * Pseudo action: rabbitmq-bundle-clone_confirmed-post_notify_running_0 * Pseudo action: redis_notified_0 - * Resource action: redis notify on redis-bundle-1 - * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-pre_notify_start_0 * Pseudo action: redis-bundle-master_start_0 * Pseudo action: rabbitmq-bundle_running_0 * Pseudo action: all_stopped * Pseudo action: redis-bundle-master_running_0 * Pseudo action: redis-bundle-master_post_notify_running_0 - * Resource action: redis notify on redis-bundle-0 - * Resource action: redis notify on redis-bundle-1 - * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-post_notify_running_0 * Pseudo action: redis-bundle_running_0 * Pseudo action: redis-bundle-master_pre_notify_promote_0 diff --git a/pengine/test10/bundle-order-partial-start-2.exp b/pengine/test10/bundle-order-partial-start-2.exp index afba801dfb4..bf9a0b034f0 100644 --- a/pengine/test10/bundle-order-partial-start-2.exp +++ b/pengine/test10/bundle-order-partial-start-2.exp @@ -315,7 +315,7 @@ - + @@ -328,7 +328,7 @@ - + @@ -379,7 +379,7 @@ - + @@ -409,7 +409,7 @@ - + diff --git a/pengine/test10/bundle-order-partial-start.exp b/pengine/test10/bundle-order-partial-start.exp index cb10bad09a5..8e28f19057b 100644 --- a/pengine/test10/bundle-order-partial-start.exp +++ b/pengine/test10/bundle-order-partial-start.exp @@ -296,7 +296,7 @@ - + @@ -309,7 +309,7 @@ - + @@ -360,7 +360,7 @@ - + @@ -390,7 +390,7 @@ - + diff --git a/pengine/test10/bundle-order-partial-stop.dot b/pengine/test10/bundle-order-partial-stop.dot index 5d0c03956c9..6bad4aba66e 100644 --- a/pengine/test10/bundle-order-partial-stop.dot +++ b/pengine/test10/bundle-order-partial-stop.dot @@ -112,7 +112,6 @@ digraph "g" { "rabbitmq-bundle-0_stop_0 undercloud" -> "do_shutdown undercloud" [ style = bold] "rabbitmq-bundle-0_stop_0 undercloud" -> "rabbitmq-bundle-docker-0_stop_0 undercloud" [ style = bold] "rabbitmq-bundle-0_stop_0 undercloud" [ style=bold color="green" fontcolor="black"] -"rabbitmq-bundle-clone_confirmed-post_notify_running_0" -> "rabbitmq_monitor_10000 rabbitmq-bundle-0" [ style = dashed] "rabbitmq-bundle-clone_confirmed-post_notify_running_0" [ style=dashed color="red" fontcolor="orange"] "rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" -> "all_stopped" [ style = bold] "rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" -> "rabbitmq-bundle-clone_pre_notify_start_0" [ style = dashed] @@ -178,8 +177,6 @@ digraph "g" { "redis-bundle-master_confirmed-post_notify_demoted_0" -> "redis_monitor_45000 redis-bundle-0" [ style = dashed] "redis-bundle-master_confirmed-post_notify_demoted_0" -> "redis_monitor_60000 redis-bundle-0" [ style = dashed] "redis-bundle-master_confirmed-post_notify_demoted_0" [ style=bold color="green" fontcolor="orange"] -"redis-bundle-master_confirmed-post_notify_running_0" -> "redis_monitor_45000 redis-bundle-0" [ style = dashed] -"redis-bundle-master_confirmed-post_notify_running_0" -> "redis_monitor_60000 redis-bundle-0" [ style = dashed] "redis-bundle-master_confirmed-post_notify_running_0" [ style=dashed color="red" fontcolor="orange"] "redis-bundle-master_confirmed-post_notify_stopped_0" -> "all_stopped" [ style = bold] "redis-bundle-master_confirmed-post_notify_stopped_0" -> "redis-bundle-master_pre_notify_start_0" [ style = dashed] diff --git a/pengine/test10/bundle-order-partial-stop.exp b/pengine/test10/bundle-order-partial-stop.exp index 611995037cd..89d87aa9f67 100644 --- a/pengine/test10/bundle-order-partial-stop.exp +++ b/pengine/test10/bundle-order-partial-stop.exp @@ -3,7 +3,7 @@ - + @@ -16,7 +16,7 @@ - + @@ -280,7 +280,7 @@ - + @@ -293,7 +293,7 @@ - + @@ -306,7 +306,7 @@ - + @@ -319,7 +319,7 @@ - + @@ -338,7 +338,7 @@ - + diff --git a/pengine/test10/bundle-order-startup-clone.dot b/pengine/test10/bundle-order-startup-clone.dot index 8426bd22168..a23a2feb940 100644 --- a/pengine/test10/bundle-order-startup-clone.dot +++ b/pengine/test10/bundle-order-startup-clone.dot @@ -118,9 +118,6 @@ digraph "g" { "redis:0_start_0 redis-bundle-0" -> "redis:0_monitor_60000 redis-bundle-0" [ style = bold] "redis:0_start_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] "storage-clone_confirmed-post_notify_running_0" -> "galera-bundle_start_0" [ style = dashed] -"storage-clone_confirmed-post_notify_running_0" -> "storage:0_monitor_30000 metal-1" [ style = dashed] -"storage-clone_confirmed-post_notify_running_0" -> "storage:1_monitor_30000 metal-2" [ style = dashed] -"storage-clone_confirmed-post_notify_running_0" -> "storage:2_monitor_30000 metal-3" [ style = dashed] "storage-clone_confirmed-post_notify_running_0" [ style=dashed color="red" fontcolor="orange"] "storage-clone_confirmed-pre_notify_start_0" -> "storage-clone_post_notify_running_0" [ style = dashed] "storage-clone_confirmed-pre_notify_start_0" -> "storage-clone_start_0" [ style = dashed] diff --git a/pengine/test10/bundle-order-stop-clone.exp b/pengine/test10/bundle-order-stop-clone.exp index 3e3fe779139..3e66f54bf34 100644 --- a/pengine/test10/bundle-order-stop-clone.exp +++ b/pengine/test10/bundle-order-stop-clone.exp @@ -1,7 +1,7 @@ - + @@ -27,7 +27,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -53,7 +53,7 @@ - + @@ -66,7 +66,7 @@ - + @@ -88,10 +88,10 @@ - + - + @@ -121,13 +121,13 @@ - + - + - + diff --git a/pengine/test10/bundle-order-stop-on-remote.exp b/pengine/test10/bundle-order-stop-on-remote.exp index db5386b1f39..96588dc9998 100644 --- a/pengine/test10/bundle-order-stop-on-remote.exp +++ b/pengine/test10/bundle-order-stop-on-remote.exp @@ -588,7 +588,7 @@ - + @@ -601,7 +601,7 @@ - + @@ -614,7 +614,7 @@ - + @@ -627,7 +627,7 @@ - + @@ -675,7 +675,7 @@ - + @@ -688,7 +688,7 @@ - + @@ -701,7 +701,7 @@ - + @@ -780,7 +780,7 @@ - + @@ -793,7 +793,7 @@ - + @@ -806,7 +806,7 @@ - + @@ -819,7 +819,7 @@ - + @@ -841,13 +841,13 @@ - + - + - + @@ -877,13 +877,13 @@ - + - + - + @@ -940,13 +940,13 @@ - + - + - + @@ -976,10 +976,10 @@ - + - + diff --git a/pengine/test10/bundle-order-stop.dot b/pengine/test10/bundle-order-stop.dot index 5d0c03956c9..6bad4aba66e 100644 --- a/pengine/test10/bundle-order-stop.dot +++ b/pengine/test10/bundle-order-stop.dot @@ -112,7 +112,6 @@ digraph "g" { "rabbitmq-bundle-0_stop_0 undercloud" -> "do_shutdown undercloud" [ style = bold] "rabbitmq-bundle-0_stop_0 undercloud" -> "rabbitmq-bundle-docker-0_stop_0 undercloud" [ style = bold] "rabbitmq-bundle-0_stop_0 undercloud" [ style=bold color="green" fontcolor="black"] -"rabbitmq-bundle-clone_confirmed-post_notify_running_0" -> "rabbitmq_monitor_10000 rabbitmq-bundle-0" [ style = dashed] "rabbitmq-bundle-clone_confirmed-post_notify_running_0" [ style=dashed color="red" fontcolor="orange"] "rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" -> "all_stopped" [ style = bold] "rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" -> "rabbitmq-bundle-clone_pre_notify_start_0" [ style = dashed] @@ -178,8 +177,6 @@ digraph "g" { "redis-bundle-master_confirmed-post_notify_demoted_0" -> "redis_monitor_45000 redis-bundle-0" [ style = dashed] "redis-bundle-master_confirmed-post_notify_demoted_0" -> "redis_monitor_60000 redis-bundle-0" [ style = dashed] "redis-bundle-master_confirmed-post_notify_demoted_0" [ style=bold color="green" fontcolor="orange"] -"redis-bundle-master_confirmed-post_notify_running_0" -> "redis_monitor_45000 redis-bundle-0" [ style = dashed] -"redis-bundle-master_confirmed-post_notify_running_0" -> "redis_monitor_60000 redis-bundle-0" [ style = dashed] "redis-bundle-master_confirmed-post_notify_running_0" [ style=dashed color="red" fontcolor="orange"] "redis-bundle-master_confirmed-post_notify_stopped_0" -> "all_stopped" [ style = bold] "redis-bundle-master_confirmed-post_notify_stopped_0" -> "redis-bundle-master_pre_notify_start_0" [ style = dashed] diff --git a/pengine/test10/bundle-order-stop.exp b/pengine/test10/bundle-order-stop.exp index 611995037cd..89d87aa9f67 100644 --- a/pengine/test10/bundle-order-stop.exp +++ b/pengine/test10/bundle-order-stop.exp @@ -3,7 +3,7 @@ - + @@ -16,7 +16,7 @@ - + @@ -280,7 +280,7 @@ - + @@ -293,7 +293,7 @@ - + @@ -306,7 +306,7 @@ - + @@ -319,7 +319,7 @@ - + @@ -338,7 +338,7 @@ - + diff --git a/pengine/test10/bundle-replicas-change.dot b/pengine/test10/bundle-replicas-change.dot index be8554cdba3..fc6ecbad33a 100644 --- a/pengine/test10/bundle-replicas-change.dot +++ b/pengine/test10/bundle-replicas-change.dot @@ -1,27 +1,29 @@ digraph "g" { +"Cancel httpd-bundle-0_monitor_30000 rh74-test" [ style=bold color="green" fontcolor="black"] "all_stopped" [ style=bold color="green" fontcolor="orange"] -"httpd-bundle-0_monitor_30000 rh74-test" [ style=bold color="green" fontcolor="black"] -"httpd-bundle-0_start_0 rh74-test" -> "httpd-bundle-0_monitor_30000 rh74-test" [ style = bold] +"httpd-bundle-0_monitor_60000 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-0_start_0 rh74-test" -> "httpd-bundle-0_monitor_60000 rh74-test" [ style = bold] "httpd-bundle-0_start_0 rh74-test" -> "httpd:0_monitor_0 httpd-bundle-0" [ style = bold] "httpd-bundle-0_start_0 rh74-test" -> "httpd:0_monitor_10000 httpd-bundle-0" [ style = bold] "httpd-bundle-0_start_0 rh74-test" -> "httpd:0_start_0 httpd-bundle-0" [ style = bold] "httpd-bundle-0_start_0 rh74-test" -> "httpd_delete_0 httpd-bundle-0" [ style = bold] "httpd-bundle-0_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-0_stop_0 rh74-test" -> "Cancel httpd-bundle-0_monitor_30000 rh74-test" [ style = bold] "httpd-bundle-0_stop_0 rh74-test" -> "all_stopped" [ style = bold] "httpd-bundle-0_stop_0 rh74-test" -> "httpd-bundle-0_start_0 rh74-test" [ style = bold] "httpd-bundle-0_stop_0 rh74-test" -> "httpd-bundle-docker-0_stop_0 rh74-test" [ style = bold] "httpd-bundle-0_stop_0 rh74-test" [ style=bold color="green" fontcolor="black"] "httpd-bundle-1_monitor_0 rh74-test" -> "httpd-bundle-1_start_0 rh74-test" [ style = bold] "httpd-bundle-1_monitor_0 rh74-test" [ style=bold color="green" fontcolor="black"] -"httpd-bundle-1_monitor_30000 rh74-test" [ style=bold color="green" fontcolor="black"] -"httpd-bundle-1_start_0 rh74-test" -> "httpd-bundle-1_monitor_30000 rh74-test" [ style = bold] +"httpd-bundle-1_monitor_60000 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-1_start_0 rh74-test" -> "httpd-bundle-1_monitor_60000 rh74-test" [ style = bold] "httpd-bundle-1_start_0 rh74-test" -> "httpd:1_monitor_10000 httpd-bundle-1" [ style = bold] "httpd-bundle-1_start_0 rh74-test" -> "httpd:1_start_0 httpd-bundle-1" [ style = bold] "httpd-bundle-1_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] "httpd-bundle-2_monitor_0 rh74-test" -> "httpd-bundle-2_start_0 rh74-test" [ style = bold] "httpd-bundle-2_monitor_0 rh74-test" [ style=bold color="green" fontcolor="black"] -"httpd-bundle-2_monitor_30000 rh74-test" [ style=bold color="green" fontcolor="black"] -"httpd-bundle-2_start_0 rh74-test" -> "httpd-bundle-2_monitor_30000 rh74-test" [ style = bold] +"httpd-bundle-2_monitor_60000 rh74-test" [ style=bold color="green" fontcolor="black"] +"httpd-bundle-2_start_0 rh74-test" -> "httpd-bundle-2_monitor_60000 rh74-test" [ style = bold] "httpd-bundle-2_start_0 rh74-test" -> "httpd:2_monitor_10000 httpd-bundle-2" [ style = bold] "httpd-bundle-2_start_0 rh74-test" -> "httpd:2_start_0 httpd-bundle-2" [ style = bold] "httpd-bundle-2_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] diff --git a/pengine/test10/bundle-replicas-change.exp b/pengine/test10/bundle-replicas-change.exp index 138dfcd1fa8..e7e68214586 100644 --- a/pengine/test10/bundle-replicas-change.exp +++ b/pengine/test10/bundle-replicas-change.exp @@ -1,7 +1,7 @@ - + @@ -11,13 +11,13 @@ - + - + @@ -30,7 +30,7 @@ - + @@ -49,104 +49,104 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -161,7 +161,7 @@ - + @@ -177,7 +177,7 @@ - + @@ -193,7 +193,7 @@ - + @@ -211,6 +211,19 @@ + + + + + + + + + + + + + @@ -226,7 +239,7 @@ - + @@ -238,39 +251,39 @@ - + - + - + - + - + - + - + - + - + - + @@ -281,7 +294,7 @@ - + @@ -290,22 +303,22 @@ - + - + - + - + - + @@ -315,14 +328,14 @@ - + - + - + @@ -331,22 +344,22 @@ - + - + - + - + - + - + @@ -356,39 +369,39 @@ - + - + - + - + - + - + - + - + - + @@ -399,7 +412,7 @@ - + @@ -408,22 +421,22 @@ - + - + - + - + - + @@ -433,14 +446,14 @@ - + - + - + @@ -449,22 +462,22 @@ - + - + - + - + - + - + @@ -474,33 +487,33 @@ - + - + - + - + - + - + - + @@ -512,13 +525,13 @@ - + - + - + @@ -528,17 +541,17 @@ - + - + - + - + @@ -547,25 +560,25 @@ - + - + - + - + - + - + @@ -579,7 +592,7 @@ - + diff --git a/pengine/test10/bundle-replicas-change.summary b/pengine/test10/bundle-replicas-change.summary index 05b1bb47a79..01e80f3d943 100644 --- a/pengine/test10/bundle-replicas-change.summary +++ b/pengine/test10/bundle-replicas-change.summary @@ -32,6 +32,7 @@ Executing cluster transition: * Pseudo action: httpd-bundle_stop_0 * Pseudo action: httpd-bundle_start_0 * Resource action: httpd-bundle-0 stop on rh74-test + * Resource action: httpd-bundle-0 cancel=30000 on rh74-test * Resource action: httpd-bundle-ip-192.168.20.189 start on rh74-test * Resource action: httpd-bundle-docker-1 start on rh74-test * Resource action: httpd-bundle-1 monitor on rh74-test @@ -42,7 +43,6 @@ Executing cluster transition: * Resource action: httpd-bundle-docker-0 start on rh74-test * Resource action: httpd-bundle-docker-0 monitor=60000 on rh74-test * Resource action: httpd-bundle-0 start on rh74-test - * Resource action: httpd-bundle-0 monitor=30000 on rh74-test * Resource action: httpd-bundle-ip-192.168.20.189 monitor=60000 on rh74-test * Resource action: httpd-bundle-docker-1 monitor=60000 on rh74-test * Resource action: httpd-bundle-1 start on rh74-test @@ -54,8 +54,9 @@ Executing cluster transition: * Pseudo action: all_stopped * Resource action: httpd:0 monitor on httpd-bundle-0 * Pseudo action: httpd-bundle-clone_start_0 - * Resource action: httpd-bundle-1 monitor=30000 on rh74-test - * Resource action: httpd-bundle-2 monitor=30000 on rh74-test + * Resource action: httpd-bundle-0 monitor=60000 on rh74-test + * Resource action: httpd-bundle-1 monitor=60000 on rh74-test + * Resource action: httpd-bundle-2 monitor=60000 on rh74-test * Resource action: httpd:0 start on httpd-bundle-0 * Resource action: httpd:1 start on httpd-bundle-1 * Resource action: httpd:2 start on httpd-bundle-2 diff --git a/pengine/test10/colo_master_w_native.exp b/pengine/test10/colo_master_w_native.exp index 28038797d5c..ffa773fe4bb 100644 --- a/pengine/test10/colo_master_w_native.exp +++ b/pengine/test10/colo_master_w_native.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -27,7 +27,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -85,7 +85,7 @@ - + @@ -98,7 +98,7 @@ - + @@ -111,7 +111,7 @@ - + @@ -124,7 +124,7 @@ - + @@ -171,10 +171,10 @@ - + - + @@ -204,10 +204,10 @@ - + - + @@ -257,10 +257,10 @@ - + - + @@ -290,10 +290,10 @@ - + - + diff --git a/pengine/test10/colo_slave_w_native.exp b/pengine/test10/colo_slave_w_native.exp index e5ec984666c..a548923ca41 100644 --- a/pengine/test10/colo_slave_w_native.exp +++ b/pengine/test10/colo_slave_w_native.exp @@ -36,7 +36,7 @@ - + @@ -49,7 +49,7 @@ - + @@ -62,7 +62,7 @@ - + @@ -75,7 +75,7 @@ - + @@ -120,7 +120,7 @@ - + @@ -133,7 +133,7 @@ - + @@ -146,7 +146,7 @@ - + @@ -159,7 +159,7 @@ - + @@ -206,10 +206,10 @@ - + - + @@ -239,10 +239,10 @@ - + - + @@ -292,10 +292,10 @@ - + - + @@ -325,10 +325,10 @@ - + - + diff --git a/pengine/test10/group-dependents.exp b/pengine/test10/group-dependents.exp index 607e9d711d9..0df62f0b0d6 100644 --- a/pengine/test10/group-dependents.exp +++ b/pengine/test10/group-dependents.exp @@ -1262,7 +1262,7 @@ - + @@ -1275,7 +1275,7 @@ - + @@ -1288,7 +1288,7 @@ - + @@ -1301,7 +1301,7 @@ - + @@ -1346,7 +1346,7 @@ - + @@ -1359,7 +1359,7 @@ - + @@ -1372,7 +1372,7 @@ - + @@ -1385,7 +1385,7 @@ - + @@ -1432,10 +1432,10 @@ - + - + @@ -1465,10 +1465,10 @@ - + - + @@ -1521,10 +1521,10 @@ - + - + @@ -1554,10 +1554,10 @@ - + - + diff --git a/pengine/test10/inc10.exp b/pengine/test10/inc10.exp index 37792872aee..742d1242eb6 100644 --- a/pengine/test10/inc10.exp +++ b/pengine/test10/inc10.exp @@ -37,7 +37,7 @@ - + @@ -50,7 +50,7 @@ - + @@ -63,7 +63,7 @@ - + @@ -89,7 +89,7 @@ - + @@ -102,7 +102,7 @@ - + @@ -115,7 +115,7 @@ - + @@ -128,7 +128,7 @@ - + @@ -150,13 +150,13 @@ - + - + - + @@ -186,16 +186,16 @@ - + - + - + - + diff --git a/pengine/test10/interleave-pseudo-stop.exp b/pengine/test10/interleave-pseudo-stop.exp index 63217e8c1b0..97faf4624ca 100644 --- a/pengine/test10/interleave-pseudo-stop.exp +++ b/pengine/test10/interleave-pseudo-stop.exp @@ -43,7 +43,7 @@ - + @@ -159,7 +159,7 @@ - + @@ -209,7 +209,7 @@ - + @@ -319,7 +319,7 @@ - + @@ -363,7 +363,7 @@ - + @@ -473,7 +473,7 @@ - + diff --git a/pengine/test10/interleave-stop.exp b/pengine/test10/interleave-stop.exp index a4c524f762f..35a842832c2 100644 --- a/pengine/test10/interleave-stop.exp +++ b/pengine/test10/interleave-stop.exp @@ -37,7 +37,7 @@ - + @@ -50,7 +50,7 @@ - + @@ -63,7 +63,7 @@ - + @@ -104,7 +104,7 @@ - + @@ -134,10 +134,10 @@ - + - + @@ -184,7 +184,7 @@ - + @@ -197,7 +197,7 @@ - + @@ -210,7 +210,7 @@ - + @@ -245,7 +245,7 @@ - + @@ -275,10 +275,10 @@ - + - + @@ -319,7 +319,7 @@ - + @@ -332,7 +332,7 @@ - + @@ -345,7 +345,7 @@ - + @@ -380,7 +380,7 @@ - + @@ -410,10 +410,10 @@ - + - + diff --git a/pengine/test10/master-13.exp b/pengine/test10/master-13.exp index 1e09ee1557e..94b8d59e7b3 100644 --- a/pengine/test10/master-13.exp +++ b/pengine/test10/master-13.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -27,7 +27,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -97,7 +97,7 @@ - + @@ -110,7 +110,7 @@ - + @@ -123,7 +123,7 @@ - + @@ -136,7 +136,7 @@ - + @@ -202,10 +202,10 @@ - + - + @@ -235,10 +235,10 @@ - + - + @@ -288,10 +288,10 @@ - + - + @@ -321,10 +321,10 @@ - + - + diff --git a/pengine/test10/master-demote.exp b/pengine/test10/master-demote.exp index 8909056bd1f..91cd0bcda69 100644 --- a/pengine/test10/master-demote.exp +++ b/pengine/test10/master-demote.exp @@ -23,7 +23,7 @@ - + @@ -36,7 +36,7 @@ - + @@ -49,7 +49,7 @@ - + @@ -62,7 +62,7 @@ - + @@ -113,10 +113,10 @@ - + - + @@ -146,10 +146,10 @@ - + - + diff --git a/pengine/test10/master-dependent-ban.exp b/pengine/test10/master-dependent-ban.exp index 99a6e946c94..caea6e33dd4 100644 --- a/pengine/test10/master-dependent-ban.exp +++ b/pengine/test10/master-dependent-ban.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -56,7 +56,7 @@ - + @@ -69,7 +69,7 @@ - + @@ -104,10 +104,10 @@ - + - + @@ -137,10 +137,10 @@ - + - + diff --git a/pengine/test10/master-failed-demote.exp b/pengine/test10/master-failed-demote.exp index 1d27dc561f7..7cbfdfa1dbe 100644 --- a/pengine/test10/master-failed-demote.exp +++ b/pengine/test10/master-failed-demote.exp @@ -28,7 +28,7 @@ - + @@ -81,7 +81,7 @@ - + @@ -94,7 +94,7 @@ - + @@ -107,7 +107,7 @@ - + @@ -120,7 +120,7 @@ - + @@ -177,7 +177,7 @@ - + @@ -190,7 +190,7 @@ - + @@ -203,7 +203,7 @@ - + @@ -216,7 +216,7 @@ - + @@ -285,10 +285,10 @@ - + - + @@ -318,10 +318,10 @@ - + - + @@ -375,10 +375,10 @@ - + - + @@ -408,13 +408,13 @@ - + - + - + diff --git a/pengine/test10/master-move.exp b/pengine/test10/master-move.exp index e77005f8d8c..536e5835773 100644 --- a/pengine/test10/master-move.exp +++ b/pengine/test10/master-move.exp @@ -182,7 +182,7 @@ - + @@ -195,7 +195,7 @@ - + @@ -208,7 +208,7 @@ - + @@ -221,7 +221,7 @@ - + @@ -278,7 +278,7 @@ - + @@ -291,7 +291,7 @@ - + @@ -304,7 +304,7 @@ - + @@ -317,7 +317,7 @@ - + @@ -383,10 +383,10 @@ - + - + @@ -416,10 +416,10 @@ - + - + @@ -472,10 +472,10 @@ - + - + @@ -505,10 +505,10 @@ - + - + diff --git a/pengine/test10/master-notify.exp b/pengine/test10/master-notify.exp index 0f6295c7003..7a73136a4a3 100644 --- a/pengine/test10/master-notify.exp +++ b/pengine/test10/master-notify.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -53,7 +53,7 @@ - + @@ -66,7 +66,7 @@ - + @@ -79,7 +79,7 @@ - + @@ -101,13 +101,13 @@ - + - + - + @@ -137,13 +137,13 @@ - + - + - + diff --git a/pengine/test10/master-partially-demoted-group.exp b/pengine/test10/master-partially-demoted-group.exp index fecf3c5ab8c..32a30fe15fc 100644 --- a/pengine/test10/master-partially-demoted-group.exp +++ b/pengine/test10/master-partially-demoted-group.exp @@ -493,7 +493,7 @@ - + @@ -506,7 +506,7 @@ - + @@ -519,7 +519,7 @@ - + @@ -532,7 +532,7 @@ - + @@ -577,7 +577,7 @@ - + @@ -590,7 +590,7 @@ - + @@ -603,7 +603,7 @@ - + @@ -616,7 +616,7 @@ - + @@ -670,10 +670,10 @@ - + - + @@ -703,10 +703,10 @@ - + - + @@ -759,10 +759,10 @@ - + - + @@ -792,10 +792,10 @@ - + - + diff --git a/pengine/test10/novell-239082.exp b/pengine/test10/novell-239082.exp index 94536565ae2..c6d0221c6f8 100644 --- a/pengine/test10/novell-239082.exp +++ b/pengine/test10/novell-239082.exp @@ -26,7 +26,7 @@ - + @@ -39,7 +39,7 @@ - + @@ -52,7 +52,7 @@ - + @@ -65,7 +65,7 @@ - + @@ -78,7 +78,7 @@ - + @@ -91,7 +91,7 @@ - + @@ -117,7 +117,7 @@ - + @@ -130,7 +130,7 @@ - + @@ -143,7 +143,7 @@ - + @@ -194,10 +194,10 @@ - + - + @@ -227,10 +227,10 @@ - + - + @@ -283,7 +283,7 @@ - + @@ -313,7 +313,7 @@ - + @@ -373,7 +373,7 @@ - + @@ -403,10 +403,10 @@ - + - + diff --git a/pengine/test10/novell-252693.exp b/pengine/test10/novell-252693.exp index 43da979e870..82486d2a6ef 100644 --- a/pengine/test10/novell-252693.exp +++ b/pengine/test10/novell-252693.exp @@ -77,7 +77,7 @@ - + @@ -90,7 +90,7 @@ - + @@ -103,7 +103,7 @@ - + @@ -138,7 +138,7 @@ - + @@ -168,10 +168,10 @@ - + - + @@ -218,7 +218,7 @@ - + @@ -231,7 +231,7 @@ - + @@ -244,7 +244,7 @@ - + @@ -279,7 +279,7 @@ - + @@ -309,10 +309,10 @@ - + - + @@ -353,7 +353,7 @@ - + @@ -366,7 +366,7 @@ - + @@ -379,7 +379,7 @@ - + @@ -414,7 +414,7 @@ - + @@ -444,10 +444,10 @@ - + - + diff --git a/pengine/test10/one-or-more-unrunnable-instances.exp b/pengine/test10/one-or-more-unrunnable-instances.exp index 45ae3b6381c..ff156a4d872 100644 --- a/pengine/test10/one-or-more-unrunnable-instances.exp +++ b/pengine/test10/one-or-more-unrunnable-instances.exp @@ -4300,7 +4300,7 @@ - + @@ -4345,7 +4345,7 @@ - + @@ -4390,7 +4390,7 @@ - + @@ -4444,13 +4444,13 @@ - + - + - + diff --git a/pengine/test10/order_constraint_stops_master.dot b/pengine/test10/order_constraint_stops_master.dot index b2ebc9f5986..359d3fe14e2 100644 --- a/pengine/test10/order_constraint_stops_master.dot +++ b/pengine/test10/order_constraint_stops_master.dot @@ -3,7 +3,6 @@ "MASTER_RSC_A_confirmed-post_notify_demoted_0" -> "MASTER_RSC_A_pre_notify_stop_0" [ style = bold] "MASTER_RSC_A_confirmed-post_notify_demoted_0" -> "NATIVE_RSC_A:0_monitor_20000 fc16-builder" [ style = dashed] "MASTER_RSC_A_confirmed-post_notify_demoted_0" [ style=bold color="green" fontcolor="orange"] -"MASTER_RSC_A_confirmed-post_notify_running_0" -> "NATIVE_RSC_A:0_monitor_20000 fc16-builder" [ style = dashed] "MASTER_RSC_A_confirmed-post_notify_running_0" [ style=dashed color="red" fontcolor="orange"] "MASTER_RSC_A_confirmed-post_notify_stopped_0" -> "MASTER_RSC_A_pre_notify_start_0" [ style = dashed] "MASTER_RSC_A_confirmed-post_notify_stopped_0" -> "NATIVE_RSC_B_stop_0 fc16-builder2" [ style = bold] diff --git a/pengine/test10/order_constraint_stops_master.exp b/pengine/test10/order_constraint_stops_master.exp index 551e1864967..22355bc0ba0 100644 --- a/pengine/test10/order_constraint_stops_master.exp +++ b/pengine/test10/order_constraint_stops_master.exp @@ -3,7 +3,7 @@ - + @@ -16,7 +16,7 @@ - + @@ -29,7 +29,7 @@ - + @@ -42,7 +42,7 @@ - + @@ -58,7 +58,7 @@ - + diff --git a/pengine/test10/order_constraint_stops_slave.dot b/pengine/test10/order_constraint_stops_slave.dot index 90e13115279..b7a48eb5b41 100644 --- a/pengine/test10/order_constraint_stops_slave.dot +++ b/pengine/test10/order_constraint_stops_slave.dot @@ -1,5 +1,4 @@ digraph "g" { -"MASTER_RSC_A_confirmed-post_notify_running_0" -> "NATIVE_RSC_A:0_monitor_30000 fc16-builder" [ style = dashed] "MASTER_RSC_A_confirmed-post_notify_running_0" [ style=dashed color="red" fontcolor="orange"] "MASTER_RSC_A_confirmed-post_notify_stopped_0" -> "MASTER_RSC_A_pre_notify_start_0" [ style = dashed] "MASTER_RSC_A_confirmed-post_notify_stopped_0" -> "NATIVE_RSC_B_stop_0 fc16-builder" [ style = bold] diff --git a/pengine/test10/order_constraint_stops_slave.exp b/pengine/test10/order_constraint_stops_slave.exp index 1a7d5da5b17..96c7a7272d4 100644 --- a/pengine/test10/order_constraint_stops_slave.exp +++ b/pengine/test10/order_constraint_stops_slave.exp @@ -3,7 +3,7 @@ - + @@ -16,7 +16,7 @@ - + diff --git a/pengine/test10/probe-0.exp b/pengine/test10/probe-0.exp index fe866a4dfcd..d65acb688ee 100644 --- a/pengine/test10/probe-0.exp +++ b/pengine/test10/probe-0.exp @@ -1,7 +1,7 @@ - + @@ -36,7 +36,7 @@ - + @@ -80,10 +80,10 @@ - + - + diff --git a/pengine/test10/probe-2.exp b/pengine/test10/probe-2.exp index e8fbcffd040..72cc32ac167 100644 --- a/pengine/test10/probe-2.exp +++ b/pengine/test10/probe-2.exp @@ -1,7 +1,7 @@ - + @@ -14,7 +14,7 @@ - + @@ -27,7 +27,7 @@ - + @@ -40,7 +40,7 @@ - + @@ -53,7 +53,7 @@ - + @@ -66,7 +66,7 @@ - + @@ -126,7 +126,7 @@ - + @@ -139,7 +139,7 @@ - + @@ -152,7 +152,7 @@ - + @@ -203,10 +203,10 @@ - + - + @@ -236,10 +236,10 @@ - + - + @@ -292,7 +292,7 @@ - + @@ -322,7 +322,7 @@ - + @@ -382,7 +382,7 @@ - + @@ -412,10 +412,10 @@ - + - + @@ -666,7 +666,7 @@ - + @@ -679,7 +679,7 @@ - + @@ -692,7 +692,7 @@ - + @@ -727,7 +727,7 @@ - + @@ -757,10 +757,10 @@ - + - + diff --git a/pengine/test10/remote-recover-all.exp b/pengine/test10/remote-recover-all.exp index 556ccfd7d67..fd2cf8295ec 100644 --- a/pengine/test10/remote-recover-all.exp +++ b/pengine/test10/remote-recover-all.exp @@ -310,7 +310,7 @@ - + @@ -339,7 +339,7 @@ - + @@ -410,10 +410,10 @@ - + - + diff --git a/pengine/test10/remote-recover-connection.exp b/pengine/test10/remote-recover-connection.exp index 40338b406b1..a9dd475c10a 100644 --- a/pengine/test10/remote-recover-connection.exp +++ b/pengine/test10/remote-recover-connection.exp @@ -187,7 +187,7 @@ - + @@ -216,7 +216,7 @@ - + @@ -287,10 +287,10 @@ - + - + diff --git a/pengine/test10/remote-recover-no-resources.exp b/pengine/test10/remote-recover-no-resources.exp index 0a57e2737a1..09b012b3321 100644 --- a/pengine/test10/remote-recover-no-resources.exp +++ b/pengine/test10/remote-recover-no-resources.exp @@ -221,7 +221,7 @@ - + @@ -250,7 +250,7 @@ - + @@ -321,10 +321,10 @@ - + - + diff --git a/pengine/test10/remote-recover-unknown.exp b/pengine/test10/remote-recover-unknown.exp index 0d7b318d098..0a1d17019de 100644 --- a/pengine/test10/remote-recover-unknown.exp +++ b/pengine/test10/remote-recover-unknown.exp @@ -221,7 +221,7 @@ - + @@ -250,7 +250,7 @@ - + @@ -321,10 +321,10 @@ - + - + diff --git a/pengine/test10/remote-recovery.exp b/pengine/test10/remote-recovery.exp index 40338b406b1..a9dd475c10a 100644 --- a/pengine/test10/remote-recovery.exp +++ b/pengine/test10/remote-recovery.exp @@ -187,7 +187,7 @@ - + @@ -216,7 +216,7 @@ - + @@ -287,10 +287,10 @@ - + - + From b2ab0f457e4fc594d98379abf2bd92ddc0bc018c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= Date: Tue, 3 Apr 2018 23:53:44 +0200 Subject: [PATCH 064/812] High: pengine: fix swapped warning message arguments leading to segfault ...when triggered. Present since 9cf01f5f9 (or since 1.1.17). --- lib/pengine/failcounts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pengine/failcounts.c b/lib/pengine/failcounts.c index 53c0f142d77..e217176e1b5 100644 --- a/lib/pengine/failcounts.c +++ b/lib/pengine/failcounts.c @@ -268,7 +268,7 @@ pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, && block_failure(node, rsc, xml_op, data_set)) { pe_warn("Ignoring failure timeout %d for %s because it conflicts with on-fail=block", - rsc->id, rsc->failure_timeout); + rsc->failure_timeout, rsc->id); rsc->failure_timeout = 0; } From d273895199d9597f167b5fcb4bb73186952d36e5 Mon Sep 17 00:00:00 2001 From: "Gao,Yan" Date: Sat, 24 Mar 2018 11:42:33 +0100 Subject: [PATCH 065/812] Fix: pengine: Avoid potential use-of-NULL in unpack_simple_rsc_order() --- pengine/constraints.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pengine/constraints.c b/pengine/constraints.c index b2afa18c08b..04d770bffaf 100644 --- a/pengine/constraints.c +++ b/pengine/constraints.c @@ -268,20 +268,23 @@ unpack_simple_rsc_order(xmlNode * xml_obj, pe_working_set_t * data_set) const char *instance_first = NULL; const char *require_all_s = NULL; - const char *id = crm_element_value(xml_obj, XML_ATTR_ID); - const char *invert = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); - - crm_str_to_boolean(invert, &invert_bool); + const char *id = NULL; + const char *invert = NULL; if (xml_obj == NULL) { crm_config_err("No constraint object to process."); return FALSE; + } - } else if (id == NULL) { + id = crm_element_value(xml_obj, XML_ATTR_ID); + if (id == NULL) { crm_config_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } + invert = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); + crm_str_to_boolean(invert, &invert_bool); + id_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN); id_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST); From bcf5d64f7becd94ad4c5486a71a55fe421a5489d Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 2 Apr 2018 18:03:57 -0500 Subject: [PATCH 066/812] Build: lrmd: allow compiling on systems without timeb.h --- lrmd/lrmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index 306260fc27b..28274ae22d2 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -946,8 +946,8 @@ action_complete(svc_action_t * action) * obsolete, we should eventually prefer a clock_gettime() implementation * (wrapped in its own ifdef) with timeb as a fallback. */ -#ifdef HAVE_SYS_TIMEB_H if(goagain) { +#ifdef HAVE_SYS_TIMEB_H int time_sum = time_diff_ms(NULL, &cmd->t_first_run); int timeout_left = cmd->timeout_orig - time_sum; int delay = cmd->timeout_orig / 10; @@ -990,8 +990,8 @@ action_complete(svc_action_t * action) cmd->exec_rc = PCMK_OCF_TIMEOUT; cmd_original_times(cmd); } - } #endif + } if (action->stderr_data) { cmd->output = strdup(action->stderr_data); From 61feadecbd4b8743a8221754d707ee0baf224254 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 25 Jan 2018 15:08:21 -0600 Subject: [PATCH 067/812] Build: spec: correct misspelling should have been done with 38c9f8d9 --- pacemaker.spec.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pacemaker.spec.in b/pacemaker.spec.in index 4487b8f04a6..bc419045839 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -418,7 +418,7 @@ sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool make %{_smp_mflags} V=1 all %check -{ pengine/regression.sh --run one-or-more-unrunnnable-instances \ +{ pengine/regression.sh --run one-or-more-unrunnable-instances \ && tools/regression.sh \ && touch .CHECKED } 2>&1 | sed 's/[fF]ail/faiil/g' # prevent false positives in rpmlint From db2f717332164246af37560abbf13c1152e93442 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 4 Apr 2018 14:29:46 -0500 Subject: [PATCH 068/812] Test: all: don't run cppcheck and clang checks by default This gets "make check" working again. --- GNUmakefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 7ea15aea75e..c2bba1fe272 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -364,8 +364,6 @@ rel-tags: tags CLANG_analyzer = $(shell which scan-build) CLANG_checkers = -check: clang cppcheck - # Extra cppcheck options: --enable=all --inconclusive --std=posix cppcheck: for d in replace lib mcp attrd pengine cib crmd fencing lrmd tools; do cppcheck -q $$d; done From 05e112ac380124a77cc74f020fd91394bc483a7b Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 4 Apr 2018 18:09:26 -0500 Subject: [PATCH 069/812] Low: various: improve NULL checks Check for NULL *before* trying to dereference a pointer, and avoid redundant NULL checks. --- crmd/join_client.c | 27 ++++++++++----------------- crmd/lrm.c | 21 +++++---------------- fencing/commands.c | 21 +++++---------------- lib/pengine/clone.c | 20 ++++---------------- lib/pengine/group.c | 21 ++++----------------- lib/pengine/native.c | 21 ++++++--------------- lrmd/lrmd.c | 2 +- pengine/allocate.c | 20 +++++--------------- pengine/clone.c | 23 ++++++----------------- pengine/constraints.c | 21 +++------------------ pengine/group.c | 27 +++++++-------------------- pengine/notif.c | 21 +++++---------------- tools/crm_simulate.c | 23 +++++------------------ 13 files changed, 66 insertions(+), 202 deletions(-) diff --git a/crmd/join_client.c b/crmd/join_client.c index 75411645c6b..2142d21dc98 100644 --- a/crmd/join_client.c +++ b/crmd/join_client.c @@ -1,20 +1,10 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ + #include #include @@ -120,8 +110,10 @@ do_cl_join_offer_respond(long long action, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); - const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); - const char *join_id = crm_element_value(input->msg, F_CRM_JOIN_ID); + const char *welcome_from; + const char *join_id; + + CRM_CHECK(input != NULL, return); #if 0 if (we are sick) { @@ -132,6 +124,8 @@ do_cl_join_offer_respond(long long action, } #endif + welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); + join_id = crm_element_value(input->msg, F_CRM_JOIN_ID); crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s", welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID)); @@ -150,7 +144,6 @@ do_cl_join_offer_respond(long long action, update_dc_expected(input->msg); - CRM_LOG_ASSERT(input != NULL); query_call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children); fsa_register_cib_callback(query_call_id, FALSE, strdup(join_id), join_query_callback); diff --git a/crmd/lrm.c b/crmd/lrm.c index 41fd9a833a4..976f7a9e37d 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include @@ -689,7 +678,7 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_ return TRUE; } - if ((rsc == NULL) || (op == NULL) || (op->params == NULL) + if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->class, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", @@ -2588,7 +2577,7 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr /* The tengine canceled this op, we have been waiting for the cancel to finish. */ erase_lrm_history_by_op(lrm_state, op); - } else if (pending && op->rsc_deleted) { + } else if (op->rsc_deleted) { /* The tengine initiated this op, but it was cancelled outside of the * tengine's control during a resource cleanup/re-probe request. The tengine * must be alerted that this operation completed, otherwise the tengine diff --git a/fencing/commands.c b/fencing/commands.c index 074e00ad9f5..3095d483367 100644 --- a/fencing/commands.c +++ b/fencing/commands.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2009 Andrew Beekhof + * Copyright 2009-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include @@ -1626,7 +1615,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc if (dev->targets == NULL || dev->targets_age + 60 < now) { crm_trace("Running %s command to see if %s can fence %s (%s)", - check_type, dev?dev->id:"N/A", search->host, search->action); + check_type, dev->id, search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "list", NULL, search->per_device_timeout, search, dynamic_list_search_cb); @@ -1641,7 +1630,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc } else if (safe_str_eq(check_type, "status")) { crm_trace("Running %s command to see if %s can fence %s (%s)", - check_type, dev?dev->id:"N/A", search->host, search->action); + check_type, dev->id, search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "status", search->host, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index 00dd5ca0bf3..b6473d267a8 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include @@ -584,14 +573,13 @@ clone_print(resource_t * rsc, const char *pre_text, long options, void *print_da void clone_free(resource_t * rsc) { - GListPtr gIter = rsc->children; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_trace(rsc, "Freeing %s", rsc->id); - for (; gIter != NULL; gIter = gIter->next) { + for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; CRM_ASSERT(child_rsc); diff --git a/lib/pengine/group.c b/lib/pengine/group.c index ec999064695..258c6b52177 100644 --- a/lib/pengine/group.c +++ b/lib/pengine/group.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include @@ -194,13 +183,11 @@ group_print(resource_t * rsc, const char *pre_text, long options, void *print_da void group_free(resource_t * rsc) { - GListPtr gIter = rsc->children; - CRM_CHECK(rsc != NULL, return); pe_rsc_trace(rsc, "Freeing %s", rsc->id); - for (; gIter != NULL; gIter = gIter->next) { + for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; CRM_ASSERT(child_rsc); diff --git a/lib/pengine/native.c b/lib/pengine/native.c index 7f219a63277..defed0b4deb 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include @@ -244,7 +233,7 @@ native_parameter(resource_t * rsc, node_t * node, gboolean create, const char *n { char *value_copy = NULL; const char *value = NULL; - GHashTable *hash = rsc->parameters; + GHashTable *hash = NULL; GHashTable *local_hash = NULL; CRM_CHECK(rsc != NULL, return NULL); @@ -264,6 +253,8 @@ native_parameter(resource_t * rsc, node_t * node, gboolean create, const char *n get_rsc_attributes(local_hash, rsc, node, data_set); hash = local_hash; + } else { + hash = rsc->parameters; } value = g_hash_table_lookup(hash, name); diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index 28274ae22d2..6e687cd277a 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -354,7 +354,7 @@ schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) } /* crmd expects lrmd to automatically cancel recurring ops before rsc stops. */ - if (rsc && safe_str_eq(cmd->action, "stop")) { + if (safe_str_eq(cmd->action, "stop")) { cancel_all_recurring(rsc, NULL); } diff --git a/pengine/allocate.c b/pengine/allocate.c index 7ae4e025e61..09ca1ef6ab1 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include @@ -1706,11 +1695,12 @@ rsc_order_then(action_t * lh_action, resource_t * rsc, order_constraint_t * orde GListPtr gIter = NULL; GListPtr rh_actions = NULL; action_t *rh_action = NULL; - enum pe_ordering type = order->type; + enum pe_ordering type; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); + type = order->type; rh_action = order->rh_action; crm_trace("Processing RH of ordering constraint %d", order->id); diff --git a/pengine/clone.c b/pengine/clone.c index e81dbc85d37..31924129999 100644 --- a/pengine/clone.c +++ b/pengine/clone.c @@ -1,19 +1,8 @@ -/* - * Copyright (C) 2004 Andrew Beekhof - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +/* + * Copyright 2004-2018 Andrew Beekhof + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include @@ -449,7 +438,7 @@ color_instance(resource_t * rsc, node_t * prefer, gboolean all_coloc, int limit, chosen = rsc->cmds->allocate(rsc, prefer, data_set); if (chosen) { node_t *local_node = parent_node_instance(rsc, chosen); - if (prefer && chosen && chosen->details != prefer->details) { + if (prefer && (chosen->details != prefer->details)) { crm_notice("Pre-allocation failed: got %s instead of %s", chosen->details->uname, prefer->details->uname); g_hash_table_destroy(rsc->allowed_nodes); diff --git a/pengine/constraints.c b/pengine/constraints.c index 04d770bffaf..82f5bc492e4 100644 --- a/pengine/constraints.c +++ b/pengine/constraints.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include @@ -2225,10 +2214,6 @@ unpack_colocation_set(xmlNode * set, int score, pe_working_set_t * data_set) if (crm_str_eq((const char *)xml_rsc_with->name, XML_TAG_RESOURCE_REF, TRUE)) { if (safe_str_eq(resource->id, ID(xml_rsc_with))) { break; - } else if (resource == NULL) { - crm_config_err("%s: No resource found for %s", set_id, - ID(xml_rsc_with)); - return FALSE; } EXPAND_CONSTRAINT_IDREF(set_id, with, ID(xml_rsc_with)); pe_rsc_trace(resource, "Anti-Colocating %s with %s", resource->id, diff --git a/pengine/group.c b/pengine/group.c index 7c5d5b4f55e..dccb4f84f0e 100644 --- a/pengine/group.c +++ b/pengine/group.c @@ -1,19 +1,8 @@ -/* - * Copyright (C) 2004 Andrew Beekhof - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +/* + * Copyright 2004-2018 Andrew Beekhof + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include @@ -465,14 +454,12 @@ group_rsc_location(resource_t * rsc, rsc_to_node_t * constraint) void group_expand(resource_t * rsc, pe_working_set_t * data_set) { - GListPtr gIter = rsc->children; + CRM_CHECK(rsc != NULL, return); pe_rsc_trace(rsc, "Processing actions from %s", rsc->id); - - CRM_CHECK(rsc != NULL, return); native_expand(rsc, data_set); - for (; gIter != NULL; gIter = gIter->next) { + for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); diff --git a/pengine/notif.c b/pengine/notif.c index 7ce8f577446..3013ee03143 100644 --- a/pengine/notif.c +++ b/pengine/notif.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include @@ -198,7 +187,7 @@ expand_list(GListPtr list, char **rsc_list, char **node_list) int existing_len = 0; int len = 2 + strlen(rsc_id); /* +1 space, +1 EOS */ - if (rsc_list && *rsc_list) { + if (*rsc_list) { existing_len = strlen(*rsc_list); } @@ -215,7 +204,7 @@ expand_list(GListPtr list, char **rsc_list, char **node_list) int existing_len = 0; int len = 2 + strlen(uname); - if (node_list && *node_list) { + if (*node_list) { existing_len = strlen(*node_list); } diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c index 5473e316d8e..0d48315427f 100644 --- a/tools/crm_simulate.c +++ b/tools/crm_simulate.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2009 Andrew Beekhof + * Copyright 2009-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include @@ -374,10 +363,8 @@ create_dotfile(pe_working_set_t * data_set, const char *dot_file, gboolean all_a } fprintf(dot_strm, "}\n"); - if (dot_strm != NULL) { - fflush(dot_strm); - fclose(dot_strm); - } + fflush(dot_strm); + fclose(dot_strm); } static void From a603f83c6802eb1980056abd3624e849eb50bf98 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 4 Apr 2018 19:07:46 -0500 Subject: [PATCH 070/812] Low: cib: avoid file descriptor leak on temporary file creation The previous lack of parentheses in this assignment plus test: (new_fd = mkstemp(new) < 0) meant that new_fd was always 0 (if mkstemp() succeeded) or 1 (if it failed). The only practical effect was that the file descriptor would not be closed for a successful creation (regression introduced in 1.1.13 via 006e98338). --- cib/io.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/cib/io.c b/cib/io.c index d4515cfe0e9..e449bd6e039 100644 --- a/cib/io.c +++ b/cib/io.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include @@ -58,7 +47,8 @@ cib_rename(const char *old) crm_err("Archiving unusable file %s as %s", old, new); umask(S_IWGRP | S_IWOTH | S_IROTH); - if ((new_fd = mkstemp(new) < 0) || (rename(old, new) < 0)) { + new_fd = mkstemp(new); + if ((new_fd < 0) || (rename(old, new) < 0)) { crm_perror(LOG_ERR, "Couldn't rename %s as %s", old, new); crm_err("Disabling disk writes and continuing"); cib_writes_enabled = FALSE; From 509770e749e1d89e713638eb4195342d60add06f Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 5 Apr 2018 16:55:07 -0500 Subject: [PATCH 071/812] Low: crmd: avoid memory leak when synthesizing operation result introduced by b3f9a5bb --- crmd/lrm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index 976f7a9e37d..c9214261685 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1477,9 +1477,9 @@ synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) /* If we can't process the result normally, at least write it to the CIB * if possible, so the PE can act on it. */ - char *standard = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS); - char *provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER); - char *type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE); + const char *standard = crm_element_value(xml_rsc, XML_AGENT_ATTR_CLASS); + const char *provider = crm_element_value(xml_rsc, XML_AGENT_ATTR_PROVIDER); + const char *type = crm_element_value(xml_rsc, XML_ATTR_TYPE); if (standard && type) { rsc_info = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); From c6c18f59ad008b88684838637cbb699abc22c08a Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 9 Apr 2018 15:20:01 -0500 Subject: [PATCH 072/812] High: crmd: delete resource from lrmd when appropriate Regression introduced in e8802834 (2.0.0-rc2): a missing return value check meant that crmd would always delete resource from its LRM state only, and not from the lrmd itself. As a side effect, this also introduced a memory leak when deleting a resource. --- crmd/lrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index c9214261685..dca94df06aa 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1837,7 +1837,7 @@ do_lrm_invoke(long long action, PCMK_OCF_CONNECTION_DIED); return; - } else if (!create_rsc) { + } else if ((rc < 0) && !create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ From 90dde63c3ef8ba99ad80883ae5970e4268b9a667 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 10 Apr 2018 15:19:11 -0500 Subject: [PATCH 073/812] Low: fencing: avoid memory leaks when freeing remote operation --- fencing/remote.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fencing/remote.c b/fencing/remote.c index ace814d8585..03949c84ffe 100644 --- a/fencing/remote.c +++ b/fencing/remote.c @@ -238,6 +238,7 @@ free_remote_op(gpointer data) free(op->id); free(op->action); + free(op->delegate); free(op->target); free(op->client_id); free(op->client_name); @@ -255,6 +256,7 @@ free_remote_op(gpointer data) op->devices_list = NULL; } g_list_free_full(op->automatic_list, free); + g_list_free(op->duplicates); free(op); } From 74171d10462e11e46c67c3aabbbc4e1185a334d4 Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Wed, 11 Apr 2018 11:48:13 +1000 Subject: [PATCH 074/812] Fix: rhbz#1565187 - Ensure failures that cause fencing are not removed until after fencing completes --- lib/pengine/unpack.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 355cdcf4ca8..3a4a7ccfdbf 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -2949,6 +2949,7 @@ static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNod } if (clear_reason != NULL) { + node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); action_t *clear_op = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); @@ -2957,6 +2958,17 @@ static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNod crm_notice("Clearing failure of %s on %s because %s " CRM_XS " %s", rsc->id, node->details->uname, clear_reason, clear_op->uuid); + + if (is_set(data_set->flags, pe_flag_stonith_enabled) + && rsc->remote_reconnect_interval + && remote_node + && remote_node->details->unclean) { + + action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, data_set); + crm_notice("Waiting for %s to complete before clearing %s failure for remote node %s", fence?fence->uuid:"nil", task, rsc->id); + + order_actions(fence, clear_op, pe_order_implies_then); + } } crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); From c00a98b4cc00a801deee2f5118cc800d24cc1c7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= Date: Mon, 9 Apr 2018 22:41:41 +0200 Subject: [PATCH 075/812] Build: spec: fix malformed conditional macros for systemd-less cases Guilty of introducing these typos in commits aa2bc6a839 and 222318d30a. --- pacemaker.spec.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pacemaker.spec.in b/pacemaker.spec.in index bc419045839..1f6ea9a07ee 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -76,8 +76,8 @@ ## propagated by systemd project ## - when not good enough, there's always a possibility to check ## particular distro-specific macros (incl. version comparison) -%define systemd_native (%{?_unitdir:1}%{?!_unitdir:0}%{nil \ - } || %{?__transaction_systemd_inhibit:1}%{?!__transaction_systemd_inhibit:0}%{nil \ +%define systemd_native (%{?_unitdir:1}%{!?_unitdir:0}%{nil \ + } || %{?__transaction_systemd_inhibit:1}%{!?__transaction_systemd_inhibit:0}%{nil \ } || %(test -f /usr/lib/os-release; test $? -ne 0; echo $?)) @@ -380,7 +380,7 @@ find . -exec touch \{\} \; # Early versions of autotools (e.g. RHEL <= 5) do not support --docdir export docdir=%{pcmk_docdir} -export systemdunitdir=%{?_unitdir}%{?!_unitdir:no} +export systemdunitdir=%{?_unitdir}%{!?_unitdir:no} %if %{with hardening} # prefer distro-provided hardening flags in case they are defined From 0e00e5274d86c9ce0dd947e8b041ecf4a5a0f44b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= Date: Mon, 9 Apr 2018 22:45:38 +0200 Subject: [PATCH 076/812] Build: spec: prevent -remote pkg dragging systemd into system without it At least since 1.1.17, pacemaker_remoted can stand for full-fledged PID1 hence it doubly does not make sense for its installation, e.g. in the container, to bring systemd in (it may be blocked by other means, but still). No need to modify anything about "%{post,...} remote" as "%systemd_{post,...}" are resolved to silent, non-fatal systemctl invocations. Also simplify apply-when-defined for the main package in a similar way already applied. References: https://github.com/systemd/systemd/commit/2424b6bd7 https://github.com/systemd/systemd/pull/3776 --- pacemaker.spec.in | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pacemaker.spec.in b/pacemaker.spec.in index 1f6ea9a07ee..ee9fc351439 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -176,10 +176,7 @@ Requires: resource-agents Requires: %{name}-libs = %{version}-%{release} Requires: %{name}-cluster-libs = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} - -%if %{defined systemd_requires} -%systemd_requires -%endif +%{?systemd_requires} # Pacemaker targets compatibility with python 2.6+ and 3.2+ Requires: python >= 2.6 @@ -299,9 +296,8 @@ Group: System Environment/Daemons Requires: %{name}-libs = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} Requires: resource-agents -%if %{defined systemd_requires} -%systemd_requires -%endif +# -remote can be fully independent of systemd +%{?systemd_ordering}%{!?systemd_ordering:%{?systemd_requires}} %description remote Pacemaker is an advanced, scalable High-Availability cluster resource From a5c0782c0012b812ad0939a4aec5842f05dc1129 Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Sun, 15 Apr 2018 20:41:01 +1000 Subject: [PATCH 077/812] Fix: crm_diff: rhbz#1561617 - Ignore attribute placement when comparing in 'cib' mode --- include/crm/common/xml.h | 1 + lib/common/xml.c | 28 ++++++++++++++++++++++++---- tools/crm_diff.c | 6 +++++- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/include/crm/common/xml.h b/include/crm/common/xml.h index 8297bb20ba3..69ad4bd1144 100644 --- a/include/crm/common/xml.h +++ b/include/crm/common/xml.h @@ -380,6 +380,7 @@ bool xml_tracking_changes(xmlNode * xml); bool xml_document_dirty(xmlNode *xml); void xml_track_changes(xmlNode * xml, const char *user, xmlNode *acl_source, bool enforce_acls); void xml_calculate_changes(xmlNode * old, xmlNode * new); /* For comparing two documents after the fact */ +void xml_calculate_significant_changes(xmlNode *old_xml, xmlNode *new_xml); void xml_accept_changes(xmlNode * xml); void xml_log_changes(uint8_t level, const char *function, xmlNode *xml); void xml_log_patchset(uint8_t level, const char *function, xmlNode *xml); diff --git a/lib/common/xml.c b/lib/common/xml.c index 947ddfe6f5e..8fd47789843 100644 --- a/lib/common/xml.c +++ b/lib/common/xml.c @@ -72,6 +72,7 @@ enum xml_private_flags { xpf_acl_create = 0x1000, xpf_acl_denied = 0x2000, + xpf_lazy = 0x4000, }; typedef struct xml_private_s @@ -115,10 +116,22 @@ static inline bool TRACKING_CHANGES(xmlNode *xml) { if(xml == NULL || xml->doc == NULL || xml->doc->_private == NULL) { return FALSE; - } else if(is_set(((xml_private_t *)xml->doc->_private)->flags, xpf_tracking)) { - return TRUE; + } else if(is_not_set(((xml_private_t *)xml->doc->_private)->flags, xpf_tracking)) { + return FALSE; } - return FALSE; + return TRUE; +} + +static inline bool TRACKING_CHANGES_LAZY(xmlNode *xml) +{ + if(xml == NULL || xml->doc == NULL || xml->doc->_private == NULL) { + return FALSE; + } else if(is_not_set(((xml_private_t *)xml->doc->_private)->flags, xpf_tracking)) { + return FALSE; + } else if(is_not_set(((xml_private_t *)xml->doc->_private)->flags, xpf_lazy)) { + return FALSE; + } + return TRUE; } #define buffer_print(buffer, max, offset, fmt, args...) do { \ @@ -4084,7 +4097,7 @@ __xml_diff_object(xmlNode * old, xmlNode * new) crm_xml_add(new, name, vcopy); free(vcopy); - } else if(p_old != p_new) { + } else if(p_old != p_new && TRACKING_CHANGES_LAZY(new) == FALSE) { crm_info("Moved %s@%s (%d -> %d)", old->name, name, p_old, p_new); __xml_node_dirty(new); p->flags |= xpf_dirty|xpf_moved; @@ -4183,6 +4196,13 @@ __xml_diff_object(xmlNode * old, xmlNode * new) } } +void +xml_calculate_significant_changes(xmlNode *old_xml, xmlNode *new_xml) +{ + set_doc_flag(new_xml, xpf_lazy); + xml_calculate_changes(old_xml, new_xml); +} + void xml_calculate_changes(xmlNode * old, xmlNode * new) { diff --git a/tools/crm_diff.c b/tools/crm_diff.c index 20e7a2755bf..0ec8e44deac 100644 --- a/tools/crm_diff.c +++ b/tools/crm_diff.c @@ -190,7 +190,11 @@ generate_patch(xmlNode *object_1, xmlNode *object_2, const char *xml_file_2, } xml_track_changes(object_2, NULL, object_2, FALSE); - xml_calculate_changes(object_1, object_2); + if(as_cib) { + xml_calculate_significant_changes(object_1, object_2); + } else { + xml_calculate_changes(object_1, object_2); + } crm_log_xml_debug(object_2, (xml_file_2? xml_file_2: "target")); output = xml_create_patchset(0, object_1, object_2, NULL, FALSE); From c224148cb55d337ef545d65c576f8124f9c2bb67 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 17 Apr 2018 16:13:34 -0500 Subject: [PATCH 078/812] Build: .gitignore: ignore more 2.0 material --- .gitignore | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index af50b7395e6..f66bef880b4 100644 --- a/.gitignore +++ b/.gitignore @@ -152,12 +152,16 @@ lib/gnu/libgnu.a lib/gnu/stdalign.h *.coverity -# Built only in 2.0 branch (makes switching branches easier) +# Built or present only in 2.0 branch (makes switching branches easier) +/cts/cts-cli /cts/cts-coverage /cts/cts-lrmd /cts/cts-pengine +/cts/cts-regression /cts/cts-stonithd /cts/fence_dummy +/cts/pacemaker-cts-dummyd +/cts/pacemaker-cts-dummyd@.service /cts/pengine/ /doc/Pacemaker_Administration.build /doc/Pacemaker_Administration/ From a081f9bc127efdcb3b65ae954e025055ee7deae1 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 20 Apr 2018 20:26:46 -0500 Subject: [PATCH 079/812] Build: .gitignore: ignore 2.0 pacemakerd directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f66bef880b4..9efb3f5f6c8 100644 --- a/.gitignore +++ b/.gitignore @@ -165,6 +165,7 @@ lib/gnu/stdalign.h /cts/pengine/ /doc/Pacemaker_Administration.build /doc/Pacemaker_Administration/ +/pacemakerd/ #Other mock From 160dd1d5e9ecd76c1c089a3a9f2f5b1f67716e81 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 18 Apr 2018 11:35:08 -0500 Subject: [PATCH 080/812] Refactor: controld: consistently indent with spaces instead of tabs --- extra/resources/controld | 109 +++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 55 deletions(-) diff --git a/extra/resources/controld b/extra/resources/controld index ddb514a6e74..733999a5340 100755 --- a/extra/resources/controld +++ b/extra/resources/controld @@ -1,6 +1,6 @@ #!/bin/sh # -# Resource Agent for managing the DLM controld process. +# OCF resource agent for managing the DLM controld process # # Copyright (c) 2009 Novell, Inc # All Rights Reserved. @@ -41,7 +41,7 @@ if [ -e "$OCF_ROOT/resource.d/heartbeat/controld" ]; then fi meta_data() { - cat < @@ -112,7 +112,7 @@ END DLM_SYSFS_DIR="/sys/kernel/dlm" controld_usage() { - cat < /dev/null - if [ $? != 0 ]; then - mount -t configfs none $OCF_RESKEY_configdir - fi - - if [ ! -e $OCF_RESKEY_configdir/dlm ]; then - modprobe dlm - if [ ! -e $OCF_RESKEY_configdir/dlm ]; then - ocf_log err "$OCF_RESKEY_configdir/dlm not available" - return $OCF_ERR_INSTALLED - fi - fi + ocf_log err "$OCF_RESKEY_configdir not available" + return $OCF_ERR_INSTALLED + fi + fi + + mount | grep "type configfs" > /dev/null + if [ $? != 0 ]; then + mount -t configfs none $OCF_RESKEY_configdir + fi + + if [ ! -e $OCF_RESKEY_configdir/dlm ]; then + modprobe dlm + if [ ! -e $OCF_RESKEY_configdir/dlm ]; then + ocf_log err "$OCF_RESKEY_configdir/dlm not available" + return $OCF_ERR_INSTALLED + fi + fi if ! ocf_is_true "$OCF_RESKEY_allow_stonith_disabled" && \ - ! ocf_is_true "`crm_attribute --type=crm_config --name=stonith-enabled --query --quiet --default=true`"; then + ! ocf_is_true "`crm_attribute --type=crm_config --name=stonith-enabled --query --quiet --default=true`"; then ocf_log err "The cluster property stonith-enabled may not be deactivated to use the DLM" return $OCF_ERR_CONFIGURED fi @@ -201,23 +201,23 @@ controld_stop() { controld_monitor; rc=$? if [ $rc = $OCF_NOT_RUNNING ]; then - return $OCF_SUCCESS + return $OCF_SUCCESS fi killall -TERM ${OCF_RESKEY_daemon}; rc=$? if [ $rc != 0 ]; then - return $OCF_ERR_GENERIC + return $OCF_ERR_GENERIC fi rc=$OCF_SUCCESS while [ $rc = $OCF_SUCCESS ]; do - controld_monitor; rc=$? - sleep 1 + controld_monitor; rc=$? + sleep 1 done if [ $rc = $OCF_NOT_RUNNING ]; then - rc=$OCF_SUCCESS + rc=$OCF_SUCCESS fi return $rc @@ -233,7 +233,7 @@ controld_monitor() { ocf_log err "DLM status is: stateful_merge_wait" rc=$OCF_ERR_GENERIC elif [ -z "$smw" ] && dlm_tool ls | grep -q "wait fencing" && \ - ! stonith_admin -H '*' -V | grep -q "wishes to"; then + ! stonith_admin -H '*' -V | grep -q "wishes to"; then ocf_log err "DLM status is: wait fencing" rc=$OCF_ERR_GENERIC else @@ -258,10 +258,10 @@ controld_validate() { check_binary ${OCF_RESKEY_daemon} case ${OCF_RESKEY_CRM_meta_globally_unique} in - yes|Yes|true|True|1) - ocf_log err "$OCF_RESOURCE_INSTANCE must be configured with the globally_unique=false meta attribute" - exit $OCF_ERR_CONFIGURED - ;; + yes|Yes|true|True|1) + ocf_log err "$OCF_RESOURCE_INSTANCE must be configured with the globally_unique=false meta attribute" + exit $OCF_ERR_CONFIGURED + ;; esac [ -d /var/run/cluster ] || mkdir /var/run/cluster @@ -280,34 +280,33 @@ esac case "$OCF_RESOURCE_INSTANCE" in *[gG][fF][sS]*) - : ${OCF_RESKEY_args=-g 0} - : ${OCF_RESKEY_daemon=gfs_controld${daemon_ext}} - ;; + : ${OCF_RESKEY_args=-g 0} + : ${OCF_RESKEY_daemon=gfs_controld${daemon_ext}} + ;; *[dD][lL][mM]*) - : ${OCF_RESKEY_args=-s 0} - : ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}} - ;; + : ${OCF_RESKEY_args=-s 0} + : ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}} + ;; *) - : ${OCF_RESKEY_args=-s 0} - : ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}} + : ${OCF_RESKEY_args=-s 0} + : ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}} esac case $__OCF_ACTION in -meta-data) meta_data - exit $OCF_SUCCESS - ;; -start) controld_validate; controld_start;; -stop) controld_stop;; -monitor) controld_validate; controld_monitor;; -validate-all) controld_validate;; -usage|help) controld_usage - exit $OCF_SUCCESS - ;; -*) controld_usage - exit $OCF_ERR_UNIMPLEMENTED - ;; +meta-data) meta_data + exit $OCF_SUCCESS + ;; +start) controld_validate; controld_start;; +stop) controld_stop;; +monitor) controld_validate; controld_monitor;; +validate-all) controld_validate;; +usage|help) controld_usage + exit $OCF_SUCCESS + ;; +*) controld_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; esac rc=$? exit $rc - From b65defeb1bc2c8e019f2e61169579d95e9e2bb12 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 18 Apr 2018 12:03:52 -0500 Subject: [PATCH 081/812] Low: controld: deprecate and ignore configdir parameter DLM itself never supported an alternate location for configfs, and even the controld agent had one hardcoded instance of the standard location. Bump the resource agent version from 0.9 to 1.0, as it is widely used in production. The deprecation is intended only for backporting to the Pacemaker 1.1 release series; the parameter will be removed entirely for the 2.0 release series. --- extra/resources/controld | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/extra/resources/controld b/extra/resources/controld index 733999a5340..8e3a5281335 100755 --- a/extra/resources/controld +++ b/extra/resources/controld @@ -44,7 +44,7 @@ meta_data() { cat < - + 1.0 @@ -66,10 +66,10 @@ Any additional options to start the dlm_controld service with -The location where configfs is or should be mounted +This parameter is deprecated and ignored -Location of configfs - +Deprecated and ignored + @@ -109,6 +109,8 @@ END ####################################################################### +CONFIGFS_DIR="/sys/kernel/config" +DLM_CONFIGFS_DIR="${CONFIGFS_DIR}/dlm" DLM_SYSFS_DIR="/sys/kernel/dlm" controld_usage() { @@ -143,23 +145,24 @@ controld_start() { *) return $OCF_ERR_GENERIC;; esac - if [ ! -e $OCF_RESKEY_configdir ]; then + # Ensure configfs is mounted + if [ ! -e "$CONFIGFS_DIR" ]; then modprobe configfs - if [ ! -e $OCF_RESKEY_configdir ]; then - ocf_log err "$OCF_RESKEY_configdir not available" + if [ ! -e "$CONFIGFS_DIR" ]; then + ocf_log err "$CONFIGFS_DIR not available" return $OCF_ERR_INSTALLED fi fi - - mount | grep "type configfs" > /dev/null - if [ $? != 0 ]; then - mount -t configfs none $OCF_RESKEY_configdir + mount -t configfs | grep " $CONFIGFS_DIR " >/dev/null 2>/dev/null + if [ $? -ne 0 ]; then + mount -t configfs none "$CONFIGFS_DIR" fi - if [ ! -e $OCF_RESKEY_configdir/dlm ]; then + # Ensure DLM is available + if [ ! -e "$DLM_CONFIGFS_DIR" ]; then modprobe dlm - if [ ! -e $OCF_RESKEY_configdir/dlm ]; then - ocf_log err "$OCF_RESKEY_configdir/dlm not available" + if [ ! -e "$DLM_CONFIGFS_DIR" ]; then + ocf_log err "$DLM_CONFIGFS_DIR not available" return $OCF_ERR_INSTALLED fi fi @@ -180,7 +183,7 @@ controld_start() { case $rc in $OCF_SUCCESS) local addr_list - addr_list="$(cat /sys/kernel/config/dlm/cluster/comms/*/addr_list 2>/dev/null)" + addr_list="$(cat "${DLM_CONFIGFS_DIR}"/cluster/comms/*/addr_list 2>/dev/null)" if [ $? -eq 0 ] && [ -n "$addr_list" ]; then return $OCF_SUCCESS fi @@ -270,7 +273,6 @@ controld_validate() { } : ${OCF_RESKEY_sctp=false} -: ${OCF_RESKEY_configdir=/sys/kernel/config} : ${OCF_RESKEY_CRM_meta_globally_unique:="false"} case "$HA_quorum_type" in From 8e9a9d1aa3cceb8f310ed3db9b31b2eab9490330 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 19 Apr 2018 17:17:51 -0500 Subject: [PATCH 082/812] Fix: libcrmcommon: don't record pending notify actions as completed ... especially relevant now that record-pending defaults to true. --- lib/common/operations.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/common/operations.c b/lib/common/operations.c index 4afef80fc81..3ca59399ac2 100644 --- a/lib/common/operations.c +++ b/lib/common/operations.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004-2017 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. @@ -513,9 +513,15 @@ create_operation_update(xmlNode * parent, lrmd_event_data_t * op, const char * c CRM_LOG_ASSERT(n_task != NULL); op_id = generate_notify_key(op->rsc_id, n_type, n_task); - /* these are not yet allowed to fail */ - op->op_status = PCMK_LRM_OP_DONE; - op->rc = 0; + if (op->op_status != PCMK_LRM_OP_PENDING) { + /* Ignore notify errors. + * + * @TODO We really should keep the actual result here, and ignore it + * when processing the CIB diff. + */ + op->op_status = PCMK_LRM_OP_DONE; + op->rc = 0; + } } else if (did_rsc_op_fail(op, target_rc)) { op_id = generate_op_key(op->rsc_id, "last_failure", 0); From 14a647dd7a43bc9b450ae76ca64c5e820a8b81dc Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 17 Apr 2018 12:35:43 -0500 Subject: [PATCH 083/812] Test: cts: ignore Dummy error when intentionally failing it CTS picks up log message with recent resource-agents (RemoteDriver *should* be using ocf:pacemaker:Dummy instead of ocf:heartbeat:Dummy, but that's a bigger change for later) --- cts/CTStests.py | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/cts/CTStests.py b/cts/CTStests.py index 8e3e3faf7f4..d6b3ba76646 100644 --- a/cts/CTStests.py +++ b/cts/CTStests.py @@ -1,30 +1,14 @@ -'''CTS: Cluster Testing System: Tests module +""" Test-specific classes for Pacemaker's Cluster Test Suite (CTS) +""" -There are a few things we want to do here: - - ''' - -__copyright__ = ''' -Copyright (C) 2000, 2001 Alan Robertson -Licensed under the GNU GPL. +# Pacemaker targets compatibility with Python 2.6+ and 3.2+ +from __future__ import print_function, unicode_literals, absolute_import, division +__copyright__ = """Copyright (C) 2000, 2001 Alan Robertson Add RecourceRecover testcase Zhao Kai -''' +""" -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" # # SPECIAL NOTE: @@ -3162,6 +3146,7 @@ def __call__(self, node): def errorstoignore(self): ignore_pats = [ r"pengine.*: Recover remote-rsc\s*\(.*\)", + r"Dummy.*: No process state file found", ] ignore_pats.extend(RemoteDriver.errorstoignore(self)) From 52a009f471e7d1558aa9212177f5fcef41a1d6f9 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 20 Apr 2018 18:19:05 -0500 Subject: [PATCH 084/812] Test: cts: ignore systemctl output when checking for systemd We weren't using it anyway, and it can cause problems if it contains a non-ASCII character (such as the bullet indicating a failed service). --- cts/environment.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cts/environment.py b/cts/environment.py index 6c4831c3896..d7fa25939f3 100644 --- a/cts/environment.py +++ b/cts/environment.py @@ -282,8 +282,9 @@ def discover(self): self["cts-master"] = master if not "have_systemd" in self.data: - self["have_systemd"] = not self.rsh(self.target, "systemctl list-units") - + self["have_systemd"] = not self.rsh(self.target, + "systemctl list-units", + silent=True) self.detect_syslog() self.detect_at_boot() self.detect_ip_offset() From 66831acfe0c0b47bef81c7cd80bd67b137e23054 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 25 Apr 2018 07:19:05 -0500 Subject: [PATCH 085/812] Fix: crmd: avoid double free after ACL rejection of resource delete --- crmd/lrm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index dca94df06aa..8f1deb6d8c2 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1739,7 +1739,6 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); - lrmd_free_rsc_info(rsc); return; } #endif From 96c8d58f49b6b8d035eea707764f174d5be67009 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 27 Apr 2018 15:35:05 -0500 Subject: [PATCH 086/812] Low: libcrmcommon: check file/directory writability better This replaces crm_is_writable() with a new similar function pcmk__daemon_can_write() (starting a new convention of prefixing internal library API functions with "pcmk__"). The old function was called with need_both=FALSE so that both user and group write permissions were not required. However, it was implemented such that the check would pass even if neither could write. The new function fixes that and simplifies the interface by restricting it to the only case we're interested in (either CRM_DAEMON_USER or CRM_DAEMON_GROUP owns and can write the target). It also gives more detailed log messages when something doesn't match. --- cib/io.c | 2 +- cib/main.c | 7 +- crmd/main.c | 15 +-- include/crm/common/internal.h | 4 +- lib/common/io.c | 172 +++++++++++++++++++++------------- pengine/main.c | 7 +- 6 files changed, 123 insertions(+), 84 deletions(-) diff --git a/cib/io.c b/cib/io.c index e449bd6e039..7d2ab3cd42f 100644 --- a/cib/io.c +++ b/cib/io.c @@ -195,7 +195,7 @@ readCibXmlFile(const char *dir, const char *file, gboolean discard_status) xmlNode *root = NULL; xmlNode *status = NULL; - if (!crm_is_writable(dir, file, CRM_DAEMON_USER, NULL, FALSE)) { + if (pcmk__daemon_can_write(dir, file) == FALSE) { cib_status = -EACCES; return NULL; } diff --git a/cib/main.c b/cib/main.c index 2f43e30ab14..5473d409584 100644 --- a/cib/main.c +++ b/cib/main.c @@ -230,9 +230,10 @@ main(int argc, char **argv) crm_notice("Using custom config location: %s", cib_root); } - if (crm_is_writable(cib_root, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == FALSE) { - crm_err("Bad permissions on %s. Terminating", cib_root); - fprintf(stderr, "ERROR: Bad permissions on %s. See logs for details\n", cib_root); + if (pcmk__daemon_can_write(cib_root, NULL) == FALSE) { + crm_err("Terminating due to bad permissions on %s", cib_root); + fprintf(stderr, "ERROR: Bad permissions on %s (see logs for details)\n", + cib_root); fflush(stderr); return 100; } diff --git a/crmd/main.c b/crmd/main.c index c06dd8f5302..e8baa125e94 100644 --- a/crmd/main.c +++ b/crmd/main.c @@ -104,16 +104,17 @@ main(int argc, char **argv) crm_help('?', EX_USAGE); } - if (crm_is_writable(PE_STATE_DIR, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == FALSE) { - crm_err("Bad permissions on " PE_STATE_DIR ". Terminating"); - fprintf(stderr, "ERROR: Bad permissions on " PE_STATE_DIR ". See logs for details\n"); + if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { + crm_err("Terminating due to bad permissions on " PE_STATE_DIR); + fprintf(stderr, + "ERROR: Bad permissions on " PE_STATE_DIR " (see logs for details)\n"); fflush(stderr); return 100; - } else if (crm_is_writable(CRM_CONFIG_DIR, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == - FALSE) { - crm_err("Bad permissions on " CRM_CONFIG_DIR ". Terminating"); - fprintf(stderr, "ERROR: Bad permissions on " CRM_CONFIG_DIR ". See logs for details\n"); + } else if (pcmk__daemon_can_write(CRM_CONFIG_DIR, NULL) == FALSE) { + crm_err("Terminating due to bad permissions on " CRM_CONFIG_DIR); + fprintf(stderr, + "ERROR: Bad permissions on " CRM_CONFIG_DIR " (see logs for details)\n"); fflush(stderr); return 100; } diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h index 006a1a56d04..a350de292c5 100644 --- a/include/crm/common/internal.h +++ b/include/crm/common/internal.h @@ -35,9 +35,7 @@ int get_last_sequence(const char *directory, const char *series); void write_last_sequence(const char *directory, const char *series, int sequence, int max); int crm_chown_last_sequence(const char *directory, const char *series, uid_t uid, gid_t gid); -gboolean crm_is_writable(const char *dir, const char *file, const char *user, const char *group, - gboolean need_both); - +bool pcmk__daemon_can_write(const char *dir, const char *file); void crm_sync_directory(const char *name); char *crm_read_contents(const char *filename); diff --git a/lib/common/io.c b/lib/common/io.c index 7329fe94f74..e296ff3ff3f 100644 --- a/lib/common/io.c +++ b/lib/common/io.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include @@ -263,88 +252,137 @@ crm_chown_last_sequence(const char *directory, const char *series, uid_t uid, gi return rc; } +static bool +pcmk__daemon_user_can_write(const char *target_name, struct stat *target_stat) +{ + struct passwd *sys_user = NULL; + + errno = 0; + sys_user = getpwnam(CRM_DAEMON_USER); + if (sys_user == NULL) { + crm_notice("Could not find user %s: %s", + CRM_DAEMON_USER, pcmk_strerror(errno)); + return FALSE; + } + if (target_stat->st_uid != sys_user->pw_uid) { + crm_notice("%s is not owned by user %s " CRM_XS " uid %d != %d", + target_name, CRM_DAEMON_USER, sys_user->pw_uid, + target_stat->st_uid); + return FALSE; + } + if ((target_stat->st_mode & (S_IRUSR | S_IWUSR)) == 0) { + crm_notice("%s is not readable and writable by user %s " + CRM_XS " st_mode=0%lo", + target_name, CRM_DAEMON_USER, + (unsigned long) target_stat->st_mode); + return FALSE; + } + return TRUE; +} + +static bool +pcmk__daemon_group_can_write(const char *target_name, struct stat *target_stat) +{ + struct group *sys_grp = NULL; + + errno = 0; + sys_grp = getgrnam(CRM_DAEMON_GROUP); + if (sys_grp == NULL) { + crm_notice("Could not find group %s: %s", + CRM_DAEMON_GROUP, pcmk_strerror(errno)); + return FALSE; + } + + if (target_stat->st_gid != sys_grp->gr_gid) { + crm_notice("%s is not owned by group %s " CRM_XS " uid %d != %d", + target_name, CRM_DAEMON_GROUP, + sys_grp->gr_gid, target_stat->st_gid); + return FALSE; + } + + if ((target_stat->st_mode & (S_IRGRP | S_IWGRP)) == 0) { + crm_notice("%s is not readable and writable by group %s " + CRM_XS " st_mode=0%lo", + target_name, CRM_DAEMON_GROUP, + (unsigned long) target_stat->st_mode); + return FALSE; + } + return TRUE; +} + /*! * \internal - * \brief Return whether a directory or file is writable by a user/group + * \brief Check whether a directory or file is writable by the cluster daemon + * + * Return TRUE if either the cluster daemon user or cluster daemon group has + * write permission on a specified file or directory. * - * \param[in] dir Directory to check or that contains file - * \param[in] file File name to check (or NULL to check directory) - * \param[in] user Name of user that should have write permission - * \param[in] group Name of group that should have write permission - * \param[in] need_both Whether both user and group must be able to write + * \param[in] dir Directory to check (this argument must be specified, and + * the directory must exist) + * \param[in] file File to check (only the directory will be checked if this + * argument is not specified or the file does not exist) * - * \return TRUE if permissions match, FALSE if they don't or on error + * \return TRUE if target is writable by cluster daemon, FALSE otherwise */ -gboolean -crm_is_writable(const char *dir, const char *file, - const char *user, const char *group, gboolean need_both) +bool +pcmk__daemon_can_write(const char *dir, const char *file) { - int s_res = -1; + int s_res = 0; struct stat buf; char *full_file = NULL; const char *target = NULL; - gboolean pass = TRUE; - gboolean readwritable = FALSE; - + // Caller must supply directory CRM_ASSERT(dir != NULL); + + // If file is given, check whether it exists as a regular file if (file != NULL) { full_file = crm_concat(dir, file, '/'); target = full_file; + s_res = stat(full_file, &buf); - if (s_res == 0 && S_ISREG(buf.st_mode) == FALSE) { - crm_err("%s must be a regular file", target); - pass = FALSE; - goto out; + if (s_res < 0) { + crm_notice("%s not found: %s", target, pcmk_strerror(errno)); + free(full_file); + full_file = NULL; + target = NULL; + + } else if (S_ISREG(buf.st_mode) == FALSE) { + crm_err("%s must be a regular file " CRM_XS " st_mode=0%lo", + target, (unsigned long) buf.st_mode); + free(full_file); + return FALSE; } } - if (s_res != 0) { + // If file is not given, ensure dir exists as directory + if (target == NULL) { target = dir; s_res = stat(dir, &buf); - if (s_res != 0) { - crm_err("%s must exist and be a directory", dir); - pass = FALSE; - goto out; + if (s_res < 0) { + crm_err("%s not found: %s", dir, pcmk_strerror(errno)); + return FALSE; } else if (S_ISDIR(buf.st_mode) == FALSE) { - crm_err("%s must be a directory", dir); - pass = FALSE; + crm_err("%s must be a directory " CRM_XS " st_mode=0%lo", + dir, (unsigned long) buf.st_mode); + return FALSE; } } - if (user) { - struct passwd *sys_user = NULL; + if (!pcmk__daemon_user_can_write(target, &buf) + && !pcmk__daemon_group_can_write(target, &buf)) { - sys_user = getpwnam(user); - readwritable = (sys_user != NULL - && buf.st_uid == sys_user->pw_uid && (buf.st_mode & (S_IRUSR | S_IWUSR))); - if (readwritable == FALSE) { - crm_err("%s must be owned and r/w by user %s", target, user); - if (need_both) { - pass = FALSE; - } - } - } - - if (group) { - struct group *sys_grp = getgrnam(group); - - readwritable = (sys_grp != NULL - && buf.st_gid == sys_grp->gr_gid && (buf.st_mode & (S_IRGRP | S_IWGRP))); - if (readwritable == FALSE) { - if (need_both || user == NULL) { - pass = FALSE; - crm_err("%s must be owned and r/w by group %s", target, group); - } else { - crm_warn("%s should be owned and r/w by group %s", target, group); - } - } + crm_err("%s must be owned and writable by either user %s or group %s " + CRM_XS " st_mode=0%ol", + target, CRM_DAEMON_USER, CRM_DAEMON_GROUP, + (unsigned long) buf.st_mode); + free(full_file); + return FALSE; } - out: free(full_file); - return pass; + return TRUE; } /*! diff --git a/pengine/main.c b/pengine/main.c index 01d4eb6455c..51131608bda 100644 --- a/pengine/main.c +++ b/pengine/main.c @@ -163,9 +163,10 @@ main(int argc, char **argv) } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - if (crm_is_writable(PE_STATE_DIR, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == FALSE) { - crm_err("Bad permissions on " PE_STATE_DIR ". Terminating"); - fprintf(stderr, "ERROR: Bad permissions on " PE_STATE_DIR ". See logs for details\n"); + if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { + crm_err("Terminating due to bad permissions on " PE_STATE_DIR); + fprintf(stderr, + "ERROR: Bad permissions on " PE_STATE_DIR " (see logs for details)\n"); fflush(stderr); return 100; } From 76e356e19902b0d5804c3c763950212340554d21 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 27 Apr 2018 20:05:45 -0500 Subject: [PATCH 087/812] Doc: pacemaker_remote: correct documentation URL in systemd unit file --- lrmd/pacemaker_remote.service.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lrmd/pacemaker_remote.service.in b/lrmd/pacemaker_remote.service.in index 1c596e1005b..fc7d57e74ac 100644 --- a/lrmd/pacemaker_remote.service.in +++ b/lrmd/pacemaker_remote.service.in @@ -1,6 +1,7 @@ [Unit] Description=Pacemaker Remote Service -Documentation=man:pacemaker_remoted http://clusterlabs.org/doc/en-US/Pacemaker/1.1-pcs/html/Pacemaker_Remote/index.html +Documentation=man:pacemaker-remoted +Documentation=https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/1.1/html-single/Pacemaker_Remote/index.html # See main pacemaker unit file for descriptions of why these are needed After=network.target From 502f9bae0f6380c7464f1a06be092aa4b35fbc48 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 1 May 2018 20:38:33 -0500 Subject: [PATCH 088/812] Build: .gitignore: ignore more 2.0 files --- .gitignore | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 9efb3f5f6c8..5d4505f398a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ TAGS Makefile Makefile.in .deps +.dirstamp .libs *.pc *.pyc @@ -155,17 +156,20 @@ lib/gnu/stdalign.h # Built or present only in 2.0 branch (makes switching branches easier) /cts/cts-cli /cts/cts-coverage -/cts/cts-lrmd -/cts/cts-pengine +/cts/cts-exec +/cts/cts-fencing +/cts/cts-log-watcher /cts/cts-regression -/cts/cts-stonithd +/cts/cts-scheduler +/cts/cts-support /cts/fence_dummy /cts/pacemaker-cts-dummyd /cts/pacemaker-cts-dummyd@.service -/cts/pengine/ +/cts/scheduler/ +/daemons /doc/Pacemaker_Administration.build /doc/Pacemaker_Administration/ -/pacemakerd/ +/tools/stonith_admin #Other mock From 9bdb34f8f5e02b71812720e23f80bc271b1ca48e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 2 May 2018 18:48:59 -0500 Subject: [PATCH 089/812] Low: controller: don't abort after delay if we're no longer DC 93b77f40 aborts transition 5 seconds after quorum is gained, if transition has not already been aborted. If the node relinquishes DC in that time, it would cause an assertion (though otherwise be harmless), so check first. --- crmd/te_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crmd/te_utils.c b/crmd/te_utils.c index 8d105dc31b5..ed98d66f233 100644 --- a/crmd/te_utils.c +++ b/crmd/te_utils.c @@ -541,7 +541,7 @@ static struct abort_timer_s { static gboolean abort_timer_popped(gpointer data) { - if (abort_timer.aborted == FALSE) { + if (AM_I_DC && (abort_timer.aborted == FALSE)) { abort_transition(abort_timer.priority, abort_timer.action, abort_timer.text, NULL); } From ea1e4c6327c12b912fa8fb6666e90d1505fe9992 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 2 May 2018 20:27:42 -0500 Subject: [PATCH 090/812] Low: contoller,scheduler: guard hash table deletes In particular, it is possible for the controller to exit and call throttle_fini() before throttle_init() had a chance to be called. Without a guard, g_hash_table_destroy() would log an (otherwise harmless) assertion. --- crmd/throttle.c | 10 ++++++++-- lib/pengine/container.c | 4 +++- lib/pengine/native.c | 4 +++- lib/pengine/unpack.c | 4 +++- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/crmd/throttle.c b/crmd/throttle.c index a0729afe1da..d781188f5b7 100644 --- a/crmd/throttle.c +++ b/crmd/throttle.c @@ -449,8 +449,14 @@ throttle_init(void) void throttle_fini(void) { - mainloop_timer_del(throttle_timer); throttle_timer = NULL; - g_hash_table_destroy(throttle_records); throttle_records = NULL; + if (throttle_timer != NULL) { + mainloop_timer_del(throttle_timer); + throttle_timer = NULL; + } + if (throttle_records != NULL) { + g_hash_table_destroy(throttle_records); + throttle_records = NULL; + } } int diff --git a/lib/pengine/container.c b/lib/pengine/container.c index 1dd2f66f15d..b5340bf5a2b 100644 --- a/lib/pengine/container.c +++ b/lib/pengine/container.c @@ -610,7 +610,9 @@ create_remote_resource( tuple->node->rsc_discover_mode = pe_discover_exclusive; /* Ensure the node shows up as allowed and with the correct discovery set */ - g_hash_table_destroy(tuple->child->allowed_nodes); + if (tuple->child->allowed_nodes != NULL) { + g_hash_table_destroy(tuple->child->allowed_nodes); + } tuple->child->allowed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); g_hash_table_insert(tuple->child->allowed_nodes, (gpointer) tuple->node->details->id, node_copy(tuple->node)); diff --git a/lib/pengine/native.c b/lib/pengine/native.c index defed0b4deb..f6d1653ad33 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -66,7 +66,9 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) node_t *local_node = NULL; /* make sure it doesn't come up again */ - g_hash_table_destroy(rsc->allowed_nodes); + if (rsc->allowed_nodes != NULL) { + g_hash_table_destroy(rsc->allowed_nodes); + } rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_iter_init(&gIter, rsc->allowed_nodes); while (g_hash_table_iter_next(&gIter, NULL, (void **)&local_node)) { diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 3a4a7ccfdbf..48e7f2ad14d 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -2729,7 +2729,9 @@ unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, } crm_warn("Making sure %s doesn't come up again", fail_rsc->id); /* make sure it doesn't come up again */ - g_hash_table_destroy(fail_rsc->allowed_nodes); + if (fail_rsc->allowed_nodes != NULL) { + g_hash_table_destroy(fail_rsc->allowed_nodes); + } fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } From 9c5eb57ca810ad31b32f4fea5ec14167f7da27d4 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 7 May 2018 12:15:24 -0500 Subject: [PATCH 091/812] Fix: execd: handle systemd actions correctly when used with "service:" Previously, this would incorrectly attempt to determine the standard of "service" rather than the resource type. That would lead to not properly waiting for starts and stops of systemd units to complete, and to logging a spurious warning for upstart jobs. --- lrmd/lrmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index 6e687cd277a..6e0c252dd03 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -879,7 +879,7 @@ action_complete(svc_action_t * action) rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)) { - rclass = resources_find_service_class(rsc->class); + rclass = resources_find_service_class(rsc->type); } else if(rsc) { rclass = rsc->class; } From 472d8106dc5da080ceedf32d87c70ac61f713cf0 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 16 May 2018 17:41:58 -0500 Subject: [PATCH 092/812] Fix: libcrmservice: find absolute paths when used with "service:" Previously, "service:" would work correctly only when specified with a path relative to the LSB init directory. Now, it also works when the agent type is an absolute path. equivalent of f9739f8f5 for 1.1 code base --- lib/services/services.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/lib/services/services.c b/lib/services/services.c index 5f0546bbc66..4250f09f32a 100644 --- a/lib/services/services.c +++ b/lib/services/services.c @@ -55,6 +55,25 @@ services_action_create(const char *name, const char *action, int interval, int t action, interval, timeout, NULL, 0); } +static char * +services__lsb_agent_path(const char *agent) +{ + return (*agent == '/')? strdup(agent) + : crm_strdup_printf("%s/%s", LSB_ROOT_DIR, agent); +} + +static bool +services__lsb_agent_exists(const char *agent) +{ + bool rc = FALSE; + struct stat st; + char *path = services__lsb_agent_path(agent); + + rc = (stat(path, &st) == 0); + free(path); + return rc; +} + /*! * \brief Find first service class that can provide a specified agent * @@ -74,18 +93,9 @@ resources_find_service_class(const char *agent) * - systemd * - upstart */ - int rc = 0; - struct stat st; - char *path = NULL; - -#ifdef LSB_ROOT_DIR - rc = asprintf(&path, "%s/%s", LSB_ROOT_DIR, agent); - if (rc > 0 && stat(path, &st) == 0) { - free(path); + if (services__lsb_agent_exists(agent)) { return PCMK_RESOURCE_CLASS_LSB; } - free(path); -#endif #if SUPPORT_SYSTEMD if (systemd_unit_exists(agent)) { @@ -235,14 +245,7 @@ resources_action_create(const char *name, const char *standard, const char *prov op->opaque->args[1] = strdup(action); } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_LSB) == 0) { - if (op->agent[0] == '/') { - /* if given an absolute path, use that instead - * of tacking on the LSB_ROOT_DIR path to the front */ - op->opaque->exec = strdup(op->agent); - } else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) { - crm_err("Internal error: cannot create agent path"); - goto return_error; - } + op->opaque->exec = services__lsb_agent_path(op->agent); op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(op->action); op->opaque->args[2] = NULL; From 57800a9239fb3b9506cbd6e0f741fe08075769a2 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 2 May 2018 18:23:58 -0500 Subject: [PATCH 093/812] Log: scheduler: improve failed op message Particularly, if a probe fails due to an error (as opposed to an unexpected running status), point the user to the resource-discovery option. --- cts/CTStests.py | 4 ++-- lib/pengine/unpack.c | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/cts/CTStests.py b/cts/CTStests.py index d6b3ba76646..9334a8358b5 100644 --- a/cts/CTStests.py +++ b/cts/CTStests.py @@ -1096,7 +1096,7 @@ def toggleMaintenanceMode(self, node, action): # fail the resource right after turning Maintenance mode on # verify it is not recovered until maintenance mode is turned off if action == "On": - pats.append(r"pengine.*:\s+warning:.*Processing failed op %s for %s on" % (self.action, self.rid)) + pats.append(r"pengine.*:\s+warning:.*Processing failed %s of %s on" % (self.action, self.rid)) else: pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid)) pats.append(self.templates["Pat:RscOpOK"] % ("start", self.rid)) @@ -1305,7 +1305,7 @@ def __call__(self, node): self.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id)) pats = [] - pats.append(r"pengine.*:\s+warning:.*Processing failed op %s for (%s|%s) on" % (self.action, + pats.append(r"pengine.*:\s+warning:.*Processing failed %s of (%s|%s) on" % (self.action, rsc.id, rsc.clone_id)) if rsc.managed(): diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 48e7f2ad14d..1b8ca229493 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -2638,9 +2638,21 @@ unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, } if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) { - crm_warn("Processing failed op %s for %s on %s: %s (%d)", - task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), - rc); + crm_warn("Processing failed %s of %s on %s: %s " CRM_XS " rc=%d", + (is_probe? "probe" : task), rsc->id, node->details->uname, + services_ocf_exitcode_str(rc), rc); + + if (is_probe && (rc != PCMK_OCF_OK) + && (rc != PCMK_OCF_NOT_RUNNING) + && (rc != PCMK_OCF_RUNNING_MASTER)) { + + /* A failed (not just unexpected) probe result could mean the user + * didn't know resources will be probed even where they can't run. + */ + crm_notice("If it is not possible for %s to run on %s, see " + "the resource-discovery option for location constraints", + rsc->id, node->details->uname); + } record_failed_op(xml_op, node, rsc, data_set); From 3b607c839ae1fdcd94e26935f0e9c34f5d897da5 Mon Sep 17 00:00:00 2001 From: "Gao,Yan" Date: Wed, 9 May 2018 14:34:00 +0200 Subject: [PATCH 094/812] Fix: tools: cibsecret --help/--version doesn't require cluster to be running --- tools/cibsecret.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/cibsecret.in b/tools/cibsecret.in index c15071af2d5..6fea8768af6 100644 --- a/tools/cibsecret.in +++ b/tools/cibsecret.in @@ -347,8 +347,6 @@ cibsecret_sync() { sync_files } -check_env - MAGIC="lrm://" umask 0077 @@ -377,6 +375,8 @@ case "$cmd" in *) usage 1; esac +check_env + # we'll need these two often current=`get_cib_param $rsc $param` current_local=`get_local_param $rsc $param` From 67796b5198dc02ee682cabd9b2ac12994f9bb622 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 9 May 2018 16:04:10 -0500 Subject: [PATCH 095/812] Low: libcrmservice: don't consider a cancelled recurring operation as failed This takes care of an recurring (pun intended) and annoying regression test issue when cancelling LSB monitors. If an instance of the recurring operation was *not* in-flight at the time, services_action_cancel() would set the operation status to cancelled and leave the rc alone (which would be 0). If an instance *was* in-flight, the instance would be killed, and the child exit handler would set the status to error and the rc to PCMK_OCF_SIGNAL (which would get mapped to PCMK_OCF_UNKNOWN_ERROR for client notification). The result would cause the regression test ignore the notification and get a timeout instead. Now, the exit handler also sets status cancelled and rc 0 in this situation. This should benefit actual cluster usage in the same situation, though those effects have not been extensively traced. The behavioral difference would be in process_lrm_event(). Most likely it avoids spurious monitor failures. --- lib/services/services_linux.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index 3549b8f953c..d95995e9398 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -290,9 +290,16 @@ operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exi op->status = PCMK_LRM_OP_TIMEOUT; op->rc = PCMK_OCF_TIMEOUT; + } else if (op->cancel) { + /* If an in-flight recurring operation was killed because it was + * cancelled, don't treat that as a failure. + */ + crm_info("%s - terminated with signal %d", prefix, signo); + op->status = PCMK_LRM_OP_CANCELLED; + op->rc = PCMK_OCF_OK; + } else { - do_crm_log_unlikely((op->cancel) ? LOG_INFO : LOG_WARNING, - "%s - terminated with signal %d", prefix, signo); + crm_warn("%s - terminated with signal %d", prefix, signo); op->status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_SIGNAL; } From eefeb6933e841e8616b35bff48f911d06c357553 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 10 May 2018 12:51:22 -0500 Subject: [PATCH 096/812] Low: tools: use output redirection correctly in crm_standby --- tools/crm_standby | 2 +- tools/regression.tools.exp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/crm_standby b/tools/crm_standby index 0b30eb65087..952f9ae2827 100755 --- a/tools/crm_standby +++ b/tools/crm_standby @@ -73,7 +73,7 @@ if [ $lifetime -eq 0 ]; then g) # For query, report the forever entry if one exists, otherwise # report the reboot entry if one exists, otherwise report off. - crm_attribute $options -l forever 2>&1 > /dev/null + crm_attribute $options -l forever >/dev/null 2>&1 if [ $? -eq 0 ]; then options="$options -l forever" else diff --git a/tools/regression.tools.exp b/tools/regression.tools.exp index e71e12be3b1..e0587f1106e 100644 --- a/tools/regression.tools.exp +++ b/tools/regression.tools.exp @@ -588,7 +588,6 @@ Call failed: Update was older than existing configuration =#=#=#= End test: Replace operation should fail - Update was older than existing configuration (205) =#=#=#= * Passed: cibadmin - Replace operation should fail =#=#=#= Begin test: Default standby value =#=#=#= -Error performing operation: No such device or address scope=status name=standby value=off =#=#=#= Current cib after: Default standby value =#=#=#= From e939f42f67044269010827c672717e1d3a931946 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 11 May 2018 12:50:29 -0500 Subject: [PATCH 097/812] Low: controld: avoid memory leak when synthesizing failure introduced by b3f9a5bbb --- crmd/lrm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crmd/lrm.c b/crmd/lrm.c index 8f1deb6d8c2..467bf7fff88 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1471,6 +1471,7 @@ synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) rsc_info = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if (rsc_info) { + lrmd_free_rsc_info(rsc_info); process_lrm_event(lrm_state, op, NULL); } else { From 88ae46f7caa4926bd07284bb0ecc7860f9317c6d Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 11 May 2018 21:34:21 -0500 Subject: [PATCH 098/812] Low: execd: avoid memory leak when testing remote key --- lrmd/tls_backend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lrmd/tls_backend.c b/lrmd/tls_backend.c index bc57ee701c3..e63e892edae 100644 --- a/lrmd/tls_backend.c +++ b/lrmd/tls_backend.c @@ -324,6 +324,7 @@ lrmd_init_remote_tls_server() if (rc != 0) { crm_warn("A cluster connection will not be possible until the key is available"); } + gnutls_free(psk_key.data); memset(&hints, 0, sizeof(struct addrinfo)); /* Bind to the wildcard address (INADDR_ANY or IN6ADDR_ANY_INIT). From 7618c29761368262fd8d633992816b52755ec028 Mon Sep 17 00:00:00 2001 From: Hideo Yamauchi Date: Thu, 26 Apr 2018 12:51:06 +0900 Subject: [PATCH 099/812] Mid: lib: Changed to lowercase comparison. --- lib/cib/cib_attrs.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/cib/cib_attrs.c b/lib/cib/cib_attrs.c index 8287a44aa5f..060d830b371 100644 --- a/lib/cib/cib_attrs.c +++ b/lib/cib/cib_attrs.c @@ -471,17 +471,19 @@ get_uuid_from_result(xmlNode *result, char **uuid, int *is_remote) * - guest node in resources section * - orphaned remote node or bundle guest node in status section */ +#define XPATH_UPPER_TRANS "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define XPATH_LOWER_TRANS "abcdefghijklmnopqrstuvwxyz" #define XPATH_NODE \ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \ - "/" XML_CIB_TAG_NODE "[@" XML_ATTR_UNAME "='%s']" \ + "/" XML_CIB_TAG_NODE "[translate(@" XML_ATTR_UNAME ",'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ "/" XML_CIB_TAG_RESOURCE \ - "[@class='ocf'][@provider='pacemaker'][@type='remote'][@id='%s']" \ + "[@class='ocf'][@provider='pacemaker'][@type='remote'][translate(@id,'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ "/" XML_CIB_TAG_RESOURCE "/" XML_TAG_META_SETS "/" XML_CIB_TAG_NVPAIR \ - "[@name='" XML_RSC_ATTR_REMOTE_NODE "'][@value='%s']" \ + "[@name='" XML_RSC_ATTR_REMOTE_NODE "'][translate(@value,'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS "/" XML_CIB_TAG_STATE \ - "[@" XML_NODE_IS_REMOTE "='true'][@" XML_ATTR_UUID "='%s']" + "[@" XML_NODE_IS_REMOTE "='true'][translate(@" XML_ATTR_UUID ",'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" int query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_node) @@ -489,6 +491,7 @@ query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_ int rc = pcmk_ok; char *xpath_string; xmlNode *xml_search = NULL; + char *host_lowercase = g_ascii_strdown(uname, -1); CRM_ASSERT(uname != NULL); @@ -499,7 +502,7 @@ query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_ *is_remote_node = FALSE; } - xpath_string = crm_strdup_printf(XPATH_NODE, uname, uname, uname, uname); + xpath_string = crm_strdup_printf(XPATH_NODE, host_lowercase, host_lowercase, host_lowercase, host_lowercase); if (cib_internal_op(the_cib, CIB_OP_QUERY, NULL, xpath_string, NULL, &xml_search, cib_sync_call|cib_scope_local|cib_xpath, NULL) == pcmk_ok) { @@ -509,6 +512,7 @@ query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_ } free(xpath_string); free_xml(xml_search); + free(host_lowercase); if (rc != pcmk_ok) { crm_debug("Could not map node name '%s' to a UUID: %s", From db7014dc33aa934fc865b5e01ed849ec543ec560 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 10 May 2018 12:23:15 -0500 Subject: [PATCH 100/812] Low: tools: handle multiple values properly in crm_attribute Regression introduced in db20d02 (1.1.10) --- tools/crm_attribute.c | 5 +++-- tools/regression.tools.exp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c index 49c69c407af..24713e8365e 100644 --- a/tools/crm_attribute.c +++ b/tools/crm_attribute.c @@ -323,7 +323,8 @@ main(int argc, char **argv) crm_info("Read %s=%s %s%s", attr_name, crm_str(read_value), set_name ? "in " : "", set_name ? set_name : ""); - if (rc == -EINVAL) { + if (rc == -ENOTUNIQ) { + // Multiple matches (already displayed) are not error for queries rc = pcmk_ok; } else if (BE_QUIET == FALSE) { @@ -339,7 +340,7 @@ main(int argc, char **argv) free(read_value); } - if (rc == -EINVAL) { + if (rc == -ENOTUNIQ) { printf("Please choose from one of the matches above and supply the 'id' with --attr-id\n"); } else if (rc != pcmk_ok) { diff --git a/tools/regression.tools.exp b/tools/regression.tools.exp index e0587f1106e..096d51d4117 100644 --- a/tools/regression.tools.exp +++ b/tools/regression.tools.exp @@ -256,10 +256,10 @@ Call failed: Name not unique on network =#=#=#= End test: Set duplicate cluster option - OK (0) =#=#=#= * Passed: crm_attribute - Set duplicate cluster option =#=#=#= Begin test: Setting multiply defined cluster option should fail =#=#=#= -Error performing operation: Name not unique on network Multiple attributes match name=cluster-delay Value: 60s (id=cib-bootstrap-options-cluster-delay) Value: 40s (id=duplicate-cluster-delay) +Please choose from one of the matches above and supply the 'id' with --attr-id =#=#=#= Current cib after: Setting multiply defined cluster option should fail =#=#=#= From aa2e91f0df7ac10470db6c96ebf52b329d4aaca2 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 17 May 2018 10:39:19 -0500 Subject: [PATCH 101/812] Log: executor: give more detail when TLS handshake fails Also refactor for readability --- lrmd/tls_backend.c | 63 ++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/lrmd/tls_backend.c b/lrmd/tls_backend.c index e63e892edae..57beeb06a31 100644 --- a/lrmd/tls_backend.c +++ b/lrmd/tls_backend.c @@ -49,6 +49,43 @@ debug_log(int level, const char *str) fputs(str, stderr); } +/*! + * \internal + * \brief Read (more) TLS handshake data from client + */ +static int +remoted__read_handshake_data(crm_client_t *client) +{ + int rc = 0; + + do { + rc = gnutls_handshake(*client->remote->tls_session); + } while (rc == GNUTLS_E_INTERRUPTED); + + if (rc == GNUTLS_E_AGAIN) { + /* No more data is available at the moment. Just return for now; + * we'll get invoked again once the client sends more. + */ + return 0; + } else if (rc != GNUTLS_E_SUCCESS) { + crm_err("TLS handshake with Pacemaker Remote failed: %s " + CRM_XS " rc=%d", gnutls_strerror(rc), rc); + return -1; + } + + if (client->remote->auth_timeout) { + g_source_remove(client->remote->auth_timeout); + } + client->remote->auth_timeout = 0; + + client->remote->tls_handshake_complete = TRUE; + crm_debug("TLS handshake with Pacemaker Remote completed"); + + // Alert other clients of the new connection + notify_of_new_client(client); + return 0; +} + static int lrmd_remote_client_msg(gpointer data) { @@ -59,31 +96,7 @@ lrmd_remote_client_msg(gpointer data) crm_client_t *client = data; if (client->remote->tls_handshake_complete == FALSE) { - int rc = 0; - - /* Muliple calls to handshake will be required, this callback - * will be invoked once the client sends more handshake data. */ - do { - rc = gnutls_handshake(*client->remote->tls_session); - - if (rc < 0 && rc != GNUTLS_E_AGAIN) { - crm_err("Remote lrmd tls handshake failed"); - return -1; - } - } while (rc == GNUTLS_E_INTERRUPTED); - - if (rc == 0) { - crm_debug("Remote lrmd tls handshake completed"); - client->remote->tls_handshake_complete = TRUE; - if (client->remote->auth_timeout) { - g_source_remove(client->remote->auth_timeout); - } - client->remote->auth_timeout = 0; - - /* Alert other clients of the new connection */ - notify_of_new_client(client); - } - return 0; + return remoted__read_handshake_data(client); } rc = crm_remote_ready(client->remote, 0); From 8b2c56ac718eee6d67b18d542d015ccfcf4846f1 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 18 May 2018 12:27:56 -0500 Subject: [PATCH 102/812] Low: liblrmd: improve TLS key reading Don't overallocate memory, and log a message if the read gets an error. Also refactor for simplicity. --- lib/lrmd/lrmd_client.c | 85 +++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index 3fd64792e63..e3fb20e8552 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -990,37 +990,46 @@ lrmd_ipc_connect(lrmd_t * lrmd, int *fd) } #ifdef HAVE_GNUTLS_GNUTLS_H +static void +copy_gnutls_datum(gnutls_datum_t *dest, gnutls_datum_t *source) +{ + dest->data = gnutls_malloc(source->size); + CRM_ASSERT(dest->data); + memcpy(dest->data, source->data, source->size); + dest->size = source->size; +} + +static void +clear_gnutls_datum(gnutls_datum_t *datum) +{ + gnutls_free(datum->data); + datum->data = NULL; + datum->size = 0; +} + +#define KEY_READ_LEN 256 + static int set_key(gnutls_datum_t * key, const char *location) { FILE *stream; - int read_len = 256; - int cur_len = 0; - int buf_len = read_len; - static char *key_cache = NULL; - static size_t key_cache_len = 0; - static time_t key_cache_updated; + size_t buf_len = KEY_READ_LEN; + static gnutls_datum_t key_cache = { 0, }; + static time_t key_cache_updated = 0; if (location == NULL) { return -1; } - if (key_cache) { - time_t now = time(NULL); - - if ((now - key_cache_updated) < 60) { - key->data = gnutls_malloc(key_cache_len + 1); - key->size = key_cache_len; - memcpy(key->data, key_cache, key_cache_len); - - crm_debug("using cached LRMD key"); + if (key_cache.data != NULL) { + if ((time(NULL) - key_cache_updated) < 60) { + copy_gnutls_datum(key, &key_cache); + crm_debug("Using cached Pacemaker Remote key"); return 0; } else { - key_cache_len = 0; + clear_gnutls_datum(&key_cache); key_cache_updated = 0; - free(key_cache); - key_cache = NULL; - crm_debug("clearing lrmd key cache"); + crm_debug("Cleared Pacemaker Remote key cache"); } } @@ -1029,37 +1038,35 @@ set_key(gnutls_datum_t * key, const char *location) return -1; } - key->data = gnutls_malloc(read_len); + key->data = gnutls_malloc(buf_len); + key->size = 0; while (!feof(stream)) { - int next; + int next = fgetc(stream); - if (cur_len == buf_len) { - buf_len = cur_len + read_len; - key->data = gnutls_realloc(key->data, buf_len); - } - next = fgetc(stream); - if (next == EOF && feof(stream)) { + if (next == EOF) { + if (!feof(stream)) { + crm_err("Error reading Pacemaker Remote key; copy in memory may be corrupted"); + } break; } - - key->data[cur_len] = next; - cur_len++; + if (key->size == buf_len) { + buf_len = key->size + KEY_READ_LEN; + key->data = gnutls_realloc(key->data, buf_len); + CRM_ASSERT(key->data); + } + key->data[key->size++] = (unsigned char) next; } fclose(stream); - key->size = cur_len; - if (!cur_len) { - gnutls_free(key->data); - key->data = 0; + if (key->size == 0) { + clear_gnutls_datum(key); return -1; } - if (!key_cache) { - key_cache = calloc(1, key->size + 1); - memcpy(key_cache, key->data, key->size); - - key_cache_len = key->size; + if (key_cache.data == NULL) { + copy_gnutls_datum(&key_cache, key); key_cache_updated = time(NULL); + crm_debug("Cached Pacemaker Remote key"); } return 0; From 8127d43a28ee5ce74bb583d29853db39b167d706 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 23 May 2018 16:33:46 -0500 Subject: [PATCH 103/812] Low: libcrmcommon: use proper IPC buffer size when reading Using a smaller maximum message size shouldn't have caused any problems, but it's not necessary. --- lib/common/ipc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/common/ipc.c b/lib/common/ipc.c index c238bca239b..8f6ab067787 100644 --- a/lib/common/ipc.c +++ b/lib/common/ipc.c @@ -1058,7 +1058,8 @@ crm_ipc_read(crm_ipc_t * client) crm_ipc_init(); client->buffer[0] = 0; - client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, client->buf_size - 1, 0); + client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, + client->buf_size, 0); if (client->msg_size >= 0) { int rc = crm_ipc_decompress(client); From 8a9186927ff8da42196de39b5222be3c2d34f7bb Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 18 May 2018 16:26:07 -0500 Subject: [PATCH 104/812] Log: scheduler: update wiki URL in log message --- pengine/native.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pengine/native.c b/pengine/native.c index d4f1ff7bd1b..e3e0c594183 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1234,8 +1234,7 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) } else { pe_proc_err("Resource %s (%s::%s) is active on %d nodes %s", rsc->id, class, type, num_active_nodes, recovery2text(rsc->recovery_type)); - crm_warn("See %s for more information.", - "http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active"); + crm_warn("See https://wiki.clusterlabs.org/wiki/FAQ#Resource_is_Too_Active for more information"); } if (rsc->recovery_type == recovery_stop_start) { From 85a3a174e1fc4cd4b055eb22827c1c3d0b288a85 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 22 May 2018 11:00:22 -0500 Subject: [PATCH 105/812] Low: libpe_status: handle pending migrations correctly This is mainly a refactor of unpack_rsc_migration() for readability. The one significant change is that previously, a migrate_from operation that was *recorded* as pending (record-pending=true) was treated differently from an unrecorded pending migrate_from (record-pending=false). --- include/crm/pengine/status.h | 3 + lib/pengine/unpack.c | 162 ++++++++++++++++++++--------------- 2 files changed, 94 insertions(+), 71 deletions(-) diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index fca7f127c6e..a8c90e23a72 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -30,6 +30,9 @@ typedef struct pe_action_s pe_action_t; typedef struct resource_s resource_t; typedef struct ticket_s ticket_t; +// forward-compatible with Pacemaker 2.0.0 +typedef struct resource_s pe_resource_t; + typedef enum no_quorum_policy_e { no_quorum_freeze, no_quorum_stop, diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 1b8ca229493..73bbe274f72 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -2414,94 +2414,114 @@ find_lrm_op(const char *resource, const char *op, const char *node, const char * return get_xpath_object(xpath, data_set->input, LOG_DEBUG); } +static bool +stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_working_set_t *data_set) +{ + xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, + NULL, data_set); + + if (stop_op) { + int stop_id = 0; + int task_id = 0; + + crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); + crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); + if (stop_id > task_id) { + return TRUE; + } + } + return FALSE; +} + static void unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { - - /* - * The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src) - * - * So if a migrate_to is followed by a stop, then we don't need to care what - * happened on the target node + /* A successful migration sequence is: + * migrate_to on source node + * migrate_from on target node + * stop on source node * - * Without the stop, we need to look for a successful migrate_from. - * This would also imply we're no longer running on the source + * If a migrate_to is followed by a stop, the entire migration (successful + * or failed) is complete, and we don't care what happened on the target. * - * Without the stop, and without a migrate_from op we make sure the resource - * gets stopped on both source and target (assuming the target is up) + * If no migrate_from has happened, the migration is considered to be + * "partial". If the migrate_from failed, make sure the resource gets + * stopped on both source and target (if up). * + * If the migrate_to and migrate_from both succeeded (which also implies the + * resource is no longer running on the source), but there is no stop, the + * migration is considered to be "dangling". */ - int stop_id = 0; - int task_id = 0; - xmlNode *stop_op = - find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set); - - if (stop_op) { - crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); + int from_rc = 0; + int from_status = 0; + const char *migrate_source = NULL; + const char *migrate_target = NULL; + pe_node_t *target = NULL; + pe_node_t *source = NULL; + xmlNode *migrate_from = NULL; + + if (stop_happened_after(rsc, node, xml_op, data_set)) { + return; } - crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); + // Clones are not allowed to migrate, so role can't be master + rsc->role = RSC_ROLE_STARTED; - if (stop_op == NULL || stop_id < task_id) { - int from_rc = 0, from_status = 0; - const char *migrate_source = - crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); - const char *migrate_target = - crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); + migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); + migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); - node_t *target = pe_find_node(data_set->nodes, migrate_target); - node_t *source = pe_find_node(data_set->nodes, migrate_source); - xmlNode *migrate_from = - find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, - data_set); + target = pe_find_node(data_set->nodes, migrate_target); + source = pe_find_node(data_set->nodes, migrate_source); - rsc->role = RSC_ROLE_STARTED; /* can be master? */ - if (migrate_from) { - crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); - crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); - pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", - ID(migrate_from), migrate_target, from_status, from_rc); - } - - if (migrate_from && from_rc == PCMK_OCF_OK - && from_status == PCMK_LRM_OP_DONE) { - pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), - migrate_source); + // Check whether there was a migrate_from action + migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, + migrate_source, data_set); + if (migrate_from) { + crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); + crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); + pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", + ID(migrate_from), migrate_target, from_status, from_rc); + } - /* all good - * just need to arrange for the stop action to get sent - * but _without_ affecting the target somehow - */ - rsc->role = RSC_ROLE_STOPPED; - rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); + if (migrate_from && from_rc == PCMK_OCF_OK + && from_status == PCMK_LRM_OP_DONE) { + /* The migrate_to and migrate_from both succeeded, so mark the migration + * as "dangling". This will be used to schedule a stop action on the + * source without affecting the target. + */ + pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), + migrate_source); + rsc->role = RSC_ROLE_STOPPED; + rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); - } else if (migrate_from) { /* Failed */ - if (target && target->details->online) { - pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, - target->details->online); - native_add_running(rsc, target, data_set); - } + } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed + if (target && target->details->online) { + pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, + target->details->online); + native_add_running(rsc, target, data_set); + } - } else { /* Pending or complete but erased */ - if (target && target->details->online) { - pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, - target->details->online); + } else { // Pending, or complete but erased + if (target && target->details->online) { + pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, + target->details->online); - native_add_running(rsc, target, data_set); - if (source && source->details->online) { - /* If we make it here we have a partial migration. The migrate_to - * has completed but the migrate_from on the target has not. Hold on - * to the target and source on the resource. Later on if we detect that - * the resource is still going to run on that target, we may continue - * the migration */ - rsc->partial_migration_target = target; - rsc->partial_migration_source = source; - } - } else { - /* Consider it failed here - forces a restart, prevents migration */ - set_bit(rsc->flags, pe_rsc_failed); - clear_bit(rsc->flags, pe_rsc_allow_migrate); + native_add_running(rsc, target, data_set); + if (source && source->details->online) { + /* This is a partial migration: the migrate_to completed + * successfully on the source, but the migrate_from has not + * completed. Remember the source and target; if the newly + * chosen target remains the same when we schedule actions + * later, we may continue with the migration. + */ + rsc->partial_migration_target = target; + rsc->partial_migration_source = source; } + } else { + /* Consider it failed here - forces a restart, prevents migration */ + set_bit(rsc->flags, pe_rsc_failed); + clear_bit(rsc->flags, pe_rsc_allow_migrate); } } } From 37913a1dec2bda66476bddb5559817d23058be59 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 21 May 2018 12:43:09 -0500 Subject: [PATCH 106/812] Refactor: libpe_status: new functions for finding resource's active nodes Existing code often grabs rsc->running_on->data (i.e. the first node in the list) as the resource's current node, and often uses g_list_length(rsc->running_on). However, if the resource is in the middle of a partial migration, the migration source should be preferred as the current node. Also, if a resource has "requires" set to "nothing" or "quorum", a clean, online node should be preferred as the current node, and a caller should ignore unclean and offline nodes when counting in certain cases. These functions will allow those issues to be addressed in later commits. --- include/crm/pengine/internal.h | 34 +++++---- lib/pengine/complex.c | 121 +++++++++++++++++++++++++++++---- 2 files changed, 127 insertions(+), 28 deletions(-) diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index e9d7582dd75..fe8f6a112a0 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -1,20 +1,10 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ + #ifndef PE_INTERNAL__H # define PE_INTERNAL__H # include @@ -125,6 +115,22 @@ int pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set); + +/* Functions for finding/counting a resource's active nodes */ + +pe_node_t *pe__find_active_on(const resource_t *rsc, + unsigned int *count_all, + unsigned int *count_clean); +pe_node_t *pe__find_active_requires(const resource_t *rsc, + unsigned int *count); + +static inline pe_node_t * +pe__current_node(const resource_t *rsc) +{ + return pe__find_active_on(rsc, NULL, NULL); +} + + /* Binary like operators for lists of nodes */ extern void node_list_exclude(GHashTable * list, GListPtr list2, gboolean merge_scores); extern GListPtr node_list_dup(GListPtr list, gboolean reset, gboolean filter); diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c index 86f290c342c..cdd409a6083 100644 --- a/lib/pengine/complex.c +++ b/lib/pengine/complex.c @@ -1,19 +1,8 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include @@ -981,3 +970,107 @@ common_free(resource_t * rsc) free(rsc->pending_task); free(rsc); } + +/*! + * \brief + * \internal Find a node (and optionally count all) where resource is active + * + * \param[in] rsc Resource to check + * \param[out] count_all If not NULL, will be set to count of active nodes + * \param[out] count_clean If not NULL, will be set to count of clean nodes + * + * \return An active node (or NULL if resource is not active anywhere) + * + * \note The order of preference is: an active node that is the resource's + * partial migration source; if the resource's "requires" is "quorum" or + * "nothing", the first active node in the list that is clean and online; + * the first active node in the list. + */ +pe_node_t * +pe__find_active_on(const resource_t *rsc, unsigned int *count_all, + unsigned int *count_clean) +{ + pe_node_t *active = NULL; + pe_node_t *node = NULL; + bool keep_looking = FALSE; + bool is_happy = FALSE; + + if (count_all) { + *count_all = 0; + } + if (count_clean) { + *count_clean = 0; + } + if (rsc == NULL) { + return NULL; + } + + for (GList *node_iter = rsc->running_on; node_iter != NULL; + node_iter = node_iter->next) { + + node = node_iter->data; + keep_looking = FALSE; + + is_happy = node->details->online && !node->details->unclean; + + if (count_all) { + ++*count_all; + } + if (count_clean && is_happy) { + ++*count_clean; + } + if (count_all || count_clean) { + // If we're counting, we need to go through entire list + keep_looking = TRUE; + } + + if (rsc->partial_migration_source != NULL) { + if (node->details == rsc->partial_migration_source->details) { + // This is the migration source + active = node; + } else { + keep_looking = TRUE; + } + } else if (is_not_set(rsc->flags, pe_rsc_needs_fencing)) { + if (is_happy && (!active || !active->details->online + || active->details->unclean)) { + // This is the first clean node + active = node; + } else { + keep_looking = TRUE; + } + } + if (active == NULL) { + // This is first node in list + active = node; + } + + if (keep_looking == FALSE) { + // Don't waste time iterating if we don't have to + break; + } + } + return active; +} + +/*! + * \brief + * \internal Find and count active nodes according to "requires" + * + * \param[in] rsc Resource to check + * \param[out] count If not NULL, will be set to count of active nodes + * + * \return An active node (or NULL if resource is not active anywhere) + * + * \note This is a convenience wrapper for pe__find_active_on() where the count + * of all active nodes or only clean active nodes is desired according to + * the "requires" meta-attribute. + */ +pe_node_t * +pe__find_active_requires(const resource_t *rsc, unsigned int *count) +{ + if (rsc && is_not_set(rsc->flags, pe_rsc_needs_fencing)) { + return pe__find_active_on(rsc, NULL, count); + } + return pe__find_active_on(rsc, count, NULL); +} From e752fcfa10ee68f8a8de48122ae0f73190ae30af Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 21 May 2018 09:36:00 -0500 Subject: [PATCH 107/812] Fix: libpe_status: find active instances properly according to requires If a resource has "requires" set to "nothing" or "quorum", that means we can properly start it elsewhere, even if the node believed to be initially running the resource is unclean and waiting to be fenced. Previously, if we did start the resource elsewhere before fencing completed, the cluster would then consider the resource multiply active, and recover it. Now, we don't consider such a resource multiply active if it's active on only one clean node. Status displays still show the resource as started on the unclean node, to give the administrator a better idea of the actual situation. However, the clean node will be considered the "current" node. --- lib/pengine/native.c | 21 +++++++-- pengine/native.c | 107 +++++++++++++++++++++---------------------- 2 files changed, 70 insertions(+), 58 deletions(-) diff --git a/lib/pengine/native.c b/lib/pengine/native.c index f6d1653ad33..e01ef17f9e1 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -17,6 +17,21 @@ #define VARIANT_NATIVE 1 #include "./variant.h" +/*! + * \internal + * \brief Check whether a resource is active on multiple nodes + */ +static bool +is_multiply_active(pe_resource_t *rsc) +{ + unsigned int count = 0; + + if (rsc->variant == pe_native) { + pe__find_active_requires(rsc, &count); + } + return count > 1; +} + void native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { @@ -58,7 +73,7 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) return; } - if (rsc->variant == pe_native && g_list_length(rsc->running_on) > 1) { + if (is_multiply_active(rsc)) { switch (rsc->recovery_type) { case recovery_stop_only: { @@ -99,8 +114,8 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) } break; } - crm_debug("%s is active on %d nodes including %s: %s", - rsc->id, g_list_length(rsc->running_on), node->details->uname, + crm_debug("%s is active on multiple nodes including %s: %s", + rsc->id, node->details->uname, recovery2text(rsc->recovery_type)); } else { diff --git a/pengine/native.c b/pengine/native.c index e3e0c594183..37ac2e4899c 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1163,7 +1163,9 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) gboolean allow_migrate = is_set(rsc->flags, pe_rsc_allow_migrate) ? TRUE : FALSE; GListPtr gIter = NULL; - int num_active_nodes = 0; + unsigned int num_all_active = 0; + unsigned int num_clean_active = 0; + bool multiply_active = FALSE; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; @@ -1181,18 +1183,7 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) pe_rsc_trace(rsc, "Processing state transition for %s %p: %s->%s", rsc->id, rsc, role2text(rsc->role), role2text(rsc->next_role)); - if (rsc->running_on) { - current = rsc->running_on->data; - } - - for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { - node_t *n = (node_t *) gIter->data; - if (rsc->partial_migration_source && - (n->details == rsc->partial_migration_source->details)) { - current = rsc->partial_migration_source; - } - num_active_nodes++; - } + current = pe__find_active_on(rsc, &num_all_active, &num_clean_active); for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; @@ -1207,46 +1198,57 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) } } - if (num_active_nodes > 1) { + if ((num_all_active == 2) && (num_clean_active == 2) && chosen + && rsc->partial_migration_source && rsc->partial_migration_target + && (current->details == rsc->partial_migration_source->details) + && (chosen->details == rsc->partial_migration_target->details)) { - if (num_active_nodes == 2 - && chosen - && rsc->partial_migration_target - && rsc->partial_migration_source - && (current->details == rsc->partial_migration_source->details) - && (chosen->details == rsc->partial_migration_target->details)) { - /* Here the chosen node is still the migration target from a partial - * migration. Attempt to continue the migration instead of recovering - * by stopping the resource everywhere and starting it on a single node. */ - pe_rsc_trace(rsc, - "Will attempt to continue with a partial migration to target %s from %s", - rsc->partial_migration_target->details->id, - rsc->partial_migration_source->details->id); - } else { - const char *type = crm_element_value(rsc->xml, XML_ATTR_TYPE); - const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + /* The chosen node is still the migration target from a partial + * migration. Attempt to continue the migration instead of recovering + * by stopping the resource everywhere and starting it on a single node. + */ + pe_rsc_trace(rsc, + "Will attempt to continue with a partial migration to target %s from %s", + rsc->partial_migration_target->details->id, + rsc->partial_migration_source->details->id); + + } else if (is_not_set(rsc->flags, pe_rsc_needs_fencing)) { + /* If a resource has "requires" set to nothing or quorum, don't consider + * it active on unclean nodes (similar to how all resources behave when + * stonith-enabled is false). We can start such resources elsewhere + * before fencing completes, and if we considered the resource active on + * the failed node, we would attempt recovery for being active on + * multiple nodes. + */ + multiply_active = (num_clean_active > 1); + } else { + multiply_active = (num_all_active > 1); + } - if(rsc->partial_migration_target && rsc->partial_migration_source) { - crm_notice("Resource %s can no longer migrate to %s. Stopping on %s too", rsc->id, - rsc->partial_migration_target->details->uname, - rsc->partial_migration_source->details->uname); + if (multiply_active) { + if (rsc->partial_migration_target && rsc->partial_migration_source) { + // Migration was in progress, but we've chosen a different target + crm_notice("Resource %s can no longer migrate to %s. Stopping on %s too", + rsc->id, rsc->partial_migration_target->details->uname, + rsc->partial_migration_source->details->uname); - } else { - pe_proc_err("Resource %s (%s::%s) is active on %d nodes %s", - rsc->id, class, type, num_active_nodes, recovery2text(rsc->recovery_type)); - crm_warn("See https://wiki.clusterlabs.org/wiki/FAQ#Resource_is_Too_Active for more information"); - } - - if (rsc->recovery_type == recovery_stop_start) { - need_stop = TRUE; - } + } else { + // Resource was incorrectly multiply active + pe_proc_err("Resource %s is active on %u nodes (%s)", + rsc->id, num_all_active, + recovery2text(rsc->recovery_type)); + crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ#Resource_is_Too_Active for more information"); + } - /* If by chance a partial migration is in process, - * but the migration target is not chosen still, clear all - * partial migration data. */ - rsc->partial_migration_source = rsc->partial_migration_target = NULL; - allow_migrate = FALSE; + if (rsc->recovery_type == recovery_stop_start) { + need_stop = TRUE; } + + /* If by chance a partial migration is in process, but the migration + * target is not chosen still, clear all partial migration data. + */ + rsc->partial_migration_source = rsc->partial_migration_target = NULL; + allow_migrate = FALSE; } if (is_set(rsc->flags, pe_rsc_start_pending)) { @@ -1339,7 +1341,7 @@ native_create_actions(resource_t * rsc, pe_working_set_t * data_set) is_not_set(rsc->flags, pe_rsc_managed) || is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending) || - (current->details->unclean == TRUE) || + (current && current->details->unclean) || rsc->next_role < RSC_ROLE_STARTED) { allow_migrate = FALSE; @@ -2329,12 +2331,7 @@ LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) next = rsc->allocated_to; if (rsc->running_on) { - if (g_list_length(rsc->running_on) > 1 && rsc->partial_migration_source) { - current = rsc->partial_migration_source; - } else { - current = rsc->running_on->data; - } - + current = pe__current_node(rsc); if (rsc->role == RSC_ROLE_STOPPED) { /* * This can occur when resources are being recovered From 76fd8c326f38a7427b2d878959bc5da268940fea Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 30 May 2018 17:49:25 -0500 Subject: [PATCH 108/812] Refactor: libcrmcommon: improve parse_op_key() efficiency - Check error conditions more strictly - Allow output variables to be NULL if caller if not interested in them - Don't over-allocate memory - Ensure output variables need to be freed only if return value is TRUE --- crmd/te_events.c | 2 -- lib/common/operations.c | 48 +++++++++++++++++++++++++++-------------- pengine/allocate.c | 11 ++-------- pengine/constraints.c | 13 +---------- tools/crm_mon.c | 3 +-- 5 files changed, 36 insertions(+), 41 deletions(-) diff --git a/crmd/te_events.c b/crmd/te_events.c index 28a8ab287c3..7b5ca2a4900 100644 --- a/crmd/te_events.c +++ b/crmd/te_events.c @@ -147,8 +147,6 @@ update_failcount(xmlNode * event, const char *event_node_uuid, int rc, CRM_CHECK(on_uname != NULL, return TRUE); CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event)); goto bail); - CRM_CHECK(task != NULL, goto bail); - CRM_CHECK(rsc_id != NULL, goto bail); /* Decide whether update is necessary and what value to use */ if ((interval > 0) || safe_str_eq(task, CRMD_ACTION_PROMOTE) diff --git a/lib/common/operations.c b/lib/common/operations.c index 3ca59399ac2..26a8f655e13 100644 --- a/lib/common/operations.c +++ b/lib/common/operations.c @@ -49,10 +49,22 @@ parse_op_key(const char *key, char **rsc_id, char **op_type, int *interval) char *mutable_key = NULL; char *mutable_key_ptr = NULL; int len = 0, offset = 0, ch = 0; + int local_interval_ms = 0; - CRM_CHECK(key != NULL, return FALSE); + // Initialize output variables in case of early return + if (rsc_id) { + *rsc_id = NULL; + } + if (op_type) { + *op_type = NULL; + } + if (interval) { + *interval = 0; + } - *interval = 0; + CRM_CHECK(key && *key, return FALSE); + + // Parse interval at end of string len = strlen(key); offset = len - 1; @@ -68,36 +80,36 @@ parse_op_key(const char *key, char **rsc_id, char **op_type, int *interval) digits--; ch = ch * 10; } - *interval += ch; + local_interval_ms += ch; offset--; } + crm_trace("Operation key '%s' has interval %ums", key, local_interval_ms); + if (interval) { + *interval = local_interval_ms; + } + + CRM_CHECK((offset != (len - 1)) && (key[offset] == '_'), return FALSE); - crm_trace(" Interval: %d", *interval); - CRM_CHECK(key[offset] == '_', return FALSE); - - mutable_key = strdup(key); - mutable_key[offset] = 0; + mutable_key = strndup(key, offset); offset--; while (offset > 0 && key[offset] != '_') { offset--; } - CRM_CHECK(key[offset] == '_', free(mutable_key); - return FALSE); + CRM_CHECK(key[offset] == '_', + free(mutable_key); return FALSE); mutable_key_ptr = mutable_key + offset + 1; crm_trace(" Action: %s", mutable_key_ptr); - - *op_type = strdup(mutable_key_ptr); + if (op_type) { + *op_type = strdup(mutable_key_ptr); + } mutable_key[offset] = 0; offset--; - CRM_CHECK(mutable_key != mutable_key_ptr, free(mutable_key); - return FALSE); - notify = strstr(mutable_key, "_post_notify"); if (notify && safe_str_eq(notify, "_post_notify")) { notify[0] = 0; @@ -109,7 +121,11 @@ parse_op_key(const char *key, char **rsc_id, char **op_type, int *interval) } crm_trace(" Resource: %s", mutable_key); - *rsc_id = mutable_key; + if (rsc_id) { + *rsc_id = mutable_key; + } else { + free(mutable_key); + } return TRUE; } diff --git a/pengine/allocate.c b/pengine/allocate.c index 09ca1ef6ab1..724736ce5fe 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1666,15 +1666,11 @@ find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_ke if (list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; - char *tmp = NULL; char *task = NULL; int interval = 0; - if (parse_op_key(original_key, &tmp, &task, &interval)) { + if (parse_op_key(original_key, NULL, &task, &interval)) { key = generate_op_key(rsc->id, task, interval); - /* crm_err("looking up %s instead of %s", key, original_key); */ - /* slist_iter(action, action_t, actions, lpc, */ - /* crm_err(" - %s", action->uuid)); */ list = find_actions(actions, key, NULL); } else { @@ -1682,7 +1678,6 @@ find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_ke } free(key); - free(tmp); free(task); } @@ -1764,11 +1759,10 @@ rsc_order_first(resource_t * lh_rsc, order_constraint_t * order, pe_working_set_ if (lh_actions == NULL && lh_rsc != rh_rsc) { char *key = NULL; - char *rsc_id = NULL; char *op_type = NULL; int interval = 0; - parse_op_key(order->lh_action_task, &rsc_id, &op_type, &interval); + parse_op_key(order->lh_action_task, NULL, &op_type, &interval); key = generate_op_key(lh_rsc->id, op_type, interval); if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) { @@ -1789,7 +1783,6 @@ rsc_order_first(resource_t * lh_rsc, order_constraint_t * order, pe_working_set_ } free(op_type); - free(rsc_id); } gIter = lh_actions; diff --git a/pengine/constraints.c b/pengine/constraints.c index 82f5bc492e4..7c777575c14 100644 --- a/pengine/constraints.c +++ b/pengine/constraints.c @@ -1311,23 +1311,12 @@ static char * task_from_action_or_key(action_t *action, const char *key) { char *res = NULL; - char *rsc_id = NULL; - char *op_type = NULL; - int interval = 0; if (action) { res = strdup(action->task); } else if (key) { - int rc = 0; - rc = parse_op_key(key, &rsc_id, &op_type, &interval); - if (rc == TRUE) { - res = op_type; - op_type = NULL; - } - free(rsc_id); - free(op_type); + parse_op_key(key, NULL, &res, NULL); } - return res; } diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 7fe0fb598d6..824b12fb1fa 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -3793,7 +3793,6 @@ handle_rsc_op(xmlNode * xml, const char *node_id) int rc = -1; int status = -1; int action = -1; - int interval = 0; int target_rc = -1; int transition_num = -1; gboolean notify = TRUE; @@ -3837,7 +3836,7 @@ handle_rsc_op(xmlNode * xml, const char *node_id) return; } - if (parse_op_key(id, &rsc, &task, &interval) == FALSE) { + if (parse_op_key(id, &rsc, &task, NULL) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } From f37c5e2c0d9fa26938563bdb5b857b8801f4794f Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 5 Dec 2017 18:26:23 -0600 Subject: [PATCH 109/812] Fix: libpe_status: fix precedence of operation meta-attributes Operations were unpacked such that op_defaults were the base, then any XML properties of the tag itself took precedence over that, and then any meta_attributes and instance_attributes beneath the tag had the lowest precedence. This changes it so that the tag has the highest precedence, then , then , then (which is deprecated for setting meta-attributes). --- lib/pengine/utils.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index a875226e000..304b77f1427 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -925,9 +925,21 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, CRM_CHECK(action->rsc != NULL, return); + // Cluster-wide unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); + // take precedence over defaults + unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, + NULL, action->meta, NULL, TRUE, data_set->now); + + // have lowest precedence (deprecated) + unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, + NULL, action->meta, NULL, FALSE, data_set->now); + + /* Anything set as an XML property has highest precedence. + * This ensures we use the name and interval from the tag. + */ if (xml_obj) { xmlAttrPtr xIter = NULL; @@ -939,12 +951,6 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, } } - unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, - NULL, action->meta, NULL, FALSE, data_set->now); - - unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, - NULL, action->meta, NULL, FALSE, data_set->now); - #if ENABLE_VERSIONED_ATTRS rsc_details = pe_rsc_action_details(action); pe_unpack_versioned_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, From 8e50fdd1088fe300f1f7a497b979b7abc8547cc3 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 6 Dec 2017 11:22:34 -0600 Subject: [PATCH 110/812] Fix: libpe_status: use correct default timeout for probes Previously, if timeout were set in op_defaults, probes without an explicitly configured timeout would use that value rather than the minimum-interval monitor's. This was contrary the behavior of default-action-timeout, which would not override the minimum-interval monitor's value. Now, timeout in op_defaults behaves the same as default-action-timeout. --- lib/pengine/utils.c | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 304b77f1427..3f123be571f 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -805,19 +805,6 @@ unpack_timeout(const char *value, action_t *action, xmlNode *xml_obj, { int timeout = 0; - if (value == NULL && xml_obj == NULL && action && - safe_str_eq(action->task, RSC_STATUS) && interval == 0) { - - xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); - - if (min_interval_mon) { - value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); - pe_rsc_trace(action->rsc, - "\t%s uses the timeout value '%s' from the minimum interval monitor", - action->uuid, value); - } - } - if (value == NULL && config_hash) { value = pe_pref(config_hash, "default-action-timeout"); if (value) { @@ -910,6 +897,18 @@ unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj, unsigned long l } #endif +/*! + * \brief Unpack operation XML into an action structure + * + * Unpack an operation's meta-attributes (normalizing the interval, timeout, + * and start delay values as integer milliseconds), requirements, and + * failure policy. + * + * \param[in,out] action Action to unpack into + * \param[in] xml_obj Operation XML (or NULL if all defaults) + * \param[in] container Resource that contains affected resource, if any + * \param[in] data_set Cluster state + */ void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set) @@ -925,6 +924,23 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, CRM_CHECK(action->rsc != NULL, return); + // Probe timeouts default to minimum-interval monitor's + if ((xml_obj == NULL) && action && + safe_str_eq(action->task, RSC_STATUS) && (interval == 0)) { + + xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); + + if (min_interval_mon) { + value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); + if (value) { + crm_trace("\t%s defaults to minimum-interval monitor's timeout '%s'", + action->uuid, value); + g_hash_table_insert(action->meta, strdup(XML_ATTR_TIMEOUT), + strdup(value)); + } + } + } + // Cluster-wide unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); From 13801fee75425449a2716506c295b41cc759a12d Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 6 Mar 2018 18:35:01 -0600 Subject: [PATCH 111/812] Fix: libpe_status: use correct default timeout for monitors really this time (attempted fix in 78802ff was incorrect) --- lib/pengine/utils.c | 75 +++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 3f123be571f..0d16faa3a5e 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -918,33 +918,24 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, char *value_ms = NULL; const char *value = NULL; const char *field = NULL; + char *default_timeout = NULL; #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t *rsc_details = NULL; #endif - CRM_CHECK(action->rsc != NULL, return); - - // Probe timeouts default to minimum-interval monitor's - if ((xml_obj == NULL) && action && - safe_str_eq(action->task, RSC_STATUS) && (interval == 0)) { - - xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); - - if (min_interval_mon) { - value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); - if (value) { - crm_trace("\t%s defaults to minimum-interval monitor's timeout '%s'", - action->uuid, value); - g_hash_table_insert(action->meta, strdup(XML_ATTR_TIMEOUT), - strdup(value)); - } - } - } + CRM_CHECK(action && action->rsc, return); // Cluster-wide unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); + // Probe timeouts default differently, so handle timeout default later + default_timeout = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT); + if (default_timeout) { + default_timeout = strdup(default_timeout); + g_hash_table_remove(action->meta, XML_ATTR_TIMEOUT); + } + // take precedence over defaults unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, TRUE, data_set->now); @@ -953,6 +944,14 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); +#if ENABLE_VERSIONED_ATTRS + rsc_details = pe_rsc_action_details(action); + pe_unpack_versioned_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, + rsc_details->versioned_parameters, data_set->now); + pe_unpack_versioned_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, + rsc_details->versioned_meta, data_set->now); +#endif + /* Anything set as an XML property has highest precedence. * This ensures we use the name and interval from the tag. */ @@ -967,16 +966,9 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, } } -#if ENABLE_VERSIONED_ATTRS - rsc_details = pe_rsc_action_details(action); - pe_unpack_versioned_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, - rsc_details->versioned_parameters, data_set->now); - pe_unpack_versioned_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, - rsc_details->versioned_meta, data_set->now); -#endif - g_hash_table_remove(action->meta, "id"); + // Normalize interval to milliseconds field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { @@ -998,6 +990,33 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, " (use 'requires' resource meta-attribute instead)"); } + // Handle timeout default, now that we know the interval + if (g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT)) { + free(default_timeout); + + } else { + // Probe timeouts default to minimum-interval monitor's + if (safe_str_eq(action->task, RSC_STATUS) && (interval == 0)) { + + xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); + + if (min_interval_mon) { + value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); + if (value) { + crm_trace("\t%s defaults to minimum-interval monitor's timeout '%s'", + action->uuid, value); + free(default_timeout); + default_timeout = strdup(value); + } + } + } + + if (default_timeout) { + g_hash_table_insert(action->meta, strdup(XML_ATTR_TIMEOUT), + default_timeout); + } + } + if (safe_str_neq(action->task, RSC_START) && safe_str_neq(action->task, RSC_PROMOTE)) { action->needs = rsc_req_nothing; @@ -1166,7 +1185,6 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task, role2text(action->fail_role)); - field = XML_OP_ATTR_START_DELAY; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY); if (value) { unpack_start_delay(value, action->meta); @@ -1175,8 +1193,7 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, unpack_interval_origin(value, action->meta, xml_obj, interval, data_set->now); } - field = XML_ATTR_TIMEOUT; - value = g_hash_table_lookup(action->meta, field); + value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT); timeout = unpack_timeout(value, action, xml_obj, interval, data_set->config_hash); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), crm_itoa(timeout)); From d07c2f2b4894e3a75a0e51a991c5df54f4242a7b Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 30 May 2018 17:52:08 -0500 Subject: [PATCH 112/812] Fix: scheduler: ensure orphaned recurring monitors have interval set Normally, unpack_operation() sets a recurring monitor's interval in the action meta-data based on the action XML. However, orphaned recurring monitors will not have any XML. In that case, set the interval based on the operation key. This fixes an issue where orphaned recurring monitors could not be cancelled if the resource was unmanaged, because the action would not be detected as a recurring monitor. --- lib/pengine/utils.c | 55 ++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 0d16faa3a5e..1edead67e06 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -936,28 +936,33 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, g_hash_table_remove(action->meta, XML_ATTR_TIMEOUT); } - // take precedence over defaults - unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, - NULL, action->meta, NULL, TRUE, data_set->now); + if (xml_obj) { + xmlAttrPtr xIter = NULL; + + // take precedence over defaults + unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, + NULL, action->meta, NULL, TRUE, + data_set->now); // have lowest precedence (deprecated) unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); #if ENABLE_VERSIONED_ATTRS - rsc_details = pe_rsc_action_details(action); - pe_unpack_versioned_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, - rsc_details->versioned_parameters, data_set->now); - pe_unpack_versioned_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, - rsc_details->versioned_meta, data_set->now); + rsc_details = pe_rsc_action_details(action); + pe_unpack_versioned_attributes(data_set->input, xml_obj, + XML_TAG_ATTR_SETS, NULL, + rsc_details->versioned_parameters, + data_set->now); + pe_unpack_versioned_attributes(data_set->input, xml_obj, + XML_TAG_META_SETS, NULL, + rsc_details->versioned_meta, + data_set->now); #endif - /* Anything set as an XML property has highest precedence. - * This ensures we use the name and interval from the tag. - */ - if (xml_obj) { - xmlAttrPtr xIter = NULL; - + /* Anything set as an XML property has highest precedence. + * This ensures we use the name and interval from the tag. + */ for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_obj, prop_name); @@ -973,13 +978,23 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, value = g_hash_table_lookup(action->meta, field); if (value != NULL) { interval = crm_get_interval(value); - if (interval > 0) { - value_ms = crm_itoa(interval); - g_hash_table_replace(action->meta, strdup(field), value_ms); - } else { - g_hash_table_remove(action->meta, field); - } + } else if ((xml_obj == NULL) && !strcmp(action->task, RSC_STATUS)) { + int interval_ms = 0; + + /* An orphaned recurring monitor will not have any XML. However, we + * want the interval to be set, so the action can be properly detected + * as a recurring monitor. Parse it from the key in this case. + */ + parse_op_key(action->uuid, NULL, NULL, &interval_ms); + interval = interval_ms; + } + if (interval > 0) { + value_ms = crm_itoa(interval); + g_hash_table_replace(action->meta, strdup(field), value_ms); + + } else if (value) { + g_hash_table_remove(action->meta, field); } /* @COMPAT data sets < 1.1.10 ("requires" on start action not resource) */ From a337225d56a3ef26f6a0250e63ec22fa20332d43 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 31 May 2018 20:06:40 -0500 Subject: [PATCH 113/812] Test: pengine: update regression tests for orphaned monitor change --- pengine/test10/bundle-replicas-change.dot | 2 ++ pengine/test10/coloc-negative-group.dot | 1 + pengine/test10/coloc-negative-group.exp | 9 +++++++++ pengine/test10/coloc-negative-group.summary | 1 + 4 files changed, 13 insertions(+) diff --git a/pengine/test10/bundle-replicas-change.dot b/pengine/test10/bundle-replicas-change.dot index fc6ecbad33a..f7a32fd2481 100644 --- a/pengine/test10/bundle-replicas-change.dot +++ b/pengine/test10/bundle-replicas-change.dot @@ -7,6 +7,7 @@ digraph "g" { "httpd-bundle-0_start_0 rh74-test" -> "httpd:0_monitor_10000 httpd-bundle-0" [ style = bold] "httpd-bundle-0_start_0 rh74-test" -> "httpd:0_start_0 httpd-bundle-0" [ style = bold] "httpd-bundle-0_start_0 rh74-test" -> "httpd_delete_0 httpd-bundle-0" [ style = bold] +"httpd-bundle-0_start_0 rh74-test" -> "httpd_monitor_10000 httpd-bundle-0" [ style = dashed] "httpd-bundle-0_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] "httpd-bundle-0_stop_0 rh74-test" -> "Cancel httpd-bundle-0_monitor_30000 rh74-test" [ style = bold] "httpd-bundle-0_stop_0 rh74-test" -> "all_stopped" [ style = bold] @@ -102,6 +103,7 @@ digraph "g" { "httpd:2_start_0 httpd-bundle-2" -> "httpd:2_monitor_10000 httpd-bundle-2" [ style = bold] "httpd:2_start_0 httpd-bundle-2" [ style=bold color="green" fontcolor="black"] "httpd_delete_0 httpd-bundle-0" [ style=bold color="green" fontcolor="black"] +"httpd_monitor_10000 httpd-bundle-0" [ style=dashed color="red" fontcolor="black"] "httpd_stop_0 httpd-bundle-0" -> "all_stopped" [ style = bold] "httpd_stop_0 httpd-bundle-0" -> "httpd-bundle-0_stop_0 rh74-test" [ style = bold] "httpd_stop_0 httpd-bundle-0" -> "httpd_delete_0 httpd-bundle-0" [ style = bold] diff --git a/pengine/test10/coloc-negative-group.dot b/pengine/test10/coloc-negative-group.dot index 1b04509b4f9..69c5e4ddcb5 100644 --- a/pengine/test10/coloc-negative-group.dot +++ b/pengine/test10/coloc-negative-group.dot @@ -1,4 +1,5 @@ digraph "g" { "Cancel res_Dummy_1_monitor_10000 lenny-b" [ style=bold color="green" fontcolor="black" ] +"Cancel res_Dummy_2_monitor_10000 lenny-b" [ style=bold color="green" fontcolor="black"] "Cancel res_Dummy_3_monitor_10000 lenny-a" [ style=bold color="green" fontcolor="black" ] } diff --git a/pengine/test10/coloc-negative-group.exp b/pengine/test10/coloc-negative-group.exp index 3fd9d300475..5ceed820c26 100644 --- a/pengine/test10/coloc-negative-group.exp +++ b/pengine/test10/coloc-negative-group.exp @@ -9,6 +9,15 @@ + + + + + + + + + diff --git a/pengine/test10/coloc-negative-group.summary b/pengine/test10/coloc-negative-group.summary index 1165026ebe6..6b9f552642d 100644 --- a/pengine/test10/coloc-negative-group.summary +++ b/pengine/test10/coloc-negative-group.summary @@ -11,6 +11,7 @@ Transition Summary: Executing cluster transition: * Resource action: res_Dummy_1 cancel=10000 on lenny-b + * Resource action: res_Dummy_2 cancel=10000 on lenny-b * Resource action: res_Dummy_3 cancel=10000 on lenny-a Revised cluster status: From 355461723733acc0f6f9d9cc1318c91ba2a0ae6c Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 22 May 2018 15:55:14 -0500 Subject: [PATCH 114/812] Fix: all: prefer appropriate node when multiply active --- lib/pengine/container.c | 8 ++---- lib/pengine/native.c | 8 ++---- pengine/allocate.c | 20 ++++++-------- pengine/clone.c | 51 ++++++++++++++++------------------ pengine/graph.c | 26 ++++++++++-------- pengine/native.c | 17 ++++++------ pengine/notif.c | 2 +- tools/crm_mon.c | 14 +++------- tools/crm_resource.c | 21 ++++++++------ tools/crm_resource_print.c | 16 +++++------ tools/crm_resource_runtime.c | 53 ++++++++++++++++++------------------ 11 files changed, 112 insertions(+), 124 deletions(-) diff --git a/lib/pengine/container.c b/lib/pengine/container.c index b5340bf5a2b..d82948a409e 100644 --- a/lib/pengine/container.c +++ b/lib/pengine/container.c @@ -807,11 +807,11 @@ container_fix_remote_addr_in(resource_t *rsc, xmlNode *xml, const char *field) } node = tuple->docker->allocated_to; - if(node == NULL && tuple->docker->running_on) { + if (node == NULL) { /* If it won't be running anywhere after the * transition, go with where it's running now. */ - node = tuple->docker->running_on->data; + node = pe__current_node(tuple->docker); } if(node == NULL) { @@ -1289,9 +1289,7 @@ tuple_print(container_grouping_t * tuple, const char *pre_text, long options, vo offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", tuple->ipaddr); } - if (tuple->docker->running_on) { - node = tuple->docker->running_on->data; - } + node = pe__current_node(tuple->docker); common_print(rsc, pre_text, buffer, node, options, print_data); } diff --git a/lib/pengine/native.c b/lib/pengine/native.c index e01ef17f9e1..eda0355e2d3 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -457,7 +457,7 @@ native_print_xml(resource_t * rsc, const char *pre_text, long options, void *pri if (options & pe_print_rsconly) { status_print("/>\n"); /* do nothing */ - } else if (g_list_length(rsc->running_on) > 0) { + } else if (rsc->running_on != NULL) { GListPtr gIter = rsc->running_on; status_print(">\n"); @@ -529,7 +529,7 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n } else if (is_set(rsc->flags, pe_rsc_failed)) { status_print(""); - } else if (rsc->variant == pe_native && g_list_length(rsc->running_on) == 0) { + } else if (rsc->variant == pe_native && (rsc->running_on == NULL)) { status_print(""); } else if (g_list_length(rsc->running_on) > 1) { @@ -742,9 +742,7 @@ native_print(resource_t * rsc, const char *pre_text, long options, void *print_d return; } - if (rsc->running_on != NULL) { - node = rsc->running_on->data; - } + node = pe__current_node(rsc); common_print(rsc, pre_text, rsc_printable_id(rsc), node, options, print_data); } diff --git a/pengine/allocate.c b/pengine/allocate.c index 724736ce5fe..427575ba202 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1101,14 +1101,14 @@ sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data) r2_weight = -INFINITY; if (resource1->running_on) { - r1_node = g_list_nth_data(resource1->running_on, 0); + r1_node = pe__current_node(resource1); r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id); if (r1_node != NULL) { r1_weight = r1_node->weight; } } if (resource2->running_on) { - r2_node = g_list_nth_data(resource2->running_on, 0); + r2_node = pe__current_node(resource2); r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id); if (r2_node != NULL) { r2_weight = r2_node->weight; @@ -1925,10 +1925,7 @@ get_remote_node_state(pe_node_t *node) remote_rsc = node->details->remote_rsc; CRM_ASSERT(remote_rsc); - if(remote_rsc->running_on) { - cluster_node = remote_rsc->running_on->data; - } - + cluster_node = pe__current_node(remote_rsc); /* If the cluster node the remote connection resource resides on * is unclean or went offline, we can't process any operations @@ -1989,11 +1986,14 @@ get_remote_node_state(pe_node_t *node) return remote_state_alive; } +/*! + * \internal + * \brief Order actions on remote node relative to actions for the connection + */ static void apply_remote_ordering(action_t *action, pe_working_set_t *data_set) { resource_t *remote_rsc = NULL; - node_t *cluster_node = NULL; enum action_tasks task = text2task(action->task); enum remote_connection_state state = get_remote_node_state(action->node); @@ -2009,10 +2009,6 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set) remote_rsc = action->node->details->remote_rsc; CRM_ASSERT(remote_rsc); - if(remote_rsc->running_on) { - cluster_node = remote_rsc->running_on->data; - } - crm_trace("Order %s action %s relative to %s%s (state: %s)", action->task, action->uuid, is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "", @@ -2093,6 +2089,8 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set) pe_order_implies_then, data_set); } else { + node_t *cluster_node = pe__current_node(remote_rsc); + if(task == monitor_rsc && state == remote_state_failed) { /* We would only be here if we do not know the * state of the resource on the remote node. diff --git a/pengine/clone.c b/pengine/clone.c index 31924129999..1de2661533a 100644 --- a/pengine/clone.c +++ b/pengine/clone.c @@ -69,6 +69,10 @@ sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) int rc = 0; node_t *node1 = NULL; node_t *node2 = NULL; + node_t *current_node1 = NULL; + node_t *current_node2 = NULL; + unsigned int nnodes1 = 0; + unsigned int nnodes2 = 0; gboolean can1 = TRUE; gboolean can2 = TRUE; @@ -87,24 +91,22 @@ sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) * - inactive instances */ - if (resource1->running_on && resource2->running_on) { - if (g_list_length(resource1->running_on) < g_list_length(resource2->running_on)) { + current_node1 = pe__find_active_on(resource1, &nnodes1, NULL); + current_node2 = pe__find_active_on(resource2, &nnodes2, NULL); + + if (nnodes1 && nnodes2) { + if (nnodes1 < nnodes2) { crm_trace("%s < %s: running_on", resource1->id, resource2->id); return -1; - } else if (g_list_length(resource1->running_on) > g_list_length(resource2->running_on)) { + } else if (nnodes1 > nnodes2) { crm_trace("%s > %s: running_on", resource1->id, resource2->id); return 1; } } - if (resource1->running_on) { - node1 = resource1->running_on->data; - } - if (resource2->running_on) { - node2 = resource2->running_on->data; - } - + node1 = current_node1; + node2 = current_node2; if (node1) { node_t *match = pe_hash_table_lookup(resource1->allowed_nodes, node1->details->id); @@ -216,10 +218,10 @@ sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) GHashTable *hash2 = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); - n = node_copy(resource1->running_on->data); + n = node_copy(current_node1); g_hash_table_insert(hash1, (gpointer) n->details->id, n); - n = node_copy(resource2->running_on->data); + n = node_copy(current_node2); g_hash_table_insert(hash2, (gpointer) n->details->id, n); if(resource1->parent) { @@ -267,11 +269,8 @@ sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) } /* Current location score */ - node1 = g_list_nth_data(resource1->running_on, 0); - node1 = g_hash_table_lookup(hash1, node1->details->id); - - node2 = g_list_nth_data(resource2->running_on, 0); - node2 = g_hash_table_lookup(hash2, node2->details->id); + node1 = g_hash_table_lookup(hash1, current_node1->details->id); + node2 = g_hash_table_lookup(hash2, current_node2->details->id); if (node1->weight < node2->weight) { if (node1->weight < 0) { @@ -295,12 +294,8 @@ sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) list1 = g_hash_table_get_values(hash1); list2 = g_hash_table_get_values(hash2); - list1 = - g_list_sort_with_data(list1, sort_node_weight, - g_list_nth_data(resource1->running_on, 0)); - list2 = - g_list_sort_with_data(list2, sort_node_weight, - g_list_nth_data(resource2->running_on, 0)); + list1 = g_list_sort_with_data(list1, sort_node_weight, current_node1); + list2 = g_list_sort_with_data(list2, sort_node_weight, current_node2); max = g_list_length(list1); if (max < g_list_length(list2)) { max = g_list_length(list2); @@ -528,8 +523,8 @@ distribute_children(resource_t *rsc, GListPtr children, GListPtr nodes, if (child->running_on && is_set(child->flags, pe_rsc_provisional) && is_not_set(child->flags, pe_rsc_failed)) { - node_t *child_node = child->running_on->data; - node_t *local_node = parent_node_instance(child, child->running_on->data); + node_t *child_node = pe__current_node(child); + node_t *local_node = parent_node_instance(child, child_node); pe_rsc_trace(rsc, "Checking pre-allocation of %s to %s (%d remaining of %d)", child->id, child_node->details->uname, max - allocated, max); @@ -556,9 +551,9 @@ distribute_children(resource_t *rsc, GListPtr children, GListPtr nodes, for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; - if (g_list_length(child->running_on) > 0) { - node_t *child_node = child->running_on->data; - node_t *local_node = parent_node_instance(child, child->running_on->data); + if (child->running_on != NULL) { + node_t *child_node = pe__current_node(child); + node_t *local_node = parent_node_instance(child, child_node); if (local_node == NULL) { crm_err("%s is running on %s which isn't allowed", diff --git a/pengine/graph.c b/pengine/graph.c index 6d4e4c7ebf3..236b2784b1d 100644 --- a/pengine/graph.c +++ b/pengine/graph.c @@ -783,6 +783,7 @@ get_router_node(action_t *action) node_t *began_on = NULL; node_t *ended_on = NULL; node_t *router_node = NULL; + bool partial_migration = FALSE; if (safe_str_eq(action->task, CRM_OP_FENCE) || is_remote_node(action->node) == FALSE) { return NULL; @@ -790,10 +791,13 @@ get_router_node(action_t *action) CRM_ASSERT(action->node->details->remote_rsc != NULL); - if (action->node->details->remote_rsc->running_on) { - began_on = action->node->details->remote_rsc->running_on->data; - } + began_on = pe__current_node(action->node->details->remote_rsc); ended_on = action->node->details->remote_rsc->allocated_to; + if (action->node->details->remote_rsc + && (action->node->details->remote_rsc->container == NULL) + && action->node->details->remote_rsc->partial_migration_target) { + partial_migration = TRUE; + } /* if there is only one location to choose from, * this is easy. Check for those conditions first */ @@ -817,6 +821,10 @@ get_router_node(action_t *action) * are all required before the remote rsc stop action can occur.) In * this case, we know these actions have to be routed through the initial * cluster node the connection resource lived on before the move takes place. + * The exception is a partial migration of a (non-guest) remote + * connection resource; in that case, all actions (even these) will be + * ordered after the connection's pseudo-start on the migration target, + * so the target is the router node. * * 2. Everything else (start, promote, monitor, probe, refresh, clear failcount * delete ....) must occur after the resource starts on the node it is @@ -824,10 +832,10 @@ get_router_node(action_t *action) */ /* 1. before connection rsc moves. */ - if (safe_str_eq(action->task, "stop") || + if ((safe_str_eq(action->task, "stop") || safe_str_eq(action->task, "demote") || safe_str_eq(action->task, "migrate_from") || - safe_str_eq(action->task, "migrate_to")) { + safe_str_eq(action->task, "migrate_to")) && !partial_migration) { router_node = began_on; @@ -1234,18 +1242,14 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) case stopped_rsc: case action_demote: case action_demoted: - if(action->node->details->remote_rsc->container->running_on) { - host = action->node->details->remote_rsc->container->running_on->data; - } + host = pe__current_node(action->node->details->remote_rsc->container); break; case start_rsc: case started_rsc: case monitor_rsc: case action_promote: case action_promoted: - if(action->node->details->remote_rsc->container->allocated_to) { - host = action->node->details->remote_rsc->container->allocated_to; - } + host = action->node->details->remote_rsc->container->allocated_to; break; default: break; diff --git a/pengine/native.c b/pengine/native.c index 37ac2e4899c..1c26642dd86 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -102,7 +102,7 @@ native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_se if (length > 0) { nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = g_list_sort_with_data(nodes, sort_node_weight, - g_list_nth_data(rsc->running_on, 0)); + pe__current_node(rsc)); // First node in sorted list has the best score best = g_list_nth_data(nodes, 0); @@ -158,7 +158,7 @@ native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_se * remaining unallocated instances to prefer a node that's already * running another instance. */ - node_t *running = g_list_nth_data(rsc->running_on, 0); + node_t *running = pe__current_node(rsc); if (running && (can_run_resources(running) == FALSE)) { pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", @@ -534,16 +534,14 @@ native_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) node_t *assign_to = NULL; rsc->next_role = rsc->role; - if (rsc->running_on == NULL) { + assign_to = pe__current_node(rsc); + if (assign_to == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_MASTER) { - assign_to = rsc->running_on->data; reason = "master"; } else if (is_set(rsc->flags, pe_rsc_failed)) { - assign_to = rsc->running_on->data; reason = "failed"; } else { - assign_to = rsc->running_on->data; reason = "active"; } pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id, @@ -1834,7 +1832,9 @@ rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working rsc_lh->id, rsc_ticket->ticket->id, rsc_ticket->id, role2text(rsc_ticket->role_lh)); - if (rsc_ticket->ticket->granted == FALSE && g_list_length(rsc_lh->running_on) > 0) { + if ((rsc_ticket->ticket->granted == FALSE) + && (rsc_lh->running_on != NULL)) { + GListPtr gIter = NULL; switch (rsc_ticket->loss_policy) { @@ -1867,7 +1867,7 @@ rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } - if (g_list_length(rsc_lh->running_on) > 0) { + if (rsc_lh->running_on != NULL) { clear_bit(rsc_lh->flags, pe_rsc_managed); set_bit(rsc_lh->flags, pe_rsc_block); } @@ -1919,7 +1919,6 @@ native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_ } else if ((then_rsc_role >= RSC_ROLE_STARTED) && safe_str_eq(then->task, RSC_START) && then->node - && then_rsc->running_on && g_list_length(then_rsc->running_on) == 1 && then->node->details == ((node_t *) then_rsc->running_on->data)->details) { /* ignore... if 'then' is supposed to be started after 'first', but diff --git a/pengine/notif.c b/pengine/notif.c index 3013ee03143..49132490167 100644 --- a/pengine/notif.c +++ b/pengine/notif.c @@ -113,7 +113,7 @@ expand_node_list(GListPtr list, char **uname, char **metal) if(node->details->remote_rsc && node->details->remote_rsc->container && node->details->remote_rsc->container->running_on) { - node = node->details->remote_rsc->container->running_on->data; + node = pe__current_node(node->details->remote_rsc->container); } if (node->details->uname == NULL) { diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 824b12fb1fa..7c638039a1b 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1953,16 +1953,10 @@ get_node_display_name(node_t *node) /* Host is displayed only if this is a guest node */ if (is_container_remote_node(node)) { - if (node->details->remote_rsc->running_on) { - /* running_on is a list, but guest nodes will have exactly one entry - * unless they are in the process of migrating, in which case they - * will have two; either way, we can use the first item in the list - */ - node_t *host_node = (node_t *) node->details->remote_rsc->running_on->data; - - if (host_node && host_node->details) { - node_host = host_node->details->uname; - } + node_t *host_node = pe__current_node(node->details->remote_rsc); + + if (host_node && host_node->details) { + node_host = host_node->details->uname; } if (node_host == NULL) { node_host = ""; /* so we at least get "uname@" to indicate guest */ diff --git a/tools/crm_resource.c b/tools/crm_resource.c index c64432ec65e..0557892c0e6 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -1015,23 +1015,27 @@ main(int argc, char **argv) rc = cli_resource_ban(rsc_id, dest->details->uname, NULL, cib_conn); } else if (rsc_cmd == 'B' || rsc_cmd == 'M') { + pe_node_t *current = NULL; + unsigned int nactive = 0; + rc = -EINVAL; - if (g_list_length(rsc->running_on) == 1) { - node_t *current = rsc->running_on->data; + current = pe__find_active_requires(rsc, &nactive); + + if (nactive == 1) { rc = cli_resource_ban(rsc_id, current->details->uname, NULL, cib_conn); } else if(rsc->variant == pe_master) { int count = 0; GListPtr iter = NULL; - node_t *current = NULL; + current = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if(child_role == RSC_ROLE_MASTER) { count++; - current = child->running_on->data; + current = pe__current_node(child); } } @@ -1039,14 +1043,15 @@ main(int argc, char **argv) rc = cli_resource_ban(rsc_id, current->details->uname, NULL, cib_conn); } else { - CMD_ERR("Resource '%s' not moved: active in %d locations (promoted in %d).", rsc_id, g_list_length(rsc->running_on), count); + CMD_ERR("Resource '%s' not moved: active in %d locations (promoted in %d).", + rsc_id, nactive, count); CMD_ERR("You can prevent '%s' from running on a specific location with: --ban --node ", rsc_id); CMD_ERR("You can prevent '%s' from being promoted at a specific location with:" " --ban --master --node ", rsc_id); } } else { - CMD_ERR("Resource '%s' not moved: active in %d locations.", rsc_id, g_list_length(rsc->running_on)); + CMD_ERR("Resource '%s' not moved: active in %d locations.", rsc_id, nactive); CMD_ERR("You can prevent '%s' from running on a specific location with: --ban --node ", rsc_id); } @@ -1164,12 +1169,12 @@ main(int argc, char **argv) node_t *node = pe_find_node(data_set.nodes, host_uname); if (node && is_remote_node(node)) { - if (node->details->remote_rsc == NULL || node->details->remote_rsc->running_on == NULL) { + node = pe__current_node(node->details->remote_rsc); + if (node == NULL) { CMD_ERR("No lrmd connection detected to remote node %s", host_uname); rc = -ENXIO; goto bail; } - node = node->details->remote_rsc->running_on->data; router_node = node->details->uname; attr_options |= attrd_opt_remote; } diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c index d066c42bb2b..2463fb53858 100644 --- a/tools/crm_resource_print.c +++ b/tools/crm_resource_print.c @@ -68,6 +68,7 @@ cli_resource_print_cts(resource_t * rsc) const char *rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + pe_node_t *node = pe__current_node(rsc); if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH)) { xmlNode *op = NULL; @@ -90,10 +91,8 @@ cli_resource_print_cts(resource_t * rsc) } } - if (rsc->running_on != NULL && g_list_length(rsc->running_on) == 1) { - node_t *tmp = rsc->running_on->data; - - host = tmp->details->uname; + if (node != NULL) { + host = node->details->uname; } printf("Resource: %s %s %s %s %s %s %s %s %d %lld 0x%.16llx\n", @@ -315,16 +314,15 @@ int cli_resource_print_attribute(resource_t *rsc, const char *attr, pe_working_set_t * data_set) { int rc = -ENXIO; - node_t *current = NULL; + unsigned int count = 0; GHashTable *params = NULL; const char *value = NULL; + node_t *current = pe__find_active_on(rsc, &count, NULL); - if (g_list_length(rsc->running_on) == 1) { - current = rsc->running_on->data; - - } else if (g_list_length(rsc->running_on) > 1) { + if (count > 1) { CMD_ERR("%s is active on more than one node," " returning the default value for %s", rsc->id, crm_str(attr)); + current = NULL; } params = crm_str_table_new(); diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 5e54f9e320c..5004935384b 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -473,11 +473,11 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, node_t *node = pe_find_node(data_set->nodes, host_uname); if (node && is_remote_node(node)) { - if (node->details->remote_rsc == NULL || node->details->remote_rsc->running_on == NULL) { + node = pe__current_node(node->details->remote_rsc); + if (node == NULL) { CMD_ERR("No lrmd connection detected to remote node %s", host_uname); return -ENXIO; } - node = node->details->remote_rsc->running_on->data; router_node = node->details->uname; } } @@ -1648,11 +1648,16 @@ cli_resource_move(resource_t *rsc, const char *rsc_id, const char *host_name, cib_t *cib, pe_working_set_t *data_set) { int rc = -EINVAL; - int count = 0; + unsigned int count = 0; node_t *current = NULL; node_t *dest = pe_find_node(data_set->nodes, host_name); bool cur_is_dest = FALSE; + if (dest == NULL) { + CMD_ERR("Error performing operation: node '%s' is unknown", host_name); + return -ENXIO; + } + if (scope_master && rsc->variant != pe_master) { resource_t *p = uber_parent(rsc); if(p->variant == pe_master) { @@ -1667,8 +1672,12 @@ cli_resource_move(resource_t *rsc, const char *rsc_id, const char *host_name, } } + current = pe__find_active_requires(rsc, &count); + if(rsc->variant == pe_master) { GListPtr iter = NULL; + unsigned int master_count = 0; + pe_node_t *master_node = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; @@ -1676,37 +1685,27 @@ cli_resource_move(resource_t *rsc, const char *rsc_id, const char *host_name, if(child_role == RSC_ROLE_MASTER) { rsc = child; - count++; + master_node = pe__current_node(child); + master_count++; } } - - if(scope_master == FALSE && count == 0) { - count = g_list_length(rsc->running_on); + if (scope_master || master_count) { + count = master_count; + current = master_node; } - } else if (pe_rsc_is_clone(rsc)) { - count = g_list_length(rsc->running_on); - - } else if (g_list_length(rsc->running_on) > 1) { - CMD_ERR("Resource '%s' not moved: active on multiple nodes", rsc_id); - return rc; - } - - if(dest == NULL) { - CMD_ERR("Error performing operation: node '%s' is unknown", host_name); - return -ENXIO; } - if(g_list_length(rsc->running_on) == 1) { - current = rsc->running_on->data; + if (count > 1) { + if (pe_rsc_is_clone(rsc)) { + current = NULL; + } else { + CMD_ERR("Resource '%s' not moved: active on multiple nodes", rsc_id); + return rc; + } } - if(current == NULL) { - /* Nothing to check */ - - } else if(scope_master && rsc->fns->state(rsc, TRUE) != RSC_ROLE_MASTER) { - crm_trace("%s is already active on %s but not in correct state", rsc_id, dest->details->uname); - } else if (safe_str_eq(current->details->uname, dest->details->uname)) { + if (current && (current->details == dest->details)) { cur_is_dest = TRUE; if (do_force) { crm_info("%s is already %s on %s, reinforcing placement with location constraint.", @@ -1736,7 +1735,7 @@ cli_resource_move(resource_t *rsc, const char *rsc_id, const char *host_name, (void)cli_resource_ban(rsc_id, current->details->uname, NULL, cib); } else if(count > 1) { - CMD_ERR("Resource '%s' is currently %s in %d locations. One may now move one to %s", + CMD_ERR("Resource '%s' is currently %s in %d locations. One may now move to %s", rsc_id, scope_master?"promoted":"active", count, dest->details->uname); CMD_ERR("You can prevent '%s' from being %s at a specific location with:" " --ban %s--host ", rsc_id, scope_master?"promoted":"active", scope_master?"--master ":""); From 58a879cd3627ec27c544f2cc91bb9c6b0877d462 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 1 Jun 2018 18:03:51 -0500 Subject: [PATCH 115/812] Refactor: libstonithd: add enum for agent namespace more efficient than strings --- include/crm/stonith-ng.h | 16 ++++++++++++++ lib/fencing/st_client.c | 46 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h index 7bb9c788d58..00c15b58fc2 100644 --- a/include/crm/stonith-ng.h +++ b/include/crm/stonith-ng.h @@ -71,6 +71,22 @@ enum op_state st_failed, }; +// Supported fence agent interface standards +enum stonith_namespace { + st_namespace_invalid, + st_namespace_any, + st_namespace_internal, // Implemented internally by Pacemaker + + /* Neither of these projects are active any longer, but the fence agent + * interfaces they created are still in use and supported by Pacemaker. + */ + st_namespace_rhcs, // Red Hat Cluster Suite compatible + st_namespace_lha, // Linux-HA compatible +}; + +enum stonith_namespace stonith_text2namespace(const char *namespace_s); +const char *stonith_namespace2text(enum stonith_namespace namespace); + typedef struct stonith_key_value_s { char *key; char *value; diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 9ffffa77655..007e9f9ba50 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -158,6 +158,52 @@ static void stonith_send_notification(gpointer data, gpointer user_data); static int internal_stonith_action_execute(stonith_action_t * action); static void log_action(stonith_action_t *action, pid_t pid); +/*! + * \brief Get agent namespace by name + * + * \param[in] namespace_s Name of namespace as string + * + * \return Namespace as enum value + */ +enum stonith_namespace +stonith_text2namespace(const char *namespace_s) +{ + if ((namespace_s == NULL) || !strcmp(namespace_s, "any")) { + return st_namespace_any; + + } else if (!strcmp(namespace_s, "redhat") + || !strcmp(namespace_s, "stonith-ng")) { + return st_namespace_rhcs; + + } else if (!strcmp(namespace_s, "internal")) { + return st_namespace_internal; + + } else if (!strcmp(namespace_s, "heartbeat")) { + return st_namespace_lha; + } + return st_namespace_invalid; +} + +/*! + * \brief Get agent namespace name + * + * \param[in] namespace Namespace as enum value + * + * \return Namespace name as string + */ +const char * +stonith_namespace2text(enum stonith_namespace namespace) +{ + switch (namespace) { + case st_namespace_any: return "any"; + case st_namespace_rhcs: return "stonith-ng"; + case st_namespace_internal: return "internal"; + case st_namespace_lha: return "heartbeat"; + default: break; + } + return "unsupported"; +} + static void log_action(stonith_action_t *action, pid_t pid) { From 5950986a65fa1e550fbe5bc371170515d4daee99 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 1 Jun 2018 18:13:59 -0500 Subject: [PATCH 116/812] Refactor: libstonithd: add function to get new namespace enum and deprecate get_stonith_provider() --- include/crm/stonith-ng.h | 3 + lib/fencing/st_client.c | 364 +++++++++++++++++++++------------------ 2 files changed, 196 insertions(+), 171 deletions(-) diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h index 00c15b58fc2..337dcac0e12 100644 --- a/include/crm/stonith-ng.h +++ b/include/crm/stonith-ng.h @@ -86,6 +86,8 @@ enum stonith_namespace { enum stonith_namespace stonith_text2namespace(const char *namespace_s); const char *stonith_namespace2text(enum stonith_namespace namespace); +enum stonith_namespace stonith_get_namespace(const char *agent, + const char *namespace_s); typedef struct stonith_key_value_s { char *key; @@ -393,6 +395,7 @@ void stonith_api_delete(stonith_t * st); void stonith_dump_pending_callbacks(stonith_t * st); +// deprecated (use stonith_get_namespace() instead) const char *get_stonith_provider(const char *agent, const char *provider); bool stonith_dispatch(stonith_t * st); diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 007e9f9ba50..aca76feb799 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -204,6 +204,56 @@ stonith_namespace2text(enum stonith_namespace namespace) return "unsupported"; } +/*! + * \brief Determine namespace of a fence agent + * + * \param[in] agent Fence agent type + * \param[in] namespace_s Name of agent namespace as string, if known + * + * \return Namespace of specified agent, as enum value + */ +enum stonith_namespace +stonith_get_namespace(const char *agent, const char *namespace_s) +{ + if (safe_str_eq(namespace_s, "internal")) { + return st_namespace_internal; + } + + if (is_redhat_agent(agent)) { + return st_namespace_rhcs; + } + +#if HAVE_STONITH_STONITH_H + { + Stonith *stonith_obj = NULL; + + static gboolean need_init = TRUE; + static Stonith *(*st_new_fn) (const char *) = NULL; + static void (*st_del_fn) (Stonith *) = NULL; + + if (need_init) { + need_init = FALSE; + st_new_fn = + find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); + st_del_fn = + find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", + FALSE); + } + + if (lha_agents_lib && st_new_fn && st_del_fn) { + stonith_obj = (*st_new_fn) (agent); + if (stonith_obj) { + (*st_del_fn) (stonith_obj); + return st_namespace_lha; + } + } + } +#endif + + crm_err("Unknown fence agent: %s", agent); + return st_namespace_invalid; +} + static void log_action(stonith_action_t *action, pid_t pid) { @@ -255,8 +305,8 @@ create_device_registration_xml(const char *id, const char *namespace, const char xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); #if HAVE_STONITH_STONITH_H - namespace = get_stonith_provider(agent, namespace); - if (safe_str_eq(namespace, "heartbeat")) { + if (stonith_get_namespace(agent, namespace) == st_namespace_lha) { + namespace = stonith_text2namespace(st_namespace_lha); hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } @@ -1162,6 +1212,7 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names stonith_key_value_t ** devices, int timeout) { int count = 0; + enum stonith_namespace ns = stonith_text2namespace(namespace); if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); @@ -1169,7 +1220,7 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names } /* Include Heartbeat agents */ - if (namespace == NULL || safe_str_eq("heartbeat", namespace)) { + if ((ns == st_namespace_any) || (ns == st_namespace_lha)) { #if HAVE_STONITH_STONITH_H static gboolean need_init = TRUE; @@ -1207,7 +1258,7 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names } /* Include Red Hat agents, basically: ls -1 @sbin_dir@/fence_* */ - if (namespace == NULL || safe_str_eq("redhat", namespace)) { + if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) { struct dirent **namelist; int file_num = scandir(RH_STONITH_DIR, &namelist, 0, alphasort); @@ -1281,160 +1332,165 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a { int rc = 0; char *buffer = NULL; - const char *provider = get_stonith_provider(agent, namespace); + enum stonith_namespace ns = stonith_get_namespace(agent, namespace); - crm_trace("looking up %s/%s metadata", agent, provider); + crm_trace("Looking up metadata for %s agent %s", + stonith_namespace2text(ns), agent); /* By having this in a library, we can access it from stonith_admin * when neither lrmd or stonith-ng are running * Important for the crm shell's validations... */ - if (safe_str_eq(provider, "redhat")) { - stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL); - int exec_rc = stonith_action_execute(action, &rc, &buffer); - xmlNode *xml = NULL; - xmlNode *actions = NULL; - xmlXPathObject *xpathObj = NULL; - - if (exec_rc < 0 || rc != 0 || buffer == NULL) { - crm_warn("Could not obtain metadata for %s", agent); - crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer)); - free(buffer); /* Just in case */ - return -EINVAL; - } + switch (ns) { + case st_namespace_rhcs: + { + stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL); + int exec_rc = stonith_action_execute(action, &rc, &buffer); + xmlNode *xml = NULL; + xmlNode *actions = NULL; + xmlXPathObject *xpathObj = NULL; + + if (exec_rc < 0 || rc != 0 || buffer == NULL) { + crm_warn("Could not obtain metadata for %s", agent); + crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer)); + free(buffer); /* Just in case */ + return -EINVAL; + } - xml = string2xml(buffer); - if(xml == NULL) { - crm_warn("Metadata for %s is invalid", agent); - free(buffer); - return -EINVAL; - } + xml = string2xml(buffer); + if(xml == NULL) { + crm_warn("Metadata for %s is invalid", agent); + free(buffer); + return -EINVAL; + } - xpathObj = xpath_search(xml, "//actions"); - if (numXpathResults(xpathObj) > 0) { - actions = getXpathResult(xpathObj, 0); - } + xpathObj = xpath_search(xml, "//actions"); + if (numXpathResults(xpathObj) > 0) { + actions = getXpathResult(xpathObj, 0); + } - freeXpathObject(xpathObj); + freeXpathObject(xpathObj); - /* Now fudge the metadata so that the start/stop actions appear */ - xpathObj = xpath_search(xml, "//action[@name='stop']"); - if (numXpathResults(xpathObj) <= 0) { - xmlNode *tmp = NULL; + /* Now fudge the metadata so that the start/stop actions appear */ + xpathObj = xpath_search(xml, "//action[@name='stop']"); + if (numXpathResults(xpathObj) <= 0) { + xmlNode *tmp = NULL; - tmp = create_xml_node(actions, "action"); - crm_xml_add(tmp, "name", "stop"); - crm_xml_add(tmp, "timeout", CRM_DEFAULT_OP_TIMEOUT_S); + tmp = create_xml_node(actions, "action"); + crm_xml_add(tmp, "name", "stop"); + crm_xml_add(tmp, "timeout", CRM_DEFAULT_OP_TIMEOUT_S); - tmp = create_xml_node(actions, "action"); - crm_xml_add(tmp, "name", "start"); - crm_xml_add(tmp, "timeout", CRM_DEFAULT_OP_TIMEOUT_S); - } + tmp = create_xml_node(actions, "action"); + crm_xml_add(tmp, "name", "start"); + crm_xml_add(tmp, "timeout", CRM_DEFAULT_OP_TIMEOUT_S); + } - freeXpathObject(xpathObj); + freeXpathObject(xpathObj); - /* Now fudge the metadata so that the port isn't required in the configuration */ - xpathObj = xpath_search(xml, "//parameter[@name='port']"); - if (numXpathResults(xpathObj) > 0) { - /* We'll fill this in */ - xmlNode *tmp = getXpathResult(xpathObj, 0); + /* Now fudge the metadata so that the port isn't required in the configuration */ + xpathObj = xpath_search(xml, "//parameter[@name='port']"); + if (numXpathResults(xpathObj) > 0) { + /* We'll fill this in */ + xmlNode *tmp = getXpathResult(xpathObj, 0); - crm_xml_add(tmp, "required", "0"); - } + crm_xml_add(tmp, "required", "0"); + } - freeXpathObject(xpathObj); - free(buffer); - buffer = dump_xml_formatted_with_text(xml); - free_xml(xml); - if (!buffer) { - return -EINVAL; - } + freeXpathObject(xpathObj); + free(buffer); + buffer = dump_xml_formatted_with_text(xml); + free_xml(xml); + if (!buffer) { + return -EINVAL; + } + } + break; - } else { + case st_namespace_lha: #if !HAVE_STONITH_STONITH_H - return -EINVAL; /* Heartbeat agents not supported */ + return -EINVAL; /* Heartbeat agents not supported */ #else - int bufferlen = 0; - static const char *no_parameter_info = ""; - - Stonith *stonith_obj = NULL; - - static gboolean need_init = TRUE; - static Stonith *(*st_new_fn) (const char *) = NULL; - static const char *(*st_info_fn) (Stonith *, int) = NULL; - static void (*st_del_fn) (Stonith *) = NULL; - static void (*st_log_fn) (Stonith *, PILLogFun) = NULL; - - if (need_init) { - need_init = FALSE; - st_new_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); - st_del_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", - FALSE); - st_log_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_set_log", - FALSE); - st_info_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_get_info", - FALSE); - } - - if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) { - char *xml_meta_longdesc = NULL; - char *xml_meta_shortdesc = NULL; - - char *meta_param = NULL; - char *meta_longdesc = NULL; - char *meta_shortdesc = NULL; - - stonith_obj = (*st_new_fn) (agent); - if (stonith_obj) { - (*st_log_fn) (stonith_obj, (PILLogFun) & stonith_plugin); - meta_longdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEDESCR)); - if (meta_longdesc == NULL) { - crm_warn("no long description in %s's metadata.", agent); - meta_longdesc = strdup(no_parameter_info); - } - - meta_shortdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEID)); - if (meta_shortdesc == NULL) { - crm_warn("no short description in %s's metadata.", agent); - meta_shortdesc = strdup(no_parameter_info); + { + static const char *no_parameter_info = ""; + + Stonith *stonith_obj = NULL; + + static gboolean need_init = TRUE; + static Stonith *(*st_new_fn) (const char *) = NULL; + static const char *(*st_info_fn) (Stonith *, int) = NULL; + static void (*st_del_fn) (Stonith *) = NULL; + static void (*st_log_fn) (Stonith *, PILLogFun) = NULL; + + if (need_init) { + need_init = FALSE; + st_new_fn = + find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); + st_del_fn = + find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", + FALSE); + st_log_fn = + find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_set_log", + FALSE); + st_info_fn = + find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_get_info", + FALSE); } - meta_param = strdup_null((*st_info_fn) (stonith_obj, ST_CONF_XML)); - if (meta_param == NULL) { - crm_warn("no list of parameters in %s's metadata.", agent); - meta_param = strdup(no_parameter_info); + if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) { + char *xml_meta_longdesc = NULL; + char *xml_meta_shortdesc = NULL; + + char *meta_param = NULL; + char *meta_longdesc = NULL; + char *meta_shortdesc = NULL; + + stonith_obj = (*st_new_fn) (agent); + if (stonith_obj) { + (*st_log_fn) (stonith_obj, (PILLogFun) & stonith_plugin); + meta_longdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEDESCR)); + if (meta_longdesc == NULL) { + crm_warn("no long description in %s's metadata.", agent); + meta_longdesc = strdup(no_parameter_info); + } + + meta_shortdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEID)); + if (meta_shortdesc == NULL) { + crm_warn("no short description in %s's metadata.", agent); + meta_shortdesc = strdup(no_parameter_info); + } + + meta_param = strdup_null((*st_info_fn) (stonith_obj, ST_CONF_XML)); + if (meta_param == NULL) { + crm_warn("no list of parameters in %s's metadata.", agent); + meta_param = strdup(no_parameter_info); + } + (*st_del_fn) (stonith_obj); + } else { + return -EINVAL; /* Heartbeat agents not supported */ + } + + xml_meta_longdesc = + (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc); + xml_meta_shortdesc = + (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc); + + buffer = crm_strdup_printf(META_TEMPLATE, agent, xml_meta_longdesc, + xml_meta_shortdesc, meta_param); + + xmlFree(xml_meta_longdesc); + xmlFree(xml_meta_shortdesc); + + free(meta_shortdesc); + free(meta_longdesc); + free(meta_param); } - (*st_del_fn) (stonith_obj); - } else { - return -EINVAL; /* Heartbeat agents not supported */ } - - xml_meta_longdesc = - (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc); - xml_meta_shortdesc = - (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc); - - bufferlen = strlen(META_TEMPLATE) + strlen(agent) - + strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc) - + strlen(meta_param) + 1; - - buffer = calloc(1, bufferlen); - snprintf(buffer, bufferlen - 1, META_TEMPLATE, - agent, xml_meta_longdesc, xml_meta_shortdesc, meta_param); - - xmlFree(xml_meta_longdesc); - xmlFree(xml_meta_shortdesc); - - free(meta_shortdesc); - free(meta_longdesc); - free(meta_param); - } + break; #endif + default: + // Do not provide meta-data for internal or unknown agents + break; } if (output) { @@ -1641,47 +1697,13 @@ is_redhat_agent(const char *agent) return FALSE; } +/*! + * \brief Deprecated (use stonith_get_namespace() instead) + */ const char * get_stonith_provider(const char *agent, const char *provider) { - /* This function sucks */ - if (is_redhat_agent(agent)) { - return "redhat"; - -#if HAVE_STONITH_STONITH_H - } else { - Stonith *stonith_obj = NULL; - - static gboolean need_init = TRUE; - static Stonith *(*st_new_fn) (const char *) = NULL; - static void (*st_del_fn) (Stonith *) = NULL; - - if (need_init) { - need_init = FALSE; - st_new_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); - st_del_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", - FALSE); - } - - if (lha_agents_lib && st_new_fn && st_del_fn) { - stonith_obj = (*st_new_fn) (agent); - if (stonith_obj) { - (*st_del_fn) (stonith_obj); - return "heartbeat"; - } - } -#endif - } - - if (safe_str_eq(provider, "internal")) { - return provider; - - } else { - crm_err("No such device: %s", agent); - return NULL; - } + return stonith_namespace2text(stonith_get_namespace(agent, provider)); } static gint From 2a922ef4ddb877eaec0ff3dac08743477253fdd8 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 1 Jun 2018 19:44:26 -0500 Subject: [PATCH 117/812] Refactor: fencing: use enum when creating device registration op This hopefully makes the intent a bit clearer. The previous cib_device_update() would attempt to pass the "provider" from the fence device resource XML to create_device_registration_xml(). However, the schema does not allow "provider" with "stonith"-class resources, so it would always be NULL. Now it doesn't look for provider. --- fencing/admin.c | 3 ++- fencing/main.c | 8 +++++--- include/crm/fencing/internal.h | 29 +++++++++++------------------ lib/fencing/st_client.c | 18 ++++++++++++------ 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/fencing/admin.c b/fencing/admin.c index 3369a416eaa..d053e1c27f0 100644 --- a/fencing/admin.c +++ b/fencing/admin.c @@ -631,7 +631,8 @@ main(int argc, char **argv) } break; case 'R': - rc = st->cmds->register_device(st, st_opts, device, "stonith-ng", agent, params); + rc = st->cmds->register_device(st, st_opts, device, NULL, agent, + params); break; case 'D': rc = st->cmds->remove_device(st, st_opts, device); diff --git a/fencing/main.c b/fencing/main.c index f46be30b820..555714a25a8 100644 --- a/fencing/main.c +++ b/fencing/main.c @@ -676,7 +676,6 @@ static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set) const char *name = NULL; const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); - const char *provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rsc_provides = NULL; crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); @@ -695,7 +694,8 @@ static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set) } remove = FALSE; - data = create_device_registration_xml(rsc_name(rsc), provider, agent, params, rsc_provides); + data = create_device_registration_xml(rsc_name(rsc), st_namespace_any, + agent, params, rsc_provides); stonith_device_register(data, NULL, TRUE); stonith_key_value_freeall(params, 1, 1); @@ -1525,7 +1525,9 @@ main(int argc, char **argv) params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname); - xml = create_device_registration_xml("watchdog", "internal", STONITH_WATCHDOG_AGENT, params, NULL); + xml = create_device_registration_xml("watchdog", st_namespace_internal, + STONITH_WATCHDOG_AGENT, params, + NULL); stonith_device_register(xml, NULL, FALSE); stonith_key_value_freeall(params, 1, 1); diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h index c0b27dbadb9..d8cfe69a371 100644 --- a/include/crm/fencing/internal.h +++ b/include/crm/fencing/internal.h @@ -1,20 +1,10 @@ -/* - * Copyright (C) 2011 Andrew Beekhof - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +/* + * Copyright 2011-2018 Andrew Beekhof + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ + #ifndef STONITH_NG_INTERNAL__H # define STONITH_NG_INTERNAL__H @@ -47,8 +37,11 @@ xmlNode *create_level_registration_xml(const char *node, const char *pattern, int level, stonith_key_value_t *device_list); -xmlNode *create_device_registration_xml(const char *id, const char *namespace, const char *agent, - stonith_key_value_t * params, const char *rsc_provides); +xmlNode *create_device_registration_xml(const char *id, + enum stonith_namespace namespace, + const char *agent, + stonith_key_value_t *params, + const char *rsc_provides); # define ST_LEVEL_MAX 10 diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index aca76feb799..4ec6c613153 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -298,15 +298,18 @@ stonith_connection_destroy(gpointer user_data) } xmlNode * -create_device_registration_xml(const char *id, const char *namespace, const char *agent, - stonith_key_value_t * params, const char *rsc_provides) +create_device_registration_xml(const char *id, enum stonith_namespace namespace, + const char *agent, stonith_key_value_t *params, + const char *rsc_provides) { xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); #if HAVE_STONITH_STONITH_H - if (stonith_get_namespace(agent, namespace) == st_namespace_lha) { - namespace = stonith_text2namespace(st_namespace_lha); + if (namespace == st_namespace_any) { + namespace = stonith_get_namespace(agent, NULL); + } + if (namespace == st_namespace_lha) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } @@ -315,7 +318,9 @@ create_device_registration_xml(const char *id, const char *namespace, const char crm_xml_add(data, XML_ATTR_ID, id); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, "agent", agent); - crm_xml_add(data, "namespace", namespace); + if ((namespace != st_namespace_any) && (namespace != st_namespace_invalid)) { + crm_xml_add(data, "namespace", stonith_namespace2text(namespace)); + } if (rsc_provides) { crm_xml_add(data, "rsc_provides", rsc_provides); } @@ -335,7 +340,8 @@ stonith_api_register_device(stonith_t * st, int call_options, int rc = 0; xmlNode *data = NULL; - data = create_device_registration_xml(id, namespace, agent, params, NULL); + data = create_device_registration_xml(id, stonith_text2namespace(namespace), + agent, params, NULL); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); free_xml(data); From 2b80d31903a98a1090796b3a990b59631541b880 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 1 Jun 2018 20:23:03 -0500 Subject: [PATCH 118/812] Refactor: libstonithd: separate RHCS-specific code into own source file improves readability and maintainability --- include/crm/fencing/internal.h | 7 +- lib/fencing/Makefile.am | 23 ++--- lib/fencing/st_client.c | 122 ++------------------------ lib/fencing/st_rhcs.c | 155 +++++++++++++++++++++++++++++++++ 4 files changed, 174 insertions(+), 133 deletions(-) create mode 100644 lib/fencing/st_rhcs.c diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h index d8cfe69a371..114d8745276 100644 --- a/include/crm/fencing/internal.h +++ b/include/crm/fencing/internal.h @@ -30,8 +30,6 @@ stonith_action_execute_async(stonith_action_t * action, int stonith_action_execute(stonith_action_t * action, int *agent_result, char **output); -gboolean is_redhat_agent(const char *agent); - xmlNode *create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, @@ -129,4 +127,9 @@ xmlNode *create_device_registration_xml(const char *id, # define STONITH_WATCHDOG_AGENT "#watchdog" +// utilities from st_rhcs.c +int stonith__list_rhcs_agents(stonith_key_value_t **devices); +int stonith__rhcs_metadata(const char *agent, int timeout, char **output); +bool stonith__agent_is_rhcs(const char *agent); + #endif diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am index da1ec37b8f9..71d4df4381f 100644 --- a/lib/fencing/Makefile.am +++ b/lib/fencing/Makefile.am @@ -1,20 +1,9 @@ -# File: Makefile.am -# Author: Sun Jiang Dong -# Copyright (c) 2004 International Business Machines # -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2004-2018 International Business Machines +# Author: Sun Jiang Dong +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/Makefile.common @@ -26,4 +15,4 @@ libstonithd_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libstonithd_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libstonithd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la -libstonithd_la_SOURCES = st_client.c +libstonithd_la_SOURCES = st_client.c st_rhcs.c diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 4ec6c613153..267b442c2c6 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -29,8 +29,6 @@ #include #include -#include -#include /* Add it for compiling on OSX */ #include #include @@ -219,7 +217,7 @@ stonith_get_namespace(const char *agent, const char *namespace_s) return st_namespace_internal; } - if (is_redhat_agent(agent)) { + if (stonith__agent_is_rhcs(agent)) { return st_namespace_rhcs; } @@ -1263,36 +1261,9 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names #endif } - /* Include Red Hat agents, basically: ls -1 @sbin_dir@/fence_* */ + // Include Red Hat agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) { - struct dirent **namelist; - int file_num = scandir(RH_STONITH_DIR, &namelist, 0, alphasort); - - if (file_num > 0) { - struct stat prop; - char buffer[FILENAME_MAX + 1]; - - while (file_num--) { - if ('.' == namelist[file_num]->d_name[0]) { - free(namelist[file_num]); - continue; - - } else if (!crm_starts_with(namelist[file_num]->d_name, - RH_STONITH_PREFIX)) { - free(namelist[file_num]); - continue; - } - - snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, namelist[file_num]->d_name); - if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { - *devices = stonith_key_value_add(*devices, NULL, namelist[file_num]->d_name); - count++; - } - - free(namelist[file_num]); - } - free(namelist); - } + count += stonith__list_rhcs_agents(devices); } return count; @@ -1336,6 +1307,10 @@ static int stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, const char *namespace, char **output, int timeout) { + /* By executing meta-data directly, we can get it from stonith_admin when + * the cluster is not running, which is important for higher-level tools. + */ + int rc = 0; char *buffer = NULL; enum stonith_namespace ns = stonith_get_namespace(agent, namespace); @@ -1343,75 +1318,9 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a crm_trace("Looking up metadata for %s agent %s", stonith_namespace2text(ns), agent); - /* By having this in a library, we can access it from stonith_admin - * when neither lrmd or stonith-ng are running - * Important for the crm shell's validations... - */ - switch (ns) { case st_namespace_rhcs: - { - stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL); - int exec_rc = stonith_action_execute(action, &rc, &buffer); - xmlNode *xml = NULL; - xmlNode *actions = NULL; - xmlXPathObject *xpathObj = NULL; - - if (exec_rc < 0 || rc != 0 || buffer == NULL) { - crm_warn("Could not obtain metadata for %s", agent); - crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer)); - free(buffer); /* Just in case */ - return -EINVAL; - } - - xml = string2xml(buffer); - if(xml == NULL) { - crm_warn("Metadata for %s is invalid", agent); - free(buffer); - return -EINVAL; - } - - xpathObj = xpath_search(xml, "//actions"); - if (numXpathResults(xpathObj) > 0) { - actions = getXpathResult(xpathObj, 0); - } - - freeXpathObject(xpathObj); - - /* Now fudge the metadata so that the start/stop actions appear */ - xpathObj = xpath_search(xml, "//action[@name='stop']"); - if (numXpathResults(xpathObj) <= 0) { - xmlNode *tmp = NULL; - - tmp = create_xml_node(actions, "action"); - crm_xml_add(tmp, "name", "stop"); - crm_xml_add(tmp, "timeout", CRM_DEFAULT_OP_TIMEOUT_S); - - tmp = create_xml_node(actions, "action"); - crm_xml_add(tmp, "name", "start"); - crm_xml_add(tmp, "timeout", CRM_DEFAULT_OP_TIMEOUT_S); - } - - freeXpathObject(xpathObj); - - /* Now fudge the metadata so that the port isn't required in the configuration */ - xpathObj = xpath_search(xml, "//parameter[@name='port']"); - if (numXpathResults(xpathObj) > 0) { - /* We'll fill this in */ - xmlNode *tmp = getXpathResult(xpathObj, 0); - - crm_xml_add(tmp, "required", "0"); - } - - freeXpathObject(xpathObj); - free(buffer); - buffer = dump_xml_formatted_with_text(xml); - free_xml(xml); - if (!buffer) { - return -EINVAL; - } - } - break; + return stonith__rhcs_metadata(agent, timeout, output); case st_namespace_lha: #if !HAVE_STONITH_STONITH_H @@ -1688,21 +1597,6 @@ stonith_api_history(stonith_t * stonith, int call_options, const char *node, return rc; } -gboolean -is_redhat_agent(const char *agent) -{ - int rc = 0; - struct stat prop; - char buffer[FILENAME_MAX + 1]; - - snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, agent); - rc = stat(buffer, &prop); - if (rc >= 0 && S_ISREG(prop.st_mode)) { - return TRUE; - } - return FALSE; -} - /*! * \brief Deprecated (use stonith_get_namespace() instead) */ diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c new file mode 100644 index 00000000000..b1255d2f8bb --- /dev/null +++ b/lib/fencing/st_rhcs.c @@ -0,0 +1,155 @@ +/* + * Copyright 2004-2018 Andrew Beekhof + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +/*! + * \internal + * \brief Add available RHCS-compatible agents to a list + * + * \param[in,out] List to add to + * + * \return Number of agents added + */ +int +stonith__list_rhcs_agents(stonith_key_value_t **devices) +{ + // Essentially: ls -1 @sbin_dir@/fence_* + + int count = 0; + struct dirent **namelist; + int file_num = scandir(RH_STONITH_DIR, &namelist, 0, alphasort); + + if (file_num > 0) { + struct stat prop; + char buffer[FILENAME_MAX + 1]; + + while (file_num--) { + if ('.' == namelist[file_num]->d_name[0]) { + free(namelist[file_num]); + continue; + + } else if (!crm_starts_with(namelist[file_num]->d_name, + RH_STONITH_PREFIX)) { + free(namelist[file_num]); + continue; + } + + snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, + namelist[file_num]->d_name); + if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { + *devices = stonith_key_value_add(*devices, NULL, + namelist[file_num]->d_name); + count++; + } + + free(namelist[file_num]); + } + free(namelist); + } + return count; +} + +/*! + * \brief Execute RHCS-compatible agent's meta-data action + * + * \param[in] agent Agent to execute + * \param[in] timeout Action timeout + * \param[out] output Where to store action output (or NULL to ignore) + * + * \todo timeout is currently ignored; shouldn't we use it? + */ +int +stonith__rhcs_metadata(const char *agent, int timeout, char **output) +{ + int rc = 0; + char *buffer = NULL; + xmlNode *xml = NULL; + xmlNode *actions = NULL; + xmlXPathObject *xpathObj = NULL; + stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, + 5, NULL, NULL); + int exec_rc = stonith_action_execute(action, &rc, &buffer); + + if ((exec_rc < 0) || (rc != 0) || (buffer == NULL)) { + crm_warn("Could not obtain metadata for %s", agent); + crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer)); + free(buffer); + return -EINVAL; + } + + xml = string2xml(buffer); + if (xml == NULL) { + crm_warn("Metadata for %s is invalid", agent); + free(buffer); + return -EINVAL; + } + + xpathObj = xpath_search(xml, "//actions"); + if (numXpathResults(xpathObj) > 0) { + actions = getXpathResult(xpathObj, 0); + } + freeXpathObject(xpathObj); + + // Add start and stop (implemented by pacemaker, not agent) to meta-data + xpathObj = xpath_search(xml, "//action[@name='stop']"); + if (numXpathResults(xpathObj) <= 0) { + xmlNode *tmp = NULL; + + tmp = create_xml_node(actions, "action"); + crm_xml_add(tmp, "name", "stop"); + crm_xml_add(tmp, "timeout", CRM_DEFAULT_OP_TIMEOUT_S); + + tmp = create_xml_node(actions, "action"); + crm_xml_add(tmp, "name", "start"); + crm_xml_add(tmp, "timeout", CRM_DEFAULT_OP_TIMEOUT_S); + } + freeXpathObject(xpathObj); + + // Fudge metadata so port isn't required in config (pacemaker adds it) + xpathObj = xpath_search(xml, "//parameter[@name='port']"); + if (numXpathResults(xpathObj) > 0) { + xmlNode *tmp = getXpathResult(xpathObj, 0); + + crm_xml_add(tmp, "required", "0"); + } + freeXpathObject(xpathObj); + + free(buffer); + buffer = dump_xml_formatted_with_text(xml); + free_xml(xml); + if (buffer == NULL) { + return -EINVAL; + } + if (output) { + *output = buffer; + } else { + free(buffer); + } + return pcmk_ok; +} + +bool +stonith__agent_is_rhcs(const char *agent) +{ + struct stat prop; + char *buffer = crm_strdup_printf(RH_STONITH_DIR "/%s", agent); + int rc = stat(buffer, &prop); + + free(buffer); + return (rc >= 0) && S_ISREG(prop.st_mode); +} From 6cd99a9a434b397d1fab702e309a1306654fe996 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 1 Jun 2018 21:10:14 -0500 Subject: [PATCH 119/812] Refactor: libstonithd: separate LHA-specific code into own source file improves readability and maintainability --- configure.ac | 1 + include/crm/fencing/internal.h | 7 + lib/fencing/Makefile.am | 4 + lib/fencing/st_client.c | 234 ++----------------------------- lib/fencing/st_lha.c | 245 +++++++++++++++++++++++++++++++++ 5 files changed, 267 insertions(+), 224 deletions(-) create mode 100644 lib/fencing/st_lha.c diff --git a/configure.ac b/configure.ac index 51bc9717125..9a401aad553 100644 --- a/configure.ac +++ b/configure.ac @@ -1058,6 +1058,7 @@ fi if test "$ac_cv_header_stonith_stonith_h" = "yes"; then PCMK_FEATURES="$PCMK_FEATURES lha-fencing" fi +AM_CONDITIONAL([BUILD_LHA_SUPPORT], [test "$ac_cv_header_stonith_stonith_h" = "yes"]) if test $HAVE_GLUE = 1; then dnl On Debian, AC_CHECK_LIBS fail if a library has any unresolved symbols diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h index 114d8745276..1ff6c1b5fb9 100644 --- a/include/crm/fencing/internal.h +++ b/include/crm/fencing/internal.h @@ -127,6 +127,13 @@ xmlNode *create_device_registration_xml(const char *id, # define STONITH_WATCHDOG_AGENT "#watchdog" +# ifdef HAVE_STONITH_STONITH_H +// utilities from st_lha.c +int stonith__list_lha_agents(stonith_key_value_t **devices); +int stonith__lha_metadata(const char *agent, int timeout, char **output); +bool stonith__agent_is_lha(const char *agent); +# endif + // utilities from st_rhcs.c int stonith__list_rhcs_agents(stonith_key_value_t **devices); int stonith__rhcs_metadata(const char *agent, int timeout, char **output); diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am index 71d4df4381f..69d9be0fb84 100644 --- a/lib/fencing/Makefile.am +++ b/lib/fencing/Makefile.am @@ -15,4 +15,8 @@ libstonithd_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libstonithd_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libstonithd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la + libstonithd_la_SOURCES = st_client.c st_rhcs.c +if BUILD_LHA_SUPPORT +libstonithd_la_SOURCES += st_lha.c +endif diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 267b442c2c6..b23ee577c79 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -36,12 +35,6 @@ #include #include -#ifdef HAVE_STONITH_STONITH_H -# include -# define LHA_STONITH_LIBRARY "libstonith.so.1" -static void *lha_agents_lib = NULL; -#endif - #include CRM_TRACE_INIT_DATA(stonith); @@ -121,28 +114,6 @@ struct timer_rec_s { typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); -#if HAVE_STONITH_STONITH_H -static const char META_TEMPLATE[] = - "\n" - "\n" - "\n" - " 1.0\n" - " \n" - "%s\n" - " \n" - " %s\n" - "%s\n" - " \n" - " \n" - " \n" - " \n" - " \n" - " \n" - " \n" - " \n" - " 2.0\n" " \n" "\n"; -#endif - bool stonith_dispatch(stonith_t * st); int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata); void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc); @@ -222,29 +193,8 @@ stonith_get_namespace(const char *agent, const char *namespace_s) } #if HAVE_STONITH_STONITH_H - { - Stonith *stonith_obj = NULL; - - static gboolean need_init = TRUE; - static Stonith *(*st_new_fn) (const char *) = NULL; - static void (*st_del_fn) (Stonith *) = NULL; - - if (need_init) { - need_init = FALSE; - st_new_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); - st_del_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", - FALSE); - } - - if (lha_agents_lib && st_new_fn && st_del_fn) { - stonith_obj = (*st_new_fn) (agent); - if (stonith_obj) { - (*st_del_fn) (stonith_obj); - return st_namespace_lha; - } - } + if (stonith__agent_is_lha(agent)) { + return st_namespace_lha; } #endif @@ -1223,43 +1173,12 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names return -EFAULT; } - /* Include Heartbeat agents */ - if ((ns == st_namespace_any) || (ns == st_namespace_lha)) { #if HAVE_STONITH_STONITH_H - static gboolean need_init = TRUE; - - char **entry = NULL; - char **type_list = NULL; - static char **(*type_list_fn) (void) = NULL; - static void (*type_free_fn) (char **) = NULL; - - if (need_init) { - need_init = FALSE; - type_list_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_types", FALSE); - type_free_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_free_hostlist", - FALSE); - } - - if (type_list_fn) { - type_list = (*type_list_fn) (); - } - - for (entry = type_list; entry != NULL && *entry; ++entry) { - crm_trace("Added: %s", *entry); - *devices = stonith_key_value_add(*devices, NULL, *entry); - count++; - } - if (type_list && type_free_fn) { - (*type_free_fn) (type_list); - } -#else - if (namespace != NULL) { - return -EINVAL; /* Heartbeat agents not supported */ - } -#endif + // Include Linux-HA agents if requested + if ((ns == st_namespace_any) || (ns == st_namespace_lha)) { + count += stonith__list_lha_agents(devices); } +#endif // Include Red Hat agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) { @@ -1269,40 +1188,6 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names return count; } -#if HAVE_STONITH_STONITH_H -static inline char * -strdup_null(const char *val) -{ - if (val) { - return strdup(val); - } - return NULL; -} - -static void -stonith_plugin(int priority, const char *fmt, ...) __attribute__((__format__ (__printf__, 2, 3))); - -static void -stonith_plugin(int priority, const char *format, ...) -{ - int err = errno; - - va_list ap; - int len = 0; - char *string = NULL; - - va_start(ap, format); - - len = vasprintf (&string, format, ap); - CRM_ASSERT(len > 0); - - do_crm_log_alias(priority, __FILE__, __func__, __LINE__, "%s", string); - - free(string); - errno = err; -} -#endif - static int stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, const char *namespace, char **output, int timeout) @@ -1311,8 +1196,6 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a * the cluster is not running, which is important for higher-level tools. */ - int rc = 0; - char *buffer = NULL; enum stonith_namespace ns = stonith_get_namespace(agent, namespace); crm_trace("Looking up metadata for %s agent %s", @@ -1322,100 +1205,15 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a case st_namespace_rhcs: return stonith__rhcs_metadata(agent, timeout, output); +#if HAVE_STONITH_STONITH_H case st_namespace_lha: -#if !HAVE_STONITH_STONITH_H - return -EINVAL; /* Heartbeat agents not supported */ -#else - { - static const char *no_parameter_info = ""; - - Stonith *stonith_obj = NULL; - - static gboolean need_init = TRUE; - static Stonith *(*st_new_fn) (const char *) = NULL; - static const char *(*st_info_fn) (Stonith *, int) = NULL; - static void (*st_del_fn) (Stonith *) = NULL; - static void (*st_log_fn) (Stonith *, PILLogFun) = NULL; - - if (need_init) { - need_init = FALSE; - st_new_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); - st_del_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", - FALSE); - st_log_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_set_log", - FALSE); - st_info_fn = - find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_get_info", - FALSE); - } - - if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) { - char *xml_meta_longdesc = NULL; - char *xml_meta_shortdesc = NULL; - - char *meta_param = NULL; - char *meta_longdesc = NULL; - char *meta_shortdesc = NULL; - - stonith_obj = (*st_new_fn) (agent); - if (stonith_obj) { - (*st_log_fn) (stonith_obj, (PILLogFun) & stonith_plugin); - meta_longdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEDESCR)); - if (meta_longdesc == NULL) { - crm_warn("no long description in %s's metadata.", agent); - meta_longdesc = strdup(no_parameter_info); - } - - meta_shortdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEID)); - if (meta_shortdesc == NULL) { - crm_warn("no short description in %s's metadata.", agent); - meta_shortdesc = strdup(no_parameter_info); - } - - meta_param = strdup_null((*st_info_fn) (stonith_obj, ST_CONF_XML)); - if (meta_param == NULL) { - crm_warn("no list of parameters in %s's metadata.", agent); - meta_param = strdup(no_parameter_info); - } - (*st_del_fn) (stonith_obj); - } else { - return -EINVAL; /* Heartbeat agents not supported */ - } - - xml_meta_longdesc = - (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc); - xml_meta_shortdesc = - (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc); - - buffer = crm_strdup_printf(META_TEMPLATE, agent, xml_meta_longdesc, - xml_meta_shortdesc, meta_param); - - xmlFree(xml_meta_longdesc); - xmlFree(xml_meta_shortdesc); - - free(meta_shortdesc); - free(meta_longdesc); - free(meta_param); - } - } - break; + return stonith__lha_metadata(agent, timeout, output); #endif + default: - // Do not provide meta-data for internal or unknown agents break; } - - if (output) { - *output = buffer; - - } else { - free(buffer); - } - - return rc; + return -EINVAL; } static int @@ -2633,15 +2431,3 @@ stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) free(name); return when; } - -#if HAVE_STONITH_STONITH_H -# include - -const char *i_hate_pils(int rc); - -const char * -i_hate_pils(int rc) -{ - return PIL_strerror(rc); -} -#endif diff --git a/lib/fencing/st_lha.c b/lib/fencing/st_lha.c new file mode 100644 index 00000000000..ee7a66ee125 --- /dev/null +++ b/lib/fencing/st_lha.c @@ -0,0 +1,245 @@ +/* + * Copyright 2004-2018 Andrew Beekhof + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define LHA_STONITH_LIBRARY "libstonith.so.1" + +static void *lha_agents_lib = NULL; + +static const char META_TEMPLATE[] = + "\n" + "\n" + "\n" + " 1.0\n" + " \n" + "%s\n" + " \n" + " %s\n" + "%s\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " 2.0\n" " \n" "\n"; + +/*! + * \brief Determine namespace of a fence agent + * + * \param[in] agent Fence agent type + * \param[in] namespace_s Name of agent namespace as string, if known + * + * \return Namespace of specified agent, as enum value + */ +bool +stonith__agent_is_lha(const char *agent) +{ + Stonith *stonith_obj = NULL; + + static gboolean need_init = TRUE; + static Stonith *(*st_new_fn) (const char *) = NULL; + static void (*st_del_fn) (Stonith *) = NULL; + + if (need_init) { + need_init = FALSE; + st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, + "stonith_new", FALSE); + st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, + "stonith_delete", FALSE); + } + + if (lha_agents_lib && st_new_fn && st_del_fn) { + stonith_obj = (*st_new_fn) (agent); + if (stonith_obj) { + (*st_del_fn) (stonith_obj); + return TRUE; + } + } + return FALSE; +} + +int +stonith__list_lha_agents(stonith_key_value_t **devices) +{ + static gboolean need_init = TRUE; + + int count = 0; + char **entry = NULL; + char **type_list = NULL; + static char **(*type_list_fn) (void) = NULL; + static void (*type_free_fn) (char **) = NULL; + + if (need_init) { + need_init = FALSE; + type_list_fn = + find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_types", FALSE); + type_free_fn = + find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_free_hostlist", + FALSE); + } + + if (type_list_fn) { + type_list = (*type_list_fn) (); + } + + for (entry = type_list; entry != NULL && *entry; ++entry) { + crm_trace("Added: %s", *entry); + *devices = stonith_key_value_add(*devices, NULL, *entry); + count++; + } + if (type_list && type_free_fn) { + (*type_free_fn) (type_list); + } + return count; +} + +static inline char * +strdup_null(const char *val) +{ + if (val) { + return strdup(val); + } + return NULL; +} + +static void +stonith_plugin(int priority, const char *fmt, ...) __attribute__((__format__ (__printf__, 2, 3))); + +static void +stonith_plugin(int priority, const char *format, ...) +{ + int err = errno; + + va_list ap; + int len = 0; + char *string = NULL; + + va_start(ap, format); + + len = vasprintf (&string, format, ap); + va_end(ap); + CRM_ASSERT(len > 0); + + do_crm_log_alias(priority, __FILE__, __func__, __LINE__, "%s", string); + + free(string); + errno = err; +} + +int +stonith__lha_metadata(const char *agent, int timeout, char **output) +{ + int rc = 0; + char *buffer = NULL; + static const char *no_parameter_info = ""; + + Stonith *stonith_obj = NULL; + + static gboolean need_init = TRUE; + static Stonith *(*st_new_fn) (const char *) = NULL; + static const char *(*st_info_fn) (Stonith *, int) = NULL; + static void (*st_del_fn) (Stonith *) = NULL; + static void (*st_log_fn) (Stonith *, PILLogFun) = NULL; + + if (need_init) { + need_init = FALSE; + st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, + "stonith_new", FALSE); + st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, + "stonith_delete", FALSE); + st_log_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, + "stonith_set_log", FALSE); + st_info_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, + "stonith_get_info", FALSE); + } + + if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) { + char *xml_meta_longdesc = NULL; + char *xml_meta_shortdesc = NULL; + + char *meta_param = NULL; + char *meta_longdesc = NULL; + char *meta_shortdesc = NULL; + + stonith_obj = (*st_new_fn) (agent); + if (stonith_obj) { + (*st_log_fn) (stonith_obj, (PILLogFun) & stonith_plugin); + meta_longdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEDESCR)); + if (meta_longdesc == NULL) { + crm_warn("no long description in %s's metadata.", agent); + meta_longdesc = strdup(no_parameter_info); + } + + meta_shortdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEID)); + if (meta_shortdesc == NULL) { + crm_warn("no short description in %s's metadata.", agent); + meta_shortdesc = strdup(no_parameter_info); + } + + meta_param = strdup_null((*st_info_fn) (stonith_obj, ST_CONF_XML)); + if (meta_param == NULL) { + crm_warn("no list of parameters in %s's metadata.", agent); + meta_param = strdup(no_parameter_info); + } + (*st_del_fn) (stonith_obj); + } else { + return -EINVAL; /* Heartbeat agents not supported */ + } + + xml_meta_longdesc = + (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc); + xml_meta_shortdesc = + (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc); + + buffer = crm_strdup_printf(META_TEMPLATE, agent, xml_meta_longdesc, + xml_meta_shortdesc, meta_param); + + xmlFree(xml_meta_longdesc); + xmlFree(xml_meta_shortdesc); + + free(meta_shortdesc); + free(meta_longdesc); + free(meta_param); + } + if (output) { + *output = buffer; + } else { + free(buffer); + } + return rc; +} + +/* Implement a dummy function that uses -lpils so that linkers don't drop the + * reference. + */ + +#include + +const char *i_hate_pils(int rc); + +const char * +i_hate_pils(int rc) +{ + return PIL_strerror(rc); +} From 889c34cf25c0672cd6df39d7f1bdae641e905d23 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 4 Jun 2018 18:42:27 -0500 Subject: [PATCH 120/812] Low: tools: improve stonith meta-data error messages stonith__rhcs_metadata() now gives more detailed error log messages and return codes. This includes some refactoring: stonith_action_destroy() is exposed internally as stonith__destroy_action() so it can be used elsewhere; a new internal function stonith__action_result() is an accessor for an action's result, so the result can be used elsewhere and so standard error output can be obtained; and stonith_action_execute() has been renamed stonith__execute(), lost a couple of arguments, and no longer destroys the action, so the caller can use the other two functions to obtain the result and destroy the action. --- include/crm/fencing/internal.h | 6 +- lib/fencing/st_client.c | 105 ++++++++++++++++++++++----------- lib/fencing/st_rhcs.c | 32 ++++++---- 3 files changed, 96 insertions(+), 47 deletions(-) diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h index 1ff6c1b5fb9..b47ee8b56d7 100644 --- a/include/crm/fencing/internal.h +++ b/include/crm/fencing/internal.h @@ -20,6 +20,9 @@ stonith_action_t *stonith_action_create(const char *agent, uint32_t victim_nodeid, int timeout, GHashTable * device_args, GHashTable * port_map); +void stonith__destroy_action(stonith_action_t *action); +void stonith__action_result(stonith_action_t *action, int *rc, char **output, + char **error_output); GPid stonith_action_execute_async(stonith_action_t * action, @@ -27,8 +30,7 @@ stonith_action_execute_async(stonith_action_t * action, void (*done) (GPid pid, int rc, const char *output, gpointer user_data)); -int - stonith_action_execute(stonith_action_t * action, int *agent_result, char **output); +int stonith__execute(stonith_action_t *action); xmlNode *create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index b23ee577c79..4ed81a08618 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -709,15 +709,63 @@ stonith_action_clear_tracking_data(stonith_action_t * action) action->last_timeout_signo = 0; } -static void -stonith_action_destroy(stonith_action_t * action) +/*! + * \internal + * \brief Free all memory used by a stonith action + * + * \param[in,out] action Action to free + */ +void +stonith__destroy_action(stonith_action_t *action) { - stonith_action_clear_tracking_data(action); - free(action->agent); - free(action->args); - free(action->action); - free(action->victim); - free(action); + if (action) { + stonith_action_clear_tracking_data(action); + free(action->agent); + free(action->args); + free(action->action); + free(action->victim); + free(action); + } +} + +/*! + * \internal + * \brief Get the result of an executed stonith action + * + * \param[in,out] action Executed action + * \param[out] rc Where to store result code (or NULL) + * \param[out] output Where to store standard output (or NULL) + * \param[out] error_output Where to store standard error output (or NULL) + * + * \note If output or error_output is not NULL, the caller is responsible for + * freeing the memory. + */ +void +stonith__action_result(stonith_action_t *action, int *rc, char **output, + char **error_output) +{ + if (rc) { + *rc = pcmk_ok; + } + if (output) { + *output = NULL; + } + if (error_output) { + *error_output = NULL; + } + if (action != NULL) { + if (rc) { + *rc = action->rc; + } + if (output && action->output) { + *output = action->output; + action->output = NULL; // hand off memory management to caller + } + if (error_output && action->error) { + *error_output = action->error; + action->error = NULL; // hand off memory management to caller + } + } } #define FAILURE_MAX_RETRIES 2 @@ -871,7 +919,7 @@ stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, action->done_cb(pid, action->rc, action->output, action->userdata); } - stonith_action_destroy(action); + stonith__destroy_action(action); } static int @@ -977,6 +1025,7 @@ internal_stonith_action_execute(stonith_action_t * action) action->agent, pcmk_strerror(rc), rc); } + errno = 0; do { crm_debug("sending args"); ret = write(p_write_fd, action->args + total, len - total); @@ -1126,38 +1175,26 @@ stonith_action_execute_async(stonith_action_t * action, return rc < 0 ? rc : action->pid; } +/*! + * \internal + * \brief Execute a stonith action + * + * \param[in,out] action Action to execute + * + * \return pcmk_ok on success, -errno otherwise + */ int -stonith_action_execute(stonith_action_t * action, int *agent_result, char **output) +stonith__execute(stonith_action_t *action) { - int rc = 0; + int rc = pcmk_ok; - if (!action) { - return -1; - } + CRM_CHECK(action != NULL, return -EINVAL); + // Keep trying until success, max retries, or timeout do { rc = internal_stonith_action_execute(action); - if (rc == pcmk_ok) { - /* success! */ - break; - } - /* keep retrying while we have time left */ - } while (update_remaining_timeout(action)); - - if (rc) { - /* error */ - return rc; - } - - if (agent_result) { - *agent_result = action->rc; - } - if (output) { - *output = action->output; - action->output = NULL; /* handed it off, do not free */ - } + } while ((rc != pcmk_ok) && update_remaining_timeout(action)); - stonith_action_destroy(action); return rc; } diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c index b1255d2f8bb..bf529352dab 100644 --- a/lib/fencing/st_rhcs.c +++ b/lib/fencing/st_rhcs.c @@ -76,27 +76,38 @@ stonith__list_rhcs_agents(stonith_key_value_t **devices) int stonith__rhcs_metadata(const char *agent, int timeout, char **output) { - int rc = 0; char *buffer = NULL; xmlNode *xml = NULL; xmlNode *actions = NULL; xmlXPathObject *xpathObj = NULL; stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL); - int exec_rc = stonith_action_execute(action, &rc, &buffer); + int rc = stonith__execute(action); - if ((exec_rc < 0) || (rc != 0) || (buffer == NULL)) { - crm_warn("Could not obtain metadata for %s", agent); - crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer)); - free(buffer); - return -EINVAL; + if (rc < 0) { + crm_warn("Could not execute metadata action for %s: %s " + CRM_XS " rc=%d", agent, pcmk_strerror(rc), rc); + stonith__destroy_action(action); + return rc; + } + + stonith__action_result(action, &rc, &buffer, NULL); + stonith__destroy_action(action); + if (rc < 0) { + crm_warn("Metadata action for %s failed: %s " CRM_XS "rc=%d", + agent, pcmk_strerror(rc), rc); + return rc; + } + + if (buffer == NULL) { + crm_warn("Metadata action for %s returned no data", agent); + return -ENODATA; } xml = string2xml(buffer); if (xml == NULL) { crm_warn("Metadata for %s is invalid", agent); - free(buffer); - return -EINVAL; + return -pcmk_err_schema_validation; } xpathObj = xpath_search(xml, "//actions"); @@ -129,11 +140,10 @@ stonith__rhcs_metadata(const char *agent, int timeout, char **output) } freeXpathObject(xpathObj); - free(buffer); buffer = dump_xml_formatted_with_text(xml); free_xml(xml); if (buffer == NULL) { - return -EINVAL; + return -pcmk_err_schema_validation; } if (output) { *output = buffer; From 67894a67027dd816a623b16bcc372a4b7a648799 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 4 Jun 2018 17:07:43 -0500 Subject: [PATCH 121/812] Low: tools: stonith_admin should print error if stonithd connection fails --- fencing/admin.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fencing/admin.c b/fencing/admin.c index d053e1c27f0..7731428e83b 100644 --- a/fencing/admin.c +++ b/fencing/admin.c @@ -561,6 +561,8 @@ main(int argc, char **argv) if (!no_connect) { rc = st->cmds->connect(st, async_fence_data.name, NULL); if (rc < 0) { + fprintf(stderr, "Could not connect to fencer: %s\n", + pcmk_strerror(rc)); goto done; } } From a9bd039552e4b3eec408561383a3fefc7d3178c0 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 4 Jun 2018 18:02:48 -0500 Subject: [PATCH 122/812] Low: tools: handle stonith_admin exit codes better cleans up memory on usage error --- fencing/admin.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fencing/admin.c b/fencing/admin.c index 7731428e83b..6228e5752c4 100644 --- a/fencing/admin.c +++ b/fencing/admin.c @@ -422,6 +422,7 @@ main(int argc, char **argv) const char *longname = NULL; char action = 0; + int exit_code = 0; stonith_t *st = NULL; stonith_key_value_t *params = NULL; stonith_key_value_t *devices = NULL; @@ -563,6 +564,7 @@ main(int argc, char **argv) if (rc < 0) { fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc)); + exit_code = 1; goto done; } } @@ -646,7 +648,8 @@ main(int argc, char **argv) case 'M': if (agent == NULL) { printf("Please specify an agent to query using -a,--agent [value]\n"); - return -1; + exit_code = 1; + goto done; } else { char *buffer = NULL; @@ -691,13 +694,13 @@ main(int argc, char **argv) break; } - done: - free(async_fence_data.name); crm_info("Command returned: %s (%d)", pcmk_strerror(rc), rc); + exit_code = (rc < 0)? 1 : 0; + done: + free(async_fence_data.name); stonith_key_value_freeall(params, 1, 1); st->cmds->disconnect(st); stonith_api_delete(st); - - return rc; + return exit_code; } From 40e4ee3f07e286f409da7e485736e09c9785135c Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 5 Jun 2018 11:28:58 -0500 Subject: [PATCH 123/812] Refactor: libcrmcommon: use const char pointer when possible --- include/crm/common/cib_secrets.h | 21 ++++----------------- lib/common/cib_secrets.c | 7 ++++--- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/include/crm/common/cib_secrets.h b/include/crm/common/cib_secrets.h index 566f445bcc8..b17c458bbc3 100644 --- a/include/crm/common/cib_secrets.h +++ b/include/crm/common/cib_secrets.h @@ -1,25 +1,12 @@ /* - * cib_secrets.h - * + * Copyright 2011-2018 SUSE, Attachmate * Author: Dejan Muhamedagic - * Copyright (c) 2011 SUSE, Attachmate * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ /* * load parameters from an ini file (cib_secrets.c) */ -int replace_secret_params(char * rsc_id, GHashTable * params); +int replace_secret_params(const char *rsc_id, GHashTable *params); diff --git a/lib/common/cib_secrets.c b/lib/common/cib_secrets.c index 9a565bffe83..5afc6639771 100644 --- a/lib/common/cib_secrets.c +++ b/lib/common/cib_secrets.c @@ -23,7 +23,7 @@ #include #include -static int do_replace_secret_params(char *rsc_id, GHashTable *params, gboolean from_legacy_dir); +static int do_replace_secret_params(const char *rsc_id, GHashTable *params, gboolean from_legacy_dir); static int is_magic_value(char *p); static int check_md5_hash(char *hash, char *value); static void add_secret_params(gpointer key, gpointer value, gpointer user_data); @@ -86,7 +86,7 @@ read_local_file(char *local_file) */ int -replace_secret_params(char *rsc_id, GHashTable *params) +replace_secret_params(const char *rsc_id, GHashTable *params) { if (do_replace_secret_params(rsc_id, params, FALSE) < 0 && do_replace_secret_params(rsc_id, params, TRUE) < 0) { @@ -97,7 +97,8 @@ replace_secret_params(char *rsc_id, GHashTable *params) } static int -do_replace_secret_params(char *rsc_id, GHashTable *params, gboolean from_legacy_dir) +do_replace_secret_params(const char *rsc_id, GHashTable *params, + gboolean from_legacy_dir) { char local_file[FILENAME_MAX+1], *start_pname; char hash_file[FILENAME_MAX+1], *hash; From aefab13b803c10307a976c57fae737060820f772 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 27 Oct 2017 12:14:20 -0500 Subject: [PATCH 124/812] Feature: libstonithd: add client API for device validation --- include/crm/fencing/internal.h | 6 +++ include/crm/stonith-ng.h | 23 ++++++++++ lib/fencing/st_client.c | 84 +++++++++++++++++++++++++++++++++- lib/fencing/st_lha.c | 11 +++++ lib/fencing/st_rhcs.c | 18 ++++++++ 5 files changed, 141 insertions(+), 1 deletion(-) diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h index b47ee8b56d7..e0f264cc23c 100644 --- a/include/crm/fencing/internal.h +++ b/include/crm/fencing/internal.h @@ -134,11 +134,17 @@ xmlNode *create_device_registration_xml(const char *id, int stonith__list_lha_agents(stonith_key_value_t **devices); int stonith__lha_metadata(const char *agent, int timeout, char **output); bool stonith__agent_is_lha(const char *agent); +int stonith__lha_validate(stonith_t *st, int call_options, const char *target, + const char *agent, GHashTable *params, + int timeout, char **output, char **error_output); # endif // utilities from st_rhcs.c int stonith__list_rhcs_agents(stonith_key_value_t **devices); int stonith__rhcs_metadata(const char *agent, int timeout, char **output); bool stonith__agent_is_rhcs(const char *agent); +int stonith__rhcs_validate(stonith_t *st, int call_options, const char *target, + const char *agent, GHashTable *params, + int timeout, char **output, char **error_output); #endif diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h index 337dcac0e12..046a669d793 100644 --- a/include/crm/stonith-ng.h +++ b/include/crm/stonith-ng.h @@ -375,6 +375,29 @@ typedef struct stonith_api_operations_s const char *attr, const char *value, int level, stonith_key_value_t *device_list); + /*! + * \brief Validate an arbitrary stonith device configuration + * + * \param[in] st Stonithd connection to use + * \param[in] call_options Bitmask of stonith_call_options to use with fencer + * \param[in] rsc_id ID used to replace CIB secrets in params + * \param[in] namespace_s Namespace of fence agent to validate (optional) + * \param[in] agent Fence agent to validate + * \param[in] params Configuration parameters to pass to fence agent + * \param[in] timeout Fail if no response within this many seconds + * \param[out] output If non-NULL, where to store any agent output + * \param[out] error_output If non-NULL, where to store agent error output + * + * \return pcmk_ok if validation succeeds, -errno otherwise + * + * \note If pcmk_ok is returned, the caller is responsible for freeing + * the output (if requested). + */ + int (*validate)(stonith_t *st, int call_options, const char *rsc_id, + const char *namespace_s, const char *agent, + stonith_key_value_t *params, int timeout, char **output, + char **error_output); + } stonith_api_operations_t; struct stonith_s diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 4ed81a08618..068d0c97732 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -37,6 +37,10 @@ #include +#if SUPPORT_CIBSECRETS +# include +#endif + CRM_TRACE_INIT_DATA(stonith); struct stonith_action_s { @@ -779,8 +783,9 @@ stonith_action_create(const char *agent, stonith_action_t *action; action = calloc(1, sizeof(stonith_action_t)); - crm_debug("Initiating action %s for agent %s (target=%s)", _action, agent, victim); action->args = make_args(agent, _action, victim, victim_nodeid, device_args, port_map); + crm_debug("Preparing '%s' action for %s using agent %s", + _action, (victim? victim : "no target"), agent); action->agent = strdup(agent); action->action = strdup(_action); if (victim) { @@ -2251,6 +2256,81 @@ stonith_api_delete(stonith_t * stonith) } } +static int +stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, + const char *namespace_s, const char *agent, + stonith_key_value_t *params, int timeout, char **output, + char **error_output) +{ + /* Validation should be done directly via the agent, so we can get it from + * stonith_admin when the cluster is not running, which is important for + * higher-level tools. + */ + + int rc = pcmk_ok; + + /* Use a dummy node name in case the agent requires a target. We assume the + * actual target doesn't matter for validation purposes (if in practice, + * that is incorrect, we will need to allow the caller to pass the target). + */ + const char *target = "node1"; + + GHashTable *params_table = crm_str_table_new(); + + // Convert parameter list to a hash table + for (; params; params = params->next) { + + // Strip out Pacemaker-implemented parameters + if (!crm_starts_with(params->key, "pcmk_") + && strcmp(params->key, "provides") + && strcmp(params->key, "stonith-timeout")) { + g_hash_table_insert(params_table, strdup(params->key), + strdup(params->value)); + } + } + +#if SUPPORT_CIBSECRETS + rc = replace_secret_params(rsc_id, params_table); + if (rc < 0) { + crm_warn("Could not replace secret parameters for validation of %s: %s", + agent, pcmk_strerror(rc)); + } +#endif + + if (output) { + *output = NULL; + } + if (error_output) { + *error_output = NULL; + } + + switch (stonith_get_namespace(agent, namespace_s)) { + case st_namespace_rhcs: + rc = stonith__rhcs_validate(st, call_options, target, agent, + params_table, timeout, output, + error_output); + break; + +#if HAVE_STONITH_STONITH_H + case st_namespace_lha: + rc = stonith__lha_validate(st, call_options, target, agent, + params_table, timeout, output, + error_output); + break; +#endif + + default: + rc = -EINVAL; + errno = EINVAL; + crm_perror(LOG_ERR, + "Agent %s not found or does not support validation", + agent); + break; + } + g_hash_table_destroy(params_table); + return rc; +} + stonith_t * stonith_api_new(void) { @@ -2298,6 +2378,8 @@ stonith_api_new(void) new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; + + new_stonith->cmds->validate = stonith_api_validate; /* *INDENT-ON* */ return new_stonith; diff --git a/lib/fencing/st_lha.c b/lib/fencing/st_lha.c index ee7a66ee125..818ace23748 100644 --- a/lib/fencing/st_lha.c +++ b/lib/fencing/st_lha.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -243,3 +244,13 @@ i_hate_pils(int rc) { return PIL_strerror(rc); } + +int +stonith__lha_validate(stonith_t *st, int call_options, const char *target, + const char *agent, GHashTable *params, int timeout, + char **output, char **error_output) +{ + errno = EOPNOTSUPP; + crm_perror(LOG_ERR, "Cannot validate Linux-HA fence agents"); + return -EOPNOTSUPP; +} diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c index bf529352dab..d453945f8d2 100644 --- a/lib/fencing/st_rhcs.c +++ b/lib/fencing/st_rhcs.c @@ -163,3 +163,21 @@ stonith__agent_is_rhcs(const char *agent) free(buffer); return (rc >= 0) && S_ISREG(prop.st_mode); } + +int +stonith__rhcs_validate(stonith_t *st, int call_options, const char *target, + const char *agent, GHashTable *params, int timeout, + char **output, char **error_output) +{ + int rc = pcmk_ok; + stonith_action_t *action = stonith_action_create(agent, "validate-all", + target, 0, timeout, params, + NULL); + + rc = stonith__execute(action); + if (rc == pcmk_ok) { + stonith__action_result(action, &rc, output, error_output); + } + stonith__destroy_action(action); + return rc; +} From f27cafad33300221cd6dedb05ca025383cffb6cd Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 27 Oct 2017 11:42:52 -0500 Subject: [PATCH 125/812] Feature: fencing: add stonith_admin --validate option --- fencing/admin.c | 58 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/fencing/admin.c b/fencing/admin.c index 6228e5752c4..8bf17ecd8be 100644 --- a/fencing/admin.c +++ b/fencing/admin.c @@ -62,7 +62,7 @@ static struct crm_option long_options[] = { { "register", required_argument, NULL, 'R', "Register the named stonith device. Requires: --agent.\n" - "\t\t\tOptional: any number of --option and/or --env entries." + "\t\t\tOptional: --option, --env-option." }, { "deregister", required_argument, NULL, 'D', "De-register the named stonith device." @@ -100,6 +100,11 @@ static struct crm_option long_options[] = { { "query", required_argument, NULL, 'Q', "Check the named device's status. Optional: --timeout." }, + { "validate", no_argument, NULL, 'K', + "\tValidate a fence device configuration.\n" + "\t\t\tRequires: --agent. Optional: --option, --env-option,\n" + "\t\t\t--quiet (print no output, only return status).\n" + }, { "-spacer-", no_argument, NULL, '-', "\nFencing Commands:" }, @@ -130,17 +135,19 @@ static struct crm_option long_options[] = { { "agent", required_argument, NULL, 'a', "The agent to use (for example, fence_xvm;\n" - "\t\t\twith --register, --metadata)." + "\t\t\twith --register, --metadata, --validate)." }, { "option", required_argument, NULL, 'o', "Specify a device configuration parameter as NAME=VALUE\n" - "\t\t\t(with --register)." + "\t\t\t(may be specified multiple times; with --register,\n" + "\t\t\t--validate)." }, { "env-option", required_argument, NULL, 'e', "Specify a device configuration parameter with the\n" "\t\t\tspecified name, using the value of the\n" "\t\t\tenvironment variable of the same name prefixed with\n" - "\t\t\tOCF_RESKEY_ (with --register)." + "\t\t\tOCF_RESKEY_ (may be specified multiple times;\n" + "\t\t\twith --register, --validate)." }, { "tag", required_argument, NULL, 'T', "Identify fencing operations in logs with the specified\n" @@ -148,8 +155,12 @@ static struct crm_option long_options[] = { "\t\t\tstonith_admin (used with most commands)." }, { "device", required_argument, NULL, 'v', - "A device to associate with a given host and\n" - "\t\t\tstonith level (with --register-level)." + "Device ID (with --register-level, device to associate with\n" + "\t\t\ta given host and level; may be specified multiple times)" +#if SUPPORT_CIBSECRETS + "\n\t\t\t(with --validate, name to use to load CIB secrets)" +#endif + "." }, { "index", required_argument, NULL, 'i', "The stonith level (1-9) (with --register-level,\n" @@ -398,6 +409,31 @@ show_history(stonith_t *st, const char *target, int timeout, int quiet, return rc; } +static int +validate(stonith_t *st, const char *agent, const char *id, + stonith_key_value_t *params, int timeout, int quiet) +{ + int rc = 1; + char *output = NULL; + char *error_output = NULL; + + rc = st->cmds->validate(st, st_opt_sync_call, id, NULL, agent, params, + timeout, &output, &error_output); + + if (!quiet) { + printf("Validation of %s %s\n", agent, (rc? "failed" : "succeeded")); + if (output && *output) { + puts(output); + free(output); + } + if (error_output && *error_output) { + puts(error_output); + free(error_output); + } + } + return rc; +} + int main(int argc, char **argv) { @@ -449,6 +485,7 @@ main(int argc, char **argv) crm_help(flag, EX_OK); break; case 'I': + case 'K': no_connect = 1; /* fall through */ case 'L': @@ -692,6 +729,15 @@ main(int argc, char **argv) case 'H': rc = show_history(st, target, timeout, quiet, verbose); break; + case 'K': + if (agent == NULL) { + printf("Please specify an agent to validate with --agent\n"); + exit_code = 1; + goto done; + } + device = (devices? devices->key : NULL); + rc = validate(st, agent, device, params, timeout, quiet); + break; } crm_info("Command returned: %s (%d)", pcmk_strerror(rc), rc); From d797742d3d73a8ba3623d24706e03a549dddc04f Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 6 Jun 2018 09:52:06 -0500 Subject: [PATCH 126/812] Log: libstonithd: show error if agent not found for meta-data call --- lib/fencing/st_client.c | 4 ++++ lib/fencing/st_lha.c | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 068d0c97732..88ca8303627 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1253,6 +1253,10 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a #endif default: + errno = EINVAL; + crm_perror(LOG_ERR, + "Agent %s not found or does not support meta-data", + agent); break; } return -EINVAL; diff --git a/lib/fencing/st_lha.c b/lib/fencing/st_lha.c index 818ace23748..26a8dba1b49 100644 --- a/lib/fencing/st_lha.c +++ b/lib/fencing/st_lha.c @@ -205,7 +205,9 @@ stonith__lha_metadata(const char *agent, int timeout, char **output) } (*st_del_fn) (stonith_obj); } else { - return -EINVAL; /* Heartbeat agents not supported */ + errno = EINVAL; + crm_perror(LOG_ERR, "Agent %s not found", agent); + return -EINVAL; } xml_meta_longdesc = From 86027811a4378114f6121150d514449df9125946 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 8 Jun 2018 14:46:56 -0500 Subject: [PATCH 127/812] Test: fencing: restore original stonith_admin exit codes needed for regression tests --- fencing/admin.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fencing/admin.c b/fencing/admin.c index 8bf17ecd8be..179116a2ac2 100644 --- a/fencing/admin.c +++ b/fencing/admin.c @@ -601,7 +601,7 @@ main(int argc, char **argv) if (rc < 0) { fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc)); - exit_code = 1; + exit_code = rc; goto done; } } @@ -685,7 +685,7 @@ main(int argc, char **argv) case 'M': if (agent == NULL) { printf("Please specify an agent to query using -a,--agent [value]\n"); - exit_code = 1; + exit_code = -1; goto done; } else { char *buffer = NULL; @@ -732,7 +732,7 @@ main(int argc, char **argv) case 'K': if (agent == NULL) { printf("Please specify an agent to validate with --agent\n"); - exit_code = 1; + exit_code = -1; goto done; } device = (devices? devices->key : NULL); @@ -741,7 +741,7 @@ main(int argc, char **argv) } crm_info("Command returned: %s (%d)", pcmk_strerror(rc), rc); - exit_code = (rc < 0)? 1 : 0; + exit_code = rc; done: free(async_fence_data.name); From 80a6780d8597df5732bc927248db9be27131bd27 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 8 Jun 2018 17:29:18 -0500 Subject: [PATCH 128/812] Test: CTS: don't muck with iptables if not necessary It interferes with other firewall management software. The iptables commands to break connectivity are only used with experimental tests or the PacketLoss scenario currently, so only clean up iptables in those cases. --- cts/CM_lha.py | 3 ++- cts/CTS.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cts/CM_lha.py b/cts/CM_lha.py index 8004594a9d2..0ba4ba1e64d 100755 --- a/cts/CM_lha.py +++ b/cts/CM_lha.py @@ -118,7 +118,8 @@ def prepare(self): self.partitions_expected = 1 for node in self.Env["nodes"]: self.ShouldBeStatus[node] = "" - self.unisolate_node(node) + if self.Env["experimental-tests"]: + self.unisolate_node(node) self.StataCM(node) def test_node_CM(self, node): diff --git a/cts/CTS.py b/cts/CTS.py index 2ba81465a1a..96ca1dd5e4b 100644 --- a/cts/CTS.py +++ b/cts/CTS.py @@ -346,7 +346,8 @@ def prepare(self): else: self.ShouldBeStatus[node] = "down" - self.unisolate_node(node) + if self.Env["experimental-tests"]: + self.unisolate_node(node) def upcount(self): '''How many nodes are up?''' From 6a4c1a05bfae67d6855361f86f08e45f37ae5c7a Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 11 Jun 2018 18:22:27 -0500 Subject: [PATCH 129/812] Fix: libcrmcluster: avoid use-of-NULL when searching for remote node --- lib/cluster/membership.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index d09ab92a9fa..5744621a5fc 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -507,7 +507,7 @@ crm_find_peer_full(unsigned int id, const char *uname, int flags) crm_peer_init(); - if (flags & CRM_GET_PEER_REMOTE) { + if ((uname != NULL) && (flags & CRM_GET_PEER_REMOTE)) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } From f25c784facc25bf0bd5dc8a972bd3abff0f5aa04 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 12 Jun 2018 14:48:23 -0500 Subject: [PATCH 130/812] Refactor: libcrmservice: eliminate redundant code systemd_unit_extension() filters out unmanageable unit file types, so systemd_unit_listall() doesn't need to do it. --- lib/services/systemd.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/lib/services/systemd.c b/lib/services/systemd.c index 46423865383..5fcf6dd9038 100644 --- a/lib/services/systemd.c +++ b/lib/services/systemd.c @@ -146,7 +146,7 @@ systemd_cleanup(void) /*! * \internal - * \brief Check whether a file name represents a systemd unit + * \brief Check whether a file name represents a manageable systemd unit * * \param[in] name File name to check * @@ -419,13 +419,13 @@ systemd_unit_listall(void) match = systemd_unit_extension(value.str); if (match == NULL) { - // Unit files always have an extension, so skip if not present - crm_debug("ListUnitFiles entry '%s' does not have an extension", + // This is not a unit file type we know how to manage + crm_debug("ListUnitFiles entry '%s' is not supported as resource", value.str); continue; } - // ListUnitFiles returns full path names + // ListUnitFiles returns full path names, we just want base name basename = strrchr(value.str, '/'); if (basename) { basename = basename + 1; @@ -433,22 +433,12 @@ systemd_unit_listall(void) basename = value.str; } - /* Unit files will include types (such as .target) that we can't manage, - * so filter the replies here. - */ if (!strcmp(match, ".service")) { // Service is the "default" unit type, so strip it unit_name = strndup(basename, match - basename); - - } else if (!strcmp(match, ".mount") - || !strcmp(match, ".socket")) { + } else { unit_name = strdup(basename); } - if (unit_name == NULL) { - crm_trace("ListUnitFiles entry '%s' is not manageable", - value.str); - continue; - } nfiles++; units = g_list_prepend(units, unit_name); From f2f0b6626044715fc9531dbd2b1c4d6b98f3069f Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 12 Jun 2018 14:49:18 -0500 Subject: [PATCH 131/812] Feature: libcrmservice: support mount, path, and timer systemd unit types --- lib/services/systemd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/services/systemd.c b/lib/services/systemd.c index 5fcf6dd9038..e89be494459 100644 --- a/lib/services/systemd.c +++ b/lib/services/systemd.c @@ -158,7 +158,11 @@ systemd_unit_extension(const char *name) if (name) { const char *dot = strrchr(name, '.'); - if (dot && (!strcmp(dot, ".service") || !strcmp(dot, ".socket"))) { + if (dot && (!strcmp(dot, ".service") + || !strcmp(dot, ".socket") + || !strcmp(dot, ".mount") + || !strcmp(dot, ".timer") + || !strcmp(dot, ".path"))) { return dot; } } From 901544b6eeb4da875536b23675a8654182de929e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Sat, 28 Apr 2018 09:46:24 -0500 Subject: [PATCH 132/812] Test: cts: ignore spurious log message from libvirtd --- cts/patterns.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cts/patterns.py b/cts/patterns.py index d5e15201f13..ccd753d87fd 100644 --- a/cts/patterns.py +++ b/cts/patterns.py @@ -9,6 +9,9 @@ def __init__(self, name): patternvariants[name] = self self.ignore = [ "avoid confusing Valgrind", + + # Logging bug in some versions of libvirtd + r"libvirtd.*: internal error: Failed to parse PCI config address", ] self.BadNews = [] self.components = {} From 8567726db1072f8425b8731896e9a592ede0a7d9 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 14 Jun 2018 11:15:40 -0500 Subject: [PATCH 133/812] Fix: tools: treat INFINITY correctly in crm_failcount Regression in 1.1.17 via 83236161 Problem discovered and original patch provided by Reid Wahl --- tools/crm_failcount | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/crm_failcount b/tools/crm_failcount index 64702c95f50..9c865607b3f 100755 --- a/tools/crm_failcount +++ b/tools/crm_failcount @@ -111,14 +111,25 @@ query_attr_sum() { # Extract the attribute values (one per line) from the output QAS_VALUE=$(echo "$QAS_ALL" | sed -n -e \ - 's/.*.*/\1/p') + 's/.*.*/\1/p') # Sum the values QAS_SUM=0 for i in 0 $QAS_VALUE; do - QAS_SUM=$(($QAS_SUM + $i)) + if [ "$i" = "INFINITY" ]; then + QAS_SUM="INFINITY" + break + else + QAS_SUM=$(($QAS_SUM + $i)) + fi done - echo $QAS_SUM + if [ "$QAS_SUM" = "INFINITY" ]; then + echo $QAS_SUM + elif [ "$QAS_SUM" -ge 1000000 ]; then + echo "INFINITY" + else + echo $QAS_SUM + fi } query_failcount() { From 24643c7930a24eb31a0c6d254b6f9d2f7c471d81 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 14 Jun 2018 12:33:30 -0500 Subject: [PATCH 134/812] Fix: scheduler: print to stdout when appropriate The scheduler libaries need to print certain items to stdout when called via the command line (i.e. crm_simulate). Previously, they used pe_flag_sanitized to detect this situation. However, crm_simulate does not set that flag when called with the live CIB. Instead, use a new flag for the purpose. This could be considered a regression in 1.1.18 via commit 349a37f5b Closes CLBZ#5353 --- include/crm/pengine/status.h | 1 + lib/pengine/utils.c | 20 ++++++++++---------- pengine/allocate.c | 2 +- pengine/master.c | 2 +- tools/crm_simulate.c | 3 +++ 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index a8c90e23a72..f64f93b702a 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -84,6 +84,7 @@ enum pe_find { # define pe_flag_quick_location 0x00100000ULL # define pe_flag_sanitized 0x00200000ULL +# define pe_flag_stdout 0x00400000ULL typedef struct pe_working_set_s { xmlNode *input; diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 1edead67e06..0654d1e706c 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -2110,7 +2110,7 @@ fencing_action_digest_cmp(resource_t * rsc, node_t * node, pe_working_set_t * da } else if(digest_secure && data->digest_secure_calc) { if(strstr(digest_secure, search_secure)) { - if (is_set(data_set->flags, pe_flag_sanitized)) { + if (is_set(data_set->flags, pe_flag_stdout)) { printf("Only 'private' parameters to %s for unfencing %s changed\n", rsc->id, node->details->uname); } @@ -2118,13 +2118,14 @@ fencing_action_digest_cmp(resource_t * rsc, node_t * node, pe_working_set_t * da } } - if (data->rc == RSC_DIGEST_ALL && is_set(data_set->flags, pe_flag_sanitized) && data->digest_secure_calc) { - if (is_set(data_set->flags, pe_flag_sanitized)) { - printf("Parameters to %s for unfencing %s changed, try '%s:%s:%s'\n", - rsc->id, node->details->uname, rsc->id, - (const char *) g_hash_table_lookup(rsc->meta, XML_ATTR_TYPE), - data->digest_secure_calc); - } + if (is_set(data_set->flags, pe_flag_sanitized) + && is_set(data_set->flags, pe_flag_stdout) + && (data->rc == RSC_DIGEST_ALL) + && data->digest_secure_calc) { + printf("Parameters to %s for unfencing %s changed, try '%s:%s:%s'\n", + rsc->id, node->details->uname, rsc->id, + (const char *) g_hash_table_lookup(rsc->meta, XML_ATTR_TYPE), + data->digest_secure_calc); } free(key); @@ -2236,8 +2237,7 @@ pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe if(data->rc == RSC_DIGEST_ALL) { optional = FALSE; crm_notice("Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id); - if (is_set(data_set->flags, pe_flag_sanitized)) { - /* Extra detail for those running from the commandline */ + if (is_set(data_set->flags, pe_flag_stdout)) { fprintf(stdout, " notice: Unfencing %s (remote): because the definition of %s changed\n", node->details->uname, match->id); } } diff --git a/pengine/allocate.c b/pengine/allocate.c index 427575ba202..dc8017ad614 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -316,7 +316,7 @@ check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op && digest_secure && digest_data->digest_secure_calc && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) { - if (is_set(data_set->flags, pe_flag_sanitized)) { + if (is_set(data_set->flags, pe_flag_stdout)) { printf("Only 'private' parameters to %s_%s_%d on %s changed: %s\n", rsc->id, task, interval, active_node->details->uname, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); diff --git a/pengine/master.c b/pengine/master.c index 67892148a0f..91135672460 100644 --- a/pengine/master.c +++ b/pengine/master.c @@ -766,7 +766,7 @@ master_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if (show_scores) { - if (is_set(data_set->flags, pe_flag_sanitized)) { + if (is_set(data_set->flags, pe_flag_stdout)) { printf("%s promotion score on %s: %s\n", child_rsc->id, (chosen? chosen->details->uname : "none"), score); diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c index 0d48315427f..b4d834d257d 100644 --- a/tools/crm_simulate.c +++ b/tools/crm_simulate.c @@ -800,6 +800,7 @@ main(int argc, char **argv) if(xml_file) { set_bit(data_set.flags, pe_flag_sanitized); } + set_bit(data_set.flags, pe_flag_stdout); cluster_status(&data_set); if (quiet == FALSE) { @@ -838,6 +839,7 @@ main(int argc, char **argv) if(xml_file) { set_bit(data_set.flags, pe_flag_sanitized); } + set_bit(data_set.flags, pe_flag_stdout); cluster_status(&data_set); } @@ -894,6 +896,7 @@ main(int argc, char **argv) get_date(&data_set); quiet_log("\nRevised cluster status:\n"); + set_bit(data_set.flags, pe_flag_stdout); cluster_status(&data_set); print_cluster_status(&data_set, 0); } From c1741cffba0ec41762bf5facc88fd1c354ec7e1d Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 11 Jun 2018 14:51:46 -0500 Subject: [PATCH 135/812] Refactor: controller: make comments correct again only one code block is non-DC-only --- crmd/messages.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/crmd/messages.c b/crmd/messages.c index 4307fcad78e..5a1a5374f7d 100644 --- a/crmd/messages.c +++ b/crmd/messages.c @@ -811,25 +811,6 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause) /*return I_SHUTDOWN; */ return I_NULL; - /*========== (NOT_DC)-Only Actions ==========*/ - } else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) { - - const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); - gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); - - if (dc_match || fsa_our_dc == NULL) { - if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) { - crm_err("We didn't ask to be shut down, yet our DC is telling us to."); - set_bit(fsa_input_register, R_STAYDOWN); - return I_STOP; - } - crm_info("Shutting down"); - return I_STOP; - - } else { - crm_warn("Discarding %s op from %s", op, host_from); - } - } else if (strcmp(op, CRM_OP_PING) == 0) { /* eventually do some stuff to figure out * if we /are/ ok @@ -883,6 +864,25 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause) remote_ra_process_maintenance_nodes(xml); + /*========== (NOT_DC)-Only Actions ==========*/ + } else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) { + + const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); + gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); + + if (dc_match || fsa_our_dc == NULL) { + if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) { + crm_err("We didn't ask to be shut down, yet our DC is telling us to."); + set_bit(fsa_input_register, R_STAYDOWN); + return I_STOP; + } + crm_info("Shutting down"); + return I_STOP; + + } else { + crm_warn("Discarding %s op from %s", op, host_from); + } + } else { crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); crm_log_xml_err(stored_msg, "Unexpected"); From bbcf1891564728253845cb4338828ac99a78170e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 11 Jun 2018 15:05:00 -0500 Subject: [PATCH 136/812] Refactor: controller: functionize handling CRM_OP_PING for readability --- crmd/messages.c | 61 +++++++++++++++++++++++++++++-------------- include/crm/msg_xml.h | 1 + tools/crmadmin.c | 2 +- 3 files changed, 43 insertions(+), 21 deletions(-) diff --git a/crmd/messages.c b/crmd/messages.c index 5a1a5374f7d..22eaa93a8c5 100644 --- a/crmd/messages.c +++ b/crmd/messages.c @@ -684,6 +684,46 @@ handle_remote_state(xmlNode *msg) return I_NULL; } +/*! + * \brief Handle a CRM_OP_PING message + * + * \param[in] msg Message XML + * + * \return Next FSA input + */ +static enum crmd_fsa_input +handle_ping(xmlNode *msg) +{ + const char *value = NULL; + xmlNode *ping = NULL; + + // Build reply + + ping = create_xml_node(NULL, XML_CRM_TAG_PING); + value = crm_element_value(msg, F_CRM_SYS_TO); + crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value); + + // Add controller state + value = fsa_state2string(fsa_state); + crm_xml_add(ping, XML_PING_ATTR_CRMDSTATE, value); + crm_notice("Current ping state: %s", value); // CTS needs this + + // Add controller health + // @TODO maybe do some checks to determine meaningful status + crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); + + // Send reply + msg = create_reply(msg, ping); + free_xml(ping); + if (msg) { + (void) relay_message(msg, TRUE); + free_xml(msg); + } + + // Nothing further to do + return I_NULL; +} + enum crmd_fsa_input handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause) { @@ -812,26 +852,7 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause) return I_NULL; } else if (strcmp(op, CRM_OP_PING) == 0) { - /* eventually do some stuff to figure out - * if we /are/ ok - */ - const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO); - xmlNode *ping = create_xml_node(NULL, XML_CRM_TAG_PING); - - crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); - crm_xml_add(ping, XML_PING_ATTR_SYSFROM, sys_to); - crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state)); - - /* Ok, so technically not so interesting, but CTS needs to see this */ - crm_notice("Current ping state: %s", fsa_state2string(fsa_state)); - - msg = create_reply(stored_msg, ping); - if (msg) { - (void)relay_message(msg, TRUE); - } - - free_xml(ping); - free_xml(msg); + return handle_ping(stored_msg); } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { int id = 0; diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h index 9ef22af1a0a..1ac43024936 100644 --- a/include/crm/msg_xml.h +++ b/include/crm/msg_xml.h @@ -140,6 +140,7 @@ # define XML_CRM_TAG_PING "ping_response" # define XML_PING_ATTR_STATUS "result" # define XML_PING_ATTR_SYSFROM "crm_subsystem" +# define XML_PING_ATTR_CRMDSTATE "crmd_state" # define XML_TAG_FRAGMENT "cib_fragment" # define XML_ATTR_RESULT "result" diff --git a/tools/crmadmin.c b/tools/crmadmin.c index 497edc2dc38..920c262093e 100644 --- a/tools/crmadmin.c +++ b/tools/crmadmin.c @@ -473,7 +473,7 @@ admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata) } else if (DO_HEALTH) { xmlNode *data = get_message_xml(xml, F_CRM_DATA); - const char *state = crm_element_value(data, "crmd_state"); + const char *state = crm_element_value(data, XML_PING_ATTR_CRMDSTATE); printf("Status of %s@%s: %s (%s)\n", crm_element_value(data, XML_PING_ATTR_SYSFROM), From 5a3dca9f35f2abfea940dd3e14e73a500475b7ad Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 11 Jun 2018 15:06:52 -0500 Subject: [PATCH 137/812] Feature: controller: add CRM_OP_NODE_INFO operation --- crmd/lrm_state.c | 17 +++++++++++ crmd/messages.c | 76 +++++++++++++++++++++++++++++++++++++++++++++-- include/crm/crm.h | 19 ++++-------- 3 files changed, 95 insertions(+), 17 deletions(-) diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c index 23d8ae677df..9968d300478 100644 --- a/crmd/lrm_state.c +++ b/crmd/lrm_state.c @@ -545,6 +545,23 @@ crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote"); crm_acl_get_set_user(request, F_LRMD_IPC_USER, lrm_state->node_name); #endif + + /* Pacemaker Remote nodes don't know their own names (as known to the + * cluster). When getting a node info request with no name or ID, add + * the name, so we don't return info for ourselves instead of the + * Pacemaker Remote node. + */ + if (safe_str_eq(crm_element_value(request, F_CRM_TASK), + CRM_OP_NODE_INFO)) { + int node_id; + + crm_element_value_int(request, XML_ATTR_ID, &node_id); + if ((node_id <= 0) + && (crm_element_value(request, XML_ATTR_UNAME) == NULL)) { + crm_xml_add(request, XML_ATTR_UNAME, lrm_state->node_name); + } + } + crmd_proxy_dispatch(session, request); crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); diff --git a/crmd/messages.c b/crmd/messages.c index 22eaa93a8c5..24ffac612db 100644 --- a/crmd/messages.c +++ b/crmd/messages.c @@ -370,6 +370,7 @@ relay_message(xmlNode * msg, gboolean originated_locally) const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *type = crm_element_value(msg, F_TYPE); + const char *task = crm_element_value(msg, F_CRM_TASK); const char *msg_error = NULL; crm_trace("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE)); @@ -377,7 +378,7 @@ relay_message(xmlNode * msg, gboolean originated_locally) if (msg == NULL) { msg_error = "Cannot route empty message"; - } else if (safe_str_eq(CRM_OP_HELLO, crm_element_value(msg, F_CRM_TASK))) { + } else if (safe_str_eq(task, CRM_OP_HELLO)) { /* quietly ignore */ processing_complete = TRUE; @@ -411,8 +412,17 @@ relay_message(xmlNode * msg, gboolean originated_locally) if (is_for_dc || is_for_te) { is_local = 0; - } else if (is_for_crm && originated_locally) { - is_local = 0; + } else if (is_for_crm) { + if (safe_str_eq(task, CRM_OP_NODE_INFO)) { + /* Node info requests do not specify a host, which is normally + * treated as "all hosts", because the whole point is that the + * client doesn't know the local node name. Always handle these + * requests locally. + */ + is_local = 1; + } else { + is_local = !originated_locally; + } } else { is_local = 1; @@ -724,6 +734,63 @@ handle_ping(xmlNode *msg) return I_NULL; } +/*! + * \brief Handle a CRM_OP_NODE_INFO request + * + * \param[in] msg Message XML + * + * \return Next FSA input + */ +static enum crmd_fsa_input +handle_node_info_request(xmlNode *msg) +{ + const char *value = NULL; + crm_node_t *node = NULL; + int node_id = 0; + xmlNode *reply = NULL; + + // Build reply + + reply = create_xml_node(NULL, XML_CIB_TAG_NODE); + crm_xml_add(reply, XML_PING_ATTR_SYSFROM, CRM_SYSTEM_CRMD); + + // Add whether current partition has quorum + crm_xml_add_boolean(reply, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); + + // Check whether client requested node info by ID and/or name + crm_element_value_int(msg, XML_ATTR_ID, &node_id); + if (node_id < 0) { + node_id = 0; + } + value = crm_element_value(msg, XML_ATTR_UNAME); + + // Default to local node if none given + if ((node_id == 0) && (value == NULL)) { + value = fsa_our_uname; + } + + node = crm_find_peer_full(node_id, value, CRM_GET_PEER_ANY); + if (node) { + crm_xml_add_int(reply, XML_ATTR_ID, node->id); + crm_xml_add(reply, XML_ATTR_UUID, node->uuid); + crm_xml_add(reply, XML_ATTR_UNAME, node->uname); + crm_xml_add(reply, XML_NODE_IS_PEER, node->state); + crm_xml_add_boolean(reply, XML_NODE_IS_REMOTE, + node->flags & crm_remote_node); + } + + // Send reply + msg = create_reply(msg, reply); + free_xml(reply); + if (msg) { + (void) relay_message(msg, TRUE); + free_xml(msg); + } + + // Nothing further to do + return I_NULL; +} + enum crmd_fsa_input handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause) { @@ -854,6 +921,9 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause) } else if (strcmp(op, CRM_OP_PING) == 0) { return handle_ping(stored_msg); + } else if (strcmp(op, CRM_OP_NODE_INFO) == 0) { + return handle_node_info_request(stored_msg); + } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { int id = 0; const char *name = NULL; diff --git a/include/crm/crm.h b/include/crm/crm.h index 6e2bcfa0f24..555757c7dae 100644 --- a/include/crm/crm.h +++ b/include/crm/crm.h @@ -1,20 +1,10 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ + #ifndef CRM__H # define CRM__H @@ -116,6 +106,7 @@ extern char *crm_system_name; # define CRM_OP_JOIN_ACKNAK "join_ack_nack" # define CRM_OP_JOIN_CONFIRM "join_confirm" # define CRM_OP_PING "ping" +# define CRM_OP_NODE_INFO "node-info" # define CRM_OP_THROTTLE "throttle" # define CRM_OP_VOTE "vote" # define CRM_OP_NOVOTE "no-vote" From b951162666ec10c92b57e1b7c374d34a5e046525 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 13 Jun 2018 13:49:53 -0500 Subject: [PATCH 138/812] Refactor: tools: more generic mainloop capability for crm_node also remove dead code and generally clean up (This is a backport from the 2.0 branch. The changes made are not done similarly for legacy cluster stacks, which will work the same as they did before.) --- tools/crm_node.c | 187 ++++++++++++++++++++++++++++++----------------- 1 file changed, 118 insertions(+), 69 deletions(-) diff --git a/tools/crm_node.c b/tools/crm_node.c index 5e91e819444..cdeb9aa9fa0 100644 --- a/tools/crm_node.c +++ b/tools/crm_node.c @@ -36,14 +36,11 @@ #include #include -int command = 0; -int ccm_fd = 0; -gboolean do_quiet = FALSE; - -char *target_uuid = NULL; -char *target_uname = NULL; -const char *standby_value = NULL; -const char *standby_scope = NULL; +static int command = 0; +static char *pid_s = NULL; +static const char *target_uname = NULL; +static GMainLoop *mainloop = NULL; +static int exit_code = pcmk_ok; /* *INDENT-OFF* */ static struct crm_option long_options[] = { @@ -80,11 +77,78 @@ static struct crm_option long_options[] = { {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"force", 0, 0, 'f'}, + // @TODO add timeout option for when IPC replies are needed {0, 0, 0, 0} }; /* *INDENT-ON* */ +/*! + * \internal + * \brief Exit crm_node + * Clean up memory, and either quit mainloop (if running) or exit + * + * \param[in] value Exit status + */ +static void +crm_node_exit(int value) +{ + if (pid_s) { + free(pid_s); + pid_s = NULL; + } + + exit_code = value; + + if (mainloop && g_main_loop_is_running(mainloop)) { + g_main_loop_quit(mainloop); + } else { + crm_exit(exit_code); + } +} + +static void +exit_disconnect(gpointer user_data) +{ + fprintf(stderr, "error: Lost connection to cluster\n"); + crm_node_exit(ENOTCONN); +} + +typedef int (*ipc_dispatch_fn) (const char *buffer, ssize_t length, + gpointer userdata); + +static crm_ipc_t * +new_mainloop_for_ipc(const char *system, ipc_dispatch_fn dispatch) +{ + mainloop_io_t *source = NULL; + crm_ipc_t *ipc = NULL; + + struct ipc_client_callbacks ipc_callbacks = { + .dispatch = dispatch, + .destroy = exit_disconnect + }; + + mainloop = g_main_loop_new(NULL, FALSE); + source = mainloop_add_ipc_client(system, G_PRIORITY_DEFAULT, 0, + NULL, &ipc_callbacks); + ipc = mainloop_get_ipc_client(source); + if (ipc == NULL) { + fprintf(stderr, + "error: Could not connect to cluster (is it running?)\n"); + crm_node_exit(ENOTCONN); + } + return ipc; +} + +static void +run_mainloop_and_exit() +{ + g_main_loop_run(mainloop); + g_main_loop_unref(mainloop); + mainloop = NULL; + crm_node_exit(exit_code); +} + static int cib_remove_node(uint32_t id, const char *name) { @@ -126,9 +190,8 @@ cib_remove_node(uint32_t id, const char *name) return rc; } -int tools_remove_node_cache(const char *node, const char *target); - -int tools_remove_node_cache(const char *node, const char *target) +static int +tools_remove_node_cache(const char *node, const char *target) { int n = 0; int rc = -1; @@ -239,10 +302,10 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) crm_log_xml_trace(msg, "message"); if (command == 'q' && quorate != NULL) { fprintf(stdout, "%s\n", quorate); - crm_exit(pcmk_ok); + crm_node_exit(pcmk_ok); } else if(command == 'q') { - crm_exit(1); + crm_node_exit(1); } for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { @@ -257,7 +320,8 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) for(iter = nodes; iter; iter = iter->next) { crm_node_t *peer = iter->data; if (command == 'l') { - fprintf(stdout, "%u %s %s\n", peer->id, peer->uname, peer->state?peer->state:""); + fprintf(stdout, "%u %s %s\n", + peer->id, peer->uname, (peer->state? peer->state : "")); } else if (command == 'p') { if(safe_str_eq(peer->state, CRM_NODE_MEMBER)) { @@ -278,36 +342,26 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) fprintf(stdout, "\n"); } - crm_exit(pcmk_ok); + crm_node_exit(pcmk_ok); } return 0; } static void -node_mcp_destroy(gpointer user_data) -{ - crm_exit(ENOTCONN); -} - -static gboolean try_pacemaker(int command, enum cluster_type_e stack) { - struct ipc_client_callbacks node_callbacks = { - .dispatch = node_mcp_dispatch, - .destroy = node_mcp_destroy - }; - if (stack == pcmk_cluster_heartbeat) { /* Nothing to do for them */ - return FALSE; + return; } switch (command) { case 'e': /* Age only applies to heartbeat clusters */ fprintf(stdout, "1\n"); - crm_exit(pcmk_ok); + crm_node_exit(pcmk_ok); + break; case 'R': { @@ -322,10 +376,10 @@ try_pacemaker(int command, enum cluster_type_e stack) for(lpc = 0; lpc < DIMOF(daemons); lpc++) { if (tools_remove_node_cache(target_uname, daemons[lpc])) { crm_err("Failed to connect to %s to remove node '%s'", daemons[lpc], target_uname); - crm_exit(pcmk_err_generic); + crm_node_exit(pcmk_err_generic); } } - crm_exit(pcmk_ok); + crm_node_exit(pcmk_ok); } break; @@ -335,21 +389,21 @@ try_pacemaker(int command, enum cluster_type_e stack) case 'p': /* Go to pacemakerd */ { - GMainLoop *amainloop = g_main_loop_new(NULL, FALSE); - mainloop_io_t *ipc = - mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_DEFAULT, 0, NULL, &node_callbacks); - if (ipc != NULL) { - /* Sending anything will get us a list of nodes */ - xmlNode *poke = create_xml_node(NULL, "poke"); - - crm_ipc_send(mainloop_get_ipc_client(ipc), poke, 0, 0, NULL); - free_xml(poke); - g_main_run(amainloop); - } + crm_ipc_t *ipc = NULL; + xmlNode *poke = NULL; + + ipc = new_mainloop_for_ipc(CRM_SYSTEM_MCP, node_mcp_dispatch); + + // Sending anything will get us a list of nodes + poke = create_xml_node(NULL, "poke"); + crm_ipc_send(ipc, poke, 0, 0, NULL); + free_xml(poke); + + // Handle reply via node_mcp_dispatch() + run_mainloop_and_exit(); } break; } - return FALSE; } #if SUPPORT_HEARTBEAT @@ -359,6 +413,7 @@ try_pacemaker(int command, enum cluster_type_e stack) # define UUID_LEN 16 +static int ccm_fd = 0; oc_ev_t *ccm_token = NULL; static void *ccm_library = NULL; void oc_ev_special(const oc_ev_t *, oc_ev_class_t, int); @@ -753,7 +808,7 @@ ais_membership_dispatch(cpg_handle_t handle, # include # include -static gboolean +static void try_corosync(int command, enum cluster_type_e stack) { int rc = 0; @@ -769,46 +824,40 @@ try_corosync(int command, enum cluster_type_e stack) rc = quorum_initialize(&q_handle, NULL, &quorum_type); if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %d", rc); - return FALSE; + return; } rc = quorum_getquorate(q_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d", rc); - return FALSE; + return; } - if (quorate) { - fprintf(stdout, "1\n"); - } else { - fprintf(stdout, "0\n"); - } + printf("%d\n", quorate); quorum_finalize(q_handle); - crm_exit(pcmk_ok); + crm_node_exit(pcmk_ok); case 'i': /* Go direct to the CPG API */ rc = cpg_initialize(&c_handle, NULL); if (rc != CS_OK) { crm_err("Could not connect to the Cluster Process Group API: %d", rc); - return FALSE; + return; } rc = cpg_local_get(c_handle, &nodeid); if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API"); - return FALSE; + return; } - fprintf(stdout, "%u\n", nodeid); + printf("%u\n", nodeid); cpg_finalize(c_handle); - crm_exit(pcmk_ok); + crm_node_exit(pcmk_ok); default: - try_pacemaker(command, stack); break; } - return FALSE; } #endif @@ -894,7 +943,7 @@ main(int argc, char **argv) crm_help(flag, EX_OK); break; case 'Q': - do_quiet = TRUE; + // currently unused break; case 'H': set_cluster_type(pcmk_cluster_heartbeat); @@ -942,25 +991,24 @@ main(int argc, char **argv) crm_help('?', EX_USAGE); } + if (dangerous_cmd && force_flag == FALSE) { + fprintf(stderr, "The supplied command is considered dangerous." + " To prevent accidental destruction of the cluster," + " the --force flag is required in order to proceed.\n"); + crm_node_exit(EINVAL); + } + if (command == 'n') { const char *name = getenv("OCF_RESKEY_" CRM_META "_" XML_LRM_ATTR_TARGET); if(name == NULL) { name = get_local_node_name(); } fprintf(stdout, "%s\n", name); - crm_exit(pcmk_ok); + crm_node_exit(pcmk_ok); } else if (command == 'N') { fprintf(stdout, "%s\n", get_node_name(nodeid)); - crm_exit(pcmk_ok); - } - - if (dangerous_cmd && force_flag == FALSE) { - fprintf(stderr, "The supplied command is considered dangerous." - " To prevent accidental destruction of the cluster," - " the --force flag is required in order to proceed.\n"); - fflush(stderr); - crm_exit(EINVAL); + crm_node_exit(pcmk_ok); } try_stack = get_cluster_type(); @@ -994,5 +1042,6 @@ main(int argc, char **argv) try_pacemaker(command, try_stack); - return (1); + // We only get here if command hasn't been handled + crm_node_exit(1); } From 1e598b7f2f7b613ff4a3f91b199a9585189d0673 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 13 Jun 2018 15:07:47 -0500 Subject: [PATCH 139/812] Refactor: tools: functionize crmd hello in crm_node for future reuse --- tools/crm_node.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/tools/crm_node.c b/tools/crm_node.c index cdeb9aa9fa0..17832870bd7 100644 --- a/tools/crm_node.c +++ b/tools/crm_node.c @@ -149,6 +149,19 @@ run_mainloop_and_exit() crm_node_exit(exit_code); } +static int +send_controller_hello(crm_ipc_t *controller) +{ + xmlNode *hello = NULL; + int rc; + + pid_s = crm_getpid_s(); + hello = create_hello_message(pid_s, crm_system_name, "1", "0"); + rc = crm_ipc_send(controller, hello, 0, 0, NULL); + free_xml(hello); + return (rc < 0)? rc : 0; +} + static int cib_remove_node(uint32_t id, const char *name) { @@ -196,10 +209,8 @@ tools_remove_node_cache(const char *node, const char *target) int n = 0; int rc = -1; char *name = NULL; - char *admin_uuid = NULL; crm_ipc_t *conn = crm_ipc_new(target, 0); xmlNode *cmd = NULL; - xmlNode *hello = NULL; char *endptr = NULL; if (!conn) { @@ -213,19 +224,15 @@ tools_remove_node_cache(const char *node, const char *target) } if(safe_str_eq(target, CRM_SYSTEM_CRMD)) { - admin_uuid = crm_getpid_s(); - - hello = create_hello_message(admin_uuid, "crm_node", "0", "1"); - rc = crm_ipc_send(conn, hello, 0, 0, NULL); - - free_xml(hello); + // The controller requires a hello message before sending a request + rc = send_controller_hello(conn); if (rc < 0) { - free(admin_uuid); + fprintf(stderr, "error: Could not register with controller: %s\n", + pcmk_strerror(rc)); return rc; } } - errno = 0; n = strtol(node, &endptr, 10); if (errno != 0 || endptr == node || *endptr != '\0') { @@ -256,7 +263,7 @@ tools_remove_node_cache(const char *node, const char *target) } else { cmd = create_request(CRM_OP_RM_NODE_CACHE, - NULL, NULL, target, crm_system_name, admin_uuid); + NULL, NULL, target, crm_system_name, pid_s); if (n) { crm_xml_set_id(cmd, "%u", n); } @@ -274,7 +281,6 @@ tools_remove_node_cache(const char *node, const char *target) crm_ipc_close(conn); crm_ipc_destroy(conn); } - free(admin_uuid); free_xml(cmd); free(name); return rc > 0 ? 0 : rc; From 45b814f2771f1e879b1616c5642529b7eb560d2f Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 13 Jun 2018 13:50:26 -0500 Subject: [PATCH 140/812] Fix: tools: query controller for crm_node -n This allows crm_node -n to function on Pacemaker Remote nodes, but it now requires that the cluster be running. --- tools/crm_node.c | 118 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 6 deletions(-) diff --git a/tools/crm_node.c b/tools/crm_node.c index 17832870bd7..ce20c1ba936 100644 --- a/tools/crm_node.c +++ b/tools/crm_node.c @@ -162,6 +162,117 @@ send_controller_hello(crm_ipc_t *controller) return (rc < 0)? rc : 0; } +static int +send_node_info_request(crm_ipc_t *controller) +{ + xmlNode *ping = NULL; + int rc; + + ping = create_request(CRM_OP_NODE_INFO, NULL, NULL, CRM_SYSTEM_CRMD, + crm_system_name, pid_s); + rc = crm_ipc_send(controller, ping, 0, 0, NULL); + free_xml(ping); + return (rc < 0)? rc : 0; +} + +static int +dispatch_controller(const char *buffer, ssize_t length, gpointer userdata) +{ + xmlNode *message = string2xml(buffer); + xmlNode *data = NULL; + const char *value = NULL; + + if (message == NULL) { + fprintf(stderr, "error: Could not understand reply from controller\n"); + crm_node_exit(1); + return 0; + } + crm_log_xml_trace(message, "controller reply"); + + exit_code = 1; + + // Validate reply + value = crm_element_value(message, F_CRM_MSG_TYPE); + if (safe_str_neq(value, XML_ATTR_RESPONSE)) { + fprintf(stderr, "error: Message from controller was not a reply\n"); + goto done; + } + value = crm_element_value(message, XML_ATTR_REFERENCE); + if (value == NULL) { + fprintf(stderr, "error: Controller reply did not specify original message\n"); + goto done; + } + data = get_message_xml(message, F_CRM_DATA); + if (data == NULL) { + fprintf(stderr, "error: Controller reply did not contain any data\n"); + goto done; + } + + switch (command) { + case 'n': + value = crm_element_value(data, XML_ATTR_UNAME); + if (value == NULL) { + fprintf(stderr, "Node is not known to cluster\n"); + exit_code = 1; + } else { + printf("%s\n", value); + exit_code = pcmk_ok; + } + break; + default: + fprintf(stderr, "internal error: Controller reply not expected\n"); + exit_code = 1; + break; + } + +done: + free_xml(message); + crm_node_exit(exit_code); + return 0; +} + +static void +run_controller_mainloop() +{ + crm_ipc_t *controller = NULL; + int rc; + + controller = new_mainloop_for_ipc(CRM_SYSTEM_CRMD, dispatch_controller); + + rc = send_controller_hello(controller); + if (rc < 0) { + fprintf(stderr, "error: Could not register with controller: %s\n", + pcmk_strerror(rc)); + crm_node_exit(1); + } + + rc = send_node_info_request(controller); + if (rc < 0) { + fprintf(stderr, "error: Could not ping controller: %s\n", + pcmk_strerror(rc)); + crm_node_exit(1); + } + + // Run main loop to get controller reply via dispatch_controller() + run_mainloop_and_exit(); +} + +static void +print_node_name() +{ + // Check environment first (i.e. when called by resource agent) + const char *name = getenv("OCF_RESKEY_" CRM_META "_" XML_LRM_ATTR_TARGET); + + if (name != NULL) { + printf("%s\n", name); + crm_node_exit(pcmk_ok); + + } else { + // Otherwise ask the controller + run_controller_mainloop(); + } +} + static int cib_remove_node(uint32_t id, const char *name) { @@ -1005,12 +1116,7 @@ main(int argc, char **argv) } if (command == 'n') { - const char *name = getenv("OCF_RESKEY_" CRM_META "_" XML_LRM_ATTR_TARGET); - if(name == NULL) { - name = get_local_node_name(); - } - fprintf(stdout, "%s\n", name); - crm_node_exit(pcmk_ok); + print_node_name(); } else if (command == 'N') { fprintf(stdout, "%s\n", get_node_name(nodeid)); From fa52e464c2c88cb385c9107e62dcd7f69af1dda0 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 13 Jun 2018 16:47:18 -0500 Subject: [PATCH 141/812] Refactor: tools: ask controller instead of corosync for crm_node -N gets us closer to crm_node not needing libcrmcluster --- tools/crm_node.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/crm_node.c b/tools/crm_node.c index ce20c1ba936..07edaa35b8d 100644 --- a/tools/crm_node.c +++ b/tools/crm_node.c @@ -163,13 +163,16 @@ send_controller_hello(crm_ipc_t *controller) } static int -send_node_info_request(crm_ipc_t *controller) +send_node_info_request(crm_ipc_t *controller, uint32_t nodeid) { xmlNode *ping = NULL; int rc; ping = create_request(CRM_OP_NODE_INFO, NULL, NULL, CRM_SYSTEM_CRMD, crm_system_name, pid_s); + if (nodeid > 0) { + crm_xml_add_int(ping, XML_ATTR_ID, nodeid); + } rc = crm_ipc_send(controller, ping, 0, 0, NULL); free_xml(ping); return (rc < 0)? rc : 0; @@ -210,6 +213,7 @@ dispatch_controller(const char *buffer, ssize_t length, gpointer userdata) switch (command) { case 'n': + case 'N': value = crm_element_value(data, XML_ATTR_UNAME); if (value == NULL) { fprintf(stderr, "Node is not known to cluster\n"); @@ -232,7 +236,7 @@ dispatch_controller(const char *buffer, ssize_t length, gpointer userdata) } static void -run_controller_mainloop() +run_controller_mainloop(uint32_t nodeid) { crm_ipc_t *controller = NULL; int rc; @@ -246,7 +250,7 @@ run_controller_mainloop() crm_node_exit(1); } - rc = send_node_info_request(controller); + rc = send_node_info_request(controller, nodeid); if (rc < 0) { fprintf(stderr, "error: Could not ping controller: %s\n", pcmk_strerror(rc)); @@ -269,7 +273,7 @@ print_node_name() } else { // Otherwise ask the controller - run_controller_mainloop(); + run_controller_mainloop(0); } } @@ -1119,8 +1123,7 @@ main(int argc, char **argv) print_node_name(); } else if (command == 'N') { - fprintf(stdout, "%s\n", get_node_name(nodeid)); - crm_node_exit(pcmk_ok); + run_controller_mainloop(nodeid); } try_stack = get_cluster_type(); From c9dd7ac170cc36b367747e12faadc2092b06bfa7 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 18 Jun 2018 10:31:39 -0500 Subject: [PATCH 142/812] Refactor: tools: ask controller instead of corosync for crm_node -i This is a partial backport of commit 4b006a6c from the 2.0 branch. The new behavior is used only when the cluster stack is corosync 2+. --- tools/crm_node.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/tools/crm_node.c b/tools/crm_node.c index 07edaa35b8d..ee27628fcb3 100644 --- a/tools/crm_node.c +++ b/tools/crm_node.c @@ -212,6 +212,15 @@ dispatch_controller(const char *buffer, ssize_t length, gpointer userdata) } switch (command) { + case 'i': + value = crm_element_value(data, XML_ATTR_ID); + if (value == NULL) { + fprintf(stderr, "error: Controller reply did not contain node ID\n"); + } else { + printf("%s\n", value); + exit_code = pcmk_ok; + } + break; case 'n': case 'N': value = crm_element_value(data, XML_ATTR_UNAME); @@ -935,8 +944,6 @@ try_corosync(int command, enum cluster_type_e stack) int rc = 0; int quorate = 0; uint32_t quorum_type = 0; - unsigned int nodeid = 0; - cpg_handle_t c_handle = 0; quorum_handle_t q_handle = 0; switch (command) { @@ -957,24 +964,11 @@ try_corosync(int command, enum cluster_type_e stack) printf("%d\n", quorate); quorum_finalize(q_handle); crm_node_exit(pcmk_ok); + break; case 'i': - /* Go direct to the CPG API */ - rc = cpg_initialize(&c_handle, NULL); - if (rc != CS_OK) { - crm_err("Could not connect to the Cluster Process Group API: %d", rc); - return; - } - - rc = cpg_local_get(c_handle, &nodeid); - if (rc != CS_OK) { - crm_err("Could not get local node id from the CPG API"); - return; - } - - printf("%u\n", nodeid); - cpg_finalize(c_handle); - crm_node_exit(pcmk_ok); + run_controller_mainloop(0); + break; default: break; @@ -1140,6 +1134,10 @@ main(int argc, char **argv) if (try_stack == pcmk_cluster_corosync) { try_corosync(command, try_stack); } + if ((try_stack == pcmk_cluster_unknown) && (command == 'i')) { + // possibly a Pacemaker Remote node + run_controller_mainloop(0); + } #endif #if HAVE_CONFDB From 1ca56cd0caea78b7cfe49904fdbd277ced737775 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 19 Jun 2018 16:51:46 -0500 Subject: [PATCH 143/812] Low: libstonithd: avoid memory leak when getting meta-data introduced by a69002b4 --- lib/fencing/st_rhcs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c index d453945f8d2..48d234a1c03 100644 --- a/lib/fencing/st_rhcs.c +++ b/lib/fencing/st_rhcs.c @@ -96,6 +96,7 @@ stonith__rhcs_metadata(const char *agent, int timeout, char **output) if (rc < 0) { crm_warn("Metadata action for %s failed: %s " CRM_XS "rc=%d", agent, pcmk_strerror(rc), rc); + free(buffer); return rc; } @@ -105,6 +106,8 @@ stonith__rhcs_metadata(const char *agent, int timeout, char **output) } xml = string2xml(buffer); + free(buffer); + buffer = NULL; if (xml == NULL) { crm_warn("Metadata for %s is invalid", agent); return -pcmk_err_schema_validation; From 6517d47fc306b6d9b054dd55ce705e6f9a492a94 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 4 Apr 2018 15:52:03 -0500 Subject: [PATCH 144/812] Low: libcrmcommon: avoid resource leak CIB secrets didn't close a file pointer --- lib/common/cib_secrets.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/common/cib_secrets.c b/lib/common/cib_secrets.c index 5afc6639771..d5766fe5da2 100644 --- a/lib/common/cib_secrets.c +++ b/lib/common/cib_secrets.c @@ -70,8 +70,10 @@ read_local_file(char *local_file) if (!fgets(buf, MAX_VALUE_LEN, fp)) { crm_perror(LOG_ERR, "cannot read %s", local_file); + fclose(fp); return NULL; } + fclose(fp); /* strip white space */ for (p = buf+strlen(buf)-1; p >= buf && isspace(*p); p--) From 04be10fcd59075857b292a0b06c6560216adb24a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= Date: Thu, 14 Jun 2018 23:54:10 +0200 Subject: [PATCH 145/812] Refactor: xml.c: add new find_entity_by_id_or_just_name generalization More powerful than find_entity, which boils down to it. --- lib/common/xml.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/lib/common/xml.c b/lib/common/xml.c index 8fd47789843..5f5d00a14ac 100644 --- a/lib/common/xml.c +++ b/lib/common/xml.c @@ -2277,25 +2277,45 @@ find_xml_node(xmlNode * root, const char *search_path, gboolean must_find) return NULL; } -xmlNode * -find_entity(xmlNode * parent, const char *node_name, const char *id) +/* As the name suggests, the perfect match is required for both node + name and fully specified attribute, otherwise, when attribute not + specified, the outcome is the first node matching on the name. */ +static xmlNode * +find_entity_by_attr_or_just_name(xmlNode *parent, const char *node_name, + const char *attr_n, const char *attr_v) { - xmlNode *a_child = NULL; + xmlNode *child; - for (a_child = __xml_first_child(parent); a_child != NULL; a_child = __xml_next(a_child)) { - /* Uncertain if node_name == NULL check is strictly necessary here */ - if (node_name == NULL || strcmp((const char *)a_child->name, node_name) == 0) { - const char *cid = ID(a_child); - if (id == NULL || (cid != NULL && strcmp(id, cid) == 0)) { - return a_child; + /* ensure attr_v specified when attr_n is */ + CRM_CHECK(attr_n == NULL || attr_v != NULL, return NULL); + + for (child = __xml_first_child(parent); child != NULL; child = __xml_next(child)) { + /* XXX uncertain if the first check is strictly necessary here */ + if (node_name == NULL || !strcmp((const char *) child->name, node_name)) { + if (attr_n == NULL + || crm_str_eq(crm_element_value(child, attr_n), attr_v, TRUE)) { + return child; } } } - crm_trace("node <%s id=%s> not found in %s.", node_name, id, crm_element_name(parent)); + crm_trace("node <%s%s%s%s%s> not found in %s", crm_str(node_name), + attr_n ? " " : "", + attr_n ? attr_n : "", + attr_n ? "=" : "", + attr_n ? attr_v : "", + crm_element_name(parent)); + return NULL; } +xmlNode * +find_entity(xmlNode *parent, const char *node_name, const char *id) +{ + return find_entity_by_attr_or_just_name(parent, node_name, + (id == NULL) ? id : XML_ATTR_ID, id); +} + void copy_in_properties(xmlNode * target, xmlNode * src) { From 6eceea90096c33e0d2d11ecf83e5436b67cbedc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= Date: Thu, 14 Jun 2018 23:52:02 +0200 Subject: [PATCH 146/812] Refactor: xml.c: simplify add_xml_object Suggested by Ken. --- lib/common/xml.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/common/xml.c b/lib/common/xml.c index 5f5d00a14ac..5b85fc3f995 100644 --- a/lib/common/xml.c +++ b/lib/common/xml.c @@ -4653,12 +4653,9 @@ add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * update, gboolean as object_id = ID(update); CRM_CHECK(object_name != NULL, return 0); + CRM_CHECK(target != NULL || parent != NULL, return 0); - if (target == NULL && object_id == NULL) { - /* placeholder object */ - target = find_xml_node(parent, object_name, FALSE); - - } else if (target == NULL) { + if (target == NULL) { target = find_entity(parent, object_name, object_id); } From 41973f1934ac35b36d97f42005a40b8d2fe455ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= Date: Fri, 15 Jun 2018 00:13:49 +0200 Subject: [PATCH 147/812] High: CIB: no schema violating oversimplified patching with @id-ref Previously, this input to modification of CIB: > > > > > > > > > > > > > would degenerate into this: > > > > > > > > > > > apparently resulting in an invalid configuration instance (cannot mix @id-ref with anything else incl. @id). Now, we are more careful about @id-ref so this won't happen. There's a slight time complexity penalty in case we search the matching element with identical @id-ref specification that we could skip, resulting in multiple identical siblings with these elements, but let's stick with absolute idempotency, as it seems to be quite a hit. --- lib/common/xml.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/lib/common/xml.c b/lib/common/xml.c index 5b85fc3f995..8deb02666bf 100644 --- a/lib/common/xml.c +++ b/lib/common/xml.c @@ -4635,8 +4635,9 @@ int add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * update, gboolean as_diff) { xmlNode *a_child = NULL; - const char *object_id = NULL; - const char *object_name = NULL; + const char *object_name = NULL, + *object_href = NULL, + *object_href_val = NULL; #if XML_PARSE_DEBUG crm_log_xml_trace("update:", update); @@ -4650,25 +4651,38 @@ add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * update, gboolean as } object_name = crm_element_name(update); - object_id = ID(update); + object_href_val = ID(update); + if (object_href_val != NULL) { + object_href = XML_ATTR_ID; + } else { + object_href_val = crm_element_value(update, XML_ATTR_IDREF); + object_href = (object_href_val == NULL) ? NULL : XML_ATTR_IDREF; + } CRM_CHECK(object_name != NULL, return 0); CRM_CHECK(target != NULL || parent != NULL, return 0); if (target == NULL) { - target = find_entity(parent, object_name, object_id); + target = find_entity_by_attr_or_just_name(parent, object_name, + object_href, object_href_val); } if (target == NULL) { target = create_xml_node(parent, object_name); CRM_CHECK(target != NULL, return 0); #if XML_PARSER_DEBUG - crm_trace("Added <%s%s%s/>", crm_str(object_name), - object_id ? " id=" : "", object_id ? object_id : ""); + crm_trace("Added <%s%s%s%s%s/>", crm_str(object_name), + object_href ? " " : "", + object_href ? object_href : "", + object_href ? "=" : "", + object_href ? object_href_val : ""); } else { - crm_trace("Found node <%s%s%s/> to update", - crm_str(object_name), object_id ? " id=" : "", object_id ? object_id : ""); + crm_trace("Found node <%s%s%s%s%s/> to update", crm_str(object_name), + object_href ? " " : "", + object_href ? object_href : "", + object_href ? "=" : "", + object_href ? object_href_val : ""); #endif } @@ -4694,13 +4708,21 @@ add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * update, gboolean as for (a_child = __xml_first_child(update); a_child != NULL; a_child = __xml_next(a_child)) { #if XML_PARSER_DEBUG - crm_trace("Updating child <%s id=%s>", crm_element_name(a_child), ID(a_child)); + crm_trace("Updating child <%s%s%s%s%s/>", crm_str(object_name), + object_href ? " " : "", + object_href ? object_href : "", + object_href ? "=" : "", + object_href ? object_href_val : ""); #endif add_xml_object(target, NULL, a_child, as_diff); } #if XML_PARSER_DEBUG - crm_trace("Finished with <%s id=%s>", crm_str(object_name), crm_str(object_id)); + crm_trace("Finished with <%s%s%s%s%s/>", crm_str(object_name), + object_href ? " " : "", + object_href ? object_href : "", + object_href ? "=" : "", + object_href ? object_href_val : ""); #endif return 0; } From 3a026b67cb33c13202d2118404d9f7f98d1f70d4 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 19 Jun 2018 12:15:04 -0500 Subject: [PATCH 148/812] Build: update shared object versions for 1.1.19 --- lib/cib/Makefile.am | 2 +- lib/cluster/Makefile.am | 2 +- lib/common/Makefile.am | 2 +- lib/fencing/Makefile.am | 2 +- lib/lrmd/Makefile.am | 2 +- lib/pengine/Makefile.am | 4 ++-- lib/services/Makefile.am | 2 +- pengine/Makefile.am | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/cib/Makefile.am b/lib/cib/Makefile.am index 25219dbbd15..cc4f4364b0d 100644 --- a/lib/cib/Makefile.am +++ b/lib/cib/Makefile.am @@ -24,7 +24,7 @@ lib_LTLIBRARIES = libcib.la libcib_la_SOURCES = cib_ops.c cib_utils.c cib_client.c cib_native.c cib_attrs.c libcib_la_SOURCES += cib_file.c cib_remote.c -libcib_la_LDFLAGS = -version-info 5:4:1 +libcib_la_LDFLAGS = -version-info 5:5:1 libcib_la_CPPFLAGS = -I$(top_srcdir) $(AM_CPPFLAGS) libcib_la_CFLAGS = $(CFLAGS_HARDENED_LIB) diff --git a/lib/cluster/Makefile.am b/lib/cluster/Makefile.am index 7865785fc10..a7f9655f2e4 100644 --- a/lib/cluster/Makefile.am +++ b/lib/cluster/Makefile.am @@ -20,7 +20,7 @@ include $(top_srcdir)/Makefile.common ## libraries lib_LTLIBRARIES = libcrmcluster.la -libcrmcluster_la_LDFLAGS = -version-info 7:1:3 +libcrmcluster_la_LDFLAGS = -version-info 7:2:3 libcrmcluster_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libcrmcluster_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am index 30427625eda..9dd202529d8 100644 --- a/lib/common/Makefile.am +++ b/lib/common/Makefile.am @@ -30,7 +30,7 @@ lib_LTLIBRARIES = libcrmcommon.la CFLAGS = $(CFLAGS_COPY:-Wcast-qual=) -fPIC -libcrmcommon_la_LDFLAGS = -version-info 11:0:8 +libcrmcommon_la_LDFLAGS = -version-info 12:0:9 libcrmcommon_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libcrmcommon_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am index 69d9be0fb84..a61d143cd4a 100644 --- a/lib/fencing/Makefile.am +++ b/lib/fencing/Makefile.am @@ -9,7 +9,7 @@ include $(top_srcdir)/Makefile.common lib_LTLIBRARIES = libstonithd.la -libstonithd_la_LDFLAGS = -version-info 4:4:2 +libstonithd_la_LDFLAGS = -version-info 5:0:3 libstonithd_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libstonithd_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) diff --git a/lib/lrmd/Makefile.am b/lib/lrmd/Makefile.am index 8e5e4761b4a..7eac51ea678 100644 --- a/lib/lrmd/Makefile.am +++ b/lib/lrmd/Makefile.am @@ -18,7 +18,7 @@ include $(top_srcdir)/Makefile.common lib_LTLIBRARIES = liblrmd.la -liblrmd_la_LDFLAGS = -version-info 6:0:5 +liblrmd_la_LDFLAGS = -version-info 6:1:5 liblrmd_la_CFLAGS = $(CFLAGS_HARDENED_LIB) liblrmd_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) diff --git a/lib/pengine/Makefile.am b/lib/pengine/Makefile.am index 8aceccc7079..5f36c94a38d 100644 --- a/lib/pengine/Makefile.am +++ b/lib/pengine/Makefile.am @@ -23,7 +23,7 @@ lib_LTLIBRARIES = libpe_rules.la libpe_status.la ## SOURCES noinst_HEADERS = unpack.h variant.h -libpe_rules_la_LDFLAGS = -version-info 5:0:3 +libpe_rules_la_LDFLAGS = -version-info 5:1:3 libpe_rules_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpe_rules_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) @@ -31,7 +31,7 @@ libpe_rules_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libpe_rules_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la libpe_rules_la_SOURCES = rules.c rules_alerts.c common.c -libpe_status_la_LDFLAGS = -version-info 14:0:4 +libpe_status_la_LDFLAGS = -version-info 15:0:5 libpe_status_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpe_status_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) diff --git a/lib/services/Makefile.am b/lib/services/Makefile.am index a416f1369e9..efded9e3fbe 100644 --- a/lib/services/Makefile.am +++ b/lib/services/Makefile.am @@ -23,7 +23,7 @@ AM_CPPFLAGS = -I$(top_srcdir)/include lib_LTLIBRARIES = libcrmservice.la noinst_HEADERS = pcmk-dbus.h upstart.h systemd.h services_private.h -libcrmservice_la_LDFLAGS = -version-info 6:0:3 +libcrmservice_la_LDFLAGS = -version-info 6:1:3 libcrmservice_la_CPPFLAGS = -DOCF_ROOT_DIR=\"@OCF_ROOT_DIR@\" $(AM_CPPFLAGS) libcrmservice_la_CFLAGS = $(GIO_CFLAGS) diff --git a/pengine/Makefile.am b/pengine/Makefile.am index e997454faf3..861946c92cf 100644 --- a/pengine/Makefile.am +++ b/pengine/Makefile.am @@ -60,7 +60,7 @@ endif noinst_HEADERS = allocate.h notif.h utils.h pengine.h -libpengine_la_LDFLAGS = -version-info 14:0:4 +libpengine_la_LDFLAGS = -version-info 15:0:5 libpengine_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpengine_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) From ca6cf190f5dbfb5fd41847ca9b8c8fc5bbf2f978 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 19 Jun 2018 12:16:38 -0500 Subject: [PATCH 149/812] Build: update version information for 1.1.19 --- pacemaker.spec.in | 2 +- version.m4 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pacemaker.spec.in b/pacemaker.spec.in index ee9fc351439..e205a843fc1 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -13,7 +13,7 @@ ## Upstream pacemaker version, and its package version (specversion ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) -%global pcmkversion 1.1.18 +%global pcmkversion 1.1.19 %global specversion 1 ## Upstream commit (or git tag, such as "Pacemaker-" plus the diff --git a/version.m4 b/version.m4 index 3f75ce3f90a..1adb07cabd1 100644 --- a/version.m4 +++ b/version.m4 @@ -1,2 +1,2 @@ -m4_define([VERSION_NUMBER], [1.1.18]) -m4_define([PCMK_URL], [http://clusterlabs.org/]) +m4_define([VERSION_NUMBER], [1.1.19]) +m4_define([PCMK_URL], [https://clusterlabs.org/]) From 7bcbadf7588e592359c311596a0b26fb92d3b7af Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 19 Jun 2018 12:38:38 -0500 Subject: [PATCH 150/812] Doc: update change log for 1.1.19-rc1 release --- ChangeLog | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/ChangeLog b/ChangeLog index 08230063efd..2b7a3ceb6ff 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,52 @@ +* Wed Jun 20 2018 Ken Gaillot Pacemaker-1.1.19-rc1 +- Update source tarball to revision: ca6cf19 +- Changesets: 165 +- Diff: 96 files changed, 3636 insertions(+), 2318 deletions(-) + +- Features added since Pacemaker-1.1.18 + + This is a maintenance release with selected changes backported from 2.0.0 + + fencing: add stonith_admin --validate option + + pengine: deprecate stonith-action=poweroff + + libcrmservice: support mount, path, and timer systemd unit types + +- Changes since Pacemaker-1.1.18 + + Restore systemd unit dependency on DBus (regression in 1.1.17) + + cib: handle mixed-case node names when modifying attributes (regression in 1.1.17) + + attrd: ensure node name is broadcast at start-up (regression in 1.1.18) + + pengine: unfence before probing or starting fence devices (regression in 1.1.18) + + tools: crm_master did not work without explicit --lifetime (regression in 1.1.18) + + all: prefer appropriate node when multiply active + + crmd: always write faked failures to CIB whenever possible + + crmd: avoid double free after ACL rejection of resource delete + + crmd: delete resource from lrmd when appropriate + + crmd: match only executed down events + + lrmd: handle systemd actions correctly when used with "service:" + + lrmd: always use most recent remote proxy + + pengine: find active instances properly according to requires + + pengine: fix precedence of operation meta-attributes + + pengine: handle unique bundle children correctly + + pengine: use correct default timeout for monitors and probes + + pengine: avoid potential use-of-NULL in unpack_simple_rsc_order() + + pengine: ensure stop operations occur after stopped remote connections have been brought up + + pengine: fix swapped warning message arguments leading to segfault + + pengine: only allowed nodes need to be considered when ordering resource startup after _all_ recovery + + pengine: ordering bundle child stops/demotes after container fencing causes graph loops + + pengine: passing boolean instead of a pointer + + pengine: remote connection resources are safe to to require only quorum + + pengine: correctly observe colocation constraints with bundles in the Master role + + pengine: do not perform notifications for events that can't be executed + + pengine: ensure failures that cause fencing are not removed until after fencing completes + + pengine: ensure orphaned recurring monitors have interval set + + libcrmcluster: avoid use-of-NULL when searching for remote node + + libcrmcommon: don't record pending notify actions as completed + + libcrmservice: find absolute paths when used with "service:" + + tools: crm_simulate -Ls should show promotion scores + + tools: prevent notify actions from causing crm_resource --wait to hang + + tools: ignore attribute placement when crm_diff compares in cib mode + + tools: cibsecret --help/--version doesn't require cluster to be running + + tools: crm_node -n/-N/-i should work on Pacemaker Remote nodes + + tools: treat INFINITY correctly in crm_failcount + * Tue Nov 14 2017 Ken Gaillot Pacemaker-1.1.18-1 - Update source tarball to revision: a9fbd15 - Changesets: 644 From def039dc7e0cbc4f3633e169e58241b40677de73 Mon Sep 17 00:00:00 2001 From: Klaus Wenninger Date: Thu, 21 Jun 2018 16:14:13 +0200 Subject: [PATCH 151/812] Fix: st_client, stonith_admin: cleanup memory of stonith-history --- fencing/admin.c | 4 +++- include/crm/stonith-ng.h | 2 ++ lib/fencing/st_client.c | 17 ++++++++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/fencing/admin.c b/fencing/admin.c index 179116a2ac2..973f470bc45 100644 --- a/fencing/admin.c +++ b/fencing/admin.c @@ -359,7 +359,7 @@ static int show_history(stonith_t *st, const char *target, int timeout, int quiet, int verbose) { - stonith_history_t *history, *hp, *latest = NULL; + stonith_history_t *history = NULL, *hp, *latest = NULL; int rc = 0; rc = st->cmds->history(st, st_opts, @@ -406,6 +406,8 @@ show_history(stonith_t *st, const char *target, int timeout, int quiet, print_fence_event(latest); } } + + stonith_history_free(history); return rc; } diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h index 046a669d793..56c1ec7a508 100644 --- a/include/crm/stonith-ng.h +++ b/include/crm/stonith-ng.h @@ -427,6 +427,8 @@ stonith_key_value_t *stonith_key_value_add(stonith_key_value_t * kvp, const char const char *value); void stonith_key_value_freeall(stonith_key_value_t * kvp, int keys, int values); +void stonith_history_free(stonith_history_t *history); + /* Basic helpers that allows nodes to be fenced and the history to be * queried without mainloop or the caller understanding the full API * diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 88ca8303627..9b405325c78 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1441,6 +1441,19 @@ stonith_api_history(stonith_t * stonith, int call_options, const char *node, return rc; } +void stonith_history_free(stonith_history_t *history) +{ + stonith_history_t *hp, *hp_old; + + for (hp = history; hp; hp_old = hp, hp = hp->next, free(hp_old)) { + free(hp->target); + free(hp->action); + free(hp->origin); + free(hp->delegate); + free(hp->client); + } +} + /*! * \brief Deprecated (use stonith_get_namespace() instead) */ @@ -2494,7 +2507,7 @@ stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) time_t when = 0; stonith_t *st = NULL; - stonith_history_t *history, *hp = NULL; + stonith_history_t *history = NULL, *hp = NULL; enum stonith_call_options opts = st_opt_sync_call; st = stonith_api_new(); @@ -2536,6 +2549,8 @@ stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) } } + stonith_history_free(history); + if(rc == pcmk_ok) { api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed); } else { From 485ac1f73f541ccd22aed9ee8c51a62b58051c84 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 25 Jun 2018 10:28:05 -0500 Subject: [PATCH 152/812] Doc: tools: stonith_admin -h/-H options are queries --- fencing/admin.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fencing/admin.c b/fencing/admin.c index 973f470bc45..6f35bd9f6a8 100644 --- a/fencing/admin.c +++ b/fencing/admin.c @@ -100,6 +100,16 @@ static struct crm_option long_options[] = { { "query", required_argument, NULL, 'Q', "Check the named device's status. Optional: --timeout." }, + { "history", required_argument, NULL, 'H', + "Show last successful fencing operation for named node\n" + "\t\t\t(or '*' for all nodes). Optional: --timeout, --quiet\n" + "\t\t\t(show only the operation's epoch timestamp),\n" + "\t\t\t--verbose (show all recorded and pending operations)." + }, + { "last", required_argument, NULL, 'h', + "Indicate when the named node was last fenced.\n" + "\t\t\tOptional: --as-node-id." + }, { "validate", no_argument, NULL, 'K', "\tValidate a fence device configuration.\n" "\t\t\tRequires: --agent. Optional: --option, --env-option,\n" @@ -120,16 +130,6 @@ static struct crm_option long_options[] = { { "confirm", required_argument, NULL, 'C', "Tell cluster that named host is now safely down." }, - { "history", required_argument, NULL, 'H', - "Show last successful fencing operation for named node\n" - "\t\t\t(or '*' for all nodes). Optional: --timeout, --quiet\n" - "\t\t\t(show only the operation's epoch timestamp),\n" - "\t\t\t--verbose (show all recorded and pending operations)." - }, - { "last", required_argument, NULL, 'h', - "Indicate when the named node was last fenced.\n" - "\t\t\tOptional: --as-node-id." - }, { "-spacer-", no_argument, NULL, '-', "\nAdditional Options:" }, From 0eee17ea3ac11e40e8bb2ae93a25909b18287da0 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 21 Jun 2018 14:45:40 -0500 Subject: [PATCH 153/812] Build: ChangeLog: don't add "source tarball" line hasn't been relevant since changelog moved from spec file to its own file in bde826f7 --- GNUmakefile | 1 - 1 file changed, 1 deletion(-) diff --git a/GNUmakefile b/GNUmakefile index c2bba1fe272..a0841504c05 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -333,7 +333,6 @@ www: manhtml-upload global-upload doxygen-upload summary: @printf "\n* `date +"%a %b %d %Y"` `git config user.name` <`git config user.email`> $(NEXT_RELEASE)-1" - @printf "\n- Update source tarball to revision: `git log --pretty=format:%h -n 1`" @printf "\n- Changesets: `git log --pretty=oneline $(LAST_RELEASE)..HEAD | wc -l`" @printf "\n- Diff: " @git diff -r $(LAST_RELEASE)..HEAD --stat include lib mcp pengine/*.c pengine/*.h cib crmd fencing lrmd tools xml | tail -n 1 From c423406ba62723ac4dc55f8379ee419b3f476aba Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 13 Jun 2018 11:56:59 -0500 Subject: [PATCH 154/812] Fix: executor: cancel recurring monitors if fence device registration is lost If the fence daemon crashes and respawns, any recurring monitors for fence devices fail with "no device". Previously, this would be recorded as a monitor failure, but the monitor would still be rescheduled indefinitely. Now, the monitor will be cancelled in such a situation. cancellation would fail because the fence device would no longer be registered. Fixes CLBZ#5352 --- lrmd/lrmd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index 6e0c252dd03..c717947a5ed 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -1019,8 +1019,19 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc) /* do nothing */ } else if (rc == -ENODEV && safe_str_eq(cmd->action, "monitor")) { - /* Not registered == inactive */ - cmd->lrmd_op_status = PCMK_LRM_OP_DONE; + // The device is not registered with the fencer + + if (recurring) { + /* If we get here, the fencer somehow lost the registration of a + * previously active device (possibly due to crash and respawn). In + * that case, we need to indicate that the recurring monitor needs + * to be cancelled. + */ + cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; + recurring = FALSE; + } else { + cmd->lrmd_op_status = PCMK_LRM_OP_DONE; + } cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (rc) { From 3b4e5555322c2c85a1bb1be6da9761de3ddf93f0 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 29 Jun 2018 17:16:52 -0500 Subject: [PATCH 155/812] Refactor: controller: remove dead code around recording action timeouts cib_action_update() could no longer can be called for pending actions as of 5519c94, so simplify it and rename it controld_record_action_timeout(). --- crmd/te_actions.c | 49 +++++++++++++++++++-------------------------- crmd/te_callbacks.c | 2 +- crmd/tengine.h | 2 +- 3 files changed, 23 insertions(+), 30 deletions(-) diff --git a/crmd/te_actions.c b/crmd/te_actions.c index fde44dbeae3..14097abc160 100644 --- a/crmd/te_actions.c +++ b/crmd/te_actions.c @@ -305,8 +305,8 @@ te_crm_command(crm_graph_t * graph, crm_action_t * action) return TRUE; } -gboolean -cib_action_update(crm_action_t * action, int status, int op_rc) +void +controld_record_action_timeout(crm_action_t *action) { lrmd_event_data_t *op = NULL; xmlNode *state = NULL; @@ -317,7 +317,6 @@ cib_action_update(crm_action_t * action, int status, int op_rc) int rc = pcmk_ok; const char *rsc_id = NULL; - const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); @@ -325,22 +324,17 @@ cib_action_update(crm_action_t * action, int status, int op_rc) int call_options = cib_quorum_override | cib_scope_local; int target_rc = get_target_rc(action); - if (status == PCMK_LRM_OP_PENDING) { - crm_debug("%s %d: Recording pending operation %s on %s", - crm_element_name(action->xml), action->id, task_uuid, target); - } else { - crm_warn("%s %d: %s on %s timed out", - crm_element_name(action->xml), action->id, task_uuid, target); - } + crm_warn("%s %d: %s on %s timed out", + crm_element_name(action->xml), action->id, task_uuid, target); action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if (action_rsc == NULL) { - return FALSE; + return; } rsc_id = ID(action_rsc); - CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); - return FALSE); + CRM_CHECK(rsc_id != NULL, + crm_log_xml_err(action->xml, "Bad:action"); return); /* update the CIB @@ -368,33 +362,32 @@ cib_action_update(crm_action_t * action, int status, int op_rc) crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS); crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER); - op = convert_graph_action(NULL, action, status, op_rc); + /* If the executor gets a timeout while waiting for the action to complete, + * that will be reported via the usual callback. This timeout means that we + * didn't hear from the executor or the controller that relayed the action + * to the executor. + * + * @TODO Using PCMK_OCF_UNKNOWN_ERROR instead of PCMK_OCF_TIMEOUT is one way + * to distinguish those situations, but perhaps PCMK_OCF_TIMEOUT would be + * preferable anyway. + */ + op = convert_graph_action(NULL, action, PCMK_LRM_OP_TIMEOUT, + PCMK_OCF_UNKNOWN_ERROR); op->call_id = -1; op->user_data = generate_transition_key(transition_graph->id, action->id, target_rc, te_uuid); xml_op = create_operation_update(rsc, op, CRM_FEATURE_SET, target_rc, target, __FUNCTION__, LOG_INFO); lrmd_free_event(op); - crm_trace("Updating CIB with \"%s\" (%s): %s %s on %s", - status < 0 ? "new action" : XML_ATTR_TIMEOUT, - crm_element_name(action->xml), crm_str(task), rsc_id, target); - crm_log_xml_trace(xml_op, "Op"); + crm_log_xml_trace(xml_op, "Action timeout"); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options); - - crm_trace("Updating CIB with %s action %d: %s on %s (call_id=%d)", - services_lrm_status_str(status), action->id, task_uuid, target, rc); - fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated); free_xml(state); + crm_trace("Sent CIB update (call ID %d) for timeout of action %d (%s on %s)", + rc, action->id, task_uuid, target); action->sent_update = TRUE; - - if (rc < pcmk_ok) { - return FALSE; - } - - return TRUE; } static gboolean diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c index d67dabc1897..27c72b6da82 100644 --- a/crmd/te_callbacks.c +++ b/crmd/te_callbacks.c @@ -964,7 +964,7 @@ action_timer_callback(gpointer data) } if (send_update) { - cib_action_update(timer->action, PCMK_LRM_OP_TIMEOUT, PCMK_OCF_UNKNOWN_ERROR); + controld_record_action_timeout(timer->action); } } diff --git a/crmd/tengine.h b/crmd/tengine.h index df49731c244..0d4ef98d368 100644 --- a/crmd/tengine.h +++ b/crmd/tengine.h @@ -36,7 +36,7 @@ void execute_stonith_cleanup(void); extern crm_action_t *match_down_event(const char *target, bool quiet); extern crm_action_t *get_cancel_action(const char *id, const char *node); -extern gboolean cib_action_update(crm_action_t * action, int status, int op_rc); +void controld_record_action_timeout(crm_action_t *action); extern gboolean fail_incompletable_actions(crm_graph_t * graph, const char *down_node); void process_graph_event(xmlNode *event, const char *event_node); From 96b945ab9db1372baa9b5a98a43dbcc57551e9e4 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 29 Jun 2018 16:04:46 -0500 Subject: [PATCH 156/812] Fix: controller: don't record pending notifies in CIB 0fb8547 fixed a bug that recorded pending clone notifications as successfully completed in the CIB, by recording them as pending. However, we don't record notification completions in the CIB, so the pending entries stay in the CIB even after the notification completes. Clone notifications should not be recorded in the CIB at all. Fixes CLBZ#5359 --- crmd/lrm.c | 4 +++- lib/common/operations.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index 467bf7fff88..1f5abe86fcb 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -2163,9 +2163,11 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t CRM_CHECK(rsc != NULL, return); CRM_CHECK(op != NULL, return); + // Never record certain operation types as pending if (op->op_type == NULL || safe_str_eq(op->op_type, CRMD_ACTION_CANCEL) - || safe_str_eq(op->op_type, CRMD_ACTION_DELETE)) { + || safe_str_eq(op->op_type, CRMD_ACTION_DELETE) + || safe_str_eq(op->op_type, CRMD_ACTION_NOTIFY)) { return; } diff --git a/lib/common/operations.c b/lib/common/operations.c index 26a8f655e13..01b4f7adffd 100644 --- a/lib/common/operations.c +++ b/lib/common/operations.c @@ -532,8 +532,8 @@ create_operation_update(xmlNode * parent, lrmd_event_data_t * op, const char * c if (op->op_status != PCMK_LRM_OP_PENDING) { /* Ignore notify errors. * - * @TODO We really should keep the actual result here, and ignore it - * when processing the CIB diff. + * @TODO It might be better to keep the correct result here, and + * ignore it in process_graph_event(). */ op->op_status = PCMK_LRM_OP_DONE; op->rc = 0; From e88be2fa576bb79a774aede6cfd90b8b3bb31f76 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 29 Jun 2018 17:37:03 -0500 Subject: [PATCH 157/812] Refactor: controller: functionize checking action for recordability will help reduce errors --- crmd/cib.c | 20 ++++++++++++++++++++ crmd/crmd_utils.h | 1 + crmd/lrm.c | 11 ++++------- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/crmd/cib.c b/crmd/cib.c index d902a32a3b4..a8a097a9d35 100644 --- a/crmd/cib.c +++ b/crmd/cib.c @@ -204,3 +204,23 @@ int crmd_cib_smart_opt() } return call_opt; } + +/*! + * \internal + * \brief Check whether an action type should be recorded in the CIB + * + * \param[in] action Action type + * + * \return TRUE if action should be recorded, FALSE otherwise + */ +bool +controld_action_is_recordable(const char *action) +{ + if (safe_str_eq(action, CRMD_ACTION_CANCEL) + || safe_str_eq(action, CRMD_ACTION_DELETE) + || safe_str_eq(action, CRMD_ACTION_NOTIFY) + || safe_str_eq(action, CRMD_ACTION_METADATA)) { + return FALSE; + } + return TRUE; +} diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h index 3f86fef9e63..0fcdc6ab23d 100644 --- a/crmd/crmd_utils.h +++ b/crmd/crmd_utils.h @@ -107,6 +107,7 @@ void abort_for_stonith_failure(enum transition_action abort_action, const char *target, xmlNode *reason); void crmd_peer_down(crm_node_t *peer, bool full); unsigned int cib_op_timeout(void); +bool controld_action_is_recordable(const char *action); /* Convenience macro for registering a CIB callback * (assumes that data can be freed with free()) diff --git a/crmd/lrm.c b/crmd/lrm.c index 1f5abe86fcb..4c1e679d85b 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -2165,9 +2165,7 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t // Never record certain operation types as pending if (op->op_type == NULL - || safe_str_eq(op->op_type, CRMD_ACTION_CANCEL) - || safe_str_eq(op->op_type, CRMD_ACTION_DELETE) - || safe_str_eq(op->op_type, CRMD_ACTION_NOTIFY)) { + || !controld_action_is_recordable(op->op_type)) { return; } @@ -2554,11 +2552,10 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr } if (op->op_status != PCMK_LRM_OP_CANCELLED) { - if (safe_str_eq(op->op_type, RSC_NOTIFY) || safe_str_eq(op->op_type, RSC_METADATA)) { - /* Keep notify and meta-data ops out of the CIB */ - send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); - } else { + if (controld_action_is_recordable(op->op_type)) { update_id = do_update_resource(lrm_state->node_name, rsc, op); + } else { + send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } } else if (op->interval == 0) { /* This will occur when "crm resource cleanup" is called while actions are in-flight */ From efb431fb69a1c456a7a3fa7ece90f528b2b25612 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 29 Jun 2018 17:38:24 -0500 Subject: [PATCH 158/812] Low: controller: filter certain action timeouts from CIB --- crmd/te_callbacks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c index 27c72b6da82..087f3e131d2 100644 --- a/crmd/te_callbacks.c +++ b/crmd/te_callbacks.c @@ -958,7 +958,7 @@ action_timer_callback(gpointer data) if (timer->action->type != action_type_rsc) { send_update = FALSE; - } else if (safe_str_eq(task, RSC_CANCEL)) { + } else if (!controld_action_is_recordable(task)) { /* we don't need to update the CIB with these */ send_update = FALSE; } From df54cfd4b4062b985dec2e90e112e64a5174d202 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 29 Jun 2018 17:55:31 -0500 Subject: [PATCH 159/812] Low: controller: filter certain synthesized failures from CIB The types of actions that are filtered shouldn't really get synthesized failures, but this is a fail-safe just in case. --- crmd/lrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index 4c1e679d85b..db8ed7e4b3b 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1474,7 +1474,7 @@ synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) lrmd_free_rsc_info(rsc_info); process_lrm_event(lrm_state, op, NULL); - } else { + } else if (controld_action_is_recordable(op->op_type)) { /* If we can't process the result normally, at least write it to the CIB * if possible, so the PE can act on it. */ From ee501646914e201eef01d0fbbe46e4610eb2fd7e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 4 Jul 2018 16:49:37 -0500 Subject: [PATCH 160/812] Refactor: executor: add API function for meta-data call with parameters This adds a get_metadata_param() function to the lrmd API for calling the meta-data action with parameters passed to the environment. The parameters will currently only by used with OCF agents, not any other standards, including fence agents. --- include/crm/lrmd.h | 22 ++++++++++++++++++ lib/lrmd/lrmd_client.c | 52 ++++++++++++++++++++++++++++++------------ 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h index 5c1947efc4b..ecfc98469e7 100644 --- a/include/crm/lrmd.h +++ b/include/crm/lrmd.h @@ -495,6 +495,28 @@ typedef struct lrmd_api_operations_s { const char *alert_path, int timeout, /* ms */ lrmd_key_value_t *params); /* ownership of params is given up to api here */ + /*! + * \brief Get resource metadata for a resource agent, passing parameters + * + * \param[in] lrmd Executor connection (unused) + * \param[in] standard Resource agent class + * \param[in] provider Resource agent provider + * \param[in] agent Resource agent type + * \param[out] output Metadata will be stored here (must not be NULL) + * \param[in] options Options to use with any executor API calls (unused) + * \param[in] params Parameters to pass to agent via environment + * + * \note This is identical to the get_metadata() API call, except parameters + * will be passed to the resource agent via environment variables. + * \note The API will handle freeing params. + * + * \return lrmd_ok on success, negative error code on failure + */ + int (*get_metadata_params) (lrmd_t *lrmd, const char *standard, + const char *provider, const char *agent, + char **output, enum lrmd_call_options options, + lrmd_key_value_t *params); + } lrmd_api_operations_t; struct lrmd_s { diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index e3fb20e8552..28d6c6d174c 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1600,38 +1600,59 @@ stonith_get_metadata(const char *provider, const char *type, char **output) } static int -lrmd_api_get_metadata(lrmd_t * lrmd, - const char *class, - const char *provider, - const char *type, char **output, enum lrmd_call_options options) +lrmd_api_get_metadata(lrmd_t *lrmd, const char *standard, const char *provider, + const char *type, char **output, + enum lrmd_call_options options) { - svc_action_t *action; + return lrmd->cmds->get_metadata_params(lrmd, standard, provider, type, + output, options, NULL); +} - if (!class || !type) { +static int +lrmd_api_get_metadata_params(lrmd_t *lrmd, const char *standard, + const char *provider, const char *type, + char **output, enum lrmd_call_options options, + lrmd_key_value_t *params) +{ + svc_action_t *action = NULL; + GHashTable *params_table = NULL; + + if (!standard || !type) { + lrmd_key_value_freeall(params); return -EINVAL; } - if (safe_str_eq(class, PCMK_RESOURCE_CLASS_STONITH)) { + if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) { + lrmd_key_value_freeall(params); return stonith_get_metadata(provider, type, output); } - action = resources_action_create(type, class, provider, type, - "meta-data", 0, - CRMD_METADATA_CALL_TIMEOUT, NULL, 0); + params_table = crm_str_table_new(); + for (const lrmd_key_value_t *param = params; param; param = param->next) { + g_hash_table_insert(params_table, strdup(param->key), strdup(param->value)); + } + action = resources_action_create(type, standard, provider, type, + CRMD_ACTION_METADATA, 0, + CRMD_METADATA_CALL_TIMEOUT, params_table, + 0); + lrmd_key_value_freeall(params); + if (action == NULL) { - crm_err("Unable to retrieve meta-data for %s:%s:%s", class, provider, type); - services_action_free(action); + crm_err("Unable to retrieve meta-data for %s:%s:%s", + standard, provider, type); return -EINVAL; } - if (!(services_action_sync(action))) { - crm_err("Failed to retrieve meta-data for %s:%s:%s", class, provider, type); + if (!services_action_sync(action)) { + crm_err("Failed to retrieve meta-data for %s:%s:%s", + standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { - crm_err("Failed to receive meta-data for %s:%s:%s", class, provider, type); + crm_err("Failed to receive meta-data for %s:%s:%s", + standard, provider, type); services_action_free(action); return -EIO; } @@ -1863,6 +1884,7 @@ lrmd_api_new(void) new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; new_lrmd->cmds->list_standards = lrmd_api_list_standards; new_lrmd->cmds->exec_alert = lrmd_api_exec_alert; + new_lrmd->cmds->get_metadata_params = lrmd_api_get_metadata_params; return new_lrmd; } From a15aa9f51ad5ccd9cd3cbde8eca439cc0aa70652 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 4 Jul 2018 17:27:54 -0500 Subject: [PATCH 161/812] Fix: controller: add node name to meta-data call environment Resource agent invocations get all the environment variables defined for them by the scheduler -- with the exception of meta-data calls, which do not go through the scheduler but are executed directly by the controller. This means that if a resource agent's meta-data action calls "crm_node -n" (e.g. via the ocf_local_nodename function), it would (since b4606fb) try to contact the controller to get the name -- but the controller would be blocking on the synchronous meta-data call, and thus the meta-data call would eventually time out. Now, the controller adds the node name to the meta-data call's environment, so "crm_node -n" does not need to contact the controller to get it. This is a quick fix for the immediate issue; a full solution will be part of a larger project to make the controller execute meta-data calls asynchronously via the executor. Fixes CLBZ#5357 --- crmd/lrm_state.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c index 9968d300478..497afe11715 100644 --- a/crmd/lrm_state.c +++ b/crmd/lrm_state.c @@ -619,11 +619,31 @@ lrm_state_get_metadata(lrm_state_t * lrm_state, const char *provider, const char *agent, char **output, enum lrmd_call_options options) { + lrmd_key_value_t *params = NULL; + if (!lrm_state->conn) { return -ENOTCONN; } - return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata(lrm_state->conn, class, provider, agent, - output, options); + + /* Add the node name to the environment, as is done with normal resource + * action calls. Meta-data calls shouldn't need it, but some agents are + * written with an ocf_local_nodename call at the beginning regardless of + * action. Without the environment variable, the agent would try to contact + * the controller to get the node name -- but the controller would be + * blocking on the synchronous meta-data call. + * + * At this point, we have to assume that agents are unlikely to make other + * calls that require the controller, such as crm_node --quorum or + * --cluster-id. + * + * @TODO Make meta-data calls asynchronous. (This will be part of a larger + * project to make meta-data calls via the executor rather than directly.) + */ + params = lrmd_key_value_add(params, CRM_META "_" XML_LRM_ATTR_TARGET, + lrm_state->node_name); + + return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata_params(lrm_state->conn, + class, provider, agent, output, options, params); } int From e4c002bb4ac5e0f2f1e6a483c7f4efd71fe36439 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 5 Jul 2018 14:55:19 -0500 Subject: [PATCH 162/812] Low: scheduler: avoid use-after-free when on-fail=block It was a problem only when trace logs were enabled. --- lib/pengine/utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 0654d1e706c..a9ca86f825f 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1080,6 +1080,7 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, } else if (safe_str_eq(value, "block")) { action->on_fail = action_fail_block; g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block")); + value = "block"; // The above could destroy the original string } else if (safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; From 353262bf0abab0acb2183a4575d9991c47617578 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 24 May 2018 13:44:13 -0500 Subject: [PATCH 163/812] Refactor: libpe_status: make clone unpack more efficient Previously, when unpacking clone resource history, we would loop through all possible clone instances until we found an instance active on the node, and then if not found, we would loop through all possible instances again until we found an inactive instance we could assign to the node. Now, we only loop through the instances once, remembering the first inactive instance. If we end up not finding an active instance, we return the remembered inactive instance. --- lib/pengine/unpack.c | 92 ++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 73bbe274f72..61778d8bc69 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1691,30 +1691,60 @@ create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set); +/*! + * \internal + * \brief Create orphan instance for anonymous clone resource history + */ +static pe_resource_t * +create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id, + pe_node_t *node, pe_working_set_t *data_set) +{ + pe_resource_t *top = create_child_clone(parent, -1, data_set); + + // find_rsc() because we might be a cloned group + pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); + + pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", + top->id, parent->id, rsc_id, node->details->uname); + return orphan; +} + +/*! + * \internal + * \brief Check a node for an instance of an anonymous clone + * + * Return a child instance of the specified anonymous clone, in order of + * preference: (1) the instance running on the specified node, if any; + * (2) an inactive instance (i.e. within the total of clone-max instances); + * (3) a newly created orphan (i.e. clone-max instances are already active). + * + * \param[in] data_set Cluster information + * \param[in] node Node on which to check for instance + * \param[in] parent Clone to check + * \param[in] rsc_id ID of (clone or cloned) resource being searched for + */ static resource_t * find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent, const char *rsc_id) { GListPtr rIter = NULL; - resource_t *rsc = NULL; + pe_resource_t *rsc = NULL; + pe_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(pe_rsc_is_clone(parent)); CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique)); - /* Find an instance active (or partially active for grouped clones) on the specified node */ + // Check for active (or partially active, for cloned groups) instance pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr nIter = NULL; GListPtr locations = NULL; resource_t *child = rIter->data; + // Find node(s) where we already know this instance is active child->fns->location(child, &locations, TRUE); - if (locations == NULL) { - pe_rsc_trace(child, "Resource %s, skip inactive", child->id); - continue; - } for (nIter = locations; nIter && rsc == NULL; nIter = nIter->next) { node_t *childnode = nIter->data; @@ -1723,7 +1753,7 @@ find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * pa /* ->find_rsc() because we might be a cloned group */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); if(rsc) { - pe_rsc_trace(rsc, "Resource %s, active", rsc->id); + pe_rsc_trace(parent, "Resource %s, active", rsc->id); } } @@ -1738,41 +1768,27 @@ find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * pa } } - g_list_free(locations); - } - - /* Find an inactive instance */ - if (skip_inactive == FALSE) { - pe_rsc_trace(parent, "Looking for %s anywhere", rsc_id); - for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { - GListPtr locations = NULL; - resource_t *child = rIter->data; - - if (is_set(child->flags, pe_rsc_block)) { - pe_rsc_trace(child, "Skip: blocked in stopped state"); - continue; - } - - child->fns->location(child, &locations, TRUE); - if (locations == NULL) { - /* ->find_rsc() because we might be a cloned group */ - rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); - pe_rsc_trace(parent, "Resource %s, empty slot", rsc->id); - } + if (locations != NULL) { g_list_free(locations); + } else { + pe_rsc_trace(parent, "Resource %s, skip inactive", child->id); + if (!skip_inactive && !inactive_instance + && is_not_set(child->flags, pe_rsc_block)) { + // Remember one inactive instance in case we don't find active + inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL, + pe_find_clone); + } } } - if (rsc == NULL) { - /* Create an extra orphan */ - resource_t *top = create_child_clone(parent, -1, data_set); - - /* ->find_rsc() because we might be a cloned group */ - rsc = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); - CRM_ASSERT(rsc != NULL); + if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) { + pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id); + rsc = inactive_instance; + } - pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, - node->details->uname); + if (rsc == NULL) { + rsc = create_anonymous_orphan(parent, rsc_id, node, data_set); + pe_rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } @@ -1820,7 +1836,7 @@ unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc if (pe_rsc_is_anon_clone(parent)) { - if (parent && parent->parent) { + if (pe_rsc_is_bundled(parent)) { rsc = find_container_child(parent->parent, node); } else { char *base = clone_strip(rsc_id); From 4cce5489cbff86df19bfb3e0d7132918b980910d Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 5 Jul 2018 14:15:50 -0500 Subject: [PATCH 164/812] Refactor: scheduler: assume only one anonymous instance location find_anonymous_clone() checks for all locations where a particular clone instance is active. Previously, we would loop through this list. However, a particular instance can only ever be active on one node (regardless of what the history looks like, because we ignore any instance numbers in history of anonymous clones). So now, we log an assertion if the list has more than one member. --- lib/pengine/unpack.c | 48 +++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 61778d8bc69..7b827398adc 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1739,37 +1739,43 @@ find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * pa // Check for active (or partially active, for cloned groups) instance pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { - GListPtr nIter = NULL; GListPtr locations = NULL; resource_t *child = rIter->data; // Find node(s) where we already know this instance is active child->fns->location(child, &locations, TRUE); - - for (nIter = locations; nIter && rsc == NULL; nIter = nIter->next) { - node_t *childnode = nIter->data; - - if (childnode->details == node->details) { - /* ->find_rsc() because we might be a cloned group */ + if (locations) { + /* We should never associate the same numbered anonymous clone + * instance with multiple nodes, and clone instances can't migrate, + * so there must be only one location, regardless of history. + */ + CRM_LOG_ASSERT(locations->next == NULL); + + if (((pe_node_t *)locations->data)->details == node->details) { + /* This instance is active on the requested node, so check for + * a corresponding configured resource. We use find_rsc() + * because child may be a cloned group, and we need the + * particular member corresponding to rsc_id. + * + * If the history entry is orphaned, rsc will be NULL. + */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); - if(rsc) { + if (rsc) { pe_rsc_trace(parent, "Resource %s, active", rsc->id); - } - } - /* Keep this block, it means we'll do the right thing if - * anyone toggles the unique flag to 'off' - */ - if (rsc && rsc->running_on) { - crm_notice("/Anonymous/ clone %s is already running on %s", - parent->id, node->details->uname); - skip_inactive = TRUE; - rsc = NULL; + /* Keep this block, it means we'll do the right thing if + * anyone toggles the unique flag to 'off' + */ + if (rsc->running_on) { + crm_notice("/Anonymous/ clone %s is already running on %s", + parent->id, node->details->uname); + skip_inactive = TRUE; + rsc = NULL; + } + } } - } - - if (locations != NULL) { g_list_free(locations); + } else { pe_rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance From 7d818b0e40acc0b18068f833d3e11f71478745bb Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 5 Jul 2018 15:56:42 -0500 Subject: [PATCH 165/812] Log: scheduler: clarify multiple-anonymous-instances message and the comments related to it --- lib/pengine/unpack.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 7b827398adc..019e22ed79d 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1742,7 +1742,22 @@ find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * pa GListPtr locations = NULL; resource_t *child = rIter->data; - // Find node(s) where we already know this instance is active + /* Check whether this instance is already known to be active anywhere. + * + * "Active" in this case means known to be active at this stage of + * unpacking. Because this function is called for a resource before the + * resource's individual operation history entries are unpacked, + * locations will generally be NULL. + * + * However, there are three exceptions: + * (1) when child is a cloned group and we have already unpacked the + * history of another member of the group; + * (2) when we've already unpacked the history of another numbered + * instance on the same node (which can happen if globally-unique + * was flipped from true to false); and + * (3) when we re-run calculations on the same data set as part of a + * simulation. + */ child->fns->location(child, &locations, TRUE); if (locations) { /* We should never associate the same numbered anonymous clone @@ -1763,11 +1778,13 @@ find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * pa if (rsc) { pe_rsc_trace(parent, "Resource %s, active", rsc->id); - /* Keep this block, it means we'll do the right thing if - * anyone toggles the unique flag to 'off' + /* If there are multiple active instances of an anonymous + * clone in a single node's history (which can happen if + * globally-unique is switched from true to false), we want + * to consider the instances beyond the first as orphans. */ if (rsc->running_on) { - crm_notice("/Anonymous/ clone %s is already running on %s", + crm_notice("Now-anonymous clone %s has multiple instances active on %s", parent->id, node->details->uname); skip_inactive = TRUE; rsc = NULL; From c3c624ea3d98a74a8a287671a156db126c99a7bb Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 11 Jul 2018 11:26:57 -0500 Subject: [PATCH 166/812] Doc: update change log for 1.1.19 release --- ChangeLog | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2b7a3ceb6ff..65e98ac4c89 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,6 @@ -* Wed Jun 20 2018 Ken Gaillot Pacemaker-1.1.19-rc1 -- Update source tarball to revision: ca6cf19 -- Changesets: 165 -- Diff: 96 files changed, 3636 insertions(+), 2318 deletions(-) +* Wed Jul 11 2018 Ken Gaillot Pacemaker-1.1.19-1 +- Changesets: 184 +- Diff: 99 files changed, 3900 insertions(+), 2435 deletions(-) - Features added since Pacemaker-1.1.18 + This is a maintenance release with selected changes backported from 2.0.0 @@ -19,6 +18,7 @@ + crmd: always write faked failures to CIB whenever possible + crmd: avoid double free after ACL rejection of resource delete + crmd: delete resource from lrmd when appropriate + + crmd: don't record pending clone notifications in CIB + crmd: match only executed down events + lrmd: handle systemd actions correctly when used with "service:" + lrmd: always use most recent remote proxy From 28076181217313c7e33bf88cb70eb16c2f8e737f Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 19 Jul 2018 12:38:08 -0500 Subject: [PATCH 167/812] Low: tools: enable file consolidation in crm_report Correct a variable misspelling that resulted in file de-duplication being skipped. Also, remove an unused variable, and avoid the unreliable "-a". Found by static analysis. --- tools/crm_report.in | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tools/crm_report.in b/tools/crm_report.in index 43ed6466072..695c1f1f06f 100755 --- a/tools/crm_report.in +++ b/tools/crm_report.in @@ -27,7 +27,6 @@ eval set -- "$TEMP" progname=$(basename "$0") rsh="ssh -T" -times="" tests="" nodes="" compress=1 @@ -243,10 +242,18 @@ EOF # check if files have same content in the cluster # cibdiff() { - d1=`dirname $1` - d2=`dirname $2` - if [ -f $d1/RUNNING -a -f $d2/RUNNING ] || - [ -f $d1/STOPPED -a -f $d2/STOPPED ]; then + d1=$(dirname $1) + d2=$(dirname $2) + + if [ -f "$d1/RUNNING" ] && [ ! -f "$d2/RUNNING" ]; then + DIFF_OK=0 + elif [ -f "$d1/STOPPED" ] && [ ! -f "$d2/STOPPED" ]; then + DIFF_OK=0 + else + DIFF_OK=1 + fi + + if [ $DIFF_OK -eq 1 ]; then if which crm_diff > /dev/null 2>&1; then crm_diff -c -n $1 -o $2 else @@ -277,7 +284,7 @@ esac # remove duplicates if files are same, make links instead # consolidate() { - for n in $NODES; do + for n in $nodes; do if [ -f $1/$2 ]; then rm $1/$n/$2 else @@ -290,7 +297,7 @@ consolidate() { analyze_one() { rc=0 node0="" - for n in $NODES; do + for n in $nodes; do if [ "$node0" ]; then diffcheck $1/$node0/$2 $1/$n/$2 rc=$(($rc+$?)) From 2db3895359beb0f577c142c03ac2c8e6f44c67cf Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 4 Apr 2018 15:47:18 -0500 Subject: [PATCH 168/812] Low: tools: get sensor lun in ipmiservicelogd before using it --- tools/ipmiservicelogd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ipmiservicelogd.c b/tools/ipmiservicelogd.c index 47ff220f20b..1047c9c3a11 100644 --- a/tools/ipmiservicelogd.c +++ b/tools/ipmiservicelogd.c @@ -434,14 +434,14 @@ sensor_discrete_event_handler(ipmi_sensor_t * sensor, instance = ipmi_entity_get_entity_instance(ent); ipmi_sensor_get_id(sensor, name, sizeof(name)); + ipmi_sensor_get_num(sensor, &sensor_lun, &sensor_number); + sel_id = ipmi_entity_get_entity_id(ent); sel_type = ipmi_entity_get_type(ent); generator = ipmi_entity_get_slave_address(ent) | (sensor_lun << 5); /* LUN (2 bits) | SLAVE ADDRESS (5 bits) */ version = 0x04; sensor_type = ipmi_sensor_get_sensor_type(sensor); - ipmi_sensor_get_num(sensor, &sensor_lun, &sensor_number); - event_class = 0; /* @TBD - where does this come from? */ event_type = ipmi_event_get_type(event); direction = dir; From 7a79e4ef8315842d4d1078475dab287d8f3327de Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 25 Jul 2018 15:15:38 -0500 Subject: [PATCH 169/812] Low: tools: notifyServicelogEvent FTBFS on ppc64le --- tools/notifyServicelogEvent.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/tools/notifyServicelogEvent.c b/tools/notifyServicelogEvent.c index b7f672cc99b..700f0685e00 100644 --- a/tools/notifyServicelogEvent.c +++ b/tools/notifyServicelogEvent.c @@ -1,24 +1,15 @@ /* - * Copyright (C) 2009 International Business Machines, IBM, Mark Hamzy + * Copyright 2009-2018 International Business Machines, IBM, Mark Hamzy * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ /* gcc -o notifyServicelogEvent `pkg-config --cflags servicelog-1` `pkg-config --libs servicelog-1` notifyServicelogEvent.c */ +#include + #include #include #include @@ -27,9 +18,10 @@ #include #include #include + #include #include -#include +#include typedef enum { STATUS_GREEN = 1, STATUS_YELLOW, STATUS_RED } STATUS; @@ -91,7 +83,7 @@ main(int argc, char *argv[]) struct sl_event *event = NULL; uint64_t event_id = 0; - crm_log_init_quiet("notifyServicelogEvent", LOG_INFO, FALSE, TRUE, argc, argv); + crm_log_cli_init("notifyServicelogEvent"); crm_set_options(NULL, "event_id ", long_options, "Gets called upon events written to servicelog database"); From b408a3ead462c8f02b68a164f24ba1b05bb3cad1 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 25 Jul 2018 16:07:42 -0500 Subject: [PATCH 170/812] Low: tools: ipmiservicelogd FTBFS on ppc64le --- tools/ipmiservicelogd.c | 53 +++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/tools/ipmiservicelogd.c b/tools/ipmiservicelogd.c index 1047c9c3a11..4f528656435 100644 --- a/tools/ipmiservicelogd.c +++ b/tools/ipmiservicelogd.c @@ -9,13 +9,10 @@ * Author: Intel Corporation * Jeff Zheng * - * Copyright 2009 International Business Machines, IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. + * Copyright 2009-2018 International Business Machines, IBM * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF @@ -27,10 +24,6 @@ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* gcc -o ipmiservicelogd -g `pkg-config --cflags --libs OpenIPMI OpenIPMIposix servicelog-1` ipmiservicelogd.c @@ -38,6 +31,12 @@ /* ./ipmiservicelogd smi 0 */ +#include + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + #include #include #include @@ -68,7 +67,7 @@ static os_handler_t *os_hnd; -char *getStringExecOutput(char *args[]); +char *getStringExecOutput(const char *const args[]); char *getSerialNumber(void); char *getProductName(void); static void con_usage(const char *name, const char *help, void *cb_data); @@ -91,7 +90,7 @@ void setup_done(ipmi_domain_t * domain, int err, unsigned int conn_num, unsigned int still_connected, void *user_data); char * -getStringExecOutput(char *args[]) +getStringExecOutput(const char *const args[]) { int rc; pid_t pid; @@ -201,7 +200,11 @@ getStringExecOutput(char *args[]) crm_err("Error: child close (pipefd[1]) = %d", errno); } - rc = execvp(args[0], args); + /* execvp() takes (char *const *) for backward compatibility, + * but POSIX guarantees that it will not modify the strings, + * so the cast is safe + */ + rc = execvp(args[0], (char *const *) args); if (rc == -1) { crm_err("Error: child execvp = %d", errno); @@ -224,7 +227,7 @@ getStringExecOutput(char *args[]) char * getSerialNumber(void) { - char *dmiArgs[] = { + const char *const dmiArgs[] = { "dmidecode", "--string", "system-serial-number", @@ -237,7 +240,7 @@ getSerialNumber(void) char * getProductName(void) { - char *dmiArgs[] = { + const char *dmiArgs[] = { "dmidecode", "--string", "system-product-name", @@ -313,8 +316,8 @@ ipmi2servicelog(struct sl_data_bmc *bmc_data) sl_event.machine_serial = serial_number; sl_event.machine_model = product_name; /* it may not have the serial # within the first 20 chars */ sl_event.nodename = name.nodename; - sl_event.refcode = "ipmi"; - sl_event.description = "ipmi event"; + sl_event.refcode = strdup("ipmi"); + sl_event.description = strdup("ipmi event"); sl_event.serviceable = 1; /* 1 or 0 */ sl_event.predictive = 0; /* 1 or 0 */ sl_event.disposition = SL_DISP_RECOVERABLE; /* one of SL_DISP_* */ @@ -336,6 +339,8 @@ ipmi2servicelog(struct sl_data_bmc *bmc_data) crm_debug("Sending to servicelog database"); } + free(sl_event.refcode); + free(sl_event.description); free(serial_number); free(product_name); @@ -352,7 +357,6 @@ sensor_threshold_event_handler(ipmi_sensor_t * sensor, double value, void *cb_data, ipmi_event_t * event) { ipmi_entity_t *ent = ipmi_sensor_get_entity(sensor); - int id, instance; char name[IPMI_ENTITY_NAME_LEN]; struct sl_data_bmc bmc_data; uint32_t sel_id; @@ -366,8 +370,6 @@ sensor_threshold_event_handler(ipmi_sensor_t * sensor, uint8_t event_type; int direction; - id = ipmi_entity_get_entity_id(ent); - instance = ipmi_entity_get_entity_instance(ent); ipmi_sensor_get_id(sensor, name, sizeof(name)); ipmi_sensor_get_num(sensor, &sensor_lun, &sensor_number); @@ -416,7 +418,6 @@ sensor_discrete_event_handler(ipmi_sensor_t * sensor, int severity, int prev_severity, void *cb_data, ipmi_event_t * event) { ipmi_entity_t *ent = ipmi_sensor_get_entity(sensor); - int id, instance; char name[IPMI_ENTITY_NAME_LEN]; struct sl_data_bmc bmc_data; uint32_t sel_id; @@ -430,8 +431,6 @@ sensor_discrete_event_handler(ipmi_sensor_t * sensor, uint8_t event_type; int direction; - id = ipmi_entity_get_entity_id(ent); - instance = ipmi_entity_get_entity_instance(ent); ipmi_sensor_get_id(sensor, name, sizeof(name)); ipmi_sensor_get_num(sensor, &sensor_lun, &sensor_number); @@ -501,10 +500,7 @@ static void entity_change(enum ipmi_update_e op, ipmi_domain_t * domain, ipmi_entity_t * entity, void *cb_data) { int rv; - int id, instance; - id = ipmi_entity_get_entity_id(entity); - instance = ipmi_entity_get_entity_instance(entity); if (op == IPMI_ADDED) { /* Register callback so that when the status of a sensor changes, sensor_change is called */ @@ -564,8 +560,9 @@ main(int argc, char *argv[]) #endif crm_make_daemon("ipmiservicelogd", TRUE, "/var/run/ipmiservicelogd.pid0"); - - crm_log_init("ipmiservicelogd", LOG_INFO, FALSE, TRUE, argc, argv); + crm_log_cli_init("ipmiservicelogd"); + // Maybe this should log like a daemon instead? + // crm_log_init("ipmiservicelogd", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); #ifdef COMPLEX rv = ipmi_args_setup_con(args, os_hnd, NULL, &con); From 46201f029e4a5ac3ba0aaf05cb6df80341729566 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 30 Jul 2018 14:17:49 -0500 Subject: [PATCH 171/812] Doc: tools: add --help/--version options to ipmiservicelogd allows ppc64le build when relevant dependencies are installed --- tools/ipmiservicelogd.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/ipmiservicelogd.c b/tools/ipmiservicelogd.c index 4f528656435..865eae07dfc 100644 --- a/tools/ipmiservicelogd.c +++ b/tools/ipmiservicelogd.c @@ -253,7 +253,7 @@ getProductName(void) static void con_usage(const char *name, const char *help, void *cb_data) { - printf("\n%s%s", name, help); + printf("%s\n", help); } static void @@ -261,7 +261,7 @@ usage(const char *progname) { printf("Usage:\n"); printf(" %s \n", progname); - printf(" Where is one of:"); + printf(" Where is one of:\n"); ipmi_parse_args_iter_help(con_usage, NULL); } @@ -550,6 +550,18 @@ main(int argc, char *argv[]) /* Initialize the OpenIPMI library. */ ipmi_init(os_hnd); + // Check for pacemaker-standard help and version options + if (argc > 1) { + for (char **arg = &argv[1]; *arg != NULL; ++arg) { + if (!strcmp(*arg, "--help") || !strcmp(*arg, "-?")) { + usage(argv[0]); + return 0; + } else if (!strcmp(*arg, "--version") || !strcmp(*arg, "-$")) { + crm_help('$', 0); + } + } + } + #ifdef COMPLEX rv = ipmi_parse_args2(&curr_arg, argc, argv, &args); if (rv) { From 98ce833a093e1493343d52459ae2c3d7d5998150 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 31 Jul 2018 08:56:36 -0500 Subject: [PATCH 172/812] Refactor: scheduler: add header for library use only This adds a new header for functions internal to libpe_status, so they can use G_GNUC_INTERNAL for efficiency. --- lib/pengine/Makefile.am | 2 +- lib/pengine/pe_status_private.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 lib/pengine/pe_status_private.h diff --git a/lib/pengine/Makefile.am b/lib/pengine/Makefile.am index 5f36c94a38d..d023b337ef5 100644 --- a/lib/pengine/Makefile.am +++ b/lib/pengine/Makefile.am @@ -21,7 +21,7 @@ include $(top_srcdir)/Makefile.common lib_LTLIBRARIES = libpe_rules.la libpe_status.la ## SOURCES -noinst_HEADERS = unpack.h variant.h +noinst_HEADERS = unpack.h variant.h pe_status_private.h libpe_rules_la_LDFLAGS = -version-info 5:1:3 diff --git a/lib/pengine/pe_status_private.h b/lib/pengine/pe_status_private.h new file mode 100644 index 00000000000..712639caa31 --- /dev/null +++ b/lib/pengine/pe_status_private.h @@ -0,0 +1,15 @@ +/* + * Copyright 2018 Andrew Beekhof + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef PE_STATUS_PRIVATE__H +# define PE_STATUS_PRIVATE__H + +/* This header is for the sole use of libpe_status, so that functions can be + * declared with G_GNUC_INTERNAL for efficiency. + */ + +#endif // PE_STATUS_PRIVATE__H From 9750c1d2bc3640cb0ed6ffc6519f371301e79024 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 31 Jul 2018 08:57:29 -0500 Subject: [PATCH 173/812] Refactor: scheduler: simplify clone child creation create_child_clone() is renamed, simplified, and declared in pe_status_private.h --- lib/pengine/clone.c | 34 ++++++++++++++++++--------------- lib/pengine/pe_status_private.h | 4 ++++ lib/pengine/unpack.c | 5 ++--- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index b6473d267a8..375989075ab 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -11,13 +11,13 @@ #include #include #include +#include #include #define VARIANT_CLONE 1 #include "./variant.h" void force_non_unique_clone(resource_t * rsc, const char *rid, pe_working_set_t * data_set); -resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set); static void mark_as_orphan(resource_t * rsc) @@ -69,8 +69,8 @@ find_clone_instance(resource_t * rsc, const char *sub_id, pe_working_set_t * dat return child; } -resource_t * -create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set) +pe_resource_t * +pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set) { gboolean as_orphan = FALSE; char *inc_num = NULL; @@ -83,11 +83,13 @@ create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set) CRM_CHECK(clone_data->xml_obj_child != NULL, return FALSE); - if (sub_id < 0) { + if (clone_data->total_clones >= clone_data->clone_max) { + // If we've already used all available instances, this is an orphan as_orphan = TRUE; - sub_id = clone_data->total_clones; } - inc_num = crm_itoa(sub_id); + + // Allocate instance numbers in numerical order (starting at 0) + inc_num = crm_itoa(clone_data->total_clones); inc_max = crm_itoa(clone_data->clone_max); child_copy = copy_xml(clone_data->xml_obj_child); @@ -216,18 +218,20 @@ clone_unpack(resource_t * rsc, pe_working_set_t * data_set) add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE, is_set(rsc->flags, pe_rsc_unique) ? XML_BOOLEAN_TRUE : XML_BOOLEAN_FALSE); - for (lpc = 0; lpc < clone_data->clone_max; lpc++) { - if (create_child_clone(rsc, lpc, data_set) == NULL) { + if (clone_data->clone_max <= 0) { + /* Create one child instance so that unpack_find_resource() will hook up + * any orphans up to the parent correctly. + */ + if (pe__create_clone_child(rsc, data_set) == NULL) { return FALSE; } - } - if (clone_data->clone_max == 0) { - /* create one so that unpack_find_resource() will hook up - * any orphans up to the parent correctly - */ - if (create_child_clone(rsc, -1, data_set) == NULL) { - return FALSE; + } else { + // Create a child instance for each available instance number + for (lpc = 0; lpc < clone_data->clone_max; lpc++) { + if (pe__create_clone_child(rsc, data_set) == NULL) { + return FALSE; + } } } diff --git a/lib/pengine/pe_status_private.h b/lib/pengine/pe_status_private.h index 712639caa31..b29198c55c1 100644 --- a/lib/pengine/pe_status_private.h +++ b/lib/pengine/pe_status_private.h @@ -12,4 +12,8 @@ * declared with G_GNUC_INTERNAL for efficiency. */ +G_GNUC_INTERNAL +pe_resource_t *pe__create_clone_child(pe_resource_t *rsc, + pe_working_set_t *data_set); + #endif // PE_STATUS_PRIVATE__H diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 019e22ed79d..824fc8b2ef4 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -28,6 +28,7 @@ #include #include #include +#include CRM_TRACE_INIT_DATA(pe_status); @@ -1689,8 +1690,6 @@ create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * return rsc; } -extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set); - /*! * \internal * \brief Create orphan instance for anonymous clone resource history @@ -1699,7 +1698,7 @@ static pe_resource_t * create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id, pe_node_t *node, pe_working_set_t *data_set) { - pe_resource_t *top = create_child_clone(parent, -1, data_set); + pe_resource_t *top = pe__create_clone_child(parent, data_set); // find_rsc() because we might be a cloned group pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); From a029282fe2be86dce6ddd32f02b234bb8e984763 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 17 Jul 2018 17:10:38 -0500 Subject: [PATCH 174/812] Refactor: libcrmcommon: separate RA-related functions into own source file Before: 1386 utils.c After: 1296 utils.c 110 agents.c --- include/crm/common/util.h | 12 +++-- lib/common/Makefile.am | 2 +- lib/common/agents.c | 110 ++++++++++++++++++++++++++++++++++++++ lib/common/utils.c | 92 ------------------------------- 4 files changed, 118 insertions(+), 98 deletions(-) create mode 100644 lib/common/agents.c diff --git a/include/crm/common/util.h b/include/crm/common/util.h index 1361abf9af1..994ea153c10 100644 --- a/include/crm/common/util.h +++ b/include/crm/common/util.h @@ -134,6 +134,13 @@ xmlNode *crm_create_op_xml(xmlNode *parent, const char *prefix, const char *timeout); #define CRM_DEFAULT_OP_TIMEOUT_S "20s" +// Public resource agent functions (from agents.c) +char *crm_generate_ra_key(const char *standard, const char *provider, + const char *type); +bool crm_provider_required(const char *standard); +int crm_parse_agent_spec(const char *spec, char **standard, char **provider, + char **type); + int compare_version(const char *version1, const char *version2); /* coverity[+kill] */ @@ -184,9 +191,4 @@ void crm_gnutls_global_init(void); int crm_exit(int rc); bool pcmk_acl_required(const char *user); -char *crm_generate_ra_key(const char *class, const char *provider, const char *type); -bool crm_provider_required(const char *standard); -int crm_parse_agent_spec(const char *spec, char **standard, char **provider, - char **type); - #endif diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am index 9dd202529d8..adc8e43f8ca 100644 --- a/lib/common/Makefile.am +++ b/lib/common/Makefile.am @@ -40,7 +40,7 @@ libcrmcommon_la_LIBADD = @LIBADD_DL@ $(GNUTLSLIBS) libcrmcommon_la_SOURCES = compat.c digest.c ipc.c io.c procfs.c utils.c xml.c \ iso8601.c remote.c mainloop.c logging.c watchdog.c \ schemas.c strings.c xpath.c attrd_client.c alerts.c \ - operations.c + operations.c agents.c if BUILD_CIBSECRETS libcrmcommon_la_SOURCES += cib_secrets.c endif diff --git a/lib/common/agents.c b/lib/common/agents.c new file mode 100644 index 00000000000..e6158c58473 --- /dev/null +++ b/lib/common/agents.c @@ -0,0 +1,110 @@ +/* + * Copyright 2004-2018 Andrew Beekhof + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#include +#include +#include + +#include +#include + +char * +crm_generate_ra_key(const char *standard, const char *provider, + const char *type) +{ + if (!standard && !provider && !type) { + return NULL; + } + + return crm_strdup_printf("%s%s%s:%s", + (standard? standard : ""), + (provider? ":" : ""), (provider? provider : ""), + (type? type : "")); +} + +/*! + * \brief Check whether a resource standard requires a provider to be specified + * + * \param[in] standard Standard name + * + * \return TRUE if standard requires a provider, FALSE otherwise + */ +bool +crm_provider_required(const char *standard) +{ + CRM_CHECK(standard != NULL, return FALSE); + + /* @TODO + * - this should probably be case-sensitive, but isn't, + * for backward compatibility + * - it might be nice to keep standards' capabilities (supports provider, + * can be promotable, etc.) as structured data somewhere + */ + if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF)) { + return TRUE; + } + return FALSE; +} + +/*! + * \brief Parse a "standard[:provider]:type" agent specification + * + * \param[in] spec Agent specification + * \param[out] standard Newly allocated memory containing agent standard (or NULL) + * \param[out] provider Newly allocated memory containing agent provider (or NULL) + * \param[put] type Newly allocated memory containing agent type (or NULL) + * + * \return pcmk_ok if the string could be parsed, -EINVAL otherwise + * + * \note It is acceptable for the type to contain a ':' if the standard supports + * that. For example, systemd supports the form "systemd:UNIT@A:B". + * \note It is the caller's responsibility to free the returned values. + */ +int +crm_parse_agent_spec(const char *spec, char **standard, char **provider, + char **type) +{ + char *colon; + + CRM_CHECK(spec && standard && provider && type, return -EINVAL); + *standard = NULL; + *provider = NULL; + *type = NULL; + + colon = strchr(spec, ':'); + if ((colon == NULL) || (colon == spec)) { + return -EINVAL; + } + + *standard = strndup(spec, colon - spec); + spec = colon + 1; + + if (crm_provider_required(*standard)) { + colon = strchr(spec, ':'); + if ((colon == NULL) || (colon == spec)) { + free(*standard); + return -EINVAL; + } + *provider = strndup(spec, colon - spec); + spec = colon + 1; + } + + if (*spec == '\0') { + free(*standard); + free(*provider); + return -EINVAL; + } + + *type = strdup(spec); + return pcmk_ok; +} diff --git a/lib/common/utils.c b/lib/common/utils.c index 2bf65b8ba77..01a7fc54368 100644 --- a/lib/common/utils.c +++ b/lib/common/utils.c @@ -1456,95 +1456,3 @@ crm_gnutls_global_init(void) gnutls_global_init(); } #endif - -char * -crm_generate_ra_key(const char *class, const char *provider, const char *type) -{ - if (!class && !provider && !type) { - return NULL; - } - - return crm_strdup_printf("%s%s%s:%s", - (class? class : ""), - (provider? ":" : ""), (provider? provider : ""), - (type? type : "")); -} - -/*! - * \brief Check whether a resource standard requires a provider to be specified - * - * \param[in] standard Standard name - * - * \return TRUE if standard requires a provider, FALSE otherwise - */ -bool -crm_provider_required(const char *standard) -{ - CRM_CHECK(standard != NULL, return FALSE); - - /* @TODO - * - this should probably be case-sensitive, but isn't, - * for backward compatibility - * - it might be nice to keep standards' capabilities (supports provider, - * master/slave, etc.) as structured data somewhere - */ - if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF)) { - return TRUE; - } - return FALSE; -} - -/*! - * \brief Parse a "standard[:provider]:type" agent specification - * - * \param[in] spec Agent specification - * \param[out] standard Newly allocated memory containing agent standard (or NULL) - * \param[out] provider Newly allocated memory containing agent provider (or NULL) - * \param[put] type Newly allocated memory containing agent type (or NULL) - * - * \return pcmk_ok if the string could be parsed, -EINVAL otherwise - * - * \note It is acceptable for the type to contain a ':' if the standard supports - * that. For example, systemd supports the form "systemd:UNIT@A:B". - * \note It is the caller's responsibility to free the returned values. - */ -int -crm_parse_agent_spec(const char *spec, char **standard, char **provider, - char **type) -{ - char *colon; - - CRM_CHECK(spec && standard && provider && type, return -EINVAL); - *standard = NULL; - *provider = NULL; - *type = NULL; - - colon = strchr(spec, ':'); - if ((colon == NULL) || (colon == spec)) { - return -EINVAL; - } - - *standard = calloc(colon - spec + 1, sizeof(char)); - strncpy(*standard, spec, colon - spec); - spec = colon + 1; - - if (crm_provider_required(*standard)) { - colon = strchr(spec, ':'); - if ((colon == NULL) || (colon == spec)) { - free(*standard); - return -EINVAL; - } - *provider = calloc(colon - spec + 1, sizeof(char)); - strncpy(*provider, spec, colon - spec); - spec = colon + 1; - } - - if (*spec == '\0') { - free(*standard); - free(*provider); - return -EINVAL; - } - - *type = strdup(spec); - return pcmk_ok; -} From 92cfcc154e29544a8f503b3e1aa474e1ca03dfb1 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 17 Jul 2018 17:31:25 -0500 Subject: [PATCH 175/812] Refactor: libcrmcommon,etc.: generic support for agent standard capabilities This adds an enum for resource agent standard capabilities (such as requiring a provider, or using status instead of monitor). A generic pcmk_get_ra_caps() function replaces the now-deprecated crm_provider_required(). --- include/crm/common/util.h | 15 +++++++++- lib/common/agents.c | 62 +++++++++++++++++++++++++++++++-------- lib/common/operations.c | 5 +--- lib/lrmd/lrmd_client.c | 5 ++-- lib/pengine/native.c | 4 +-- lib/services/services.c | 53 ++++++++++++++++++--------------- lrmd/lrmd.c | 4 +-- tools/fake_transition.c | 3 +- 8 files changed, 101 insertions(+), 50 deletions(-) diff --git a/include/crm/common/util.h b/include/crm/common/util.h index 994ea153c10..440ae0ee0b1 100644 --- a/include/crm/common/util.h +++ b/include/crm/common/util.h @@ -135,11 +135,24 @@ xmlNode *crm_create_op_xml(xmlNode *parent, const char *prefix, #define CRM_DEFAULT_OP_TIMEOUT_S "20s" // Public resource agent functions (from agents.c) + +// Capabilities supported by a resource agent standard +enum pcmk_ra_caps { + pcmk_ra_cap_none = 0x000, + pcmk_ra_cap_provider = 0x001, // Requires provider + pcmk_ra_cap_status = 0x002, // Supports status instead of monitor + pcmk_ra_cap_params = 0x004, // Supports parameters + pcmk_ra_cap_unique = 0x008, // Supports unique clones + pcmk_ra_cap_promotable = 0x010, // Supports promotable clones +}; + +uint32_t pcmk_get_ra_caps(const char *standard); char *crm_generate_ra_key(const char *standard, const char *provider, const char *type); -bool crm_provider_required(const char *standard); int crm_parse_agent_spec(const char *spec, char **standard, char **provider, char **type); +bool crm_provider_required(const char *standard); // deprecated + int compare_version(const char *version1, const char *version2); diff --git a/lib/common/agents.c b/lib/common/agents.c index e6158c58473..23c364f8471 100644 --- a/lib/common/agents.c +++ b/lib/common/agents.c @@ -18,6 +18,52 @@ #include #include +/*! + * \brief Get capabilities of a resource agent standard + * + * \param[in] standard Standard name + * + * \return Bitmask of enum pcmk_ra_caps values + */ +uint32_t +pcmk_get_ra_caps(const char *standard) +{ + /* @COMPAT This should probably be case-sensitive, but isn't, + * for backward compatibility. + */ + if (standard == NULL) { + return pcmk_ra_cap_none; + + } else if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF)) { + return pcmk_ra_cap_provider | pcmk_ra_cap_params + | pcmk_ra_cap_unique | pcmk_ra_cap_promotable; + + } else if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_STONITH)) { + /* @COMPAT Stonith resources can't really be unique clones, but we've + * allowed it in the past and have it in some scheduler regression tests + * (which were likely never used as real configurations). + * + * @TODO Remove pcmk_ra_cap_unique at the next major schema version + * bump, with a transform to remove globally-unique from the config. + */ + return pcmk_ra_cap_params | pcmk_ra_cap_unique; + + } else if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_SYSTEMD) + || !strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) + || !strcasecmp(standard, PCMK_RESOURCE_CLASS_LSB) + || !strcasecmp(standard, PCMK_RESOURCE_CLASS_UPSTART)) { + + /* Since service can map to LSB, systemd, or upstart, these should + * have identical capabilities + */ + return pcmk_ra_cap_status; + + } else if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_NAGIOS)) { + return pcmk_ra_cap_params; + } + return pcmk_ra_cap_none; +} + char * crm_generate_ra_key(const char *standard, const char *provider, const char *type) @@ -33,6 +79,7 @@ crm_generate_ra_key(const char *standard, const char *provider, } /*! + * \deprecated * \brief Check whether a resource standard requires a provider to be specified * * \param[in] standard Standard name @@ -42,18 +89,7 @@ crm_generate_ra_key(const char *standard, const char *provider, bool crm_provider_required(const char *standard) { - CRM_CHECK(standard != NULL, return FALSE); - - /* @TODO - * - this should probably be case-sensitive, but isn't, - * for backward compatibility - * - it might be nice to keep standards' capabilities (supports provider, - * can be promotable, etc.) as structured data somewhere - */ - if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF)) { - return TRUE; - } - return FALSE; + return is_set(pcmk_get_ra_caps(standard), pcmk_ra_cap_provider); } /*! @@ -89,7 +125,7 @@ crm_parse_agent_spec(const char *spec, char **standard, char **provider, *standard = strndup(spec, colon - spec); spec = colon + 1; - if (crm_provider_required(*standard)) { + if (is_set(pcmk_get_ra_caps(*standard), pcmk_ra_cap_provider)) { colon = strchr(spec, ':'); if ((colon == NULL) || (colon == spec)) { free(*standard); diff --git a/lib/common/operations.c b/lib/common/operations.c index 01b4f7adffd..42fe216203f 100644 --- a/lib/common/operations.c +++ b/lib/common/operations.c @@ -663,10 +663,7 @@ crm_op_needs_metadata(const char *rsc_class, const char *op) CRM_CHECK(rsc_class || op, return FALSE); - if (rsc_class - && strcmp(rsc_class, PCMK_RESOURCE_CLASS_OCF) - && strcmp(rsc_class, PCMK_RESOURCE_CLASS_STONITH)) { - + if (is_set(pcmk_get_ra_caps(rsc_class), pcmk_ra_cap_params)) { /* Meta-data is only needed for resource classes that use parameters */ return FALSE; } diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index 28d6c6d174c..51912ec2e78 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1430,7 +1430,7 @@ lrmd_api_register_rsc(lrmd_t * lrmd, if (!class || !type || !rsc_id) { return -EINVAL; } - if (crm_provider_required(class) && !provider) { + if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && !provider) { return -EINVAL; } @@ -1533,7 +1533,8 @@ lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options if (!class || !type) { free_xml(output); return NULL; - } else if (crm_provider_required(class) && !provider) { + } else if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) + && !provider) { free_xml(output); return NULL; } diff --git a/lib/pengine/native.c b/lib/pengine/native.c index eda0355e2d3..78b0b719791 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -548,7 +548,7 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n } offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", name); offset += snprintf(buffer + offset, LINE_MAX - offset, "\t(%s", class); - if (crm_provider_required(class)) { + if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) { const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov); } @@ -829,7 +829,7 @@ get_rscs_brief(GListPtr rsc_list, GHashTable * rsc_table, GHashTable * active_ta } offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", class); - if (crm_provider_required(class)) { + if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) { const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov); } diff --git a/lib/services/services.c b/lib/services/services.c index 4250f09f32a..5a0baa0cb4e 100644 --- a/lib/services/services.c +++ b/lib/services/services.c @@ -177,6 +177,7 @@ resources_action_create(const char *name, const char *standard, const char *prov GHashTable * params, enum svc_action_flags flags) { svc_action_t *op = NULL; + uint32_t ra_caps = 0; /* * Do some up front sanity checks before we go off and @@ -192,9 +193,10 @@ resources_action_create(const char *name, const char *standard, const char *prov crm_err("Cannot create operation for %s without resource class", name); goto return_error; } + ra_caps = pcmk_get_ra_caps(standard); - if (crm_provider_required(standard) && crm_strlen_zero(provider)) { - crm_err("Cannot create OCF operation for %s without provider", name); + if (is_set(ra_caps, pcmk_ra_cap_provider) && crm_strlen_zero(provider)) { + crm_err("Cannot create operation for %s without provider", name); goto return_error; } @@ -223,32 +225,34 @@ resources_action_create(const char *name, const char *standard, const char *prov op->flags = flags; op->id = generate_op_key(name, action, interval); - if (safe_str_eq(action, "monitor") && ( -#if SUPPORT_HEARTBEAT - safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_HB) || -#endif - safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB))) { - action = "status"; + if (is_set(ra_caps, pcmk_ra_cap_status) && safe_str_eq(action, "monitor")) { + op->action = strdup("status"); + } else { + op->action = strdup(action); } - op->action = strdup(action); - if (crm_provider_required(op->standard)) { + if (is_set(ra_caps, pcmk_ra_cap_provider)) { op->provider = strdup(provider); + } + + if (is_set(ra_caps, pcmk_ra_cap_params)) { op->params = params; - params = NULL; + params = NULL; // so we don't free them in this function + } + if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_OCF) == 0) { if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); - op->opaque->args[1] = strdup(action); + op->opaque->args[1] = strdup(op->action); } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_LSB) == 0) { op->opaque->exec = services__lsb_agent_path(op->agent); op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(op->action); - op->opaque->args[2] = NULL; + #if SUPPORT_HEARTBEAT } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_HB) == 0) { int index; @@ -297,8 +301,6 @@ resources_action_create(const char *name, const char *standard, const char *prov #endif #if SUPPORT_NAGIOS } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) { - int index = 0; - if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the NAGIOS_PLUGIN_DIR path to the front */ @@ -310,20 +312,19 @@ resources_action_create(const char *name, const char *standard, const char *prov } op->opaque->args[0] = strdup(op->opaque->exec); - index = 1; if (safe_str_eq(op->action, "monitor") && op->interval == 0) { /* Invoke --version for a nagios probe */ - op->opaque->args[index] = strdup("--version"); - index++; + op->opaque->args[1] = strdup("--version"); - } else if (params) { + } else if (op->params) { GHashTableIter iter; char *key = NULL; char *value = NULL; + int index = 1; static int args_size = sizeof(op->opaque->args) / sizeof(char *); - g_hash_table_iter_init(&iter, params); + g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) && index <= args_size - 3) { @@ -344,12 +345,16 @@ resources_action_create(const char *name, const char *standard, const char *prov index += 2; } } - op->opaque->args[index] = NULL; + + // Nagios actions don't need to keep the parameters + if (op->params != NULL) { + g_hash_table_destroy(op->params); + op->params = NULL; + } #endif } else { crm_err("Unknown resource standard: %s", op->standard); - services_action_free(op); - op = NULL; + goto return_error; } if(params) { @@ -1417,7 +1422,7 @@ resources_list_standards(void) GList * resources_list_providers(const char *standard) { - if (crm_provider_required(standard)) { + if (is_set(pcmk_get_ra_caps(standard), pcmk_ra_cap_provider)) { return resources_os_list_ocf_providers(); } diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index c717947a5ed..5feab098f96 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -135,9 +135,7 @@ static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (safe_str_eq(action, "monitor") && - (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_LSB) || - safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE) - || safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SYSTEMD))) { + is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) { return "status"; } return action; diff --git a/tools/fake_transition.c b/tools/fake_transition.c index 74778505182..fa0e5370b60 100644 --- a/tools/fake_transition.c +++ b/tools/fake_transition.c @@ -300,7 +300,8 @@ inject_resource(xmlNode * cib_node, const char *resource, const char *rclass, co fprintf(stderr, "Invalid class for %s: %s\n", resource, rclass); return NULL; - } else if (crm_provider_required(rclass) && (rprovider == NULL)) { + } else if (is_set(pcmk_get_ra_caps(rclass), pcmk_ra_cap_provider) + && (rprovider == NULL)) { fprintf(stderr, "Please specify the provider for resource %s\n", resource); return NULL; } From 746e787fa9f1b350896c6dc94a3331800f819d13 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 6 Aug 2018 10:26:21 -0500 Subject: [PATCH 176/812] Low: libcrmcommon,libcrmservice: update for heartbeat class support --- lib/common/agents.c | 6 ++++++ lib/services/services.c | 8 ++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/common/agents.c b/lib/common/agents.c index 23c364f8471..c5605d4ce2b 100644 --- a/lib/common/agents.c +++ b/lib/common/agents.c @@ -58,6 +58,12 @@ pcmk_get_ra_caps(const char *standard) */ return pcmk_ra_cap_status; + } else if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_HB)) { + /* @COMPAT Heartbeat resources likely can't really be unique clones, but + * we've allowed it in the past and have it in some PE regression tests. + */ + return pcmk_ra_cap_params | pcmk_ra_cap_status | pcmk_ra_cap_unique; + } else if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_NAGIOS)) { return pcmk_ra_cap_params; } diff --git a/lib/services/services.c b/lib/services/services.c index 5a0baa0cb4e..90188fcbfcd 100644 --- a/lib/services/services.c +++ b/lib/services/services.c @@ -273,10 +273,10 @@ resources_action_create(const char *name, const char *standard, const char *prov /* The "heartbeat" agent class only has positional arguments, * which we keyed by their decimal position number. */ param_num = 1; - if (params) { + if (op->params) { for (index = 1; index <= MAX_ARGC - 3; index++ ) { snprintf(buf_tmp, sizeof(buf_tmp), "%d", index); - value_tmp = g_hash_table_lookup(params, buf_tmp); + value_tmp = g_hash_table_lookup(op->params, buf_tmp); if (value_tmp == NULL) { /* maybe: strdup("") ?? * But the old lrmd did simply continue as well. */ @@ -284,6 +284,10 @@ resources_action_create(const char *name, const char *standard, const char *prov } op->opaque->args[param_num++] = strdup(value_tmp); } + + // Heartbeat actions don't need to keep the parameters + g_hash_table_destroy(op->params); + op->params = NULL; } /* Add operation code as the last argument, */ From 3248826a025dd73db2df6110105d657b94df25aa Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 6 Aug 2018 10:44:04 -0500 Subject: [PATCH 177/812] Fix: scheduler: only some agent standards support unique clones If LSB resources are cloned with globally-unique set to true, we automatically convert it to false (with a warning). This commit broadens that to all classes that don't support unique clones. --- lib/pengine/native.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/lib/pengine/native.c b/lib/pengine/native.c index 78b0b719791..f105fb67604 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -134,33 +134,30 @@ native_unpack(resource_t * rsc, pe_working_set_t * data_set) { resource_t *parent = uber_parent(rsc); native_variant_data_t *native_data = NULL; - const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + const char *standard = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + uint32_t ra_caps = pcmk_get_ra_caps(standard); pe_rsc_trace(rsc, "Processing resource %s...", rsc->id); native_data = calloc(1, sizeof(native_variant_data_t)); rsc->variant_opaque = native_data; - if (is_set(rsc->flags, pe_rsc_unique) && rsc->parent) { - - if (safe_str_eq(class, PCMK_RESOURCE_CLASS_LSB)) { - resource_t *top = uber_parent(rsc); + // Only some agent standards support unique and promotable clones + if (is_not_set(ra_caps, pcmk_ra_cap_unique) + && is_set(rsc->flags, pe_rsc_unique) && rsc->parent) { - force_non_unique_clone(top, rsc->id, data_set); - } + force_non_unique_clone(parent, rsc->id, data_set); } - - if (safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF) == FALSE) { + if (is_not_set(ra_caps, pcmk_ra_cap_promotable)) { const char *stateful = g_hash_table_lookup(parent->meta, "stateful"); if (safe_str_eq(stateful, XML_BOOLEAN_TRUE)) { pe_err ("Resource %s is of type %s and therefore cannot be used as a master/slave resource", - rsc->id, class); + rsc->id, standard); return FALSE; } } - return TRUE; } From 486650aa71d50585af1e12b6379e6fd8b139bd2b Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 27 Jul 2018 15:58:30 -0500 Subject: [PATCH 178/812] Log: scheduler: avoid duplicate trace message when unpacking clones --- lib/pengine/clone.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index 375989075ab..10f76333b27 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -212,9 +212,10 @@ clone_unpack(resource_t * rsc, pe_working_set_t * data_set) add_hash_param(rsc->meta, XML_RSC_ATTR_STICKINESS, "1"); } - pe_rsc_trace(rsc, "\tClone is unique (fixed): %s", - is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); clone_data->notify_confirm = is_set(rsc->flags, pe_rsc_notify); + /* This ensures that the globally-unique value always exists for children to + * inherit when being unpacked, as well as in resource agents' environment. + */ add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE, is_set(rsc->flags, pe_rsc_unique) ? XML_BOOLEAN_TRUE : XML_BOOLEAN_FALSE); From 45b21a4ad7dbaaf4c4a061964cce2e7efffa61fc Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 27 Jul 2018 16:22:10 -0500 Subject: [PATCH 179/812] Refactor: scheduler: mark force-anonymous function as internal --- lib/pengine/clone.c | 4 +--- lib/pengine/native.c | 10 +++++++--- lib/pengine/pe_status_private.h | 4 ++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index 10f76333b27..307f6b8ac43 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -17,8 +17,6 @@ #define VARIANT_CLONE 1 #include "./variant.h" -void force_non_unique_clone(resource_t * rsc, const char *rid, pe_working_set_t * data_set); - static void mark_as_orphan(resource_t * rsc) { @@ -34,7 +32,7 @@ mark_as_orphan(resource_t * rsc) } void -force_non_unique_clone(resource_t * rsc, const char *rid, pe_working_set_t * data_set) +pe__force_anon(pe_resource_t *rsc, const char *rid, pe_working_set_t *data_set) { if (pe_rsc_is_clone(rsc)) { clone_variant_data_t *clone_data = NULL; diff --git a/lib/pengine/native.c b/lib/pengine/native.c index f105fb67604..42ade049784 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -13,6 +13,7 @@ #include #include #include +#include #define VARIANT_NATIVE 1 #include "./variant.h" @@ -127,8 +128,6 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) } } -extern void force_non_unique_clone(resource_t * rsc, const char *rid, pe_working_set_t * data_set); - gboolean native_unpack(resource_t * rsc, pe_working_set_t * data_set) { @@ -146,7 +145,12 @@ native_unpack(resource_t * rsc, pe_working_set_t * data_set) if (is_not_set(ra_caps, pcmk_ra_cap_unique) && is_set(rsc->flags, pe_rsc_unique) && rsc->parent) { - force_non_unique_clone(parent, rsc->id, data_set); + /* @COMPAT We should probably reject this situation as an error (as we + * do for promotable below) rather than warn and convert, but that would + * be a backward-incompatible change that we should probably do with a + * transform at a schema major version bump. + */ + pe__force_anon(parent, rsc->id, data_set); } if (is_not_set(ra_caps, pcmk_ra_cap_promotable)) { const char *stateful = g_hash_table_lookup(parent->meta, "stateful"); diff --git a/lib/pengine/pe_status_private.h b/lib/pengine/pe_status_private.h index b29198c55c1..0cfd5faf933 100644 --- a/lib/pengine/pe_status_private.h +++ b/lib/pengine/pe_status_private.h @@ -16,4 +16,8 @@ G_GNUC_INTERNAL pe_resource_t *pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set); +G_GNUC_INTERNAL +void pe__force_anon(pe_resource_t *rsc, const char *rid, + pe_working_set_t *data_set); + #endif // PE_STATUS_PRIVATE__H From e197c73ef91f0185ac28d7d2dfee3dc07111e019 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 27 Jul 2018 16:23:45 -0500 Subject: [PATCH 180/812] Log: scheduler: reword force-anonymous message This rewords the log warning when forcing a clone to be anonymous to include the standard, and changes it from crm_config_warn() to pe_warn(). --- lib/pengine/clone.c | 9 +++++---- lib/pengine/native.c | 2 +- lib/pengine/pe_status_private.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index 307f6b8ac43..3a9b241b39c 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -32,16 +32,17 @@ mark_as_orphan(resource_t * rsc) } void -pe__force_anon(pe_resource_t *rsc, const char *rid, pe_working_set_t *data_set) +pe__force_anon(const char *standard, pe_resource_t *rsc, const char *rid, + pe_working_set_t *data_set) { if (pe_rsc_is_clone(rsc)) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); - crm_config_warn("Clones %s contains non-OCF resource %s and so " - "can only be used as an anonymous clone. " - "Set the " XML_RSC_ATTR_UNIQUE " meta attribute to false", rsc->id, rid); + pe_warn("Ignoring " XML_RSC_ATTR_UNIQUE " for %s because %s resources " + "such as %s can be used only as anonymous clones", + rsc->id, standard, rid); clone_data->clone_node_max = 1; clone_data->clone_max = g_list_length(data_set->nodes); diff --git a/lib/pengine/native.c b/lib/pengine/native.c index 42ade049784..bcd49fa2588 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -150,7 +150,7 @@ native_unpack(resource_t * rsc, pe_working_set_t * data_set) * be a backward-incompatible change that we should probably do with a * transform at a schema major version bump. */ - pe__force_anon(parent, rsc->id, data_set); + pe__force_anon(standard, parent, rsc->id, data_set); } if (is_not_set(ra_caps, pcmk_ra_cap_promotable)) { const char *stateful = g_hash_table_lookup(parent->meta, "stateful"); diff --git a/lib/pengine/pe_status_private.h b/lib/pengine/pe_status_private.h index 0cfd5faf933..325bce67918 100644 --- a/lib/pengine/pe_status_private.h +++ b/lib/pengine/pe_status_private.h @@ -17,7 +17,7 @@ pe_resource_t *pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set); G_GNUC_INTERNAL -void pe__force_anon(pe_resource_t *rsc, const char *rid, +void pe__force_anon(const char *standard, pe_resource_t *rsc, const char *rid, pe_working_set_t *data_set); #endif // PE_STATUS_PRIVATE__H From efe028151cabaeb752ff1bb01171cfc369ff9b3c Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 27 Jul 2018 16:24:33 -0500 Subject: [PATCH 181/812] Low: scheduler: respect lower clone-max for forced anonymous clones Previously, when a clone marked as globally unique was automatically converted to an anonymous clone, clone-max would be reset to the number of nodes in the cluster. While this makes sense when the original clone-max was greater, we should preserve a lower value. --- lib/pengine/clone.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index 3a9b241b39c..e78dcc8bf40 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -45,7 +45,8 @@ pe__force_anon(const char *standard, pe_resource_t *rsc, const char *rid, rsc->id, standard, rid); clone_data->clone_node_max = 1; - clone_data->clone_max = g_list_length(data_set->nodes); + clone_data->clone_max = QB_MIN(clone_data->clone_max, + g_list_length(data_set->nodes)); clear_bit_recursive(rsc, pe_rsc_unique); } } From 366a53ab9121f9da2ec7f32f9d625864dbc5ce9a Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 27 Jul 2018 16:34:57 -0500 Subject: [PATCH 182/812] Low: scheduler: clear globally-unique correctly when forcing anonymous Previously, when a globally-unique clone was forced to be anonymous, the scheduler would recursively clear the parent's pe_rsc_unique flag. That had two issues: the child being unpacked wasn't added to the parent's children yet, so it wasn't cleared; and the globally-unique meta-attribute was not cleared. Those issues are now fixed. --- lib/pengine/clone.c | 1 - lib/pengine/native.c | 21 ++++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index e78dcc8bf40..3b1856e1b1f 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -47,7 +47,6 @@ pe__force_anon(const char *standard, pe_resource_t *rsc, const char *rid, clone_data->clone_node_max = 1; clone_data->clone_max = QB_MIN(clone_data->clone_max, g_list_length(data_set->nodes)); - clear_bit_recursive(rsc, pe_rsc_unique); } } diff --git a/lib/pengine/native.c b/lib/pengine/native.c index bcd49fa2588..7d0b6740009 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -128,6 +128,17 @@ native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) } } +static void +recursive_clear_unique(pe_resource_t *rsc) +{ + clear_bit(rsc->flags, pe_rsc_unique); + add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE, XML_BOOLEAN_FALSE); + + for (GList *child = rsc->children; child != NULL; child = child->next) { + recursive_clear_unique((pe_resource_t *) child->data); + } +} + gboolean native_unpack(resource_t * rsc, pe_working_set_t * data_set) { @@ -143,7 +154,7 @@ native_unpack(resource_t * rsc, pe_working_set_t * data_set) // Only some agent standards support unique and promotable clones if (is_not_set(ra_caps, pcmk_ra_cap_unique) - && is_set(rsc->flags, pe_rsc_unique) && rsc->parent) { + && is_set(rsc->flags, pe_rsc_unique) && pe_rsc_is_clone(parent)) { /* @COMPAT We should probably reject this situation as an error (as we * do for promotable below) rather than warn and convert, but that would @@ -151,6 +162,14 @@ native_unpack(resource_t * rsc, pe_working_set_t * data_set) * transform at a schema major version bump. */ pe__force_anon(standard, parent, rsc->id, data_set); + + /* Clear globally-unique on the parent and all its descendents unpacked + * so far (clearing the parent should make any future children unpacking + * correct). We have to clear this resource explicitly because it isn't + * hooked into the parent's children yet. + */ + recursive_clear_unique(parent); + recursive_clear_unique(rsc); } if (is_not_set(ra_caps, pcmk_ra_cap_promotable)) { const char *stateful = g_hash_table_lookup(parent->meta, "stateful"); From 091592008e99dc57ab1debd36fd429aaa12b329d Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 1 Aug 2018 17:07:58 -0500 Subject: [PATCH 183/812] Refactor: scheduler: avoid code duplication when marking orphan --- lib/pengine/clone.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index 3b1856e1b1f..d577e0a00de 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -17,20 +17,6 @@ #define VARIANT_CLONE 1 #include "./variant.h" -static void -mark_as_orphan(resource_t * rsc) -{ - GListPtr gIter = rsc->children; - - set_bit(rsc->flags, pe_rsc_orphan); - - for (; gIter != NULL; gIter = gIter->next) { - resource_t *child = (resource_t *) gIter->data; - - mark_as_orphan(child); - } -} - void pe__force_anon(const char *standard, pe_resource_t *rsc, const char *rid, pe_working_set_t *data_set) @@ -107,7 +93,7 @@ pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set) pe_rsc_trace(child_rsc, "Setting clone attributes for: %s", child_rsc->id); rsc->children = g_list_append(rsc->children, child_rsc); if (as_orphan) { - mark_as_orphan(child_rsc); + set_bit_recursive(child_rsc, pe_rsc_orphan); } add_hash_param(child_rsc->meta, XML_RSC_ATTR_INCARNATION_MAX, inc_max); From e241c2bbc1cf6f18728a20db845db1a921f45fb4 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 6 Aug 2018 08:55:08 -0500 Subject: [PATCH 184/812] Build: RPM: don't package servicelog-related binaries avoids complaints when RPM is rebuilt on suitable systems --- pacemaker.spec.in | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pacemaker.spec.in b/pacemaker.spec.in index e205a843fc1..93b09a19e04 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -449,6 +449,12 @@ find %{buildroot} -name '*.xml' -type f -print0 | xargs -0 chmod a-x find %{buildroot} -name '*.a' -type f -print0 | xargs -0 rm -f find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f +# For now, don't package the servicelog-related binaries built only for +# ppc64le when certain dependencies are installed. If they get more exercise by +# advanced users, we can reconsider. +rm -f %{buildroot}/%{_sbindir}/notifyServicelogEvent +rm -f %{buildroot}/%{_sbindir}/ipmiservicelogd + # Do not package these either rm -f %{buildroot}/%{_libdir}/service_crm.so From 1307b6f238fb7f4cada95f6af02c1a4caae3eb63 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 3 Aug 2018 18:30:47 -0500 Subject: [PATCH 185/812] Refactor: scheduler: remove unused variable setting makes static analysis happy --- lib/pengine/container.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/pengine/container.c b/lib/pengine/container.c index d82948a409e..1526f377848 100644 --- a/lib/pengine/container.c +++ b/lib/pengine/container.c @@ -780,7 +780,6 @@ container_fix_remote_addr(resource_t *rsc) } for (int lpc = 0; lpc < DIMOF(attr_list); lpc++) { - name = attr_list[lpc]; value = crm_element_value(rsc->xml, attr_list[lpc]); if (safe_str_eq(value, value_list[lpc]) == FALSE) { return FALSE; From 1a95cbae653df8835906314d77e74091f55ab319 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 3 Aug 2018 18:32:08 -0500 Subject: [PATCH 186/812] Refactor: libcrmcommon: remove dead code makes static analysis happy --- lib/common/iso8601.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/common/iso8601.c b/lib/common/iso8601.c index c95fa1350d0..b661fce7cca 100644 --- a/lib/common/iso8601.c +++ b/lib/common/iso8601.c @@ -1384,7 +1384,7 @@ crm_time_format_hr(const char *format, crm_time_hr_t * hr_dt) { const char *mark_s; int max = 128, scanned_pos = 0, printed_pos = 0, fmt_pos = 0, - date_len = 0, nano_digits = 0, fmt_len; + date_len = 0, nano_digits = 0; char nano_s[10], date_s[max+1], nanofmt_s[5] = "%", *tmp_fmt_s; struct tm tm; crm_time_t dt; @@ -1397,11 +1397,11 @@ crm_time_format_hr(const char *format, crm_time_hr_t * hr_dt) sprintf(nano_s, "%06d000", hr_dt->useconds); while ((format[scanned_pos]) != '\0') { - fmt_len = 0; mark_s = strchr(&format[scanned_pos], '%'); if (mark_s) { + int fmt_len = 1; + fmt_pos = mark_s - format; - fmt_len = 1; while ((format[fmt_pos+fmt_len] != '\0') && (format[fmt_pos+fmt_len] >= '0') && (format[fmt_pos+fmt_len] <= '9')) { From cdbc6bfc8995ecc86da3fe81a463d8fc4ce1f8fd Mon Sep 17 00:00:00 2001 From: Hideo Yamauchi Date: Mon, 30 Jul 2018 07:33:09 +0900 Subject: [PATCH 187/812] Mid: controld: No update by already confirmed events. --- crmd/te_events.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crmd/te_events.c b/crmd/te_events.c index 7b5ca2a4900..1f7a34c701c 100644 --- a/crmd/te_events.c +++ b/crmd/te_events.c @@ -510,6 +510,14 @@ process_graph_event(xmlNode *event, const char *event_node) abort_transition(INFINITY, tg_restart, "Unknown event", event); } else { + + /* Actions already confirmed skip matching. */ + /* ex. Ignoring xxx_last_0 or xxx_last_failure_0 generated by create_operation_update() in order to prevent duplicate fail-count from increasing. */ + if (action->confirmed == TRUE) { + crm_log_xml_debug(event, "No update by already confirmed events :"); + goto bail; + } + ignore_failures = safe_str_eq( crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore"); match_graph_event(action, event, status, rc, target_rc, ignore_failures); From 039b778b07f256dd564171430c5427dfb9489a58 Mon Sep 17 00:00:00 2001 From: "Gao,Yan" Date: Fri, 8 Dec 2017 14:47:40 +0100 Subject: [PATCH 188/812] Refactor: tools: crm_resource - Functionize cleaning up resource failures --- tools/crm_resource.c | 26 ++------------------------ tools/crm_resource.h | 3 +++ tools/crm_resource_runtime.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/tools/crm_resource.c b/tools/crm_resource.c index 0557892c0e6..331adf672bd 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -1103,31 +1103,9 @@ main(int argc, char **argv) } else if (rsc_cmd == 'C' && just_errors) { crmd_replies_needed = 0; - for (xmlNode *xml_op = __xml_first_child(data_set.failed); xml_op != NULL; - xml_op = __xml_next(xml_op)) { - - const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); - const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); - const char *task_interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); - const char *resource_name = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); - - if(resource_name == NULL) { - continue; - } else if(host_uname && safe_str_neq(host_uname, node)) { - continue; - } else if(rsc_id && safe_str_neq(rsc_id, resource_name)) { - continue; - } else if(operation && safe_str_neq(operation, task)) { - continue; - } else if(interval && safe_str_neq(interval, task_interval)) { - continue; - } - crm_debug("Erasing %s failure for %s (%s detected) on %s", - task, rsc->id, resource_name, node); - rc = cli_resource_delete(crmd_channel, node, rsc, task, - task_interval, &data_set); - } + rc = cli_resource_delete_failures(crmd_channel, host_uname, rsc, operation, + interval, &data_set); if(rsc && (rc == pcmk_ok) && (BE_QUIET == FALSE)) { /* Now check XML_RSC_ATTR_TARGET_ROLE and XML_RSC_ATTR_MANAGED */ diff --git a/tools/crm_resource.h b/tools/crm_resource.h index 0b8dd2a56b6..e28c9ef4255 100644 --- a/tools/crm_resource.h +++ b/tools/crm_resource.h @@ -76,6 +76,9 @@ int cli_resource_search(resource_t *rsc, const char *requested_name, int cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, resource_t *rsc, const char *operation, const char *interval, pe_working_set_t *data_set); +int cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname, + resource_t *rsc, const char *operation, + const char *interval, pe_working_set_t *data_set); int cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * cib); int cli_resource_move(resource_t *rsc, const char *rsc_id, const char *host_name, cib_t *cib, diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 5004935384b..9aa7b7e0182 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -681,6 +681,42 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, return rc; } +int +cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname, + resource_t *rsc, const char *operation, + const char *interval, pe_working_set_t *data_set) +{ + int rc = pcmk_ok; + + for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; + xml_op = __xml_next(xml_op)) { + + const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); + const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); + const char *task_interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); + const char *resource_name = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); + + if(resource_name == NULL) { + continue; + } else if(host_uname && safe_str_neq(host_uname, node)) { + continue; + } else if(rsc->id && safe_str_neq(rsc->id, resource_name)) { + continue; + } else if(operation && safe_str_neq(operation, task)) { + continue; + } else if(interval && safe_str_neq(interval, task_interval)) { + continue; + } + + crm_debug("Erasing %s failure for %s (%s detected) on %s", + task, rsc->id, resource_name, node); + rc = cli_resource_delete(crmd_channel, node, rsc, task, + task_interval, data_set); + } + + return rc; +} + void cli_resource_check(cib_t * cib_conn, resource_t *rsc) { From 4ae40b495305b87f59e439de3298910c243c171d Mon Sep 17 00:00:00 2001 From: "Gao,Yan" Date: Fri, 8 Dec 2017 16:22:54 +0100 Subject: [PATCH 189/812] Fix: tools: crm_resource --cleanup for non-primitive resources --- tools/crm_resource_runtime.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 9aa7b7e0182..98cd27f1566 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -688,6 +688,24 @@ cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname, { int rc = pcmk_ok; + if (rsc == NULL) { + return -ENXIO; + + } else if (rsc->children) { + GListPtr lpc = NULL; + + for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { + resource_t *child = (resource_t *) lpc->data; + + rc = cli_resource_delete_failures(crmd_channel, host_uname, child, operation, + interval, data_set); + if(rc != pcmk_ok) { + return rc; + } + } + return pcmk_ok; + } + for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; xml_op = __xml_next(xml_op)) { From 6ce88cdbcbe15b7e81a4234eb92a93663243a7ff Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 11 Dec 2017 12:23:06 -0600 Subject: [PATCH 190/812] Fix: tools: crm_resource --cleanup The new "failures only" mode of crm_resource --cleanup had multiple issues, including not working without --resource specified, comparing a user-provided interval string against a milliseconds interval, and considering no interval specified as all intervals rather than 0 but only when clearing LRM history entries. --- tools/crm_resource.c | 35 ++--- tools/crm_resource.h | 9 +- tools/crm_resource_runtime.c | 258 +++++++++++++++++++++++++---------- 3 files changed, 202 insertions(+), 100 deletions(-) diff --git a/tools/crm_resource.c b/tools/crm_resource.c index 331adf672bd..e3f8f8616e4 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -1101,14 +1101,20 @@ main(int argc, char **argv) rc = cli_resource_delete_attribute(rsc, rsc_id, prop_set, prop_id, prop_name, cib_conn, &data_set); - } else if (rsc_cmd == 'C' && just_errors) { + } else if ((rsc_cmd == 'C') && rsc) { + if (do_force == FALSE) { + rsc = uber_parent(rsc); + } crmd_replies_needed = 0; - rc = cli_resource_delete_failures(crmd_channel, host_uname, rsc, operation, - interval, &data_set); + crm_debug("%s of %s (%s requested) on %s", + (just_errors? "Clearing failures" : "Re-checking the state"), + rsc->id, rsc_id, (host_uname? host_uname : "all hosts")); + rc = cli_resource_delete(crmd_channel, host_uname, rsc, operation, + interval, just_errors, &data_set); - if(rsc && (rc == pcmk_ok) && (BE_QUIET == FALSE)) { - /* Now check XML_RSC_ATTR_TARGET_ROLE and XML_RSC_ATTR_MANAGED */ + if ((rc == pcmk_ok) && !BE_QUIET) { + // Show any reasons why resource might stay stopped cli_resource_check(cib_conn, rsc); } @@ -1116,22 +1122,9 @@ main(int argc, char **argv) start_mainloop(); } - } else if ((rsc_cmd == 'C') && rsc) { - if(do_force == FALSE) { - rsc = uber_parent(rsc); - } - - crm_debug("Re-checking the state of %s (%s requested) on %s", - rsc->id, rsc_id, host_uname); - crmd_replies_needed = 0; - rc = cli_resource_delete(crmd_channel, host_uname, rsc, operation, - interval, &data_set); - - if(rc == pcmk_ok && BE_QUIET == FALSE) { - /* Now check XML_RSC_ATTR_TARGET_ROLE and XML_RSC_ATTR_MANAGED */ - cli_resource_check(cib_conn, rsc); - } - + } else if (rsc_cmd == 'C' && just_errors) { + rc = cli_cleanup_all(crmd_channel, host_uname, operation, interval, + &data_set); if (rc == pcmk_ok) { start_mainloop(); } diff --git a/tools/crm_resource.h b/tools/crm_resource.h index e28c9ef4255..0ac51f26f96 100644 --- a/tools/crm_resource.h +++ b/tools/crm_resource.h @@ -75,10 +75,11 @@ int cli_resource_search(resource_t *rsc, const char *requested_name, pe_working_set_t *data_set); int cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, resource_t *rsc, const char *operation, - const char *interval, pe_working_set_t *data_set); -int cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname, - resource_t *rsc, const char *operation, - const char *interval, pe_working_set_t *data_set); + const char *interval, bool just_failures, + pe_working_set_t *data_set); +int cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, + const char *operation, const char *interval, + pe_working_set_t *data_set); int cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * cib); int cli_resource_move(resource_t *rsc, const char *rsc_id, const char *host_name, cib_t *cib, diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 98cd27f1566..2cc2bec30a5 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -558,15 +558,129 @@ rsc_fail_name(resource_t *rsc) return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name); } +static int +clear_rsc_history(crm_ipc_t *crmd_channel, const char *host_uname, + const char *rsc_id, pe_working_set_t *data_set) +{ + int rc = pcmk_ok; + + /* Erase the resource's entire LRM history in the CIB, even if we're only + * clearing a single operation's fail count. If we erased only entries for a + * single operation, we might wind up with a wrong idea of the current + * resource state, and we might not re-probe the resource. + */ + rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc_id, + TRUE, data_set); + if (rc != pcmk_ok) { + return rc; + } + crmd_replies_needed++; + + crm_trace("Processing %d mainloop inputs", crmd_replies_needed); + while (g_main_context_iteration(NULL, FALSE)) { + crm_trace("Processed mainloop input, %d still remaining", + crmd_replies_needed); + } + + if (crmd_replies_needed < 0) { + crmd_replies_needed = 0; + } + return rc; +} + +static int +clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, + const char *rsc_id, const char *operation, + const char *interval, pe_working_set_t *data_set) +{ + int rc = pcmk_ok; + const char *failed_value = NULL; + const char *interval_ms_str = NULL; + GHashTable *rscs = NULL; + GHashTableIter iter; + + /* Create a hash table to use as a set of resources to clean. This lets us + * clean each resource only once (per node) regardless of how many failed + * operations it has. + */ + rscs = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); + + // Normalize interval to milliseconds for comparison to history entry + if (operation) { + interval_ms_str = crm_strdup_printf("%llu", crm_get_interval(interval)); + } + + for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; + xml_op = __xml_next(xml_op)) { + + // No resource specified means all resources match + failed_value = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); + if (rsc_id == NULL) { + rsc_id = failed_value; + } else if (safe_str_neq(rsc_id, failed_value)) { + continue; + } + + // Host name should always have been provided by this point + failed_value = crm_element_value(xml_op, XML_ATTR_UNAME); + if (safe_str_neq(node_name, failed_value)) { + continue; + } + + // No operation specified means all operations match + if (operation) { + failed_value = crm_element_value(xml_op, XML_LRM_ATTR_TASK); + if (safe_str_neq(operation, failed_value)) { + continue; + } + + // Interval (if operation was specified) defaults to 0 (not all) + failed_value = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); + if (safe_str_neq(interval_ms_str, failed_value)) { + continue; + } + } + + g_hash_table_add(rscs, (gpointer) rsc_id); + } + + g_hash_table_iter_init(&iter, rscs); + while (g_hash_table_iter_next(&iter, (gpointer *) &rsc_id, NULL)) { + crm_debug("Erasing failures of %s on %s", rsc_id, node_name); + rc = clear_rsc_history(crmd_channel, node_name, rsc_id, data_set); + if (rc != pcmk_ok) { + return rc; + } + } + g_hash_table_destroy(rscs); + return rc; +} + +static int +clear_rsc_fail_attrs(resource_t *rsc, const char *operation, + const char *interval, node_t *node) +{ + int rc = pcmk_ok; + int attr_options = attrd_opt_none; + char *rsc_name = rsc_fail_name(rsc); + + if (is_remote_node(node)) { + attr_options |= attrd_opt_remote; + } + rc = attrd_clear_delegate(NULL, node->details->uname, rsc_name, operation, + interval, NULL, attr_options); + free(rsc_name); + return rc; +} + int cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, resource_t *rsc, const char *operation, - const char *interval, pe_working_set_t *data_set) + const char *interval, bool just_failures, + pe_working_set_t *data_set) { int rc = pcmk_ok; node_t *node = NULL; - char *rsc_name = NULL; - int attr_options = attrd_opt_none; if (rsc == NULL) { return -ENXIO; @@ -578,8 +692,8 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, resource_t *child = (resource_t *) lpc->data; rc = cli_resource_delete(crmd_channel, host_uname, child, operation, - interval, data_set); - if(rc != pcmk_ok) { + interval, just_failures, data_set); + if (rc != pcmk_ok) { return rc; } } @@ -611,8 +725,13 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, node = (node_t *) lpc->data; if (node->details->online) { - cli_resource_delete(crmd_channel, node->details->uname, rsc, - operation, interval, data_set); + rc = cli_resource_delete(crmd_channel, node->details->uname, + rsc, operation, interval, + just_failures, data_set); + } + if (rc != pcmk_ok) { + g_list_free(nodes); + return rc; } } @@ -637,102 +756,91 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, if (crmd_channel == NULL) { printf("Dry run: skipping clean-up of %s on %s due to CIB_file\n", rsc->id, host_uname); - return rc; - } + return pcmk_ok; + } - /* Erase the resource's entire LRM history in the CIB, even if we're only - * clearing a single operation's fail count. If we erased only entries for a - * single operation, we might wind up with a wrong idea of the current - * resource state, and we might not re-probe the resource. - */ - rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc->id, - TRUE, data_set); + rc = clear_rsc_fail_attrs(rsc, operation, interval, node); if (rc != pcmk_ok) { - printf("Unable to clean up %s history on %s: %s\n", - rsc->id, host_uname, pcmk_strerror(rc)); + printf("Unable to clean up %s failures on %s: %s\n", + rsc->id, host_uname, pcmk_strerror(rc)); return rc; } - crmd_replies_needed++; - crm_trace("Processing %d mainloop inputs", crmd_replies_needed); - while(g_main_context_iteration(NULL, FALSE)) { - crm_trace("Processed mainloop input, %d still remaining", - crmd_replies_needed); - } - - if(crmd_replies_needed < 0) { - crmd_replies_needed = 0; - } - - rsc_name = rsc_fail_name(rsc); - if (is_remote_node(node)) { - attr_options |= attrd_opt_remote; + if (just_failures) { + rc = clear_rsc_failures(crmd_channel, host_uname, rsc->id, operation, + interval, data_set); + } else { + rc = clear_rsc_history(crmd_channel, host_uname, rsc->id, data_set); } - rc = attrd_clear_delegate(NULL, host_uname, rsc_name, operation, interval, - NULL, attr_options); if (rc != pcmk_ok) { - printf("Cleaned %s history on %s, but unable to clear failures: %s\n", + printf("Cleaned %s failures on %s, but unable to clean history: %s\n", rsc->id, host_uname, pcmk_strerror(rc)); } else { printf("Cleaned up %s on %s\n", rsc->id, host_uname); } - free(rsc_name); - return rc; } int -cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname, - resource_t *rsc, const char *operation, - const char *interval, pe_working_set_t *data_set) +cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, + const char *operation, const char *interval, + pe_working_set_t *data_set) { + int attr_options = attrd_opt_none; int rc = pcmk_ok; + const char *display_name = node_name? node_name : "all nodes"; - if (rsc == NULL) { - return -ENXIO; - - } else if (rsc->children) { - GListPtr lpc = NULL; + if (crmd_channel == NULL) { + printf("Dry run: skipping clean-up of %s due to CIB_file\n", + display_name); + return pcmk_ok; + } + crmd_replies_needed = 0; - for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { - resource_t *child = (resource_t *) lpc->data; + if (node_name) { + node_t *node = pe_find_node(data_set->nodes, node_name); - rc = cli_resource_delete_failures(crmd_channel, host_uname, child, operation, - interval, data_set); - if(rc != pcmk_ok) { - return rc; - } + if (node == NULL) { + CMD_ERR("Unknown node: %s", node_name); + return -ENXIO; + } + if (is_remote_node(node)) { + attr_options |= attrd_opt_remote; } - return pcmk_ok; } - for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; - xml_op = __xml_next(xml_op)) { - - const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); - const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); - const char *task_interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); - const char *resource_name = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); + rc = attrd_clear_delegate(NULL, node_name, NULL, operation, interval, + NULL, attr_options); + if (rc != pcmk_ok) { + printf("Unable to clean up all failures on %s: %s\n", + display_name, pcmk_strerror(rc)); + return rc; + } - if(resource_name == NULL) { - continue; - } else if(host_uname && safe_str_neq(host_uname, node)) { - continue; - } else if(rsc->id && safe_str_neq(rsc->id, resource_name)) { - continue; - } else if(operation && safe_str_neq(operation, task)) { - continue; - } else if(interval && safe_str_neq(interval, task_interval)) { - continue; + if (node_name) { + rc = clear_rsc_failures(crmd_channel, node_name, NULL, + operation, interval, data_set); + if (rc != pcmk_ok) { + printf("Cleaned all resource failures on %s, but unable to clean history: %s\n", + node_name, pcmk_strerror(rc)); + return rc; } + } else { + for (GList *iter = data_set->nodes; iter; iter = iter->next) { + pe_node_t *node = (pe_node_t *) iter->data; - crm_debug("Erasing %s failure for %s (%s detected) on %s", - task, rsc->id, resource_name, node); - rc = cli_resource_delete(crmd_channel, node, rsc, task, - task_interval, data_set); + rc = clear_rsc_failures(crmd_channel, node->details->uname, NULL, + operation, interval, data_set); + if (rc != pcmk_ok) { + printf("Cleaned all resource failures on all nodes, but unable to clean history on %s: %s\n", + node->details->uname, pcmk_strerror(rc)); + return rc; + } + } } - return rc; + printf("Cleaned up all resources on %s\n", display_name); + return pcmk_ok; } void From 0b6c3b3064401c8f0ebb48ccfd11f43dc2dc2b1b Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 12 Dec 2017 10:02:22 -0600 Subject: [PATCH 191/812] Fix: tools: crm_resource --cleanup with no resource specified 7a813755 failed to completely fix --cleanup without --resource --- tools/crm_resource_runtime.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 2cc2bec30a5..ce86a49e6c4 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -595,6 +595,7 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, { int rc = pcmk_ok; const char *failed_value = NULL; + const char *failed_id = NULL; const char *interval_ms_str = NULL; GHashTable *rscs = NULL; GHashTableIter iter; @@ -613,11 +614,14 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; xml_op = __xml_next(xml_op)) { + failed_id = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); + if (failed_id == NULL) { + // Malformed history entry, should never happen + continue; + } + // No resource specified means all resources match - failed_value = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); - if (rsc_id == NULL) { - rsc_id = failed_value; - } else if (safe_str_neq(rsc_id, failed_value)) { + if (rsc_id && safe_str_neq(rsc_id, failed_id)) { continue; } @@ -641,13 +645,13 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, } } - g_hash_table_add(rscs, (gpointer) rsc_id); + g_hash_table_add(rscs, (gpointer) failed_id); } g_hash_table_iter_init(&iter, rscs); - while (g_hash_table_iter_next(&iter, (gpointer *) &rsc_id, NULL)) { - crm_debug("Erasing failures of %s on %s", rsc_id, node_name); - rc = clear_rsc_history(crmd_channel, node_name, rsc_id, data_set); + while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) { + crm_debug("Erasing failures of %s on %s", failed_id, node_name); + rc = clear_rsc_history(crmd_channel, node_name, failed_id, data_set); if (rc != pcmk_ok) { return rc; } From 9d5a1dae23a44db190782560d8dbdf50343b3692 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 24 Jan 2018 10:51:34 -0600 Subject: [PATCH 192/812] Low: tools: crm_resource --refresh should ignore --operation and --interval It already did when a resource was not specified. Also update help text to clarify cleanup vs refresh. --- tools/crm_resource.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/tools/crm_resource.c b/tools/crm_resource.c index e3f8f8616e4..d00c8f2b6a9 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -214,15 +214,17 @@ static struct crm_option long_options[] = { "cleanup", no_argument, NULL, 'C', #if 0 // new behavior disabled until 2.0.0 - "\t\tDelete failed operations from a resource's history allowing its current state to be rechecked.\n" + "\t\tIf resource has any past failures, clear its history and fail count.\n" "\t\t\t\tOptionally filtered by --resource, --node, --operation, and --interval (otherwise all).\n" + "\t\t\t\t--operation and --interval apply to fail counts, but entire history is always cleared,\n" + "\t\t\t\tto allow current state to be rechecked.\n" }, { "refresh", no_argument, NULL, 'R', #endif "\t\tDelete resource's history (including failures) so its current state is rechecked.\n" - "\t\t\t\tOptionally filtered by --resource, --node, --operation, and --interval (otherwise all).\n" - "\t\t\t\tUnless --force is specified, resource's group or clone (if any) will also be cleaned" + "\t\t\t\tOptionally filtered by --resource and --node (otherwise all).\n" + "\t\t\t\tUnless --force is specified, resource's group or clone (if any) will also be refreshed." }, { "set-parameter", required_argument, NULL, 'p', @@ -442,7 +444,6 @@ main(int argc, char **argv) bool require_resource = TRUE; /* whether command requires that resource be specified */ bool require_dataset = TRUE; /* whether command requires populated dataset instance */ bool require_crmd = FALSE; /* whether command requires connection to CRMd */ - bool just_errors = TRUE; /* whether cleanup command deletes all history or just errors */ int rc = pcmk_ok; int is_ocf_rc = 0; @@ -634,8 +635,7 @@ main(int argc, char **argv) if (cib_file == NULL) { require_crmd = TRUE; } - just_errors = FALSE; - rsc_cmd = 'C'; + rsc_cmd = 'R'; find_flags = pe_find_renamed|pe_find_anon; break; @@ -645,7 +645,6 @@ main(int argc, char **argv) if (cib_file == NULL) { require_crmd = TRUE; } - just_errors = FALSE; // disable until 2.0.0 rsc_cmd = 'C'; find_flags = pe_find_renamed|pe_find_anon; break; @@ -1101,7 +1100,7 @@ main(int argc, char **argv) rc = cli_resource_delete_attribute(rsc, rsc_id, prop_set, prop_id, prop_name, cib_conn, &data_set); - } else if ((rsc_cmd == 'C') && rsc) { + } else if ((rsc_cmd == 'R') && rsc) { if (do_force == FALSE) { rsc = uber_parent(rsc); } @@ -1110,8 +1109,8 @@ main(int argc, char **argv) crm_debug("%s of %s (%s requested) on %s", (just_errors? "Clearing failures" : "Re-checking the state"), rsc->id, rsc_id, (host_uname? host_uname : "all hosts")); - rc = cli_resource_delete(crmd_channel, host_uname, rsc, operation, - interval, just_errors, &data_set); + rc = cli_resource_delete(crmd_channel, host_uname, rsc, NULL, 0, + &data_set); if ((rc == pcmk_ok) && !BE_QUIET) { // Show any reasons why resource might stay stopped @@ -1122,14 +1121,14 @@ main(int argc, char **argv) start_mainloop(); } - } else if (rsc_cmd == 'C' && just_errors) { + } else if (rsc_cmd == 'C') { rc = cli_cleanup_all(crmd_channel, host_uname, operation, interval, &data_set); if (rc == pcmk_ok) { start_mainloop(); } - } else if (rsc_cmd == 'C') { + } else if (rsc_cmd == 'R') { #if HAVE_ATOMIC_ATTRD const char *router_node = host_uname; xmlNode *msg_data = NULL; From 035bebd78c1936b0749ae64fe949deb5d77effe9 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 13 Feb 2018 12:43:48 -0600 Subject: [PATCH 193/812] Fix: tools: auto-merge was insufficient The master and 2.0 branches had taken different approaches to crm_resource clean-up refactoring in response to different issues. It was necessary to combine the code more carefully. --- tools/crm_resource.c | 13 ++----------- tools/crm_resource_runtime.c | 16 +++++++++++----- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/tools/crm_resource.c b/tools/crm_resource.c index d00c8f2b6a9..fc46cc04028 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -628,6 +628,7 @@ main(int argc, char **argv) timeout_ms = crm_get_msec(optarg); break; + case 'C': case 'R': case 'P': crm_log_args(argc, argv); @@ -635,17 +636,7 @@ main(int argc, char **argv) if (cib_file == NULL) { require_crmd = TRUE; } - rsc_cmd = 'R'; - find_flags = pe_find_renamed|pe_find_anon; - break; - - case 'C': - crm_log_args(argc, argv); - require_resource = FALSE; - if (cib_file == NULL) { - require_crmd = TRUE; - } - rsc_cmd = 'C'; + rsc_cmd = 'R'; // disable new behavior until 2.0 find_flags = pe_find_renamed|pe_find_anon; break; diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index ce86a49e6c4..e02cc440bf3 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -621,8 +621,14 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, } // No resource specified means all resources match - if (rsc_id && safe_str_neq(rsc_id, failed_id)) { - continue; + if (rsc_id) { + resource_t *fail_rsc = pe_find_resource_with_flags(data_set->resources, + failed_id, + pe_find_renamed|pe_find_anon); + + if (!fail_rsc || safe_str_neq(rsc_id, fail_rsc->id)) { + continue; + } } // Host name should always have been provided by this point @@ -790,8 +796,8 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, const char *operation, const char *interval, pe_working_set_t *data_set) { - int attr_options = attrd_opt_none; int rc = pcmk_ok; + int attr_options = attrd_opt_none; const char *display_name = node_name? node_name : "all nodes"; if (crmd_channel == NULL) { @@ -836,8 +842,8 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, rc = clear_rsc_failures(crmd_channel, node->details->uname, NULL, operation, interval, data_set); if (rc != pcmk_ok) { - printf("Cleaned all resource failures on all nodes, but unable to clean history on %s: %s\n", - node->details->uname, pcmk_strerror(rc)); + printf("Cleaned all resource failures on all nodes, but unable to clean history: %s\n", + pcmk_strerror(rc)); return rc; } } From 5fa351ec714de6b67c456fb1a85a8ebdb658f604 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 7 Aug 2018 10:42:59 -0500 Subject: [PATCH 194/812] Low: tools: update crm_resource for 1.1 vs 2.0 differences --- tools/crm_resource.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/tools/crm_resource.c b/tools/crm_resource.c index fc46cc04028..128d0758a3f 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -352,11 +352,13 @@ static struct crm_option long_options[] = { }, { "operation", required_argument, NULL, 'n', - "\tOperation to clear instead of all (with -C -r)" + "\tOperation to clear instead of all (with -C -r)", + pcmk_option_hidden // only used with 2.0 -C behavior }, { "interval", required_argument, NULL, 'I', - "\tInterval of operation to clear (default 0) (with -C -r -n)" + "\tInterval of operation to clear (default 0) (with -C -r -n)", + pcmk_option_hidden // only used with 2.0 -C behavior }, { "set-name", required_argument, NULL, 's', @@ -1091,17 +1093,16 @@ main(int argc, char **argv) rc = cli_resource_delete_attribute(rsc, rsc_id, prop_set, prop_id, prop_name, cib_conn, &data_set); - } else if ((rsc_cmd == 'R') && rsc) { + } else if ((rsc_cmd == 'C') && rsc) { if (do_force == FALSE) { rsc = uber_parent(rsc); } crmd_replies_needed = 0; - crm_debug("%s of %s (%s requested) on %s", - (just_errors? "Clearing failures" : "Re-checking the state"), - rsc->id, rsc_id, (host_uname? host_uname : "all hosts")); - rc = cli_resource_delete(crmd_channel, host_uname, rsc, NULL, 0, - &data_set); + crm_debug("Erasing failures of %s (%s requested) on %s", + rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); + rc = cli_resource_delete(crmd_channel, host_uname, rsc, + operation, interval, TRUE, &data_set); if ((rc == pcmk_ok) && !BE_QUIET) { // Show any reasons why resource might stay stopped @@ -1119,6 +1120,22 @@ main(int argc, char **argv) start_mainloop(); } + } else if ((rsc_cmd == 'R') && rsc) { + if (do_force == FALSE) { + rsc = uber_parent(rsc); + } + crmd_replies_needed = 0; + + crm_debug("Re-checking the state of %s (%s requested) on %s", + rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); + rc = cli_resource_delete(crmd_channel, host_uname, rsc, + NULL, 0, FALSE, &data_set); + + if ((rc == pcmk_ok) && !BE_QUIET) { + // Show any reasons why resource might stay stopped + cli_resource_check(cib_conn, rsc); + } + } else if (rsc_cmd == 'R') { #if HAVE_ATOMIC_ATTRD const char *router_node = host_uname; @@ -1174,8 +1191,8 @@ main(int argc, char **argv) crmd_replies_needed = 0; for (rIter = data_set.resources; rIter; rIter = rIter->next) { rsc = rIter->data; - cli_resource_delete(crmd_channel, host_uname, rsc, NULL, NULL, - &data_set); + cli_resource_delete(crmd_channel, host_uname, rsc, NULL, 0, + FALSE, &data_set); } start_mainloop(); From 555bdce4ceaf9a406059150c9dee047151fb3d94 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 7 Aug 2018 14:11:50 -0500 Subject: [PATCH 195/812] Low: tools: avoid function not available until glib 2.32.0 --- tools/crm_resource_runtime.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index e02cc440bf3..41cc742ade6 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -651,7 +651,10 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, } } + /* not available until glib 2.32 g_hash_table_add(rscs, (gpointer) failed_id); + */ + g_hash_table_insert(rscs, (gpointer) failed_id, (gpointer) failed_id); } g_hash_table_iter_init(&iter, rscs); From 1f645cbabb17b6d2fb08e5b60afed3ce6c9d7dcb Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 7 Aug 2018 08:23:35 -0500 Subject: [PATCH 196/812] Low: controller: set exit-reason when faking failure --- crmd/lrm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crmd/lrm.c b/crmd/lrm.c index db8ed7e4b3b..d18665c7055 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1554,6 +1554,7 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); + op->exit_reason = strdup("Simulated failure"); process_lrm_event(lrm_state, op, NULL); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; From 5bc746c812f936dd5e8234be58a363f8eec0e508 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 8 Aug 2018 15:26:26 -0500 Subject: [PATCH 197/812] Fix: pacemaker-based: inform originator of CIB upgrade failure "cibadmin --upgrade" sends an upgrade request to its local CIB manager, which forwards the request to the DC. If the DC successfully upgrades the CIB, it broadcasts a message causing all peers to perform the upgrade, which also results in local clients such as cibadmin being notified. However, if the upgrade is either not necessary or fails, the DC would simply ignore the request, and local clients would time out waiting for a reply that would never come. Now, the DC will send the error result to the originating peer, which will notify its clients. --- cib/callbacks.c | 17 ++++++++++++---- cib/messages.c | 40 +++++++++++++++++++++++++++++++++++--- include/crm/cib/internal.h | 19 +++++------------- 3 files changed, 55 insertions(+), 21 deletions(-) diff --git a/cib/callbacks.c b/cib/callbacks.c index 47397a4f5d0..be41c918c1c 100644 --- a/cib/callbacks.c +++ b/cib/callbacks.c @@ -752,10 +752,18 @@ parse_peer_options_v2(int call_type, xmlNode * request, * limit on how far newer nodes will go */ const char *max = crm_element_value(request, F_CIB_SCHEMA_MAX); + const char *upgrade_rc = crm_element_value(request, F_CIB_UPGRADE_RC); - crm_trace("Parsing %s operation%s for %s with max=%s", - op, is_reply?" reply":"", cib_is_master?"master":"slave", max); - if(max == NULL && cib_is_master) { + crm_trace("Parsing %s operation%s for %s with max=%s and upgrade_rc=%s", + op, (is_reply? " reply" : ""), + (cib_is_master? "master" : "slave"), + (max? max : "none"), (upgrade_rc? upgrade_rc : "none")); + + if (upgrade_rc != NULL) { + // Our upgrade request was rejected by DC, notify clients of result + crm_xml_add(request, F_CIB_RC, upgrade_rc); + + } else if ((max == NULL) && cib_is_master) { /* We are the DC, check if this upgrade is allowed */ goto skip_is_reply; @@ -764,7 +772,8 @@ parse_peer_options_v2(int call_type, xmlNode * request, goto skip_is_reply; } else { - return FALSE; /* Ignore */ + // Ignore broadcast client requests when we're not DC + return FALSE; } } else if (crm_is_true(update)) { diff --git a/cib/messages.c b/cib/messages.c index ff9755294a1..86f8a2faad9 100644 --- a/cib/messages.c +++ b/cib/messages.c @@ -246,6 +246,11 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml *answer = NULL; if(crm_element_value(req, F_CIB_SCHEMA_MAX)) { + /* The originator of an upgrade request sends it to the DC, without + * F_CIB_SCHEMA_MAX. If an upgrade is needed, the DC re-broadcasts the + * request with F_CIB_SCHEMA_MAX, and each node performs the upgrade + * (and notifies its local clients) here. + */ return cib_process_upgrade( op, options, section, req, input, existing_cib, result_cib, answer); @@ -255,6 +260,9 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml xmlNode *scratch = copy_xml(existing_cib); const char *host = crm_element_value(req, F_ORIG); const char *value = crm_element_value(existing_cib, XML_ATTR_VALIDATION); + const char *client_id = crm_element_value(req, F_CIB_CLIENTID); + const char *call_opts = crm_element_value(req, F_CIB_CALLOPTS); + const char *call_id = crm_element_value(req, F_CIB_CALLID); crm_trace("Processing \"%s\" event", op); if (value != NULL) { @@ -272,9 +280,9 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml crm_xml_add(up, F_CIB_OPERATION, CIB_OP_UPGRADE); crm_xml_add(up, F_CIB_SCHEMA_MAX, get_schema_name(new_version)); crm_xml_add(up, F_CIB_DELEGATED, host); - crm_xml_add(up, F_CIB_CLIENTID, crm_element_value(req, F_CIB_CLIENTID)); - crm_xml_add(up, F_CIB_CALLOPTS, crm_element_value(req, F_CIB_CALLOPTS)); - crm_xml_add(up, F_CIB_CALLID, crm_element_value(req, F_CIB_CALLID)); + crm_xml_add(up, F_CIB_CLIENTID, client_id); + crm_xml_add(up, F_CIB_CALLOPTS, call_opts); + crm_xml_add(up, F_CIB_CALLID, call_id); if (cib_legacy_mode() && cib_is_master) { rc = cib_process_upgrade( @@ -290,6 +298,32 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml rc = -pcmk_err_schema_unchanged; } + if (rc != pcmk_ok) { + // Notify originating peer so it can notify its local clients + crm_node_t *origin = crm_find_peer(0, host); + + crm_info("Rejecting upgrade request from %s: %s " + CRM_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc, + (origin? origin->uname : "lost")); + + if (origin) { + xmlNode *up = create_xml_node(NULL, __FUNCTION__); + + crm_xml_add(up, F_TYPE, "cib"); + crm_xml_add(up, F_CIB_OPERATION, CIB_OP_UPGRADE); + crm_xml_add(up, F_CIB_DELEGATED, host); + crm_xml_add(up, F_CIB_ISREPLY, host); + crm_xml_add(up, F_CIB_CLIENTID, client_id); + crm_xml_add(up, F_CIB_CALLOPTS, call_opts); + crm_xml_add(up, F_CIB_CALLID, call_id); + crm_xml_add_int(up, F_CIB_UPGRADE_RC, rc); + if (send_cluster_message(origin, crm_msg_cib, up, TRUE) + == FALSE) { + crm_warn("Could not send CIB upgrade result to %s", host); + } + free_xml(up); + } + } free_xml(scratch); } return rc; diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h index 319440eb0e1..18d065ce505 100644 --- a/include/crm/cib/internal.h +++ b/include/crm/cib/internal.h @@ -1,20 +1,10 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ + #ifndef CIB_INTERNAL__H # define CIB_INTERNAL__H # include @@ -49,6 +39,7 @@ # define F_CIB_SECTION "cib_section" # define F_CIB_HOST "cib_host" # define F_CIB_RC "cib_rc" +# define F_CIB_UPGRADE_RC "cib_upgrade_rc" # define F_CIB_DELEGATED "cib_delegated_from" # define F_CIB_OBJID "cib_object" # define F_CIB_OBJTYPE "cib_object_type" From 6314f317090213ded03de1efc5c8ea2c3eeecb21 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 19 Feb 2018 10:26:27 -0600 Subject: [PATCH 198/812] Low: tools: already latest schema is not failure for cibadmin --upgrade --- tools/cibadmin.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/cibadmin.c b/tools/cibadmin.c index 8425044702e..c8cea0f93ed 100644 --- a/tools/cibadmin.c +++ b/tools/cibadmin.c @@ -470,6 +470,14 @@ main(int argc, char **argv) crm_info("Starting mainloop"); g_main_run(mainloop); + } else if ((exit_code == -pcmk_err_schema_unchanged) + && crm_str_eq(cib_action, CIB_OP_UPGRADE, TRUE)) { + + // Already at latest schema + crm_info("Upgrade unnecessary: %s\n", pcmk_strerror(exit_code)); + printf("Upgrade unnecessary: %s\n", pcmk_strerror(exit_code)); + exit_code = 0; + } else if (exit_code < 0) { crm_err("Call failed: %s", pcmk_strerror(exit_code)); fprintf(stderr, "Call failed: %s\n", pcmk_strerror(exit_code)); From c61cf5594b876596b17d0a14574de55df1e4f82e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 8 Aug 2018 16:02:05 -0500 Subject: [PATCH 199/812] Low: tools: already latest schema is not failure for cibadmin --upgrade a2950209 handled sync results only (cibadmin -s) --- tools/cibadmin.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/cibadmin.c b/tools/cibadmin.c index c8cea0f93ed..66f7661eb35 100644 --- a/tools/cibadmin.c +++ b/tools/cibadmin.c @@ -197,6 +197,17 @@ print_xml_output(xmlNode * xml) } } +// Upgrade requested but already at latest schema +static void +report_schema_unchanged() +{ + const char *err = pcmk_strerror(pcmk_err_schema_unchanged); + + crm_info("Upgrade unnecessary: %s\n", err); + printf("Upgrade unnecessary: %s\n", err); + exit_code = 0; +} + int main(int argc, char **argv) { @@ -472,10 +483,7 @@ main(int argc, char **argv) } else if ((exit_code == -pcmk_err_schema_unchanged) && crm_str_eq(cib_action, CIB_OP_UPGRADE, TRUE)) { - - // Already at latest schema - crm_info("Upgrade unnecessary: %s\n", pcmk_strerror(exit_code)); - printf("Upgrade unnecessary: %s\n", pcmk_strerror(exit_code)); + report_schema_unchanged(); exit_code = 0; } else if (exit_code < 0) { @@ -569,7 +577,10 @@ cibadmin_op_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void { exit_code = rc; - if (rc != 0) { + if (rc == -pcmk_err_schema_unchanged) { + report_schema_unchanged(); + + } else if (rc != pcmk_ok) { crm_warn("Call %s failed (%d): %s", cib_action, rc, pcmk_strerror(rc)); fprintf(stderr, "Call %s failed (%d): %s\n", cib_action, rc, pcmk_strerror(rc)); print_xml_output(output); From 041026835bce4d4ae2390daccade0e74c4fa3c1c Mon Sep 17 00:00:00 2001 From: Christine Caulfield Date: Thu, 26 Jul 2018 08:06:45 +0100 Subject: [PATCH 200/812] Shutdown corosync after a fatal error If pacemaker shuts down due to being fenced by a non-power (eg fabric) fence agent then it should also take down corosync so that full cluster service on that node is lost, rather than just resource management. https://bugzilla.redhat.com/show_bug.cgi?id=1448221 Signed-off-by: Christine Caulfield --- mcp/pacemaker.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c index f57fc258962..40a2de2a8aa 100644 --- a/mcp/pacemaker.c +++ b/mcp/pacemaker.c @@ -21,6 +21,9 @@ #include #include #include +#ifdef SUPPORT_COROSYNC +#include +#endif #include #include @@ -142,6 +145,28 @@ pcmk_process_exit(pcmk_child_t * child) } } +static void pcmk_exit_with_cluster(int exitcode) +{ +#ifdef SUPPORT_COROSYNC + corosync_cfg_handle_t cfg_handle; + cs_error_t err; + + if (exitcode == DAEMON_RESPAWN_STOP) { + crm_info("Asking Corosync to shut down"); + err = corosync_cfg_initialize(&cfg_handle, NULL); + if (err != CS_OK) { + crm_warn("Unable to open handle to corosync to close it down. err=%d", err); + } + err = corosync_cfg_try_shutdown(cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE); + if (err != CS_OK) { + crm_warn("Corosync shutdown failed. err=%d", err); + } + corosync_cfg_finalize(cfg_handle); + } +#endif + crm_exit(exitcode); +} + static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { @@ -423,7 +448,7 @@ pcmk_shutdown_worker(gpointer user_data) if (fatal_error) { crm_notice("Attempting to inhibit respawning after fatal error"); - crm_exit(DAEMON_RESPAWN_STOP); + pcmk_exit_with_cluster(DAEMON_RESPAWN_STOP); } return TRUE; From 54f0d731af6e66373862cf1b13b6087cccd61b6d Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 26 Jul 2018 18:19:53 -0500 Subject: [PATCH 201/812] Test: cts-scheduler: add test for forced anonymous clone --- pengine/test10/force-anon-clone-max.dot | 81 ++++ pengine/test10/force-anon-clone-max.exp | 456 ++++++++++++++++++++ pengine/test10/force-anon-clone-max.scores | 109 +++++ pengine/test10/force-anon-clone-max.summary | 72 ++++ pengine/test10/force-anon-clone-max.xml | 136 ++++++ 5 files changed, 854 insertions(+) create mode 100644 pengine/test10/force-anon-clone-max.dot create mode 100644 pengine/test10/force-anon-clone-max.exp create mode 100644 pengine/test10/force-anon-clone-max.scores create mode 100644 pengine/test10/force-anon-clone-max.summary create mode 100644 pengine/test10/force-anon-clone-max.xml diff --git a/pengine/test10/force-anon-clone-max.dot b/pengine/test10/force-anon-clone-max.dot new file mode 100644 index 00000000000..83f68bfae2e --- /dev/null +++ b/pengine/test10/force-anon-clone-max.dot @@ -0,0 +1,81 @@ +digraph "g" { +"Fencing_start_0 node1" [ style=bold color="green" fontcolor="black"] +"clone1_running_0" [ style=bold color="green" fontcolor="orange"] +"clone1_start_0" -> "clone1_running_0" [ style = bold] +"clone1_start_0" -> "lsb1:0_start_0 node2" [ style = bold] +"clone1_start_0" -> "lsb1:1_start_0 node3" [ style = bold] +"clone1_start_0" [ style=bold color="green" fontcolor="orange"] +"clone2_running_0" [ style=bold color="green" fontcolor="orange"] +"clone2_start_0" -> "clone2_running_0" [ style = bold] +"clone2_start_0" -> "lsb2:0_start_0 node1" [ style = bold] +"clone2_start_0" -> "lsb2:1_start_0 node2" [ style = bold] +"clone2_start_0" -> "lsb2:2_start_0 node3" [ style = bold] +"clone2_start_0" [ style=bold color="green" fontcolor="orange"] +"clone3_running_0" [ style=bold color="green" fontcolor="orange"] +"clone3_start_0" -> "clone3_running_0" [ style = bold] +"clone3_start_0" -> "group1:0_start_0" [ style = bold] +"clone3_start_0" -> "group1:1_start_0" [ style = bold] +"clone3_start_0" [ style=bold color="green" fontcolor="orange"] +"dummy1:0_monitor_5000 node1" [ style=bold color="green" fontcolor="black"] +"dummy1:0_start_0 node1" -> "dummy1:0_monitor_5000 node1" [ style = bold] +"dummy1:0_start_0 node1" -> "dummy2:0_start_0 node1" [ style = bold] +"dummy1:0_start_0 node1" -> "group1:0_running_0" [ style = bold] +"dummy1:0_start_0 node1" [ style=bold color="green" fontcolor="black"] +"dummy1:1_monitor_5000 node2" [ style=bold color="green" fontcolor="black"] +"dummy1:1_start_0 node2" -> "dummy1:1_monitor_5000 node2" [ style = bold] +"dummy1:1_start_0 node2" -> "dummy2:1_start_0 node2" [ style = bold] +"dummy1:1_start_0 node2" -> "group1:1_running_0" [ style = bold] +"dummy1:1_start_0 node2" [ style=bold color="green" fontcolor="black"] +"dummy2:0_monitor_5000 node1" [ style=bold color="green" fontcolor="black"] +"dummy2:0_start_0 node1" -> "dummy2:0_monitor_5000 node1" [ style = bold] +"dummy2:0_start_0 node1" -> "group1:0_running_0" [ style = bold] +"dummy2:0_start_0 node1" -> "lsb3:0_start_0 node1" [ style = bold] +"dummy2:0_start_0 node1" [ style=bold color="green" fontcolor="black"] +"dummy2:1_monitor_5000 node2" [ style=bold color="green" fontcolor="black"] +"dummy2:1_start_0 node2" -> "dummy2:1_monitor_5000 node2" [ style = bold] +"dummy2:1_start_0 node2" -> "group1:1_running_0" [ style = bold] +"dummy2:1_start_0 node2" -> "lsb3:1_start_0 node2" [ style = bold] +"dummy2:1_start_0 node2" [ style=bold color="green" fontcolor="black"] +"group1:0_running_0" -> "clone3_running_0" [ style = bold] +"group1:0_running_0" [ style=bold color="green" fontcolor="orange"] +"group1:0_start_0" -> "dummy1:0_start_0 node1" [ style = bold] +"group1:0_start_0" -> "dummy2:0_start_0 node1" [ style = bold] +"group1:0_start_0" -> "group1:0_running_0" [ style = bold] +"group1:0_start_0" -> "lsb3:0_start_0 node1" [ style = bold] +"group1:0_start_0" [ style=bold color="green" fontcolor="orange"] +"group1:1_running_0" -> "clone3_running_0" [ style = bold] +"group1:1_running_0" [ style=bold color="green" fontcolor="orange"] +"group1:1_start_0" -> "dummy1:1_start_0 node2" [ style = bold] +"group1:1_start_0" -> "dummy2:1_start_0 node2" [ style = bold] +"group1:1_start_0" -> "group1:1_running_0" [ style = bold] +"group1:1_start_0" -> "lsb3:1_start_0 node2" [ style = bold] +"group1:1_start_0" [ style=bold color="green" fontcolor="orange"] +"lsb1:0_monitor_5000 node2" [ style=bold color="green" fontcolor="black"] +"lsb1:0_start_0 node2" -> "clone1_running_0" [ style = bold] +"lsb1:0_start_0 node2" -> "lsb1:0_monitor_5000 node2" [ style = bold] +"lsb1:0_start_0 node2" [ style=bold color="green" fontcolor="black"] +"lsb1:1_monitor_5000 node3" [ style=bold color="green" fontcolor="black"] +"lsb1:1_start_0 node3" -> "clone1_running_0" [ style = bold] +"lsb1:1_start_0 node3" -> "lsb1:1_monitor_5000 node3" [ style = bold] +"lsb1:1_start_0 node3" [ style=bold color="green" fontcolor="black"] +"lsb2:0_monitor_5000 node1" [ style=bold color="green" fontcolor="black"] +"lsb2:0_start_0 node1" -> "clone2_running_0" [ style = bold] +"lsb2:0_start_0 node1" -> "lsb2:0_monitor_5000 node1" [ style = bold] +"lsb2:0_start_0 node1" [ style=bold color="green" fontcolor="black"] +"lsb2:1_monitor_5000 node2" [ style=bold color="green" fontcolor="black"] +"lsb2:1_start_0 node2" -> "clone2_running_0" [ style = bold] +"lsb2:1_start_0 node2" -> "lsb2:1_monitor_5000 node2" [ style = bold] +"lsb2:1_start_0 node2" [ style=bold color="green" fontcolor="black"] +"lsb2:2_monitor_5000 node3" [ style=bold color="green" fontcolor="black"] +"lsb2:2_start_0 node3" -> "clone2_running_0" [ style = bold] +"lsb2:2_start_0 node3" -> "lsb2:2_monitor_5000 node3" [ style = bold] +"lsb2:2_start_0 node3" [ style=bold color="green" fontcolor="black"] +"lsb3:0_monitor_5000 node1" [ style=bold color="green" fontcolor="black"] +"lsb3:0_start_0 node1" -> "group1:0_running_0" [ style = bold] +"lsb3:0_start_0 node1" -> "lsb3:0_monitor_5000 node1" [ style = bold] +"lsb3:0_start_0 node1" [ style=bold color="green" fontcolor="black"] +"lsb3:1_monitor_5000 node2" [ style=bold color="green" fontcolor="black"] +"lsb3:1_start_0 node2" -> "group1:1_running_0" [ style = bold] +"lsb3:1_start_0 node2" -> "lsb3:1_monitor_5000 node2" [ style = bold] +"lsb3:1_start_0 node2" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/force-anon-clone-max.exp b/pengine/test10/force-anon-clone-max.exp new file mode 100644 index 00000000000..492de388aa7 --- /dev/null +++ b/pengine/test10/force-anon-clone-max.exp @@ -0,0 +1,456 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/force-anon-clone-max.scores b/pengine/test10/force-anon-clone-max.scores new file mode 100644 index 00000000000..afcc09ae05e --- /dev/null +++ b/pengine/test10/force-anon-clone-max.scores @@ -0,0 +1,109 @@ +Allocation scores: +clone_color: clone1 allocation score on node1: 0 +clone_color: clone1 allocation score on node2: 0 +clone_color: clone1 allocation score on node3: 0 +clone_color: clone2 allocation score on node1: 0 +clone_color: clone2 allocation score on node2: 0 +clone_color: clone2 allocation score on node3: 0 +clone_color: clone3 allocation score on node1: 0 +clone_color: clone3 allocation score on node2: 0 +clone_color: clone3 allocation score on node3: 0 +clone_color: dummy1:0 allocation score on node1: 0 +clone_color: dummy1:0 allocation score on node2: 0 +clone_color: dummy1:0 allocation score on node3: 0 +clone_color: dummy1:1 allocation score on node1: 0 +clone_color: dummy1:1 allocation score on node2: 0 +clone_color: dummy1:1 allocation score on node3: 0 +clone_color: dummy2:0 allocation score on node1: 0 +clone_color: dummy2:0 allocation score on node2: 0 +clone_color: dummy2:0 allocation score on node3: 0 +clone_color: dummy2:1 allocation score on node1: 0 +clone_color: dummy2:1 allocation score on node2: 0 +clone_color: dummy2:1 allocation score on node3: 0 +clone_color: group1:0 allocation score on node1: 0 +clone_color: group1:0 allocation score on node2: 0 +clone_color: group1:0 allocation score on node3: 0 +clone_color: group1:1 allocation score on node1: 0 +clone_color: group1:1 allocation score on node2: 0 +clone_color: group1:1 allocation score on node3: 0 +clone_color: lsb1:0 allocation score on node1: 0 +clone_color: lsb1:0 allocation score on node2: 0 +clone_color: lsb1:0 allocation score on node3: 0 +clone_color: lsb1:1 allocation score on node1: 0 +clone_color: lsb1:1 allocation score on node2: 0 +clone_color: lsb1:1 allocation score on node3: 0 +clone_color: lsb2:0 allocation score on node1: 0 +clone_color: lsb2:0 allocation score on node2: 0 +clone_color: lsb2:0 allocation score on node3: 0 +clone_color: lsb2:1 allocation score on node1: 0 +clone_color: lsb2:1 allocation score on node2: 0 +clone_color: lsb2:1 allocation score on node3: 0 +clone_color: lsb2:2 allocation score on node1: 0 +clone_color: lsb2:2 allocation score on node2: 0 +clone_color: lsb2:2 allocation score on node3: 0 +clone_color: lsb3:0 allocation score on node1: 0 +clone_color: lsb3:0 allocation score on node2: 0 +clone_color: lsb3:0 allocation score on node3: 0 +clone_color: lsb3:1 allocation score on node1: 0 +clone_color: lsb3:1 allocation score on node2: 0 +clone_color: lsb3:1 allocation score on node3: 0 +group_color: dummy1:0 allocation score on node1: 0 +group_color: dummy1:0 allocation score on node2: 0 +group_color: dummy1:0 allocation score on node3: 0 +group_color: dummy1:1 allocation score on node1: -INFINITY +group_color: dummy1:1 allocation score on node2: 0 +group_color: dummy1:1 allocation score on node3: 0 +group_color: dummy2:0 allocation score on node1: 0 +group_color: dummy2:0 allocation score on node2: 0 +group_color: dummy2:0 allocation score on node3: 0 +group_color: dummy2:1 allocation score on node1: -INFINITY +group_color: dummy2:1 allocation score on node2: 0 +group_color: dummy2:1 allocation score on node3: 0 +group_color: group1:0 allocation score on node1: 0 +group_color: group1:0 allocation score on node2: 0 +group_color: group1:0 allocation score on node3: 0 +group_color: group1:1 allocation score on node1: -INFINITY +group_color: group1:1 allocation score on node2: 0 +group_color: group1:1 allocation score on node3: 0 +group_color: lsb3:0 allocation score on node1: 0 +group_color: lsb3:0 allocation score on node2: 0 +group_color: lsb3:0 allocation score on node3: 0 +group_color: lsb3:1 allocation score on node1: -INFINITY +group_color: lsb3:1 allocation score on node2: 0 +group_color: lsb3:1 allocation score on node3: 0 +native_color: Fencing allocation score on node1: 0 +native_color: Fencing allocation score on node2: 0 +native_color: Fencing allocation score on node3: 0 +native_color: dummy1:0 allocation score on node1: 0 +native_color: dummy1:0 allocation score on node2: 0 +native_color: dummy1:0 allocation score on node3: 0 +native_color: dummy1:1 allocation score on node1: -INFINITY +native_color: dummy1:1 allocation score on node2: 0 +native_color: dummy1:1 allocation score on node3: 0 +native_color: dummy2:0 allocation score on node1: 0 +native_color: dummy2:0 allocation score on node2: -INFINITY +native_color: dummy2:0 allocation score on node3: -INFINITY +native_color: dummy2:1 allocation score on node1: -INFINITY +native_color: dummy2:1 allocation score on node2: 0 +native_color: dummy2:1 allocation score on node3: -INFINITY +native_color: lsb1:0 allocation score on node1: 0 +native_color: lsb1:0 allocation score on node2: 0 +native_color: lsb1:0 allocation score on node3: 0 +native_color: lsb1:1 allocation score on node1: 0 +native_color: lsb1:1 allocation score on node2: -INFINITY +native_color: lsb1:1 allocation score on node3: 0 +native_color: lsb2:0 allocation score on node1: 0 +native_color: lsb2:0 allocation score on node2: 0 +native_color: lsb2:0 allocation score on node3: 0 +native_color: lsb2:1 allocation score on node1: -INFINITY +native_color: lsb2:1 allocation score on node2: 0 +native_color: lsb2:1 allocation score on node3: 0 +native_color: lsb2:2 allocation score on node1: -INFINITY +native_color: lsb2:2 allocation score on node2: -INFINITY +native_color: lsb2:2 allocation score on node3: 0 +native_color: lsb3:0 allocation score on node1: 0 +native_color: lsb3:0 allocation score on node2: -INFINITY +native_color: lsb3:0 allocation score on node3: -INFINITY +native_color: lsb3:1 allocation score on node1: -INFINITY +native_color: lsb3:1 allocation score on node2: 0 +native_color: lsb3:1 allocation score on node3: -INFINITY diff --git a/pengine/test10/force-anon-clone-max.summary b/pengine/test10/force-anon-clone-max.summary new file mode 100644 index 00000000000..1f5fc7580a9 --- /dev/null +++ b/pengine/test10/force-anon-clone-max.summary @@ -0,0 +1,72 @@ + +Current cluster status: +Online: [ node1 node2 node3 ] + + Fencing (stonith:fence_imaginary): Stopped + Clone Set: clone1 [lsb1] + Stopped: [ node1 node2 node3 ] + Clone Set: clone2 [lsb2] + Stopped: [ node1 node2 node3 ] + Clone Set: clone3 [group1] + Stopped: [ node1 node2 node3 ] + +Transition Summary: + * Start Fencing ( node1 ) + * Start lsb1:0 ( node2 ) + * Start lsb1:1 ( node3 ) + * Start lsb2:0 ( node1 ) + * Start lsb2:1 ( node2 ) + * Start lsb2:2 ( node3 ) + * Start dummy1:0 ( node1 ) + * Start dummy2:0 ( node1 ) + * Start lsb3:0 ( node1 ) + * Start dummy1:1 ( node2 ) + * Start dummy2:1 ( node2 ) + * Start lsb3:1 ( node2 ) + +Executing cluster transition: + * Resource action: Fencing start on node1 + * Pseudo action: clone1_start_0 + * Pseudo action: clone2_start_0 + * Pseudo action: clone3_start_0 + * Resource action: lsb1:0 start on node2 + * Resource action: lsb1:1 start on node3 + * Pseudo action: clone1_running_0 + * Resource action: lsb2:0 start on node1 + * Resource action: lsb2:1 start on node2 + * Resource action: lsb2:2 start on node3 + * Pseudo action: clone2_running_0 + * Pseudo action: group1:0_start_0 + * Resource action: dummy1:0 start on node1 + * Resource action: dummy2:0 start on node1 + * Resource action: lsb3:0 start on node1 + * Pseudo action: group1:1_start_0 + * Resource action: dummy1:1 start on node2 + * Resource action: dummy2:1 start on node2 + * Resource action: lsb3:1 start on node2 + * Resource action: lsb1:0 monitor=5000 on node2 + * Resource action: lsb1:1 monitor=5000 on node3 + * Resource action: lsb2:0 monitor=5000 on node1 + * Resource action: lsb2:1 monitor=5000 on node2 + * Resource action: lsb2:2 monitor=5000 on node3 + * Pseudo action: group1:0_running_0 + * Resource action: dummy1:0 monitor=5000 on node1 + * Resource action: dummy2:0 monitor=5000 on node1 + * Resource action: lsb3:0 monitor=5000 on node1 + * Pseudo action: group1:1_running_0 + * Resource action: dummy1:1 monitor=5000 on node2 + * Resource action: dummy2:1 monitor=5000 on node2 + * Resource action: lsb3:1 monitor=5000 on node2 + * Pseudo action: clone3_running_0 + +Revised cluster status: +Online: [ node1 node2 node3 ] + + Fencing (stonith:fence_imaginary): Started node1 + Clone Set: clone1 [lsb1] + Started: [ node2 node3 ] + Clone Set: clone2 [lsb2] + Started: [ node1 node2 node3 ] + Clone Set: clone3 [group1] + Started: [ node1 node2 ] + diff --git a/pengine/test10/force-anon-clone-max.xml b/pengine/test10/force-anon-clone-max.xml new file mode 100644 index 00000000000..84dee6f72ad --- /dev/null +++ b/pengine/test10/force-anon-clone-max.xml @@ -0,0 +1,136 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From b62a9c10c458ba63d1d4c2f3c39e8d29ccc811d9 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 9 Aug 2018 16:57:29 -0500 Subject: [PATCH 202/812] Test: cts: run force-anon-clone-max test forgot to do with 7cf80601 --- pengine/regression.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/pengine/regression.sh b/pengine/regression.sh index 5ba64a1a5e8..793972fb33a 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -334,6 +334,7 @@ echo "" do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node" do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones" do_test clone-anon-failcount "Merge failcounts for anonymous clones" +do_test force-anon-clone-max "Update clone-max properly when forcing a clone to be anonymous" do_test inc0 "Incarnation start" do_test inc1 "Incarnation start order" do_test inc2 "Incarnation silent restart, stop, move" From 5d7ec9d21787bbf2190ff0732c7d33134f1d9398 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 30 Jul 2018 15:12:05 -0500 Subject: [PATCH 203/812] Refactor: scheduler: remove redundant code when probing clones --- pengine/clone.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pengine/clone.c b/pengine/clone.c index 1de2661533a..015b9525ca8 100644 --- a/pengine/clone.c +++ b/pengine/clone.c @@ -1382,7 +1382,6 @@ clone_create_probe(resource_t * rsc, node_t * node, action_t * complete, } /* Fall back to the first clone instance */ - CRM_ASSERT(rsc->children); child = rsc->children->data; return child->cmds->create_probe(child, node, complete, force, data_set); } @@ -1394,12 +1393,6 @@ clone_create_probe(resource_t * rsc, node_t * node, action_t * complete, if (child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set)) { any_created = TRUE; } - - if (any_created && is_not_set(rsc->flags, pe_rsc_unique) - && clone_data->clone_node_max == 1) { - /* only look for one copy (clone :0) */ - break; - } } return any_created; From 6cb51c48485cfcbcd39e9a194bfc82b9163f0439 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 30 Jul 2018 15:22:11 -0500 Subject: [PATCH 204/812] Low: scheduler: never probe more than a single instance for anonymous clones A clone_node_max of 0 is legal, if questionable, so remove otherwise redundant test (clone_node_max can never be greater than 1 for anonymous clones) --- pengine/clone.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pengine/clone.c b/pengine/clone.c index 015b9525ca8..962cee79813 100644 --- a/pengine/clone.c +++ b/pengine/clone.c @@ -1325,10 +1325,8 @@ clone_create_probe(resource_t * rsc, node_t * node, action_t * complete, { GListPtr gIter = NULL; gboolean any_created = FALSE; - clone_variant_data_t *clone_data = NULL; CRM_ASSERT(rsc); - get_clone_variant_data(clone_data, rsc); rsc->children = g_list_sort(rsc->children, sort_rsc_id); if (rsc->children == NULL) { @@ -1353,8 +1351,7 @@ clone_create_probe(resource_t * rsc, node_t * node, action_t * complete, } } - if (is_not_set(rsc->flags, pe_rsc_unique) - && clone_data->clone_node_max == 1) { + if (is_not_set(rsc->flags, pe_rsc_unique)) { /* only look for one copy */ resource_t *child = NULL; From 4cf2a01afcb39c7eda51c7171325d6dde7a02e1d Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 30 Jul 2018 15:44:01 -0500 Subject: [PATCH 205/812] Refactor: scheduler: functionize clone probes by uniqueness hopefully easier to follow --- pengine/clone.c | 95 ++++++++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/pengine/clone.c b/pengine/clone.c index 962cee79813..fb3452e5a83 100644 --- a/pengine/clone.c +++ b/pengine/clone.c @@ -1319,11 +1319,59 @@ find_instance_on(resource_t * rsc, node_t * node) return NULL; } +// For unique clones, probe each instance separately +static gboolean +probe_unique_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, + gboolean force, pe_working_set_t *data_set) +{ + gboolean any_created = FALSE; + + for (GList *child_iter = rsc->children; child_iter != NULL; + child_iter = child_iter->next) { + + resource_t *child = (resource_t *) child_iter->data; + + any_created |= child->cmds->create_probe(child, node, complete, force, + data_set); + } + return any_created; +} + +// For anonymous clones, only a single instance needs to be probed +static gboolean +probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node, + pe_action_t *complete, gboolean force, + pe_working_set_t *data_set) +{ + // First, check if we probed an instance on this node last time + resource_t *child = find_instance_on(rsc, node); + + // Otherwise, check if we plan to start an instance on this node + if (child == NULL) { + for (GList *child_iter = rsc->children; child_iter && !child; + child_iter = child_iter->next) { + + node_t *local_node = NULL; + resource_t *child_rsc = (resource_t *) child_iter->data; + + local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); + if (local_node && (local_node->details == node->details)) { + child = child_rsc; + } + } + } + + // Otherwise, use the first clone instance + if (child == NULL) { + child = rsc->children->data; + } + return child->cmds->create_probe(child, node, complete, force, data_set); +} + gboolean clone_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set) { - GListPtr gIter = NULL; gboolean any_created = FALSE; CRM_ASSERT(rsc); @@ -1351,47 +1399,12 @@ clone_create_probe(resource_t * rsc, node_t * node, action_t * complete, } } - if (is_not_set(rsc->flags, pe_rsc_unique)) { - /* only look for one copy */ - resource_t *child = NULL; - - /* Try whoever we probed last time */ - child = find_instance_on(rsc, node); - if (child) { - return child->cmds->create_probe(child, node, complete, force, data_set); - } - - /* Try whoever we plan on starting there */ - gIter = rsc->children; - for (; gIter != NULL; gIter = gIter->next) { - node_t *local_node = NULL; - resource_t *child_rsc = (resource_t *) gIter->data; - - CRM_ASSERT(child_rsc); - local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); - if (local_node == NULL) { - continue; - } - - if (local_node->details == node->details) { - return child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set); - } - } - - /* Fall back to the first clone instance */ - child = rsc->children->data; - return child->cmds->create_probe(child, node, complete, force, data_set); - } - - gIter = rsc->children; - for (; gIter != NULL; gIter = gIter->next) { - resource_t *child_rsc = (resource_t *) gIter->data; - - if (child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set)) { - any_created = TRUE; - } + if (is_set(rsc->flags, pe_rsc_unique)) { + any_created = probe_unique_clone(rsc, node, complete, force, data_set); + } else { + any_created = probe_anonymous_clone(rsc, node, complete, force, + data_set); } - return any_created; } From 8d1b0e4438e7c332dad655cf31691f3ca128e01a Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 30 Jul 2018 18:09:55 -0500 Subject: [PATCH 206/812] Refactor: scheduler: improve efficiency of checking for clone instance on node --- include/crm/pengine/internal.h | 1 - pengine/clone.c | 72 +++++++++++----------------------- 2 files changed, 23 insertions(+), 50 deletions(-) diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index fe8f6a112a0..0a82a9fdcbd 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -59,7 +59,6 @@ void verify_pe_options(GHashTable * options); void common_update_score(resource_t * rsc, const char *id, int score); void native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set); -node_t *rsc_known_on(resource_t * rsc, GListPtr * list); gboolean native_unpack(resource_t * rsc, pe_working_set_t * data_set); gboolean group_unpack(resource_t * rsc, pe_working_set_t * data_set); diff --git a/pengine/clone.c b/pengine/clone.c index fb3452e5a83..f74d0f1146b 100644 --- a/pengine/clone.c +++ b/pengine/clone.c @@ -1250,72 +1250,46 @@ clone_expand(resource_t * rsc, pe_working_set_t * data_set) clone_data->promote_notify = NULL; } -node_t * -rsc_known_on(resource_t * rsc, GListPtr * list) +// Check whether a resource or any of its children is known on node +static bool +rsc_known_on(pe_resource_t *rsc, pe_node_t *node) { - GListPtr gIter = NULL; - node_t *one = NULL; - GListPtr result = NULL; - if (rsc->children) { + for (GList *child_iter = rsc->children; child_iter != NULL; + child_iter = child_iter->next) { - gIter = rsc->children; - for (; gIter != NULL; gIter = gIter->next) { - resource_t *child = (resource_t *) gIter->data; + resource_t *child = (resource_t *) child_iter->data; - rsc_known_on(child, &result); + if (rsc_known_on(child, node)) { + return TRUE; + } } } else if (rsc->known_on) { - result = g_hash_table_get_values(rsc->known_on); - } - - if (result && g_list_length(result) == 1) { - one = g_list_nth_data(result, 0); - } - - if (list) { - GListPtr gIter = NULL; - - gIter = result; - for (; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; + GHashTableIter iter; + node_t *known_node = NULL; - if (*list == NULL || pe_find_node_id(*list, node->details->id) == NULL) { - *list = g_list_prepend(*list, node); + g_hash_table_iter_init(&iter, rsc->known_on); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) { + if (node->details == known_node->details) { + return TRUE; } } } - - g_list_free(result); - return one; + return FALSE; } -static resource_t * -find_instance_on(resource_t * rsc, node_t * node) +// Look for an instance of clone that is known on node +static pe_resource_t * +find_instance_on(pe_resource_t *clone, pe_node_t *node) { - GListPtr gIter = NULL; - - gIter = rsc->children; - for (; gIter != NULL; gIter = gIter->next) { - GListPtr gIter2 = NULL; - GListPtr known_list = NULL; + for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; - rsc_known_on(child, &known_list); - - gIter2 = known_list; - for (; gIter2 != NULL; gIter2 = gIter2->next) { - node_t *known = (node_t *) gIter2->data; - - if (node->details == known->details) { - g_list_free(known_list); - return child; - } + if (rsc_known_on(child, node)) { + return child; } - g_list_free(known_list); } - return NULL; } @@ -1344,7 +1318,7 @@ probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { // First, check if we probed an instance on this node last time - resource_t *child = find_instance_on(rsc, node); + pe_resource_t *child = find_instance_on(rsc, node); // Otherwise, check if we plan to start an instance on this node if (child == NULL) { From 377ffe0de39fa724091d11e47552298a9bbe7b89 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 12 Jul 2018 10:01:06 -0500 Subject: [PATCH 207/812] Refactor: scheduler: avoid unnecessary probe loop for anonymous clones For anonymous clones, clone_create_probe() first uses find_instance_on() to check for any clone instance (or any of its children, for cloned groups) already known on a node. Only if none is found does is look for an instance allocated to the node, or as a fallback, use the first clone instance. In any case, by the time native_create_probe() is called for a clone instance, we are already sure that no other clone instance is known on this node. Therefore, we do not need to re-check all other clone instances. As a side effect, this resolves two bugs in the removed code. First, probe_anon_group_member() needlessly searched all of data_set->resources when checking for other clone instances, when rsc->children was the right space. Second, an increment_clone() bug introduced in 2.0.0 via commit 6caa3a64 returned rscname:0:10 when given rscname:9. The effect was to short-circuit the check for other instances, which ironically was beneficial. --- pengine/native.c | 133 +---------------------------------------------- 1 file changed, 2 insertions(+), 131 deletions(-) diff --git a/pengine/native.c b/pengine/native.c index 1c26642dd86..fe4393fcacb 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -2708,123 +2708,6 @@ DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * return TRUE; } -#include <../lib/pengine/unpack.h> -#define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; -static char * -increment_clone(char *last_rsc_id) -{ - int lpc = 0; - int len = 0; - char *tmp = NULL; - gboolean complete = FALSE; - - CRM_CHECK(last_rsc_id != NULL, return NULL); - if (last_rsc_id != NULL) { - len = strlen(last_rsc_id); - } - - lpc = len - 1; - while (complete == FALSE && lpc > 0) { - switch (last_rsc_id[lpc]) { - case 0: - lpc--; - break; - case '0': - set_char('1'); - break; - case '1': - set_char('2'); - break; - case '2': - set_char('3'); - break; - case '3': - set_char('4'); - break; - case '4': - set_char('5'); - break; - case '5': - set_char('6'); - break; - case '6': - set_char('7'); - break; - case '7': - set_char('8'); - break; - case '8': - set_char('9'); - break; - case '9': - last_rsc_id[lpc] = '0'; - lpc--; - break; - case ':': - tmp = last_rsc_id; - last_rsc_id = calloc(1, len + 2); - memcpy(last_rsc_id, tmp, len); - last_rsc_id[++lpc] = '1'; - last_rsc_id[len] = '0'; - last_rsc_id[len + 1] = 0; - complete = TRUE; - free(tmp); - break; - default: - crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); - return NULL; - break; - } - } - return last_rsc_id; -} - -static node_t * -probe_anon_group_member(resource_t *rsc, node_t *node, - pe_working_set_t *data_set) -{ - resource_t *top = uber_parent(rsc); - - if (is_not_set(top->flags, pe_rsc_unique)) { - /* Annoyingly we also need to check any other clone instances - * Clumsy, but it will work. - * - * An alternative would be to update known_on for every peer - * during process_rsc_state() - * - * This code desperately needs optimization - * ptest -x with 100 nodes, 100 clones and clone-max=10: - * No probes O(25s) - * Detection without clone loop O(3m) - * Detection with clone loop O(8m) - - ptest[32211]: 2010/02/18_14:27:55 CRIT: stage5: Probing for unknown resources - ptest[32211]: 2010/02/18_14:33:39 CRIT: stage5: Done - ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Updating action states - ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Done - - */ - char *clone_id = clone_zero(rsc->id); - resource_t *peer = pe_find_resource(top->children, clone_id); - node_t *running = NULL; - - while (peer) { - running = pe_hash_table_lookup(peer->known_on, node->details->id); - if (running != NULL) { - /* we already know the status of the resource on this node */ - pe_rsc_trace(rsc, "Skipping active clone: %s", rsc->id); - free(clone_id); - return running; - } - clone_id = increment_clone(clone_id); - peer = pe_find_resource(data_set->resources, clone_id); - } - - free(clone_id); - } - return NULL; -} - gboolean native_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set) @@ -2894,20 +2777,8 @@ native_create_probe(resource_t * rsc, node_t * node, action_t * complete, return FALSE; } - running = g_hash_table_lookup(rsc->known_on, node->details->id); - if (running == NULL && is_set(rsc->flags, pe_rsc_unique) == FALSE) { - /* Anonymous clones */ - if (rsc->parent == top) { - running = g_hash_table_lookup(rsc->parent->known_on, node->details->id); - - } else { - // Members of anonymous-cloned groups need special handling - running = probe_anon_group_member(rsc, node, data_set); - } - } - - if (force == FALSE && running != NULL) { - /* we already know the status of the resource on this node */ + // Check whether resource is already known on node + if (!force && g_hash_table_lookup(rsc->known_on, node->details->id)) { pe_rsc_trace(rsc, "Skipping known: %s on %s", rsc->id, node->details->uname); return FALSE; } From 498b1acb59d4ade2e8bd3e27d8eef6b0888253ae Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 2 Aug 2018 15:50:35 -0500 Subject: [PATCH 208/812] Fix: tools: simulation should use same history name as live cluster Previously, crm_simulate would always use a resource's XML ID when creating its simulated post-transition LRM history. However, the live cluster sometimes uses the ID without the instance number. This makes the simulation do the same, unless the input LRM history used the number. --- tools/fake_transition.c | 59 +++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/tools/fake_transition.c b/tools/fake_transition.c index fa0e5370b60..6c865ba3400 100644 --- a/tools/fake_transition.c +++ b/tools/fake_transition.c @@ -270,8 +270,8 @@ find_resource_xml(xmlNode * cib_node, const char *resource) static xmlNode * -inject_resource(xmlNode * cib_node, const char *resource, const char *rclass, const char *rtype, - const char *rprovider) +inject_resource(xmlNode * cib_node, const char *resource, const char *lrm_name, + const char *rclass, const char *rtype, const char *rprovider) { xmlNode *lrm = NULL; xmlNode *container = NULL; @@ -280,9 +280,20 @@ inject_resource(xmlNode * cib_node, const char *resource, const char *rclass, co cib_resource = find_resource_xml(cib_node, resource); if (cib_resource != NULL) { + /* If an existing LRM history entry uses the resource name, + * continue using it, even if lrm_name is different. + */ return cib_resource; } + // Check for history entry under preferred name + if (strcmp(resource, lrm_name)) { + cib_resource = find_resource_xml(cib_node, lrm_name); + if (cib_resource != NULL) { + return cib_resource; + } + } + /* One day, add query for class, provider, type */ if (rclass == NULL || rtype == NULL) { @@ -307,7 +318,7 @@ inject_resource(xmlNode * cib_node, const char *resource, const char *rclass, co } xpath = (char *)xmlGetNodePath(cib_node); - crm_info("Injecting new resource %s into %s '%s'", resource, xpath, ID(cib_node)); + crm_info("Injecting new resource %s into %s '%s'", lrm_name, xpath, ID(cib_node)); free(xpath); lrm = first_named_child(cib_node, XML_CIB_TAG_LRM); @@ -324,7 +335,9 @@ inject_resource(xmlNode * cib_node, const char *resource, const char *rclass, co } cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE); - crm_xml_add(cib_resource, XML_ATTR_ID, resource); + + // If we're creating a new entry, use the preferred name + crm_xml_add(cib_resource, XML_ATTR_ID, lrm_name); crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass); crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider); @@ -585,7 +598,8 @@ modify_configuration(pe_working_set_t * data_set, cib_t *cib, update_failcounts(cib_node, resource, task, interval, outcome); - cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); + cib_resource = inject_resource(cib_node, resource, resource, + rclass, rtype, rprovider); CRM_ASSERT(cib_resource != NULL); op = create_op(cib_resource, task, interval, outcome); @@ -631,6 +645,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) const char *rclass = NULL; const char *resource = NULL; const char *rprovider = NULL; + const char *lrm_name = NULL; const char *operation = crm_element_value(action->xml, "operation"); const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); @@ -659,6 +674,7 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) * If not found use the preferred name anyway */ resource = crm_element_value(action_rsc, XML_ATTR_ID); + lrm_name = resource; // Preferred name when writing history if (pe_find_resource(fake_resource_list, resource) == NULL) { const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG); @@ -690,7 +706,8 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) cib_node = inject_node_state(fake_cib, node, uname_is_uuid ? node : uuid); CRM_ASSERT(cib_node != NULL); - cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); + cib_resource = inject_resource(cib_node, resource, lrm_name, + rclass, rtype, rprovider); CRM_ASSERT(cib_resource != NULL); op = convert_graph_action(cib_resource, action, 0, target_outcome); @@ -704,29 +721,43 @@ exec_rsc_action(crm_graph_t * graph, crm_action_t * action) for (gIter = fake_op_fail_list; gIter != NULL; gIter = gIter->next) { char *spec = (char *)gIter->data; char *key = NULL; + const char *match_name = NULL; - key = calloc(1, 1 + strlen(spec)); - snprintf(key, strlen(spec), "%s_%s_%d@%s=", resource, op->op_type, op->interval, node); - + // Allow user to specify anonymous clone with or without instance number + key = crm_strdup_printf("%s_%s_%d@%s=", resource, op->op_type, + op->interval, node); if (strncasecmp(key, spec, strlen(key)) == 0) { + match_name = resource; + } + free(key); + + if ((match_name == NULL) && strcmp(resource, lrm_name)) { + key = crm_strdup_printf("%s_%s_%d@%s=", lrm_name, op->op_type, + op->interval, node); + if (strncasecmp(key, spec, strlen(key)) == 0) { + match_name = lrm_name; + } + free(key); + } + + if (match_name != NULL) { + rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc); - // ${resource}_${task}_${interval}@${node}=${rc} + // ${match_name}_${task}_${interval_in_ms}@${node}=${rc} if (rc != 1) { fprintf(stderr, "Invalid failed operation spec: %s. Result code must be integer\n", spec); - free(key); continue; } action->failed = TRUE; graph->abort_priority = INFINITY; printf("\tPretending action %d failed with rc=%d\n", action->id, op->rc); - update_failcounts(cib_node, resource, op->op_type, op->interval, op->rc); - free(key); + update_failcounts(cib_node, match_name, op->op_type, + op->interval, op->rc); break; } - free(key); } inject_op(cib_resource, op, target_outcome); From c129c1b4bc0748541e4d3ee1a44dede7deafb779 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 3 Aug 2018 12:12:06 -0500 Subject: [PATCH 209/812] Test: scheduler: make inc6 regression test input more sane The crm_simulate clone numbering change made the inc6 summary change. However, inc6 has an impossible status section; one of the weird things was that there were two separate lrm_resource history entries for child_rsc6:1 on node2, one with a successful start, and one with a probe not-running result. Since the live cluster would not handle this well either and should never have to, remove the probe result, which makes the test output the same again. --- pengine/test10/inc6.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pengine/test10/inc6.xml b/pengine/test10/inc6.xml index edd085d8df0..dbe13a51a7c 100644 --- a/pengine/test10/inc6.xml +++ b/pengine/test10/inc6.xml @@ -269,9 +269,6 @@ - - - From 36d294061d9e0ba82b7b79edaa47d516d1fd4320 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 9 Aug 2018 15:50:29 -0500 Subject: [PATCH 210/812] Log: scheduler: reword messages when finding anonymous clone instances --- lib/pengine/unpack.c | 45 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 824fc8b2ef4..7e867e0bce6 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1720,7 +1720,7 @@ create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id, * \param[in] data_set Cluster information * \param[in] node Node on which to check for instance * \param[in] parent Clone to check - * \param[in] rsc_id ID of (clone or cloned) resource being searched for + * \param[in] rsc_id Name of cloned resource in history (without instance) */ static resource_t * find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent, @@ -1746,11 +1746,11 @@ find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * pa * "Active" in this case means known to be active at this stage of * unpacking. Because this function is called for a resource before the * resource's individual operation history entries are unpacked, - * locations will generally be NULL. + * locations will generally not contain the desired node. * * However, there are three exceptions: * (1) when child is a cloned group and we have already unpacked the - * history of another member of the group; + * history of another member of the group on the same node; * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if globally-unique * was flipped from true to false); and @@ -1766,27 +1766,30 @@ find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * pa CRM_LOG_ASSERT(locations->next == NULL); if (((pe_node_t *)locations->data)->details == node->details) { - /* This instance is active on the requested node, so check for - * a corresponding configured resource. We use find_rsc() - * because child may be a cloned group, and we need the - * particular member corresponding to rsc_id. + /* This child instance is active on the requested node, so check + * for a corresponding configured resource. We use find_rsc() + * instead of child because child may be a cloned group, and we + * need the particular member corresponding to rsc_id. * * If the history entry is orphaned, rsc will be NULL. */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); if (rsc) { - pe_rsc_trace(parent, "Resource %s, active", rsc->id); - - /* If there are multiple active instances of an anonymous - * clone in a single node's history (which can happen if - * globally-unique is switched from true to false), we want - * to consider the instances beyond the first as orphans. + /* If there are multiple instance history entries for an + * anonymous clone in a single node's history (which can + * happen if globally-unique is switched from true to + * false), we want to consider the instances beyond the + * first as orphans, even if there are inactive instance + * numbers available. */ if (rsc->running_on) { - crm_notice("Now-anonymous clone %s has multiple instances active on %s", + crm_notice("Active (now-)anonymous clone %s has " + "multiple (orphan) instance histories on %s", parent->id, node->details->uname); skip_inactive = TRUE; rsc = NULL; + } else { + pe_rsc_trace(parent, "Resource %s, active", rsc->id); } } } @@ -1835,20 +1838,16 @@ unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; + parent = uber_parent(clone0); + crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); } else { - crm_trace("%s is not known as %s either", rsc_id, clone0_id); + crm_trace("%s is not known as %s either (orphan)", + rsc_id, clone0_id); } - - /* Grab the parent clone even if this a different unique instance, - * so we can remember the clone name, which will be the same. - */ - parent = uber_parent(clone0); free(clone0_id); - crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan"); - } else if (rsc->variant > pe_native) { - crm_trace("%s is no longer a primitive resource, the lrm_resource entry is obsolete", + crm_trace("Resource history for %s is orphaned because it is no longer primitive", rsc_id); return NULL; From fb996252977ecaddb8a642bbde580063a07469e7 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 13 Aug 2018 11:08:44 -0500 Subject: [PATCH 211/812] Refactor: controller: simplify anonymous CIB update function and make inline for better type checking --- crmd/callbacks.c | 3 +-- crmd/crmd_utils.h | 19 +++++++++++-------- crmd/election.c | 3 +-- crmd/heartbeat.c | 6 ++---- crmd/join_dc.c | 3 +-- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/crmd/callbacks.c b/crmd/callbacks.c index 96791ed71a1..41fe78dff94 100644 --- a/crmd/callbacks.c +++ b/crmd/callbacks.c @@ -278,8 +278,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d /* Update the CIB node state */ update = create_node_state_update(node, flags, NULL, __FUNCTION__); - fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, - cib_scope_local | cib_quorum_override | cib_can_create); + fsa_cib_anon_update(XML_CIB_TAG_STATUS, update); free_xml(update); } diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h index 0fcdc6ab23d..d49642f0fbe 100644 --- a/crmd/crmd_utils.h +++ b/crmd/crmd_utils.h @@ -22,6 +22,7 @@ # include # include # include /* For CIB_OP_MODIFY */ +# include "crmd_fsa.h" // For fsa_cib_conn # include "crmd_alerts.h" # define CLIENT_EXIT_WAIT 30 @@ -48,14 +49,16 @@ crm_err("No CIB connection available"); \ } -# define fsa_cib_anon_update(section, data, options) \ - if(fsa_cib_conn != NULL) { \ - fsa_cib_conn->cmds->modify( \ - fsa_cib_conn, section, data, options); \ - \ - } else { \ - crm_err("No CIB connection available"); \ - } +static inline void +fsa_cib_anon_update(const char *section, xmlNode *data) { + if (fsa_cib_conn == NULL) { + crm_err("No CIB connection available"); + } else { + int opts = cib_scope_local | cib_quorum_override | cib_can_create; + + fsa_cib_conn->cmds->modify(fsa_cib_conn, section, data, opts); + } +} extern gboolean fsa_has_quorum; extern int last_peer_update; diff --git a/crmd/election.c b/crmd/election.c index 246b401baa5..9833c6990c9 100644 --- a/crmd/election.c +++ b/crmd/election.c @@ -248,8 +248,7 @@ do_dc_release(long long action, crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN); update = create_node_state_update(node, node_update_expected, NULL, __FUNCTION__); - fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, - cib_scope_local | cib_quorum_override | cib_can_create); + fsa_cib_anon_update(XML_CIB_TAG_STATUS, update); free_xml(update); } register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL); diff --git a/crmd/heartbeat.c b/crmd/heartbeat.c index 3794a3c258c..d47b9696373 100644 --- a/crmd/heartbeat.c +++ b/crmd/heartbeat.c @@ -405,8 +405,7 @@ crmd_ha_status_callback(const char *node, const char *status, void *private) if (AM_I_DC) { update = create_node_state_update(peer, node_update_cluster, NULL, __FUNCTION__); - fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, - cib_scope_local | cib_quorum_override | cib_can_create); + fsa_cib_anon_update(XML_CIB_TAG_STATUS, update); free_xml(update); } } @@ -459,8 +458,7 @@ crmd_client_status_callback(const char *node, const char *client, const char *st crm_trace("Got client status callback"); update = create_node_state_update(peer, node_update_peer, NULL, __FUNCTION__); - fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, - cib_scope_local | cib_quorum_override | cib_can_create); + fsa_cib_anon_update(XML_CIB_TAG_STATUS, update); free_xml(update); } } diff --git a/crmd/join_dc.c b/crmd/join_dc.c index 242377b5471..857e7602176 100644 --- a/crmd/join_dc.c +++ b/crmd/join_dc.c @@ -590,8 +590,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) set_uuid(tmp1, XML_ATTR_UUID, join_node); crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); - fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1, - cib_scope_local | cib_quorum_override | cib_can_create); + fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1); free_xml(tmp1); join_node = crm_get_peer(0, join_to); From f95a5fe8b227636b0f0ca5996cae4686a376c03e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 13 Aug 2018 11:11:35 -0500 Subject: [PATCH 212/812] Log: controller: avoid error message if node state not known in peer callback shouldn't happen, but handles situation better --- crmd/callbacks.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crmd/callbacks.c b/crmd/callbacks.c index 41fe78dff94..b5eac17cf8e 100644 --- a/crmd/callbacks.c +++ b/crmd/callbacks.c @@ -278,7 +278,11 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d /* Update the CIB node state */ update = create_node_state_update(node, flags, NULL, __FUNCTION__); - fsa_cib_anon_update(XML_CIB_TAG_STATUS, update); + if (update == NULL) { + crm_debug("Node state update not yet possible for %s", node->uname); + } else { + fsa_cib_anon_update(XML_CIB_TAG_STATUS, update); + } free_xml(update); } From 39b2cc3d97cff7574556da1ddb41fa7f3cb68683 Mon Sep 17 00:00:00 2001 From: Klaus Wenninger Date: Fri, 22 Jun 2018 17:29:33 +0200 Subject: [PATCH 213/812] Feature: crm_mon: rhbz#1461964 - add fence history features --- tools/crm_mon.c | 720 +++++++++++++++++++++++++++++++++++++++++++----- xml/crm_mon.rng | 50 ++++ 2 files changed, 698 insertions(+), 72 deletions(-) diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 7c638039a1b..3dc6a96b936 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -55,29 +55,36 @@ void clean_up(int rc); void crm_diff_update(const char *event, xmlNode * msg); gboolean mon_refresh_display(gpointer user_data); int cib_connect(gboolean full); -void mon_st_callback(stonith_t * st, stonith_event_t * e); +void mon_st_callback_event(stonith_t * st, stonith_event_t * e); +void mon_st_callback_display(stonith_t * st, stonith_event_t * e); +void kick_refresh(gboolean data_updated); static char *get_node_display_name(node_t *node); /* * Definitions indicating which items to print */ -#define mon_show_times (0x0001U) -#define mon_show_stack (0x0002U) -#define mon_show_dc (0x0004U) -#define mon_show_count (0x0008U) -#define mon_show_nodes (0x0010U) -#define mon_show_resources (0x0020U) -#define mon_show_attributes (0x0040U) -#define mon_show_failcounts (0x0080U) -#define mon_show_operations (0x0100U) -#define mon_show_tickets (0x0200U) -#define mon_show_bans (0x0400U) - -#define mon_show_headers (mon_show_times | mon_show_stack | mon_show_dc | mon_show_count) -#define mon_show_default (mon_show_headers | mon_show_nodes | mon_show_resources) -#define mon_show_all (mon_show_default | mon_show_attributes | mon_show_failcounts \ - | mon_show_operations | mon_show_tickets | mon_show_bans) +#define mon_show_times (0x0001U) +#define mon_show_stack (0x0002U) +#define mon_show_dc (0x0004U) +#define mon_show_count (0x0008U) +#define mon_show_nodes (0x0010U) +#define mon_show_resources (0x0020U) +#define mon_show_attributes (0x0040U) +#define mon_show_failcounts (0x0080U) +#define mon_show_operations (0x0100U) +#define mon_show_tickets (0x0200U) +#define mon_show_bans (0x0400U) +#define mon_show_fence_history (0x0800U) + +#define mon_show_headers (mon_show_times | mon_show_stack | mon_show_dc \ + | mon_show_count) +#define mon_show_default (mon_show_headers | mon_show_nodes \ + | mon_show_resources) +#define mon_show_all (mon_show_default | mon_show_attributes \ + | mon_show_failcounts | mon_show_operations \ + | mon_show_tickets | mon_show_bans \ + | mon_show_fence_history) unsigned int show = mon_show_default; @@ -109,6 +116,7 @@ int reconnect_msec = 5000; gboolean daemonize = FALSE; GMainLoop *mainloop = NULL; guint timer_id = 0; +mainloop_timer_t *refresh_timer = NULL; GList *attr_list = NULL; const char *crm_mail_host = NULL; @@ -126,6 +134,10 @@ gboolean one_shot = FALSE; gboolean has_warnings = FALSE; gboolean print_timing = FALSE; gboolean watch_fencing = FALSE; +gboolean fence_history = FALSE; +gboolean fence_full_history = FALSE; +gboolean fence_connect = FALSE; +int fence_history_level = 0; gboolean print_brief = FALSE; gboolean print_pending = TRUE; gboolean print_clone_detail = FALSE; @@ -217,6 +229,7 @@ mon_timer_popped(gpointer data) if (timer_id > 0) { g_source_remove(timer_id); + timer_id = 0; } print_as("Reconnecting...\n"); @@ -231,7 +244,26 @@ mon_timer_popped(gpointer data) static void mon_cib_connection_destroy(gpointer user_data) { - print_as("Connection to the CIB terminated\n"); + print_as("Connection to the cluster-daemons terminated\n"); + if (refresh_timer != NULL) { + /* we'll trigger a refresh after reconnect */ + mainloop_timer_stop(refresh_timer); + } + if (timer_id) { + /* we'll trigger a new reconnect-timeout at the end */ + g_source_remove(timer_id); + timer_id = 0; + } + if (st) { + /* the client API won't properly reconnect notifications + * if they are still in the table - so remove them + */ + st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT); + st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE); + if (st->state != stonith_disconnected) { + st->cmds->disconnect(st); + } + } if (cib) { cib->cmds->signoff(cib); timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL); @@ -289,16 +321,24 @@ cib_connect(gboolean full) need_pass = FALSE; } - if (watch_fencing && st == NULL) { + if ((fence_connect) && (st == NULL)) { st = stonith_api_new(); } - if (watch_fencing && st->state == stonith_disconnected) { + if ((fence_connect) && (st->state == stonith_disconnected)) { crm_trace("Connecting to stonith"); rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_ok) { crm_trace("Setting up stonith callbacks"); - st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback); + if (watch_fencing) { + st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, + mon_st_callback_event); + st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event); + } else { + st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, + mon_st_callback_display); + st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_display); + } } } @@ -376,6 +416,9 @@ static struct crm_option long_options[] = { {"timing-details", 0, 0, 't', "\tDisplay resource operation history with timing details" }, {"tickets", 0, 0, 'c', "\t\tDisplay cluster tickets"}, {"watch-fencing", 0, 0, 'W', "\tListen for fencing events. For use with --external-agent, --mail-to and/or --snmp-traps where supported"}, + {"fence-history", 2, 0, 'm', "Show fence history\n" + "\t\t\t\t\t0=off, 1=failures, 2=add successes and pending (default without value),\n" + "\t\t\t\t\t3=show full history without reduction to most recent of each flavor"}, {"neg-locations", 2, 0, 'L', "Display negative location constraints [optionally filtered by id prefix]"}, {"show-node-attributes", 0, 0, 'A', "Display node attributes" }, {"hide-headers", 0, 0, 'D', "\tHide all headers" }, @@ -430,9 +473,26 @@ get_option_desc(char c) continue; if (long_options[lpc].val == c) { - const char * tab = NULL; - tab = strrchr(long_options[lpc].desc, '\t'); - return tab ? ++tab : long_options[lpc].desc; + static char *buf = NULL; + const char *rv; + char *nl; + + /* chop off tabs and cut at newline */ + free(buf); /* free string from last usage */ + buf = strdup(long_options[lpc].desc); + rv = buf; /* make a copy to keep buf pointer unaltered + for freeing when we come by next time. + Like this the result stays valid until + the next call. + */ + while(isspace(rv[0])) { + rv++; + } + nl = strchr(rv, '\n'); + if (nl) { + *nl = '\0'; + } + return rv; } } @@ -454,6 +514,16 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer unused) c = getchar(); switch (c) { + case 'm': + if (!fence_history_level) { + fence_history = TRUE; + fence_connect = TRUE; + if (st == NULL) { + mon_cib_connection_destroy(NULL); + } + } + show ^= mon_show_fence_history; + break; case 'c': show ^= mon_show_tickets; break; @@ -527,6 +597,7 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer unused) print_option_help('R', print_clone_detail); print_option_help('b', print_brief); print_option_help('j', print_pending); + print_option_help('m', (show & mon_show_fence_history)); print_as("\n"); print_as("Toggle fields via field letter, type any other key to return"); } @@ -561,6 +632,12 @@ main(int argc, char **argv) one_shot = TRUE; } + /* to enable stonith-connection when called via some application like pcs + * set environment-variable FENCE_HISTORY to desired level + * so you don't have to modify this application + */ + /* fence_history_level = crm_atoi(getenv("FENCE_HISTORY"), "0"); */ + while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) @@ -584,6 +661,10 @@ main(int argc, char **argv) break; case 'W': watch_fencing = TRUE; + fence_connect = TRUE; + break; + case 'm': + fence_history_level = crm_atoi(optarg, "2"); break; case 'd': daemonize = TRUE; @@ -702,6 +783,21 @@ main(int argc, char **argv) } } + switch (fence_history_level) { + case 3: + fence_full_history = TRUE; + /* fall through to next lower level */ + case 2: + show |= mon_show_fence_history; + /* fall through to next lower level */ + case 1: + fence_history = TRUE; + fence_connect = TRUE; + break; + default: + break; + } + /* Extra sanity checks when in CGI mode */ if (output_format == mon_output_cgi) { argerr += (optind < argc); @@ -876,12 +972,14 @@ count_resources(pe_working_set_t * data_set, resource_t * rsc) * \brief Print one-line status suitable for use with monitoring software * * \param[in] data_set Working set of CIB state + * \param[in] history List of stonith actions * * \note This function's output (and the return code when the program exits) * should conform to https://www.monitoring-plugins.org/doc/guidelines.html */ static void -print_simple_status(pe_working_set_t * data_set) +print_simple_status(pe_working_set_t * data_set, + stonith_history_t *history) { GListPtr gIter = NULL; int nodes_online = 0; @@ -2855,12 +2953,13 @@ print_failed_actions(FILE *stream, pe_working_set_t *data_set) switch (output_format) { case mon_output_plain: case mon_output_console: - print_as("\nFailed Actions:\n"); + print_as("\nFailed Resource Actions:\n"); break; case mon_output_html: case mon_output_cgi: - fprintf(stream, "
\n

Failed Actions

\n