mjtrangoni / pacemaker_exporter

Prometheus exporter for Pacemaker high availability resource manager
Apache License 2.0
9 stars 7 forks source link

Error when scraping metrics #2

Closed jenningsloy318 closed 6 years ago

jenningsloy318 commented 6 years ago

Host operating system: output of uname -r

SLES 12 SP1 3.12.74-60.64.82-default

pacemaker_exporter version: output of pacemaker_exporter --version

version=0.0.1, branch=master, revision=7400c41ee027ecd73bc7dddc9105cd5114901fdd

pacemaker_exporter command line flags

no flags specified

What did you do that produced an error?

 ./pacemaker_exporter
INFO[0000] Starting pacemaker_exporter (version=0.0.1, branch=master, revision=7400c41ee027ecd73bc7dddc9105cd5114901fdd)  source="pacemaker_exporter.go:89"
INFO[0000] Build context (go=go1.10, user=root@CTUN50947963A, date=20180820-01:56:57)  source="pacemaker_exporter.go:90"
INFO[0000] Enabled collectors:                           source="pacemaker_exporter.go:97"
INFO[0000]  - crm_mon                                    source="pacemaker_exporter.go:99"
INFO[0000] Listening on :9356                            source="pacemaker_exporter.go:120"
ERRO[0002] strconv.ParseFloat: parsing "PROMOTED": invalid syntax  source="crm_mon_linux.go:48"
ERRO[0002] strconv.ParseFloat: parsing "PROMOTED": invalid syntax  source="crm_mon_linux.go:64"
ERRO[0002] ERROR: crm_mon collector failed after 0.011445s: couldn't get crm_mon information: strconv.ParseFloat: parsing "PROMOTED": invalid syntax  source="collector.go:133"

output of crm_mon -X

<?xml version="1.0"?>
<crm_mon version="1.1.13">
    <summary>
        <last_update time="Mon Aug 20 02:13:39 2018" />
        <last_change time="Mon Aug 20 02:13:24 2018" user="root" client="crm_attribute" origin="yhbmhdbprd01" />
        <stack type="corosync" />
        <current_dc present="true" version="1.1.13-10.4-6f22ad7" name="yhbmhdbprd01" id="170144267" with_quorum="true" />
        <nodes_configured number="2" expected_votes="unknown" />
        <resources_configured number="6" />
        <cluster_options stonith-enabled="true" symmetric-cluster="true" no-quorum-policy="ignore" />
    </summary>
    <nodes>
        <node name="yhbmhdbprd01" id="170144267" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="true" resources_running="3" type="member" />
        <node name="yhbmhdbprd02" id="170144268" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="false" resources_running="3" type="member" />
    </nodes>
    <resources>
        <resource id="stonith-sbd" resource_agent="stonith:external/sbd" role="Started" active="true" orphaned="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
            <node name="yhbmhdbprd02" id="170144268" cached="false"/>
        </resource>
        <resource id="rsc_ip_PRD_HDB00" resource_agent="ocf::heartbeat:IPaddr2" role="Started" active="true" orphaned="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
            <node name="yhbmhdbprd01" id="170144267" cached="false"/>
        </resource>
        <clone id="msl_SAPhana_PRD_HDB00" multi_state="true" unique="false" managed="true" failed="false" failure_ignored="false" >
            <resource id="rsc_SAPhana_PRD_HDB00" resource_agent="ocf::suse:SAPHana" role="Master" active="true" orphaned="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
                <node name="yhbmhdbprd01" id="170144267" cached="false"/>
            </resource>
            <resource id="rsc_SAPhana_PRD_HDB00" resource_agent="ocf::suse:SAPHana" role="Slave" active="true" orphaned="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
                <node name="yhbmhdbprd02" id="170144268" cached="false"/>
            </resource>
        </clone>
        <clone id="cln_SAPHanaTopology_PRD_HDB00" multi_state="false" unique="false" managed="true" failed="false" failure_ignored="false" >
            <resource id="rsc_SAPHanaTopology_PRD_HDB00" resource_agent="ocf::suse:SAPHanaTopology" role="Started" active="true" orphaned="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
                <node name="yhbmhdbprd01" id="170144267" cached="false"/>
            </resource>
            <resource id="rsc_SAPHanaTopology_PRD_HDB00" resource_agent="ocf::suse:SAPHanaTopology" role="Started" active="true" orphaned="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
                <node name="yhbmhdbprd02" id="170144268" cached="false"/>
            </resource>
        </clone>
    </resources>
    <node_attributes>
        <node name="yhbmhdbprd01">
            <attribute name="hana_prd_clone_state" value="PROMOTED" />
            <attribute name="hana_prd_op_mode" value="logreplay" />
            <attribute name="hana_prd_remoteHost" value="yhbmhdbprd02" />
            <attribute name="hana_prd_roles" value="4:P:master1:master:worker:master" />
            <attribute name="hana_prd_site" value="SiteA" />
            <attribute name="hana_prd_srmode" value="sync" />
            <attribute name="hana_prd_sync_state" value="PRIM" />
            <attribute name="hana_prd_version" value="1.00.122.16.1520578817" />
            <attribute name="hana_prd_vhost" value="yhbmhdbprd01" />
            <attribute name="lpa_prd_lpt" value="1534731204" />
            <attribute name="maintenance" value="off" />
            <attribute name="master-rsc_SAPhana_PRD_HDB00" value="150" />
        </node>
        <node name="yhbmhdbprd02">
            <attribute name="hana_prd_clone_state" value="DEMOTED" />
            <attribute name="hana_prd_op_mode" value="logreplay" />
            <attribute name="hana_prd_remoteHost" value="yhbmhdbprd01" />
            <attribute name="hana_prd_roles" value="4:S:master1:master:worker:master" />
            <attribute name="hana_prd_site" value="SiteB" />
            <attribute name="hana_prd_srmode" value="sync" />
            <attribute name="hana_prd_sync_state" value="SOK" />
            <attribute name="hana_prd_version" value="1.00.122.16.1520578817" />
            <attribute name="hana_prd_vhost" value="yhbmhdbprd02" />
            <attribute name="lpa_prd_lpt" value="30" />
            <attribute name="maintenance" value="off" />
            <attribute name="master-rsc_SAPhana_PRD_HDB00" value="100" />
        </node>
    </node_attributes>
    <node_history>
        <node name="yhbmhdbprd01">
            <resource_history id="rsc_ip_PRD_HDB00" orphan="false" migration-threshold="5000">
                <operation_history call="26" task="start" last-rc-change="Sun Aug 12 15:41:01 2018" last-run="Sun Aug 12 15:41:01 2018" exec-time="56ms" queue-time="0ms" rc="0" rc_text="ok" />
                <operation_history call="27" task="monitor" interval="10000ms" last-rc-change="Sun Aug 12 15:41:01 2018" exec-time="36ms" queue-time="0ms" rc="0" rc_text="ok" />
            </resource_history>
            <resource_history id="stonith-sbd" orphan="false" migration-threshold="5000">
                <operation_history call="34" task="monitor" interval="10000ms" last-rc-change="Sun Aug 12 15:53:53 2018" exec-time="1268ms" queue-time="0ms" rc="0" rc_text="ok" />
                <operation_history call="36" task="stop" last-rc-change="Sun Aug 12 15:54:12 2018" last-run="Sun Aug 12 15:54:12 2018" exec-time="1ms" queue-time="0ms" rc="0" rc_text="ok" />
            </resource_history>
            <resource_history id="rsc_SAPhana_PRD_HDB00" orphan="false" migration-threshold="5000">
                <operation_history call="29" task="promote" last-rc-change="Sun Aug 12 15:41:19 2018" last-run="Sun Aug 12 15:41:19 2018" exec-time="27689ms" queue-time="0ms" rc="0" rc_text="ok" />
                <operation_history call="30" task="monitor" interval="60000ms" last-rc-change="Sun Aug 12 15:41:54 2018" exec-time="7376ms" queue-time="0ms" rc="8" rc_text="master" />
            </resource_history>
            <resource_history id="rsc_SAPHanaTopology_PRD_HDB00" orphan="false" migration-threshold="5000">
                <operation_history call="20" task="start" last-rc-change="Sun Aug 12 15:38:27 2018" last-run="Sun Aug 12 15:38:27 2018" exec-time="3696ms" queue-time="0ms" rc="0" rc_text="ok" />
                <operation_history call="21" task="monitor" interval="10000ms" last-rc-change="Sun Aug 12 15:38:31 2018" exec-time="5507ms" queue-time="0ms" rc="0" rc_text="ok" />
            </resource_history>
        </node>
        <node name="yhbmhdbprd02">
            <resource_history id="stonith-sbd" orphan="false" migration-threshold="5000">
                <operation_history call="60" task="start" last-rc-change="Sun Aug 12 16:01:37 2018" last-run="Sun Aug 12 16:01:37 2018" exec-time="1226ms" queue-time="0ms" rc="0" rc_text="ok" />
                <operation_history call="61" task="monitor" interval="10000ms" last-rc-change="Sun Aug 12 16:01:38 2018" exec-time="1249ms" queue-time="0ms" rc="0" rc_text="ok" />
            </resource_history>
            <resource_history id="rsc_SAPhana_PRD_HDB00" orphan="false" migration-threshold="5000">
                <operation_history call="30" task="monitor" interval="60000ms" last-rc-change="Sun Aug 12 15:37:49 2018" exec-time="4176ms" queue-time="0ms" rc="8" rc_text="master" />
                <operation_history call="42" task="start" last-rc-change="Sun Aug 12 15:42:06 2018" last-run="Sun Aug 12 15:42:06 2018" exec-time="35751ms" queue-time="0ms" rc="0" rc_text="ok" />
                <operation_history call="43" task="monitor" interval="61000ms" last-rc-change="Sun Aug 12 15:42:42 2018" exec-time="4261ms" queue-time="0ms" rc="0" rc_text="ok" />
            </resource_history>
            <resource_history id="rsc_SAPHanaTopology_PRD_HDB00" orphan="false" migration-threshold="5000">
                <operation_history call="40" task="start" last-rc-change="Sun Aug 12 15:42:02 2018" last-run="Sun Aug 12 15:42:02 2018" exec-time="3754ms" queue-time="0ms" rc="0" rc_text="ok" />
                <operation_history call="41" task="monitor" interval="10000ms" last-rc-change="Sun Aug 12 15:42:06 2018" exec-time="5455ms" queue-time="0ms" rc="0" rc_text="ok" />
            </resource_history>
            <resource_history id="rsc_ip_PRD_HDB00" orphan="false" migration-threshold="5000">
                <operation_history call="24" task="monitor" interval="10000ms" last-rc-change="Sun Aug 12 15:37:29 2018" exec-time="36ms" queue-time="0ms" rc="0" rc_text="ok" />
                <operation_history call="34" task="stop" last-rc-change="Sun Aug 12 15:41:01 2018" last-run="Sun Aug 12 15:41:01 2018" exec-time="44ms" queue-time="0ms" rc="0" rc_text="ok" />
            </resource_history>
        </node>
    </node_history>
    <tickets>
    </tickets>
    <bans>
    </bans>
</crm_mon>
mjtrangoni commented 6 years ago

Hi @jenningsloy318, Thanks for reporting! Could you please check if this fixed on master now? Please do not hesitate to report any issue you could find or even if you miss some information I should include anywhere.

jenningsloy318 commented 6 years ago

@mjtrangoni

I compile it again, now it works.

And can you also update the readme to specified that it needs super privileges to for pacemaker_exporter to invoke "crm_mon -X"

wat101 commented 6 years ago

Hi I'm getting the same error after compiling it again. Please see below for error messages.

./pacemaker_exporter INFO[0000] Starting pacemaker_exporter (version=0.0.1, branch=master, revision=893a693b43da9b4cf87a86111388f0bc6411a199) source="pacemaker_exporter.go:89" INFO[0000] Build context (go=go1.9.1, user=cloud@test, date=20180824-03:30:39) source="pacemaker_exporter.go:90" INFO[0000] Enabled collectors: source="pacemaker_exporter.go:97" INFO[0000] - crm_mon source="pacemaker_exporter.go:99" INFO[0000] Listening on :9356 source="pacemaker_exporter.go:120" ERRO[0003] strconv.ParseFloat: parsing "galera-bundle-0": invalid syntax source="crm_mon_linux.go:48" ERRO[0003] strconv.ParseFloat: parsing "galera-bundle-0": invalid syntax source="crm_mon_linux.go:64" ERRO[0003] ERROR: crm_mon collector failed after 0.059715s: couldn't get crm_mon information: strconv.ParseFloat: parsing "galera-bundle-0": invalid syntax source="collector.go:133"

wat101 commented 6 years ago

Additional info:

crm_mon -X | grep galera-bundle-0

    <node name="galera-bundle-0" id="galera-bundle-0" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="false" is_dc="false" resources_running="1" type="remote" id_as_resource="galera-bundle-docker-0" />
                <node name="galera-bundle-0" id="galera-bundle-0" cached="false"/>
            <resource id="galera-bundle-0" resource_agent="ocf::pacemaker:remote" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
    <node name="galera-bundle-0">
        <resource_history id="galera-bundle-0" orphan="false" migration-threshold="1000000">
    <node name="galera-bundle-0">
mjtrangoni commented 6 years ago

Hi @wat101, Could you please open a new separate issue for this? And please provide the complete crm_mon -X, anonymized or privately per e-mail if you want, but I want to see the complete XML structure. BTW, I want to know which pacemaker version it is. Thanks!

mjtrangoni commented 6 years ago

Hi @jenningsloy318, I added a note, that this has to be run as root user. Thanks for pointing this!