Open barrettc opened 1 year ago
Hi,
yes the quorum should be based on the number of nodes, can you check how many nodes are listed in the distributed-config.json
that is inside the database folder ?
Bye
@tglman This is the content of my distributed-config.json
:
{"@type":"d","@rid":"#0:3","@version":29,"version":55,"autoDeploy":true,"readQuorum":1,"writeQuorum":"majority","readYourWrites":true,"newNodeStrategy":"dynamic","servers":{"@type":"d","@version":0,"*":"master"},"clusters":{"@type"
:"d","@version":0,"internal":{"@type":"d","@version":0},"*":{"@type":"d","@version":0,"servers":["node1694781302274","node1694781327402","node1695048481292","node1695048478158","node1695050121611","node1695245312502","node169524531
2166","node1695245315577","<NEW_NODE>"]},"movie_1":{"@type":"d","@version":0,"servers":["node1695245312502","node1694781302274","node1695048478158","node1694781327402","node1695048481292","node1695050121611","node1695245312166","no
de1695245315577","<NEW_NODE>"]},"movie_2":{"@type":"d","@version":0,"servers":["node1694781302274","node1694781327402","node1695048481292","node1695048478158","node1695050121611","node1695245312502","node1695245312166","node1695245
315577","<NEW_NODE>"]},"character_1":{"@type":"d","@version":0,"servers":["node1694781302274","node1694781327402","node1695048481292","node1695048478158","node1695050121611","node1695245312502","node1695245312166","node169524531557
7","<NEW_NODE>"]},"character_2":{"@type":"d","@version":0,"servers":["node1695245315577","node1695048481292","node1695050121611","node1694781302274","node1694781327402","node1695048478158","node1695245312502","node1695245312166","<
NEW_NODE>"]},"group_1":{"@type":"d","@version":0,"servers":["node1695048481292","node1694781327402","node1694781302274","node1695048478158","node1695050121611","node1695245312502","node1695245312166","node1695245315577","<NEW_NODE>
"]},"group_2":{"@type":"d","@version":0,"servers":["node1695245315577","node1694781302274","node1695050121611","node1694781327402","node1695048481292","node1695048478158","node1695245312502","node1695245312166","<NEW_NODE>"]},"appe
ars_in_1":{"@type":"d","@version":0,"servers":["node1695048481292","node1694781327402","node1694781302274","node1695048478158","node1695050121611","node1695245312502","node1695245312166","node1695245315577","<NEW_NODE>"]},"appears_
in_2":{"@type":"d","@version":0,"servers":["node1695245315577","node1694781302274","node1695050121611","node1694781327402","node1695048481292","node1695048478158","node1695245312502","node1695245312166","<NEW_NODE>"]},"has_characte
r_1":{"@type":"d","@version":0,"servers":["node1695245312502","node1694781302274","node1695048478158","node1694781327402","node1695048481292","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"has_character
_2":{"@type":"d","@version":0,"servers":["node1695245315577","node1695048481292","node1695050121611","node1694781302274","node1694781327402","node1695048478158","node1695245312502","node1695245312166","<NEW_NODE>"]},"killed_by_1":{
"@type":"d","@version":0,"servers":["node1695245315577","node1694781302274","node1695050121611","node1694781327402","node1695048481292","node1695048478158","node1695245312502","node1695245312166","<NEW_NODE>"]},"killed_by_2":{"@typ
e":"d","@version":0,"servers":["node1695245312502","node1695048481292","node1695048478158","node1694781302274","node1694781327402","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"killed_1":{"@type":"d","
@version":0,"servers":["node1694781302274","node1694781327402","node1695048481292","node1695048478158","node1695050121611","node1695245312502","node1695245312166","node1695245315577","<NEW_NODE>"]},"killed_2":{"@type":"d","@version
":0,"servers":["node1695245312502","node1694781302274","node1695048478158","node1694781327402","node1695048481292","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"member_of_1":{"@type":"d","@version":0,"
servers":["node1694781302274","node1694781327402","node1695048481292","node1695048478158","node1695050121611","node1695245312502","node1695245312166","node1695245315577","<NEW_NODE>"]},"member_of_2":{"@type":"d","@version":0,"serve
rs":["node1695245312502","node1694781302274","node1695048478158","node1694781327402","node1695048481292","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"has_member_1":{"@type":"d","@version":0,"servers":
["node1695048481292","node1694781327402","node1694781302274","node1695048478158","node1695050121611","node1695245312502","node1695245312166","node1695245315577","<NEW_NODE>"]},"has_member_2":{"@type":"d","@version":0,"servers":["no
de1695245315577","node1694781302274","node1695050121611","node1694781327402","node1695048481292","node1695048478158","node1695245312502","node1695245312166","<NEW_NODE>"]},"has_member":{"@type":"d","@version":0,"servers":["node1695
245312502","node1695048481292","node1695048478158","node1694781302274","node1694781327402","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"e":{"@type":"d","@version":0,"servers":["node1695245312502","nod
e1694781327402","node1694781302274","node1695048481292","node1695048478158","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"has_character":{"@type":"d","@version":0,"servers":["node1694781302274","node16
94781327402","node1695048481292","node1695048478158","node1695050121611","node1695245312502","node1695245312166","node1695245315577","<NEW_NODE>"]},"ofunction":{"@type":"d","@version":0,"servers":["node1695245312502","node169478132
7402","node1694781302274","node1695048481292","node1695048478158","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"character":{"@type":"d","@version":0,"servers":["node1695245312502","node1694781302274","
node1695048478158","node1695048481292","node1694781327402","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"v":{"@type":"d","@version":0,"servers":["node1695245312502","node1694781327402","node16947813022
74","node1695048481292","node1695048478158","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"oschedule":{"@type":"d","@version":0,"servers":["node1695245312502","node1694781327402","node1694781302274","no
de1695048481292","node1695048478158","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"appears_in":{"@type":"d","@version":0,"servers":["node1695245312502","node1695048481292","node1695048478158","node1694
781302274","node1694781327402","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"osequence":{"@type":"d","@version":0,"servers":["node1695245312502","node1694781327402","node1694781302274","node16950484812
92","node1695048478158","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"group":{"@type":"d","@version":0,"servers":["node1695245312502","node1695048481292","node1695048478158","node1694781302274","node16
94781327402","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"orole":{"@type":"d","@version":0,"servers":["node1695245312502","node1694781327402","node1694781302274","node1695048481292","node1695048478158
","node1695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"killed_by":{"@type":"d","@version":0,"servers":["node1695048481292","node1694781327402","node1694781302274","node1695048478158","node1695050121611","node
1695245312502","node1695245312166","node1695245315577","<NEW_NODE>"]},"osecuritypolicy":{"@type":"d","@version":0,"servers":["node1695245312502","node1694781327402","node1694781302274","node1695048481292","node1695048478158","node1
695050121611","node1695245312166","node1695245315577","<NEW_NODE>"]},"ouser":{"@type":"d","@version":0,"servers":["node1695245312502","node1694781327402","node1694781302274","node1695048481292","node1695048478158","node169505012161
1","node1695245312166","node1695245315577","<NEW_NODE>"]},"movie":{"@type":"d","@version":0,"servers":["node1695245315577","node1695048481292","node1695050121611","node1694781302274","node1694781327402","node1695048478158","node169
5245312502","node1695245312166","<NEW_NODE>"]},"member_of":{"@type":"d","@version":0,"servers":["node1695245315577","node1695048481292","node1695050121611","node1694781302274","node1694781327402","node1695048478158","node1695245312
502","node1695245312166","<NEW_NODE>"]},"killed":{"@type":"d","@version":0,"servers":["node1695245315577","node1695048481292","node1695050121611","node1694781302274","node1694781327402","node1695048478158","node1695245312502","node
1695245312166","<NEW_NODE>"]},"pagerankresult":{"@type":"d","@version":0,"servers":["node1695245315577","node1695048481292","node1694781327402","node1694781302274","node1695048478158","node1695050121611","node1695245312502","node16
95245312166","<NEW_NODE>"]},"pagerankresult_1":{"@type":"d","@version":0,"servers":["node1695245312502","node1694781302274","node1694781327402","node1695048481292","node1695048478158","node1695050121611","node1695245312166","node16
95245315577","<NEW_NODE>"]},"pagerankresult_2":{"@type":"d","@version":0,"servers":["node1695245312166","node1695245312502","node1695050121611","node1694781302274","node1694781327402","node1695048481292","node1695048478158","node16
95245315577","<NEW_NODE>"]},"pagerankresult_3":{"@type":"d","@version":0,"servers":["node1695050121611","node1694781302274","node1694781327402","node1695048481292","node1695048478158","node1695245312502","node1695245312166","node16
95245315577","<NEW_NODE>"]}}}
I'm not sure how to interpret this but it seems like a lot more than 3 nodes are listed here. Should I try uninstalling/reinstalling the Helm chart?
Hi,
yes the node names participating in the cluster are "node1695048481292","node1694781327402","node1694781302274","node1695048478158","node1695050121611","node1695245312502","node1695245312166","node1695245315577"
if you are aware what are the name of the node currently running you can remove them with the command: https://orientdb.com/docs/3.2.x/sql/SQL-HA-Remove-Server.html?highlight=HA%20REMOVE#sql---ha-remove-server
Also i see that the name seems generated at runtime so consider to configure the name as specified here https://orientdb.com/docs/3.2.x/distributed/Distributed-Configuration.html or setting the ORIENTDB_NODE_NAME environment variable for each instance.
I have the feeling that this problem raised with server restarts and new node names generation that added up new node names in the node list.
Thanks - I'll clean this up and report back
@tglman One question - I'm using the OrientDB Helm chart to manage the installation on my k8s cluster. Do you know a good way to specify multiple node names using this method of installation?
Hi,
I'm not an expert of helm chart, and looking at it right now it doesn't seems to be a way to set this, so probably that need some evolution to map the OrientDB node name to some constant name given by the cluster, from the point of OrientDB the way that the name can be provided is by the environment variable ORIENTDB_NODE_NAME, or by injecting the name in the configuration that looking at the helm chart is here https://github.com/orientechnologies/orientdb-helm/blob/master/templates/configmap.yaml#L89
another way could be attach the HA remove node
command as the pre-step of the controlled shutdown of the node to not leave the node in the configuration. Even though this cannot be made automatic on disconnection because otherwise will introduce split brain issues.
OrientDB Version: 3.2.21
Java Version: 8
OS: Linux
Expected behavior
I'm running OrientDB in a 3 node k3s cluster. I have installed with the OrientDB helm chart and
distributedDB.writeQuorum=majority
. I'd expect a simple write to succeed in this case as long as 2 nodes are online.Actual behavior
My write fails and I believe the relevant part of the stack trace is:
I don't understand why I would need a quorum of 5 when I have only 3 nodes available and they are all available