Open drago72 opened 3 days ago
Thanks for the report, loads of great detail there.
I don't know how many MySQL admins we have here, but I used to run a couple of instances long ago, so I'm gonna try and reproduce this. Could you help by describing the "intensive test" in more detail? Alternatively, do you know if there's any convenient load generator or similar for MySQL that might trip it? Also, your full MySQL config would be useful - I remember there's some other tuning often needed to make it work well on ZFS, and I wouldn't want anything to mask the issue.
Thanks!
@bwatkinson anything in that backtrace look obvious to you?
@bwatkinson anything in that backtrace look obvious to you?
Nothing immediately obvious to me stands out looking at the backtrace from the kernel. It is odd that we are getting a bad page state when freeing up the pages.
Sorry for the delayed response.
I've conducted further investigations and observed that when MySQL uses direct I/O (innodb_flush_method = O_DIRECT) on a dataset with sync = standard (changed from none), the errors reported by MySQL change from:
(sync = None)
2024-10-13T08:14:36.225187+02:00 16253 [ERROR] [MY-012639] [InnoDB] Write to file ./mysql_test_BMOTLWLI/#sql-ib17083-2763134737.ibd failed at offset 0, 114688 bytes should have been written, only 0 were written. Operating system error number 12. Check that your OS and file system support files of this size. Check also that the disk is not full or a disk quota exceeded.
2024-10-13T08:14:36.225245+02:00 16253 [ERROR] [MY-012640] [InnoDB] Error number 12 means 'Cannot allocate memory'
2024-10-13T08:14:36.225758+02:00 16253 [ERROR] [MY-012129] [InnoDB] Could not write the first page to tablespace './mysql_test_BMOTLWLI/#sql-ib17083-2763134737.ibd'
to:
(sync = standard)
2024-10-16T09:32:28.139769+02:00 105 [ERROR] [MY-012639] [InnoDB] Write to file ./mysql_test_PWZFDTWD/table_1.ibd failed at offset 0, 114688 bytes should have been written, only 0 were written. Operating system error number 12. Check that your OS and file system support files of this size. Check also that the disk is not full or a disk quota exceeded.
2024-10-16T09:32:28.139805+02:00 105 [ERROR] [MY-012640] [InnoDB] Error number 12 means 'Cannot allocate memory'
2024-10-16T09:32:28.139846+02:00 105 [ERROR] [MY-012129] [InnoDB] Could not write the first page to tablespace './mysql_test_PWZFDTWD/table_1.ibd'
2024-10-16T09:32:28.141953+02:00 105 [ERROR] [MY-013133] [Server] Create table/tablespace 'table_1' failed, as disk is full.
This might help in understanding the issue.
I've quickly rewritten the "intensive test" script to make it public. This script reproduces the problem.
#!/usr/bin/env python3.12
from typing import Any
from random import choice as rChoice, randint as rRandint, seed as rSeed, sample as rSample
from datetime import datetime
from time import time as tTime, sleep as tSleep
from string import ascii_uppercase
from json import loads as jLoads
from numpy import mean as npMean
from threading import Thread
# python3.12 -m pip install -U mysql-connector-python
from mysql.connector import Error as mSQLError, Warning as mSQLWarning, __version_info__ as mSQLversion
from mysql.connector import MySQLConnection, CMySQLConnection, HAVE_CEXT
class statistiche():
def __init__(self):
self.tempi = {}
self.tCreate = 0.0
self.tInsert = 0.0
self.tOpt = 0.0
self.tSelect = 0.0
self.tUpdate = 0.0
self.tInfo = 0.0
self.tTot = 0.0
class benchmark(Thread, statistiche):
__cfgConn: dict = {'user': 'root',
'passwd': 'password_root',
'unix_socket': '/var/run/mysqld/mysqld.sock',
'connect_timeout': 600,
'autocommit': True,
'charset': 'utf8mb4'}
def __init__(self, tId, nrRip=3, nrTab=50, nrRowTab=100, nrSelUpd=4000) -> None:
Thread.__init__(self)
statistiche.__init__(self)
self.threadID = tId
self.ritorno = ""
self.__colConvJson = []
self.__nomeSk = "mysql_test_" + "".join(rChoice(ascii_uppercase) for _ in range(8))
self.__nrRip = nrRip
self.__nrTab = nrTab
self.__nrRowTab = nrRowTab
self.__nrSelUpd = nrSelUpd
self.__text_rnd = []
for _ in range(100):
self.__text_rnd.append("".join(rChoice(ascii_uppercase) for _ in range(16)))
def __byteToString(self, valore: Any, tipo: str = "utf-8", corr: str = None, returnErr: str = None) -> str:
try:
if isinstance(valore, bytes):
if corr is None:
rit = valore.decode(tipo)
else:
rit = valore.decode(tipo, errors=corr)
else:
rit = str(valore)
return rit.strip()
except Exception:
return returnErr
def __convertiTipo(self, valore: Any, tipo: str) -> Any:
if valore is None:
return None
if tipo in ('tinyint', 'mediumint', 'int', 'long'):
try:
return int(valore)
except Exception:
return 0
elif tipo in ('float', 'double'):
try:
return float(valore)
except Exception:
return 0.0
elif tipo == 'date':
valore = self.__byteToString(valore, returnErr='')
try:
return datetime.strptime(valore[0:10], '%Y-%m-%d').date()
except Exception:
return None
elif tipo == 'datetime':
valore = self.__byteToString(valore, returnErr='')
try:
if len(valore) == 10:
return datetime.strptime(valore, '%Y-%m-%d 00:00:00')
else:
return datetime.strptime(valore, '%Y-%m-%d %H:%M:%S')
except Exception:
return None
elif tipo == 'Decimal':
try:
return float(valore)
except Exception:
return 0.0
else:
return str(valore)
def __convertRit(self, ritorno: Any, ritTipo: str) -> Any:
if ritorno in (None, ""):
return None
if isinstance(ritorno, tuple) and len(ritorno) == 0:
return None
elif ritTipo in ('int', 'long', 'str', 'float', 'double', 'date', 'datetime'):
return self.__convertiTipo(ritorno[0][0], ritTipo)
elif ritTipo in ('list', 'list1'):
rit = list(ritorno)
for i in range(rit.__len__()):
rit[i] = list(rit[i])
if self.__descriptRit:
for c in range(rit[i].__len__()):
if self.__descriptRit[c][0] in self.__colConvJson:
rit[i][c] = jLoads(self.__byteToString(rit[i][c], corr="ignore"))
if ritTipo == 'list1':
break
if ritTipo == 'list1' and len(rit) > 0:
rit = rit[0]
return rit
elif ritTipo in ('dict', 'dict1'):
rit = list(ritorno)
for row in rit:
if self.__descriptRit:
for col in self.__colConvJson:
if col in list(row.keys()):
row[col] = jLoads(self.__byteToString(row[col], corr="ignore")) # cls=json_encoder
if ritTipo == 'dict1':
break
if ritTipo == 'dict1' and len(rit) > 0:
rit = rit[0]
return rit
elif ritTipo == 'tuple1':
rit = tuple(ritorno)
if len(rit) > 0:
rit = rit[0]
return rit
else:
return ritorno
def connetti(self) -> None:
try:
if mSQLversion > (2, 1) and HAVE_CEXT:
self.__cfgConn["use_pure"] = False
self.__conn = CMySQLConnection(**self.__cfgConn)
else:
self.__conn = MySQLConnection(**self.__cfgConn)
self.__conn.get_warnings = False
except Exception:
...
def chiudi(self) -> None:
if self.__conn and hasattr(self.__conn, "close") and callable(getattr(self.__conn, "close")):
self.__conn.close()
def query(self, ritTipo: str, query: str) -> Any:
rit = None
self.query_err = ""
self.__descriptRit = None
if ritTipo[:4] == "dict":
cur = self.__conn.cursor(buffered=True, dictionary=True)
else:
cur = self.__conn.cursor(buffered=True)
try:
cur.execute(query)
nrRow: int = cur.rowcount if cur.with_rows else 0
if nrRow > 0:
if ritTipo != "":
rit = cur.fetchall()
if len(self.__colConvJson) > 0:
self.__descriptRit = cur.description
except (mSQLError, mSQLWarning) as err:
self.query_err = err.msg if hasattr(err, "msg") else ""
rit = None
finally:
if cur._executed:
cur.close()
if rit is not None:
rit = self.__convertRit(rit, ritTipo)
return rit
def __name_tab(self, i) -> str:
return "table_" + str(i)
def __name_tab_compl(self, i) -> str:
return self.__nomeSk + "." + self.__name_tab(i)
def __tab_rnd(self) -> str:
return self.__name_tab(rRandint(0, self.__nrTab - 1))
def __tab_rnd_compl(self) -> str:
return self.__nomeSk + "." + self.__tab_rnd()
def run(self) -> None:
txt_L: int = len(self.__text_rnd) - 1
lista_id: list[int] = [i for i in range(1, self.__nrRowTab + 1)]
listaCrk = []
listaIns = []
listaUpd = []
listaSel = []
listaInf = []
listaTot = []
self.connetti()
for _ in range(self.__nrRip):
# creo database di test
if self.query('int', "SELECT COUNT(*) FROM INFORMATION_SCHEMA.SCHEMATA WHERE "
"SCHEMA_NAME = '" + self.__nomeSk + "'") > 0:
self.query('', "DROP DATABASE " + self.__nomeSk)
self.query('', "CREATE DATABASE " + self.__nomeSk)
tSleep(3.3 * self.threadID)
oraIniT = oraIni = tTime()
# creo 100 tabelle sia MyISAM che InnoDB
for i in range(self.__nrTab):
if i % 2 == 0:
tipoEn: str = "MyISAM"
else:
tipoEn: str = "InnoDB"
self.query('', "CREATE TABLE " + self.__name_tab_compl(i) + " ("
"id INT UNSIGNED NOT NULL AUTO_INCREMENT,"
"valInt INT NULL,"
"valChar VARCHAR(16000) NULL,"
"valJSON JSON NULL,"
"PRIMARY KEY (id)) ENGINE = " + tipoEn)
self.tCreate = tTime() - oraIni
# inserisco dati sia INSERT che REPLACE
doc_json: dict = '{"pippo": 2, "pluto": 3.5, "paperino": [1,2,3]}'
oraIni: float = tTime()
for i in range(self.__nrTab):
nomeTab: str = self.__name_tab_compl(i)
for v in lista_id:
testo: str = self.__text_rnd[rRandint(0, txt_L)]
if i % 2 == 0:
self.query('', f"INSERT INTO {nomeTab} (valInt, valChar, valJSON) VALUES "
f"({v}, '{testo}', '{doc_json}')")
else:
self.query('', f"REPLACE {nomeTab} (id, valInt, valChar, valJSON) VALUES "
f"({v}, {v}, '{testo}', '{doc_json}')")
self.tInsert = tTime() - oraIni
# select sulle tabelle
oraIni = tTime()
for _ in range(self.__nrSelUpd):
v: int = rRandint(1, self.__nrRowTab)
nomeTab = self.__tab_rnd_compl()
rit_int = self.query('int', f"SELECT valInt FROM {nomeTab} WHERE id = {v}")
assert isinstance(rit_int, int)
rit_str = self.query('str', f"SELECT valChar FROM {nomeTab} WHERE valInt = {v}")
assert isinstance(rit_str, str)
rit_dict = self.query('dict1', f"SELECT valJSON FROM {nomeTab} WHERE id ={v}")
assert isinstance(rit_dict, dict)
rit_dict = jLoads(rit_dict["valJSON"])
assert isinstance(rit_dict, dict)
ids: str = "(" + ",".join([str(id) for id in rSample(lista_id, 5)]) + ")"
rit_list = self.query('list', f"SELECT * FROM {nomeTab} WHERE id IN {ids}")
assert isinstance(rit_list, list)
self.tSelect = tTime() - oraIni
# aggiorno dati
oraIni = tTime()
for _ in range(self.__nrSelUpd):
v: int = rRandint(1, self.__nrRowTab)
self.query('', f"UPDATE {self.__tab_rnd_compl()} SET valChar = '' WHERE id = {v}")
self.tUpdate = tTime() - oraIni
# select su information_schema
oraIni = tTime()
for _ in range(self.__nrSelUpd):
nomeTab: str = self.__tab_rnd()
nT: int = rRandint(1, 3)
if nT == 1:
rit_str = self.query('str', "SELECT ENGINE FROM information_schema.TABLES WHERE "
f"TABLE_SCHEMA = '{self.__nomeSk}' AND TABLE_NAME = '{nomeTab}'")
assert isinstance(rit_str, str)
elif nT == 2:
rit_int = self.query('int', "SELECT TABLE_ROWS FROM information_schema.TABLES WHERE "
f"TABLE_SCHEMA = '{self.__nomeSk}' AND TABLE_NAME = '{nomeTab}'")
assert isinstance(rit_int, int)
else:
rit_datetime = self.query('datetime', "SELECT UPDATE_TIME FROM information_schema.TABLES WHERE "
f"TABLE_SCHEMA = '{self.__nomeSk}' AND TABLE_NAME = '{nomeTab}'")
assert isinstance(rit_datetime, datetime | None)
self.tInfo = tTime() - oraIni
# cancello tutto il database di test
self.query('', f"DROP DATABASE {self.__nomeSk}")
self.tTot = tTime() - oraIniT
listaCrk.append(self.tCreate)
listaIns.append(self.tInsert)
listaUpd.append(self.tUpdate)
listaSel.append(self.tSelect)
listaInf.append(self.tInfo)
listaTot.append(self.tTot)
self.chiudi()
self.tempi['crk'] = round(npMean(listaCrk), 3)
self.tempi['ins'] = round(npMean(listaIns), 3)
self.tempi['upd'] = round(npMean(listaUpd), 3)
self.tempi['sel'] = round(npMean(listaSel), 3)
self.tempi['inf'] = round(npMean(listaInf), 3)
self.tempi['tot'] = round(npMean(listaTot), 3)
self.ritorno = f"Tempi [{self.threadID}]: {self.tempi}"
def main() -> None:
nrRip: int = 3
nrTab: int = 50
nrRowTab: int = 100
nrSelUpd: int = 4000
nrTr: int = 8
print(f"Configurazione: nrRip={nrRip}, nrTab={nrTab}, nrRowTab={nrRowTab}, nrSelUpd={nrSelUpd}")
print("")
print(f"Avvio sessioni di test a {nrTr} thread ...")
print(f"Ora avvio: {datetime.now()}")
threads = []
for tId in range(nrTr):
t: benchmark = benchmark(tId, nrRip=nrRip, nrTab=nrTab, nrRowTab=nrRowTab, nrSelUpd=nrSelUpd)
threads.append(t)
for t in threads:
t.start()
for t in threads:
t.join()
print(t.ritorno)
tempo: float = 0.0
for t in threads:
tempo += t.tempi['tot']
print(f"Ora fine: {datetime.now()}")
print(f"Terminate sessioni di test a {nrTr} thread. Somma dei tempi: {round(tempo, 2)} sec.")
if __name__ == '__main__':
rSeed()
main()
MySQL configuration files are attached below.
/etc/mysql/mysql.conf.d/mysqld.cnf
[mysqld]
pid-file = /var/run/mysqld/mysqld.pid
socket = /var/run/mysqld/mysqld.sock
#datadir = /zfs1/database_8.4
datadir = /zfs1/database_8.4_copy
log-error = /opt/mysql-8.4.2/log/error.log
# default = /usr/
basedir = /opt/mysql-8.4.2/
# default = /usr/share/mysql-8.0/
lc_messages_dir = /opt/mysql-8.4.2/share/
# default = /usr/share/mysql-8.0/charsets/
character_sets_dir = /opt/mysql-8.4.2/share/charsets/
# default = /usr/lib/mysql/plugin/
plugin_dir = /opt/mysql-8.4.2/lib/plugin/
/etc/mysql/mysql.conf.d/drago_mod.cnf
[mysqld]
# in caso di emergenza
# https://dev.mysql.com/doc/refman/8.0/en/forcing-innodb-recovery.html
# da 1 a 6 (ma 4,5,6 sconsigliati)
#innodb_force_recovery = 6
# non server perche' non uso la replicazione
skip-log-bin
log_timestamps = SYSTEM
secure-file-priv = /var/lib/mysql-files/
# default = 8161
# [Warning] [MY-010140] [Server] Could not increase number of max_open_files to more than 10000 (request: 30000)
#open_files_limit = 30000
# default = 30
net_read_timeout = 60
# default = ON
event-scheduler = ON
# default = InnoDB
default_storage_engine = MyISAM
# default = InnoDB
#default_tmp_storage_engine = MyISAM
# default = 151
max_connections = 101
# log e query lente per capire dove ottimizzare
log_output = TABLE
general_log = OFF
slow_query_log = ON
long_query_time = 15
# grandezza massima tabelle MEMORY
max_heap_table_size = 512M
### MyISAM variables
# default = OFF
myisam_use_mmap = ON
key_buffer_size = 128M
myisam-block-size = 16384
### InnoDB variables
# default = 134217728
innodb_buffer_pool_size = 1G
# default = fsync, consigliato O_DIRECT
#innodb_flush_method = fsync
innodb_flush_method = O_DIRECT
# default = ON (dovrebbe evitare la scansione di tutti tablespace) -> OFF
innodb-validate-tablespace-paths= OFF
# default = 1
innodb_flush_log_at_trx_commit = 2
# default = ON possibile ON, OFF, DETECT_AND_RECOVER (uguale ON), DETECT_ONLY (solo metadata)
innodb_doublewrite = OFF
# default = crc32
innodb_checksum_algorithm = NONE
# default = ON
innodb_log_checksums = OFF
# default = 1
innodb_page_cleaners = 4
# default = 1
innodb_buffer_pool_instances = 4
# default =
innodb_tmpdir = /var/tmp/
# default = ibtmp1:12M:autoextend
innodb_temp_data_file_path = /var/tmp/ib_tmp1:32M;/var/tmp/ib_tmp2:32M:autoextend
# default = ./#innodb_temp/
innodb_temp_tablespaces_dir = /var/tmp/
Thanks for your interest and effort on this.
System information
Describe the problem you're observing
Using MySQL with
innodb_flush_method = fsync
doesn't cause any issues. Switching the parameter toO_DIRECT
and running an intensive test resulted in a kernel panic.(I don't have extensive expertise in C or English, but I'd like to contribute to verifying filesystem stability before the final release of version 2.3. Thanks to all project contributors for their work.)
Describe how to reproduce the problem
Switched MySQL configuration
innodb_flush_method
fromfsync
toO_DIRECT
and set dataset parameterdirect = standard
Include any warning/errors/backtraces from the system logs
-->