Open simonw opened 1 week ago
Here's the change:
diff --git a/datasette/database.py b/datasette/database.py
index ffe94ea7..e56dcf03 100644
--- a/datasette/database.py
+++ b/datasette/database.py
@@ -85,12 +85,17 @@ class Database:
return "db"
def connect(self, write=False):
+ kwargs = {
+ "uri": True,
+ "check_same_thread": False,
+ }
+ if write:
+ kwargs["isolation_level"] = "IMMEDIATE"
if self.memory_name:
uri = "file:{}?mode=memory&cache=shared".format(self.memory_name)
conn = sqlite3.connect(
uri,
- uri=True,
- check_same_thread=False,
+ **kwargs,
)
if not write:
conn.execute("PRAGMA query_only=1")
@@ -110,9 +115,7 @@ class Database:
qs = ""
if self.mode is not None:
qs = f"?mode={self.mode}"
- conn = sqlite3.connect(
- f"file:{self.path}{qs}", uri=True, check_same_thread=False
- )
+ conn = sqlite3.connect(f"file:{self.path}{qs}", **kwargs)
self._all_file_connections.append(conn)
return conn
I don't want to commit this until I can run some kind of load test that demonstrates a problem with the default connection mode that is solved by switching to IMMEDIATE
.
I tried running this locustfile.py
using python -m locust -f locustfile.py
and then simulating 100 users.
I ran sqlite-utils enable-wal data.db
and then datasette data.db --root
and used the /-/create-token
page to create a token which I set as the DS_API_KEY
environment variable.
from locust import HttpUser, task, between
import random
import json
import os
class APIUser(HttpUser):
host = "http://localhost:8001"
# wait_time = between(0.05, 0.1) # in seconds
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.id_counter = 0
self.api_key = os.environ.get('DS_API_KEY')
if not self.api_key:
raise ValueError("DS_API_KEY environment variable is not set")
self.headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}'
}
@task(3)
def get_data(self):
self.client.get("/data", headers=self.headers)
@task(3)
def get_mytable(self):
self.client.get("/data/mytable?_sort_desc=id", headers=self.headers)
@task(2)
def get_mytable_with_id(self):
self.client.get("/data/mytable?id=5", headers=self.headers)
@task(4)
def post_create_data(self):
self.id_counter += 1
payload = {
"table": "mytable",
"rows": [
{
"id": self.id_counter,
"name": random.choice(["Tarantula", "Black Widow", "Huntsman", "Wolf Spider", "Jumping Spider"])
}
],
"pk": "id",
"replace": True
}
self.client.post("/data/-/create", data=json.dumps(payload), headers=self.headers)
On both my Mac and GitHub Codespaces I was unable to get it to trigger any database lock errors. Mac results:
I tried on Codespaces as well because I thought maybe my M2 performed too well and didn't trigger errors.
The currently open challenge here is this: create a locustfile.py
which reliably triggers database locked errors when exercising a Datasette instance calling the JSON write API (and reading other endpoints at the same time).
Here's Codespaces Lotust run with WAL enabled (the macOS one above didn't have WAL enabled):
I used https://github.com/datasette/studio to run this
We currently use the default isolation level for both read and write connections.
https://kerkour.com/sqlite-for-servers#use-immediate-transactions says:
Hardest part is proving this issue one way or the other.