Closed simonw closed 1 year ago
Interesting catch with this particular URL: it's stored in Git LFS, which means Datasette Lite can't currently figure out where the file itself is hosted.
Here the CORS-enabled file to load is https://media.githubusercontent.com/media/openai/evals/main/evals/registry/data/reverse_string/reverse_string.jsonl
Got this working with a new, undocumented ?jsonl=
option:
diff --git a/index.html b/index.html
index 29d0ce7..a81563b 100644
--- a/index.html
+++ b/index.html
@@ -113,9 +113,10 @@ const initialUrl = fixUrl(urlParams.get('url'));
const csvUrls = urlParams.getAll('csv').map(fixUrl);
const sqlUrls = urlParams.getAll('sql').map(fixUrl);
const jsonUrls = urlParams.getAll('json').map(fixUrl);
+const jsonlUrls = urlParams.getAll('jsonl').map(fixUrl);
const installUrls = urlParams.getAll('install');
-datasetteWorker.postMessage({type: 'startup', initialUrl, csvUrls, sqlUrls, jsonUrls, installUrls});
+datasetteWorker.postMessage({type: 'startup', initialUrl, csvUrls, sqlUrls, jsonUrls, jsonlUrls, installUrls});
let loadingLogs = ["Loading..."];
diff --git a/webworker.js b/webworker.js
index 7ab440d..de09b1b 100644
--- a/webworker.js
+++ b/webworker.js
@@ -10,6 +10,7 @@ async function startDatasette(settings) {
let csvs = [];
let sqls = [];
let jsons = [];
+ let jsonls = [];
let needsDataDb = false;
let shouldLoadDefaults = true;
if (settings.initialUrl) {
@@ -32,6 +33,11 @@ async function startDatasette(settings) {
needsDataDb = true;
shouldLoadDefaults = false;
}
+ if (settings.jsonlUrls && settings.jsonlUrls.length) {
+ jsonls = settings.jsonlUrls;
+ needsDataDb = true;
+ shouldLoadDefaults = false;
+ }
if (needsDataDb) {
toLoad.push(["data.db", 0]);
}
@@ -81,7 +87,8 @@ async function startDatasette(settings) {
# Import data from ?csv=URL CSV files/?json=URL JSON files
csvs = ${JSON.stringify(csvs)}
jsons = ${JSON.stringify(jsons)}
- if csvs or jsons:
+ jsonls = ${JSON.stringify(jsonls)}
+ if csvs or jsons or jsonls:
await micropip.install("sqlite-utils==3.28")
import sqlite_utils, json
from sqlite_utils.utils import rows_from_file, TypeTracker, Format
@@ -109,7 +116,12 @@ async function startDatasette(settings) {
db[bit].transform(
types=tracker.types
)
- for json_url in jsons:
+ json_to_do = [
+ ('json', url) for url in jsons
+ ] + [
+ ('jsonl', url) for url in jsonls
+ ]
+ for jtype, json_url in json_to_do:
bit = json_url.split("/")[-1].split(".")[0].split("?")[0]
bit = bit.strip()
if not bit:
@@ -122,7 +134,10 @@ async function startDatasette(settings) {
table_names.add(bit)
response = await pyfetch(json_url)
with open("json.json", "wb") as fp:
- json_data = json.loads(await response.bytes())
+ if jtype == "json":
+ json_data = json.loads(await response.bytes())
+ else:
+ json_data = [json.loads(line) for line in (await response.bytes()).split(b"\\n")]
# If it's an object, try to find first key that's a list of objects
if isinstance(json_data, dict):
for key, value in json_data.items():
Or should I try to teach the ?json=
option to also transparently spot JSONL files and load those instead?
That way I wouldn't need to add a "Load JSONL" button - the existing "Load JSON" button would work too.
The OpenAI Evals repo uses JSONL, not JSON. Would be useful to be able to load from there.
e.g. https://github.com/openai/evals/blob/main/evals/registry/data/reverse_string/reverse_string.jsonl