Closed candlecao closed 4 months ago
Supplemental: LoaderScript.ld
is the name of the text file with suffix .ld
That Chat-GPT generated output is incorrect (a hallucination), as the Virtuoso RDF Bulk Loader is used for loading datasets files in the RDF formats indicated as supported.
To bulk load multiple CSV files, Virtuoso has an equivalent CSV Bulk Loader process, as detailed in the linked documentation.
Note: Our Virtuoso Personal Assistant, an OpenLink Personal Assistant (OPAL) module, will provide you with better guidance about CSV bulk loading into Virtuoso. Just perform the following steps:
You should get a different response from this OPAL enclosed (and guardrailed) variant of ChatGPT, scoped to the Virtuoso Support Assistant module and its underlying Knowledge Graph.
@HughWilliams Thank you. @kidehen Thank you.
However, I did as the tutorial or the OPAL taught, I was always only stuck by being told "Undefined procedure DB.DBA.csv_register
" or "Undefined procedure DB.DBA.LOADE_RDF
", etc.
Have you loaded the CSV Bulk Loader scripts which need to be loaded first to create the required CSV bulk loader functions, as detailed in the documentation?
Hi, HughWilliams. I didn't until you told me. But As I pasted the whole content underneath the headline CSV Bulk Loader scripts in ISQL shell, then executed it, it prompted:
Here's the complete script.
-- DROP TABLE IF EXISTS csv_load_list;
CREATE TABLE csv_load_list (
cl_file VARCHAR,
cl_file_in_zip VARCHAR,
cl_state INT DEFAULT 0,
cl_error LONG VARCHAR,
cl_table VARCHAR,
cl_options ANY,
cl_started DATETIME,
cl_done DATETIME,
PRIMARY KEY (cl_file, cl_file_in_zip))
CREATE INDEX cl_state ON csv_load_list (cl_state);
CREATE PROCEDURE csv_cols_cb (
INOUT r ANY,
IN inx INT,
INOUT cbd ANY)
{
IF (cbd IS NULL)
cbd := VECTOR ();
cbd := vector_concat (cbd, VECTOR (r));
}
CREATE PROCEDURE csv_get_cols_array (
INOUT ss ANY,
IN hr INT,
IN offs INT,
IN opts ANY)
{
DECLARE h, res ANY;
DECLARE inx, j, ncols, no_head INT;
h := NULL;
no_head := 0;
IF (hr < 0)
{
no_head := 1;
hr := 0;
}
IF (offs < 0)
offs := 0;
res := VECTOR ();
csv_parse (ss, 'DB.DBA.csv_cols_cb', h, 0, offs + 10, opts);
IF (h IS NOT NULL AND LENGTH (h) > offs)
{
DECLARE _row ANY;
_row := h[hr];
FOR (j := 0; j < LENGTH (_row); j := j + 1)
{
res := vector_concat (res, VECTOR (VECTOR (SYS_ALFANUM_NAME (CAST (_row[j] AS VARCHAR)), NULL)));
}
FOR (inx := offs; inx < LENGTH (h); inx := inx + 1)
{
_row := h[inx];
FOR (j := 0; j < LENGTH (_row); j := j + 1)
{
IF (res[j][1] IS NULL AND NOT (ISSTRING (_row[j]) AND _row[j] = '') AND _row[j] IS NOT NULL)
res[j][1] := __tag (_row[j]);
ELSE IF (__tag (_row[j]) <> res[j][1] AND 189 = res[j][1] AND (ISDOUBLE (_row[j]) OR isfloat (_row[j])))
res[j][1] := __tag (_row[j]);
ELSE IF (__tag (_row[j]) <> res[j][1] AND ISINTEGER (_row[j]) AND (res[j][1] = 219 OR 190 = res[j][1]))
;
ELSE IF (__tag (_row[j]) <> res[j][1])
res[j][1] := -1;
}
}
}
FOR (inx := 0; inx < LENGTH (res); inx := inx + 1)
{
IF (NOT ISSTRING (res[inx][0]) AND NOT ISNULL (res[inx][0]))
no_head := 1;
ELSE IF (trim (res[inx][0]) = '' OR ISNULL (res[inx][0]))
res[inx][0] := sprintf ('COL%d', inx);
}
FOR (inx := 0; inx < LENGTH (res); inx := inx + 1)
{
IF (res[inx][1] = -1 OR res[inx][1] IS NULL)
res[inx][1] := 'VARCHAR';
ELSE
res[inx][1] := dv_type_title (res[inx][1]);
}
IF (no_head)
{
FOR (inx := 0; inx < LENGTH (res); inx := inx + 1)
{
res[inx][0] := sprintf ('COL%d', inx);
}
}
RETURN res;
}
Hi, HughWilliams. I did it! (--just by dividing those scripts into segments by ";" and executing them one by one) Thank you. But, after executing--
csv_register ('./CSV', '*.gz');
csv_loader_run ();
I only saw the data imported as CSV format stored in Database>SQL Database Objects. How can I further directly convert it into RDF using Bulk Loader?
Here's the complete script.
-- DROP TABLE IF EXISTS csv_load_list; CREATE TABLE csv_load_list ( cl_file VARCHAR, cl_file_in_zip VARCHAR, cl_state INT DEFAULT 0, cl_error LONG VARCHAR, cl_table VARCHAR, cl_options ANY, cl_started DATETIME, cl_done DATETIME, PRIMARY KEY (cl_file, cl_file_in_zip)) CREATE INDEX cl_state ON csv_load_list (cl_state); CREATE PROCEDURE csv_cols_cb ( INOUT r ANY, IN inx INT, INOUT cbd ANY) { IF (cbd IS NULL) cbd := VECTOR (); cbd := vector_concat (cbd, VECTOR (r)); } CREATE PROCEDURE csv_get_cols_array ( INOUT ss ANY, IN hr INT, IN offs INT, IN opts ANY) { DECLARE h, res ANY; DECLARE inx, j, ncols, no_head INT; h := NULL; no_head := 0; IF (hr < 0) { no_head := 1; hr := 0; } IF (offs < 0) offs := 0; res := VECTOR (); csv_parse (ss, 'DB.DBA.csv_cols_cb', h, 0, offs + 10, opts); IF (h IS NOT NULL AND LENGTH (h) > offs) { DECLARE _row ANY; _row := h[hr]; FOR (j := 0; j < LENGTH (_row); j := j + 1) { res := vector_concat (res, VECTOR (VECTOR (SYS_ALFANUM_NAME (CAST (_row[j] AS VARCHAR)), NULL))); } FOR (inx := offs; inx < LENGTH (h); inx := inx + 1) { _row := h[inx]; FOR (j := 0; j < LENGTH (_row); j := j + 1) { IF (res[j][1] IS NULL AND NOT (ISSTRING (_row[j]) AND _row[j] = '') AND _row[j] IS NOT NULL) res[j][1] := __tag (_row[j]); ELSE IF (__tag (_row[j]) <> res[j][1] AND 189 = res[j][1] AND (ISDOUBLE (_row[j]) OR isfloat (_row[j]))) res[j][1] := __tag (_row[j]); ELSE IF (__tag (_row[j]) <> res[j][1] AND ISINTEGER (_row[j]) AND (res[j][1] = 219 OR 190 = res[j][1])) ; ELSE IF (__tag (_row[j]) <> res[j][1]) res[j][1] := -1; } } } FOR (inx := 0; inx < LENGTH (res); inx := inx + 1) { IF (NOT ISSTRING (res[inx][0]) AND NOT ISNULL (res[inx][0])) no_head := 1; ELSE IF (trim (res[inx][0]) = '' OR ISNULL (res[inx][0])) res[inx][0] := sprintf ('COL%d', inx); } FOR (inx := 0; inx < LENGTH (res); inx := inx + 1) { IF (res[inx][1] = -1 OR res[inx][1] IS NULL) res[inx][1] := 'VARCHAR'; ELSE res[inx][1] := dv_type_title (res[inx][1]); } IF (no_head) { FOR (inx := 0; inx < LENGTH (res); inx := inx + 1) { res[inx][0] := sprintf ('COL%d', inx); } } RETURN res; }
Thank you! I did it successfully.
Now that you have successfully bulk loaded the CSV data into Virtuoso as SQL relational tables, you have to convert those tables to RDF Linked Data Views, as detailed in this Generation of Linked Data Views over Relational Data Sources with Virtuoso document.
Thank you very much!
I asked similar question to Chat GPT 4. It gave me guide as:
1. Prepare the CSV file in a path that can be accessed by Virtuoso
For example, the snippets are (1 header and 3 rows of entries):
--this file is put in a folder called "my_virdb" (where the Virtuoso server process was started; some files like
virtuoso.ini
,virtuoso.db
are put there)2. Prepare a text file with suffix
.ld
--Given that the CSV file has the name "sampleData.csv" and put in the "my_virdb" folder and (I supposed) '.' means the
sampleData.csv
is put in the same directory withsampleData.csv
.--This associates the mapping from CSV to URI (please see as below)
3. Define
mappings.ttl
that appeared in the.ld
file as aboveThe content in the file, e.g.:
--Place the
mappings.ttl
file in the same directory as yoursampleData.csv
.4.4 Execute the scripts from terminal
isql -U dba -P mysecret -S 1111 /path/to/my_virtdb/LoaderScript.ld
All above seemed executed smoothly, however, after execution of procedures as above, then I executed
select * from "DB.DBA.LOAD_LIST";
from ISQL shell or executedSPARQL SELECT * FROM <http://example.com/mygraph/UseBulkLoaderForCSV> WHERE { ?s ?p ?o } LIMIT 10;
, there was no actual data found!What's wrong? And could somebody point the right way?