Closed craigsapp closed 4 years ago
Implemented and used with commit https://github.com/musedata/musedata.github.io/commit/54fe47a1b9d7be89efbd7447a9dfc4a54f873262
Here is the Google Apps Script used to extract data from the spreadsheet:
//
// CGI parameters that the script understands:
//
// format :: output format for the spreadsheet. Values can be:
// json :: output as JSON data (the default format).
// raw :: plain text TSV output of the header
// tsv :: similar to raw, but remove any text above the header line.
// aton :: output spreadsheet in the ATON format (https://aton.sapp.org)
// humdrum :: output spreadsheet in the Humdrum format.
// heading :: Text of an expected heading value for data columns. This is used
// to skip over extra text before the data column headings.
// label :: This is the name of the row object in ATON data output. The default is "ENTRY".
//
//////////////////////////////
//
// doGet -- this is the function that returns a GET request for the web app URL.
//
function doGet(event) {
// The targetHeaderField is a column heading that the spreadsheet table starts
// at. This allows for explanatory text above the table to be ignored. The
// heading text is a sample heading that is on the header line. Any field
// can be used as long as it is not on a line by itself or in a cell by itself
// in the free-form text before the heading line.
let targetHeaderField = "LEVEL_1";
if (event && event.paramameter && event.parameter.heading) {
targetHeaderField = event.parameter.heading;
}
try {
let sid = "1U4Z-NeU2FGk_qNq6BwXeSVN-p8AnZsWbRHGo5fk3f3A";
let gid = "0";
let url = "https://docs.google.com/spreadsheets/d/"+sid+"/export?gid="+gid+"&format=tsv";
const value = UrlFetchApp.fetch(url);
let raw = value.getContentText();
let format = "json";
if (event && event.parameter && event.parameter.format) {
let f = event.parameter.format;
if (f.match(/aton/i)) {
format = "aton";
} else if (f.match(/tsv/i)) {
format = "tsv";
} else if (f.match(/hum/i)) {
format = "humdrum";
} else if (f.match(/raw/i)) {
format = "raw";
}
}
if (format === "raw") {
return ContentService.createTextOutput(raw);
} else if (format === "tsv") {
return ContentService.createTextOutput(extractTsv(raw, targetHeaderField));
} else if (format === "humdrum") {
return ContentService.createTextOutput(tsvToHumdrum(raw, targetHeaderField));
}
let obj = spreadsheetToObject(raw, -1, "LEVEL_1", true);
if (format === "aton") {
var label = "ENTRY";
if (event && event.parameter && event.parameter.label) {
label = event.parameter.label;
}
var aton = spreadsheetArrayToAton(obj, label);
return ContentService.createTextOutput(aton);
} else {
let json = JSON.stringify(obj, null, "\t");
return ContentService.createTextOutput(json)
.setMimeType(ContentService.MimeType.JSON);
}
} catch (err) {
let output = JSON.stringify({
message: "Failed to extract speadsheet data: " + err.message,
stack: err.stack,
error: err}, null, "\t");
return ContentService.createTextOutput(output);
}
}
//////////////////////////////
//
// getHeaderIndex -- Given a target column header text, return the first
// line that has that text in a TSV line.
//
function getHeaderIndex(lines, targetField) {
for (let i=0; i<lines.length; i++) {
let data = lines[i].split(/\t/);
for (let j=0; j<data.length; j++) {
let value = data[j].trim();
if (value === targetField) {
return i;
}
}
}
return 0;
}
//////////////////////////////
//
// extractTsv -- return the raw TSV data, but remove any header material
//
function extractTsv(text, headerField) {
let lines = text.match(/[^\r\n]+/g);
let headerIndex = getHeaderIndex(lines, headerField);
let output = "";
for (let i=headerIndex; i<lines.length; i++) {
output += lines[i] + "\n";
}
return output;
}
//////////////////////////////
//
// spreadsheetToObject -- Convert TSV form of spreadsheet into Javascript object.
//
function spreadsheetToObject(text, headerIndex, headerField, suppressEmpty) {
if (!text) {
return [];
}
if (!headerIndex) {
headerIndex = 0;
}
suppressEmpty = !!suppressEmpty;
let lines = text.match(/[^\r\n]+/g);
if (headerField) {
headerIndex = getHeaderIndex(lines, headerField);
}
let mapping = {};
let data = lines[headerIndex].split(/\t/);
for (let i=0; i<data.length; i++) {
if (data[i].match(/^\s*$/)) {
continue;
}
mapping[i] = data[i].trim();
}
var output = [];
for (let i=headerIndex+1; i<lines.length; i++) {
if (lines[i].match(/^\s*$/)) {
continue;
}
data = lines[i].split(/\t/);
var item = {};
for (let j=0; j<data.length; j++) {
if (suppressEmpty && data[j].match(/^\s*$/)) {
continue;
}
if (!mapping[j] || mapping[j].match(/^\s*$/)) {
continue;
}
item[mapping[j]] = data[j].trim();
}
output.push(item);
}
return output;
}
///////////////////////////////
//
// spreadsheetArrayToAton -- convert a spreadsheet in JSON format to ATON format.
// Input has to be an array.
//
function spreadsheetArrayToAton(obj, label) {
if (!label) {
label = "ENTRY";
}
let output = "";
for (let i=0; i<obj.length; i++) {
output += "@@BEGIN: " + label + "\n";
for (property in obj[i]) {
output += "@" + property + ": " + obj[i][property] + "\n";
}
output += "@@END: " + label + "\n\n";
}
return output;
}
//////////////////////////////
//
// getHeadings -- return an array of the text on the heading row.
// It is assumed that there are no empty columns. Spaces are not
// allowed in the headings and most non-alphanumeric characters.
//
function getHeadings(text) {
let fields = text.split(/\t+/);
let output = [];
for (let i=0; i<fields.length; i++) {
let value = fields[i].trim().replace(/\s+/g, "_").replace(/[^A-Za-z0-9_#-]/g, "");
output.push(value);
}
return output;
}
//////////////////////////////
//
// tsvToHumdrum -- convert a spreadsheet into the humdrum format, with optional
// text on lines before the table.
//
function tsvToHumdrum(text, headerField) {
if (!text) {
return "";
}
let lines = text.match(/[^\r\n]+/g);
let headerIndex = getHeaderIndex(lines, headerField);
let output = "";
for (let i=0; i<headerIndex; i++) {
if (lines[i].match(/^!!/)) {
output += lines[i].replace(/\s+$/, "") + "\n";
} else {
output += "!! " + lines[i].replace(/\s+$/, "") + "\n";
}
}
let headings = getHeadings(lines[headerIndex]);
// print exclusive interpretation line:
for (let i=0; i<headings.length; i++) {
if (headings[i].match(/^\*\*/)) {
output += headings[i];
} else if (headings[i].match(/^\*/)) {
output += "*" + headings[i];
} else {
output += "**" + headings[i];
}
if (i < headings.length - 1) {
output += "\t";
}
}
output += "\n";
// Print individual cells of table as Humdrum tokens
// (not checking for leading ! or *). It is assumed that
// the row fields are either constant or that they are
// controlled with spine manipulators embedded in the data.
for (let i=headerIndex + 1; i<lines.length; i++) {
if (lines[i].match(/^\s*$/)) {
continue;
}
let fields = lines[i].split(/\t/);
for (let j=0; j<fields.length; j++) {
if (j >= fields.length) {
output += ".";
}
if (fields[j].match(/^\s*$/)) {
output += ".";
} else {
output += fields[j];
}
if (j < fields.length - 1) {
output += "\t";
}
}
output += "\n";
}
// print terminator line:
for (let i=0; i<headings.length; i++) {
output += "*-";
if (i < headings.length - 1) {
output += "\t";
}
}
output += "\n";
return output;
}
The script extracts the text of the URL of the spreadsheet rather than access the spreadsheet app directly, which would also be possible.
In June, Google spreadsheets became difficult to download directly to a web browser due to CORS restrictions.
A solution is to use Google Apps Scripts to create a URL that is not limited by CORS. To do this for a Google spreadsheet, got to Tools → Script editor...
After creating and saving the following script, you have to "publish" the script: Go to the menu item Publish → Deploy as web app...:
Then a window appears:
Every time you update the script, you must create a new "Project version". Also allow for "Anyone, even anonymous" to access the script. Then press
Update
, and a new window appears:Make note of the URL (which will remain the same between different versions) and then click on
OK
.The URL to the data is:
https://script.google.com/macros/s/AKfycbyIBWnniSSUHEDr10yRdMVx72sQDXXntyQ6bd0az2JpumBulp4/exec
This links to a JSON version of the spreadsheet, with each entry in an array being a data line in the spreadsheet table, such as the first two lines generate these two entries in the JSON data:
There are other formats available for different environments and applications, which are accessed by giving an additional parameter at the end of the URL (
&format=xxx
):Raw TSV version:
https://script.google.com/macros/s/AKfycbyIBWnniSSUHEDr10yRdMVx72sQDXXntyQ6bd0az2JpumBulp4/exec?format=raw
Cleaned TSV version (no explanatory text at start of file):
https://script.google.com/macros/s/AKfycbyIBWnniSSUHEDr10yRdMVx72sQDXXntyQ6bd0az2JpumBulp4/exec?format=tsv
ATON version: https://script.google.com/macros/s/AKfycbyIBWnniSSUHEDr10yRdMVx72sQDXXntyQ6bd0az2JpumBulp4/exec?format=aton This is a lower-complexity syntax that can be converted into JSON.
Humdrum version: https://script.google.com/macros/s/AKfycbyIBWnniSSUHEDr10yRdMVx72sQDXXntyQ6bd0az2JpumBulp4/exec?format=humdrum This is a spreadsheet like format that can be manipulated with tools from the Humdrum Toolkit.
Using this system avoids dependence on server-side infrastructure, and allows non-technical persons to control the website index directly from the spreadsheet.