digitalutsc / arks-service

This standalone application provides a user interface to mint, (bulk) bind ARK Identifiers, and resolver for Ark URLs.
BSD 2-Clause "Simplified" License
8 stars 3 forks source link

Implement pagination on Ajax loading #21

Closed kylehuynh205 closed 7 months ago

kylehuynh205 commented 11 months ago

How to setup the env:

Issue:

When the amount of bound objects in the tables grows to more than 30k records. It takes so long to load objects. Need to implement pagination for datatables: https://datatables.net/examples/server_side/pipeline.html

amym-li commented 10 months ago

Summary of Ticket Progress

Tasks completed:

To do:

Examples for implementing server-side processing can be found here. (The server side script references a SSP class which can be found here).

Here are two patches that resolve some namespacing issues within the module: 0001-Fix-MysqlArkConf.php-namespacing-issues.patch 0002-Fix-class-not-found-error.patch

Here is a patch containing the changes detailed below (applies on top of the two patches above): 0003-pipeline-wip.patch

Initializing the pipeline

In /admin/admin.php, add the following lines before $(document).ready:

//
// Pipelining function for DataTables. To be used to the `ajax` option of DataTables
//
$.fn.dataTable.pipeline = function ( opts ) {
    // Configuration options
    var conf = $.extend( {
        pages: 5,     // number of pages to cache
        url: '',      // script url
        data: null,   // function or object with parameters to send to the server
                      // matching how `ajax.data` works in DataTables
        method: 'GET' // Ajax HTTP method
    }, opts );

    // Private variables for storing the cache
    var cacheLower = -1;
    var cacheUpper = null;
    var cacheLastRequest = null;
    var cacheLastJson = null;

    return function ( request, drawCallback, settings ) {
        var ajax          = false;
        var requestStart  = request.start;
        var drawStart     = request.start;
        var requestLength = request.length;
        var requestEnd    = requestStart + requestLength;

        if ( settings.clearCache ) {
            // API requested that the cache be cleared
            ajax = true;
            settings.clearCache = false;
        }
        else if ( cacheLower < 0 || requestStart < cacheLower || requestEnd > cacheUpper ) {
            // outside cached data - need to make a request
            ajax = true;
        }
        else if ( JSON.stringify( request.order )   !== JSON.stringify( cacheLastRequest.order ) ||
            JSON.stringify( request.columns ) !== JSON.stringify( cacheLastRequest.columns ) ||
            JSON.stringify( request.search )  !== JSON.stringify( cacheLastRequest.search )
        ) {
            // properties changed (ordering, columns, searching)
            ajax = true;
        }

        // Store the request for checking next time around
        cacheLastRequest = $.extend( true, {}, request );

        if ( ajax ) {
            // Need data from the server
            if ( requestStart < cacheLower ) {
                requestStart = requestStart - (requestLength*(conf.pages-1));

                if ( requestStart < 0 ) {
                    requestStart = 0;
                }
            }

            cacheLower = requestStart;
            cacheUpper = requestStart + (requestLength * conf.pages);

            request.start = requestStart;
            request.length = requestLength*conf.pages;

            // Provide the same `data` options as DataTables.
            if ( typeof conf.data === 'function' ) {
                // As a function it is executed with the data object as an arg
                // for manipulation. If an object is returned, it is used as the
                // data object to submit
                var d = conf.data( request );
                if ( d ) {
                    $.extend( request, d );
                }
            }
            else if ( $.isPlainObject( conf.data ) ) {
                // As an object, the data given extends the default
                $.extend( request, conf.data );
            }

            return $.ajax( {
                "type":     conf.method,
                "url":      conf.url,
                "data":     request,
                "dataType": "json",
                "cache":    false,
                "success":  function ( json ) {
                    cacheLastJson = $.extend(true, {}, json);

                    if ( cacheLower != drawStart ) {
                        json.data.splice( 0, drawStart-cacheLower );
                    }
                    if ( requestLength >= -1 ) {
                        json.data.splice( requestLength, json.data.length );
                    }

                    drawCallback( json );
                }
            } );
        }
        else {
            json = $.extend( true, {}, cacheLastJson );
            json.draw = request.draw; // Update the echo for each response
            json.data.splice( 0, requestStart-cacheLower );
            json.data.splice( requestLength, json.data.length );

            drawCallback(json);
        }
    }
};

// Register an API method that will empty the pipelined data, forcing an Ajax
// fetch on the next draw (i.e. `table.clearPipeline().draw()`)
$.fn.dataTable.Api.register( 'clearPipeline()', function () {
    return this.iterator( 'table', function ( settings ) {
        settings.clearCache = true;
    } );
} );

In /admin/admin.php, modify the initialization for the minted and bound arks datatables:

let mintedTable = jQuery('#minted_table').DataTable({
    dom: 'lBfrtip',
+    "ajax": $.fn.dataTable.pipeline( {
+        "url": "rest.php?db=<?php echo $_GET['db'] . "&op=minted" ?>",
+        "pages": 5 // number of pages to cache
+    }),
+    processing: true,
+    serverSide: true,
    ...

let boundTable = jQuery('#bound_table').DataTable({
    dom: 'lBfrtip',
+    "ajax": $.fn.dataTable.pipeline( {
+        "url": "rest.php?db=<?php echo $_GET['db'] . "&op=bound" ?>",
+        "pages": 5 // number of pages to cache
+    }),
+    processing: true,
+    serverSide: true,
    ...

Server-side processing

This function was used to make the sql queries in the subsequent functions (added in /admin/NoidLib/Custom/MysqlArkDB.php):

/**
 * Query
 * @param $query
 * @return false|string
 */
public function query($query) {
    if (!($this->handle instanceof mysqli)) {
        return FALSE;
    }
    $query = str_replace('<table-name>', $this->db_name, $query);
    if ($res = $this->handle->query($query)) {
        return $res->fetch_all(MYSQLI_ASSOC);
    }
    return FALSE;
}

Bounded Ark Table Processing

In /admin/rest.php:

/**
 * Return bound objects in database
 * @return false|string
 */
function selectBound() {
  GlobalsArk::$db_type = 'ark_mysql';
  if (!Database::exist($_GET['db'])) {
    die(json_encode('Database not found'));
  }

  $noid = Database::dbopen($_GET["db"], getcwd() . "/db/", DatabaseInterface::DB_WRITE);
  $firstpart = Database::$engine->get(Globals::_RR . "/firstpart");

  $columnIdx = $_GET['order'][0]['column'];
  $sortCol = $_GET['columns'][$columnIdx];
  $sortDir = $_GET['order'][0]['dir'] === 'asc' ? 'ASC' : 'DESC';
  $offset = $_GET['start'] ?? 0;
  $limit = $_GET['length'] ?? 50;
  $search = $_GET['search']['value'];

  if ($sortCol['data'] === 'redirect') {
    // Works for sorting on 'Number of Redirects' but does not work with searching
    $sql = "SELECT arks.* 
      FROM `<table-name>`
      AS arks 
      JOIN ( 
        SELECT bound.id, 
        COALESCE(redirected._value, 0) 
        AS _value 
        FROM ( 
          SELECT DISTINCT REGEXP_SUBSTR(_key, '^([^\\\\s]+)') AS id 
          FROM <table-name> 
          WHERE _key LIKE '$firstpart%' AND _key NOT REGEXP '(\\\\s:\\/c|\\\\sREDIRECT|\\\\sPID|\\\\sLOCAL_ID|\\\\sCOLLECTION)$'
        ) AS bound 
        LEFT JOIN ( 
          SELECT REGEXP_SUBSTR(_key, '^([^\\\\s]+)') AS id, _value 
          FROM <table-name> 
          WHERE _key LIKE '$firstpart%' AND _key REGEXP '\\\\sREDIRECT$'
        ) AS redirected ON bound.id = redirected.id 
        ORDER BY _value $sortDir 
        LIMIT $limit 
        OFFSET $offset 
      ) AS subquery 
      ON arks._key LIKE CONCAT(subquery.id, '%')
      AND arks._key NOT LIKE '%:\\/c'
      ORDER BY arks._key ASC;
    ";
    $sql_count = "SELECT COUNT(*) as num_filtered
      FROM (
        SELECT bound.id, 
        COALESCE(redirected._value, 0) 
        AS _value 
        FROM ( 
          SELECT DISTINCT REGEXP_SUBSTR(_key, '^([^\\\\s]+)') AS id 
          FROM <table-name> 
          WHERE _key LIKE '$firstpart%' AND _key NOT REGEXP '(\\\\s:\\/c|\\\\sREDIRECT|\\\\sPID|\\\\sLOCAL_ID|\\\\sCOLLECTION)$'
        ) AS bound 
        LEFT JOIN ( 
          SELECT REGEXP_SUBSTR(_key, '^([^\\\\s]+)') AS id, _value 
          FROM <table-name> 
          WHERE _key LIKE '$firstpart%' AND _key REGEXP '\\\\sREDIRECT$'
        ) AS redirected ON bound.id = redirected.id 
      ) AS filtered_ids;
    ";
  }
  else { // Sort on Ark IDs
    // Works for sorting on Ark IDs and searching (but searching is somewhat slow)
    $sql = "SELECT arks.* 
      FROM `<table-name>`
      AS arks 
      JOIN ( 
        SELECT * FROM (
          SELECT DISTINCT REGEXP_SUBSTR(_key, '^([^\\\\s]+)') AS id
          FROM `<table-name>`
          WHERE _key LIKE '$firstpart%' AND _key NOT REGEXP '(\\\\s:\\/c|\\\\sREDIRECT|\\\\sPID|\\\\sLOCAL_ID|\\\\sCOLLECTION)$' 
          INTERSECT
          SELECT DISTINCT REGEXP_SUBSTR(_key, '^([^\\\\s]+)') AS id
          FROM `<table-name>`
          WHERE _key LIKE '$firstpart%' AND _key NOT REGEXP '\\\\s:\\/c' AND (_key LIKE '%$search%' OR _value LIKE '%$search%')
        ) AS target
        ORDER BY id $sortDir 
        LIMIT $limit
        OFFSET $offset
      ) AS subquery 
      ON arks._key LIKE CONCAT(subquery.id, '%') 
      AND arks._key NOT LIKE '%:\\/c' 
      ORDER BY arks._key $sortDir;
    ";
    $sql_count = "SELECT COUNT(*) as num_filtered
      FROM (
        SELECT DISTINCT REGEXP_SUBSTR(_key, '^([^\\\\s]+)') AS id
        FROM `<table-name>`
        WHERE _key LIKE '$firstpart%' AND _key NOT REGEXP '(\\\\s:\\/c|\\\\sREDIRECT|\\\\sPID|\\\\sLOCAL_ID|\\\\sCOLLECTION)$' 
        INTERSECT
        SELECT DISTINCT REGEXP_SUBSTR(_key, '^([^\\\\s]+)') AS id
        FROM `<table-name>`
        WHERE _key LIKE '$firstpart%' AND _key NOT REGEXP '\\\\s:\\/c' AND (_key LIKE '%$search%' OR _value LIKE '%$search%')
      ) AS filtered_ids;
    ";
  }

  $rows = Database::$engine->query($sql);
  $num_filtered = Database::$engine->query($sql_count)[0]['num_filtered'] ?? 0;
  Database::dbclose($noid);

  $currentID = null;
  $result = array();
  $r = [];

  foreach ($rows as $row) {
    $row = (array)$row;

    if (isset($row['_key'])) {
      $key_data = preg_split('/\s+/', $row['_key']);
      if (!isset($currentID) || ($currentID !== $key_data[0])) {
        $currentID = $key_data[0];
        if (is_array($r) && count($r) > 0) {
          array_push($result, $r);
        }

        $r = [
          'select' => ' ',
          'id' => $currentID,
          'PID' => ' ',
          'LOCAL_ID' => ' ',
          'redirect' => 0,
        ];
      }

      if ($key_data[1] == 'PID')
        $r['PID'] = (!empty($row['_value'])) ? $row['_value'] : ' ';
      if ($key_data[1] == "LOCAL_ID")
        $r['LOCAL_ID'] = (!empty($row['_value'])) ? $row['_value'] : ' ';
      if ($key_data[1] == "REDIRECT")
        $r['redirect'] = (!empty($row['_value'])) ? $row['_value'] : ' ';
      $r['metadata'] = (!empty($r['metadata']) ? $r['metadata'] . "|" : "") . $key_data[1] .':' .$row['_value'];

      // check if server have https://, if not, go with http://
      if (empty($_SERVER['HTTPS'])) {
        $protocol = strtolower(substr($_SERVER["SERVER_PROTOCOL"], 0, strpos($_SERVER["SERVER_PROTOCOL"], '/'))) . '://';
      }
      else {
        $protocol = "https://";
      }

      $arkURL = $protocol . $_SERVER['HTTP_HOST'];
      // establish Ark URL
      // old format
      //$ark_url = rtrim($arkURL,"/") . "/ark:/" . $currentID;
      // new format
      $ark_url = rtrim($arkURL,"/") . "/ark:" . $currentID;
      $r['ark_url'] = (array_key_exists("ark_url", $r) && is_array($r['ark_url']) && count($r['ark_url']) > 1) ? $r['ark_url'] : [$ark_url];

      // if there is qualifier bound to an Ark ID, establish the link the link
      if ($key_data[1] !== "URL" && filter_var($row['_value'], FILTER_VALIDATE_URL)) {
        array_push($r['ark_url'], strtolower($ark_url . "/" . $key_data[1]));
      }
    }
  }

  if (!empty($r)) {
    array_push($result, $r);
  }

  // Have to do sorting here since the sql query does not always allow us to sort on some columns
  // e.g. when sorting on number of redirects, can't sort on the _value column since it contains
  //      values other than the number of redirects such as the PID or quantifier values
  if ($sortCol['data'] === 'redirect') {
    $redirect = array_column($result, "redirect");
    array_multisort($redirect, $sortDir === 'ASC' ? SORT_ASC : SORT_DESC, $result);
  }
  else {
    $id = array_column($result, "id");
    array_multisort($id, $sortDir === 'ASC' ? SORT_ASC : SORT_DESC, $result);
  }

  return json_encode(array(
    "data" => $result,
    "draw" => isset ( $_GET['draw'] ) ? intval( $_GET['draw'] ) : 0,
    "recordsTotal" => countBoundedArks(),
    "recordsFiltered" => $num_filtered,
  ));
}

function countBoundedArks() {
  GlobalsArk::$db_type = 'ark_mysql';
  if (!Database::exist($_GET['db'])) {
      die(json_encode('Database not found'));
  }
  $noid = Database::dbopen($_GET["db"], getcwd() . "/db/", DatabaseInterface::DB_WRITE);
  $firstpart = Database::$engine->get(Globals::_RR . "/firstpart");
  $result = Database::$engine->query("SELECT COUNT(
    DISTINCT REGEXP_SUBSTR(_key, '^([^\\\\s]+)')) AS total 
    FROM `<table-name>` 
    WHERE _key LIKE '$firstpart%' AND _key NOT REGEXP '(\\\\s:\\/c|\\\\sREDIRECT|\\\\sPID|\\\\sLOCAL_ID|\\\\sCOLLECTION)$';
  ");
  Database::dbclose($noid);
  return $result[0]['total'] ?? 0;
}

An alternative SQL query for sorting by number of redirects (no support for searching)

$sql = "SELECT arks.* 
  FROM `<table-name>`
  AS arks 
    JOIN ( 
    SELECT bound.id, 
    COALESCE(redirected._value, 0) 
    AS _value 
    FROM ( 
      SELECT DISTINCT REGEXP_SUBSTR(_key, '^([^\\\\s]+)') AS id 
      FROM <table-name> 
      WHERE _key LIKE '$firstpart%' AND _key NOT REGEXP '(\\\\s:\\/c|\\\\sREDIRECT|\\\\sPID|\\\\sLOCAL_ID|\\\\sCOLLECTION)$'
    ) AS bound 
    LEFT JOIN ( 
      SELECT REGEXP_SUBSTR(_key, '^([^\\\\s]+)') AS id, _value 
      FROM <table-name> 
      WHERE _key LIKE '$firstpart%' AND _key REGEXP '\\\\sREDIRECT$'
    ) AS redirected ON bound.id = redirected.id 
    ORDER BY _value $sortDir 
    LIMIT $limit 
    OFFSET $offset 
  ) AS subquery 
  ON arks._key LIKE CONCAT(subquery.id, '%')
  AND arks._key NOT LIKE '%:\\/c'
  ORDER BY arks._key ASC;
  ";

Minted Arks Table Processing

In /admin/rest.php:

function getMinted() {
  GlobalsArk::$db_type = 'ark_mysql';
  if (!Database::exist($_GET['db'])) {
    die(json_encode('Database not found'));
  }

  $noid = Database::dbopen($_GET["db"], getcwd() . "/db/", DatabaseInterface::DB_WRITE);
  $firstpart = Database::$engine->get(Globals::_RR . "/firstpart");

  if (isset($_GET['order'][0]['dir'])) {
    $sortDir = $_GET['order'][0]['dir'] === 'asc' ? 'ASC' : 'DESC';
  } else {
    $sortDir = 'ASC';
  }
  $offset = $_GET['start'] ?? 0;
  $limit = $_GET['length'] ?? 50;

  $sql = "SELECT REGEXP_SUBSTR(_key, '^([^\\\\s]+)') AS id, _value
    FROM `<table-name>`
    WHERE _key LIKE '$firstpart%' AND _key REGEXP '\\\\s:\/c$' 
    ORDER BY _key $sortDir
    LIMIT $limit
    OFFSET $offset;
  ";

  $result = Database::$engine->query($sql);
  Database::dbclose($noid);

  $json = array();
  foreach ($result as $row) {
    $urow = array();
    $urow['select'] = ' ';
    $urow['_key'] = $row['id'];

    $metadata = explode('|', $row['_value']);
    $urow['_value'] = date("F j, Y", $metadata[2]);
    array_push($json, (object)$urow);
  }

  $totalArks = countTotalArks();
  return json_encode(array(
    "data" => $json,
    "draw" => isset ( $_GET['draw'] ) ? intval( $_GET['draw'] ) : 0,
    "recordsTotal" => $totalArks,
    "recordsFiltered" => $totalArks,
  ));
}

function countTotalArks() {
  GlobalsArk::$db_type = 'ark_mysql';
  if (!Database::exist($_GET['db'])) {
      die(json_encode('Database not found'));
  }
  $noid = Database::dbopen($_GET["db"], getcwd() . "/db/", DatabaseInterface::DB_WRITE);
  $firstpart = Database::$engine->get(Globals::_RR . "/firstpart");
  $result = Database::$engine->query("SELECT COUNT(
    DISTINCT REGEXP_SUBSTR(_key, '^([^\\\\s]+)')) AS total 
    FROM `<table-name>` 
    WHERE _key LIKE '$firstpart%' and _key REGEXP '\\\\s:\\/c$';
  ");
  Database::dbclose($noid);
  return $result[0]['total'] ?? 0;
}