digitalutsc / islandora_lite_docs

Contains a Wiki with documentation for the UTSC Library's Islandora Lite System
GNU General Public License v3.0
2 stars 0 forks source link

Create reports for repository #85

Open kylehuynh205 opened 1 year ago

kylehuynh205 commented 1 year ago

This ticket is meant for starter site, build https://github.com/Islandora-Devops/isle-dc with make starter_dev, requires ingest content with islandora_workbench.

TODO:

Use Views (potentially its aggregation feature, may need other module to assist) to generate the below reports for objects in the repository:

Scalability:

Hint:

Started view (started by Nat):

uuid: 071cbac4-3daa-4092-b240-4d8110ee3277
langcode: en
status: true
dependencies:
  config:
    - field.storage.media.field_file_size
    - field.storage.media.field_mime_type
  module:
    - media
    - node
    - user
id: reports
label: Reports
module: views
description: ''
tag: ''
base_table: node_field_data
base_field: nid
display:
  default:
    id: default
    display_title: Default
    display_plugin: default
    position: 0
    display_options:
      title: Reports
      fields:
        nid:
          id: nid
          table: node_field_data
          field: nid
          relationship: none
          group_type: group
          admin_label: ''
          entity_type: node
          entity_field: nid
          plugin_id: field
          label: ID
          exclude: false
          alter:
            alter_text: false
            text: ''
            make_link: false
            path: ''
            absolute: false
            external: false
            replace_spaces: false
            path_case: none
            trim_whitespace: false
            alt: ''
            rel: ''
            link_class: ''
            prefix: ''
            suffix: ''
            target: ''
            nl2br: false
            max_length: 0
            word_boundary: true
            ellipsis: true
            more_link: false
            more_link_text: ''
            more_link_path: ''
            strip_tags: false
            trim: false
            preserve_tags: ''
            html: false
          element_type: ''
          element_class: ''
          element_label_type: ''
          element_label_class: ''
          element_label_colon: true
          element_wrapper_type: ''
          element_wrapper_class: ''
          element_default_classes: true
          empty: ''
          hide_empty: false
          empty_zero: false
          hide_alter_empty: true
          click_sort_column: value
          type: number_integer
          settings:
            thousand_separator: ''
            prefix_suffix: true
          group_column: value
          group_columns: {  }
          group_rows: true
          delta_limit: 0
          delta_offset: 0
          delta_reversed: false
          delta_first_last: false
          multi_type: separator
          separator: ', '
          field_api_classes: false
        title:
          id: title
          table: node_field_data
          field: title
          relationship: none
          group_type: group
          admin_label: ''
          entity_type: node
          entity_field: title
          plugin_id: field
          label: ''
          exclude: false
          alter:
            alter_text: false
            make_link: false
            absolute: false
            word_boundary: false
            ellipsis: false
            strip_tags: false
            trim: false
            html: false
          element_type: ''
          element_class: ''
          element_label_type: ''
          element_label_class: ''
          element_label_colon: true
          element_wrapper_type: ''
          element_wrapper_class: ''
          element_default_classes: true
          empty: ''
          hide_empty: false
          empty_zero: false
          hide_alter_empty: true
          click_sort_column: value
          type: string
          settings:
            link_to_entity: true
          group_column: value
          group_columns: {  }
          group_rows: true
          delta_limit: 0
          delta_offset: 0
          delta_reversed: false
          delta_first_last: false
          multi_type: separator
          separator: ', '
          field_api_classes: false
        name:
          id: name
          table: media_field_data
          field: name
          relationship: reverse__media__field_media_of
          group_type: group
          admin_label: ''
          entity_type: media
          entity_field: name
          plugin_id: field
          label: 'media name'
          exclude: false
          alter:
            alter_text: false
            text: ''
            make_link: false
            path: ''
            absolute: false
            external: false
            replace_spaces: false
            path_case: none
            trim_whitespace: false
            alt: ''
            rel: ''
            link_class: ''
            prefix: ''
            suffix: ''
            target: ''
            nl2br: false
            max_length: 0
            word_boundary: true
            ellipsis: true
            more_link: false
            more_link_text: ''
            more_link_path: ''
            strip_tags: false
            trim: false
            preserve_tags: ''
            html: false
          element_type: ''
          element_class: ''
          element_label_type: ''
          element_label_class: ''
          element_label_colon: false
          element_wrapper_type: ''
          element_wrapper_class: ''
          element_default_classes: true
          empty: ''
          hide_empty: false
          empty_zero: false
          hide_alter_empty: true
          click_sort_column: value
          type: string
          settings:
            link_to_entity: false
          group_column: value
          group_columns: {  }
          group_rows: true
          delta_limit: 0
          delta_offset: 0
          delta_reversed: false
          delta_first_last: false
          multi_type: separator
          separator: ', '
          field_api_classes: false
        field_file_size_value:
          id: field_file_size_value
          table: media__field_file_size
          field: field_file_size_value
          relationship: reverse__media__field_media_of
          group_type: sum
          admin_label: ''
          plugin_id: field
          label: 'File size (field_file_size)'
          exclude: false
          alter:
            alter_text: true
            text: '{{ field_file_size_value__value }} '
            make_link: false
            path: ''
            absolute: false
            external: false
            replace_spaces: false
            path_case: none
            trim_whitespace: false
            alt: ''
            rel: ''
            link_class: ''
            prefix: ''
            suffix: ''
            target: ''
            nl2br: false
            max_length: 0
            word_boundary: true
            ellipsis: true
            more_link: false
            more_link_text: ''
            more_link_path: ''
            strip_tags: false
            trim: false
            preserve_tags: ''
            html: false
          element_type: ''
          element_class: ''
          element_label_type: ''
          element_label_class: ''
          element_label_colon: true
          element_wrapper_type: ''
          element_wrapper_class: ''
          element_default_classes: true
          empty: ''
          hide_empty: false
          empty_zero: false
          hide_alter_empty: true
          click_sort_column: value
          type: number_integer
          settings:
            thousand_separator: ''
            prefix_suffix: false
          group_column: null
          group_columns: null
          group_rows: true
          delta_limit: 0
          delta_offset: 0
          delta_reversed: false
          delta_first_last: false
          multi_type: separator
          separator: ', '
          field_api_classes: false
          set_precision: false
          precision: 0
          decimal: .
          format_plural: 0
          format_plural_string: !!binary MQNAY291bnQ=
          prefix: ''
          suffix: ''
        field_mime_type:
          id: field_mime_type
          table: media__field_mime_type
          field: field_mime_type
          relationship: reverse__media__field_media_of
          group_type: group
          admin_label: ''
          plugin_id: field
          label: 'MIME type'
          exclude: false
          alter:
            alter_text: false
            text: ''
            make_link: false
            path: ''
            absolute: false
            external: false
            replace_spaces: false
            path_case: none
            trim_whitespace: false
            alt: ''
            rel: ''
            link_class: ''
            prefix: ''
            suffix: ''
            target: ''
            nl2br: false
            max_length: 0
            word_boundary: true
            ellipsis: true
            more_link: false
            more_link_text: ''
            more_link_path: ''
            strip_tags: false
            trim: false
            preserve_tags: ''
            html: false
          element_type: ''
          element_class: ''
          element_label_type: ''
          element_label_class: ''
          element_label_colon: true
          element_wrapper_type: ''
          element_wrapper_class: ''
          element_default_classes: true
          empty: ''
          hide_empty: false
          empty_zero: false
          hide_alter_empty: true
          click_sort_column: value
          type: string
          settings:
            link_to_entity: false
          group_column: value
          group_columns: {  }
          group_rows: true
          delta_limit: 0
          delta_offset: 0
          delta_reversed: false
          delta_first_last: false
          multi_type: separator
          separator: ', '
          field_api_classes: false
      pager:
        type: mini
        options:
          offset: 0
          items_per_page: 0
          total_pages: null
          id: 0
          tags:
            next: ››
            previous: ‹‹
          expose:
            items_per_page: false
            items_per_page_label: 'Items per page'
            items_per_page_options: '5, 10, 25, 50'
            items_per_page_options_all: false
            items_per_page_options_all_label: '- All -'
            offset: false
            offset_label: Offset
      exposed_form:
        type: basic
        options:
          submit_button: Apply
          reset_button: false
          reset_button_label: Reset
          exposed_sorts_label: 'Sort by'
          expose_sort_order: true
          sort_asc_label: Asc
          sort_desc_label: Desc
      access:
        type: perm
        options:
          perm: 'access content'
      cache:
        type: tag
        options: {  }
      empty: {  }
      sorts:
        nid:
          id: nid
          table: node_field_data
          field: nid
          relationship: none
          group_type: group
          admin_label: ''
          entity_type: node
          entity_field: nid
          plugin_id: standard
          order: DESC
          expose:
            label: ''
            field_identifier: ''
          exposed: false
      arguments: {  }
      filters: {  }
      style:
        type: table
        options:
          grouping: {  }
          row_class: ''
          default_row_class: true
          columns:
            nid: nid
            title: title
            name: name
            field_file_size_value: field_file_size_value
            field_mime_type: field_mime_type
          default: '-1'
          info:
            nid:
              sortable: false
              default_sort_order: asc
              align: ''
              separator: ''
              empty_column: false
              responsive: ''
            title:
              sortable: false
              default_sort_order: asc
              align: ''
              separator: ''
              empty_column: false
              responsive: ''
            name:
              sortable: false
              default_sort_order: asc
              align: ''
              separator: ''
              empty_column: false
              responsive: ''
            field_file_size_value:
              sortable: false
              default_sort_order: asc
              align: ''
              separator: ''
              empty_column: false
              responsive: ''
            field_mime_type:
              sortable: false
              default_sort_order: asc
              align: ''
              separator: ''
              empty_column: false
              responsive: ''
          override: true
          sticky: false
          summary: ''
          empty_table: false
          caption: ''
          description: ''
      row:
        type: fields
        options:
          default_field_elements: true
          inline: {  }
          separator: ''
          hide_empty: false
      query:
        type: views_query
        options:
          query_comment: ''
          disable_sql_rewrite: false
          distinct: false
          replica: false
          query_tags: {  }
      relationships:
        reverse__media__field_media_of:
          id: reverse__media__field_media_of
          table: node_field_data
          field: reverse__media__field_media_of
          relationship: none
          group_type: group
          admin_label: field_media_of
          entity_type: node
          plugin_id: entity_reverse
          required: false
      group_by: false
      header: {  }
      footer: {  }
      display_extenders: {  }
    cache_metadata:
      max-age: -1
      contexts:
        - 'languages:language_content'
        - 'languages:language_interface'
        - url.query_args
        - 'user.node_grants:view'
        - user.permissions
      tags:
        - 'config:field.storage.media.field_file_size'
        - 'config:field.storage.media.field_mime_type'
  page_1:
    id: page_1
    display_title: Page
    display_plugin: page
    position: 1
    display_options:
      display_extenders:
        matomo:
          enabled: false
          keyword_gets: ''
          keyword_behavior: first
          keyword_concat_separator: ' '
          category_behavior: none
          category_gets: ''
          category_concat_separator: ' '
          category_fallback: ''
          category_facets: {  }
          category_facets_concat_separator: ', '
      path: reports
    cache_metadata:
      max-age: -1
      contexts:
        - 'languages:language_content'
        - 'languages:language_interface'
        - url.query_args
        - 'user.node_grants:view'
        - user.permissions
      tags:
        - 'config:field.storage.media.field_file_size'
        - 'config:field.storage.media.field_mime_type'
kylehuynh205 commented 1 year ago

Amy tried the module https://github.com/mjordan/islandora_repository_reports and found It covers the usages for:

But not:

After the demo, we see that the module provide the report in Charts and can be exported to CSV. However The  question is do we need to include the count for the sub-collection because we're not sure if how much this use case is needed, also there is ongoing ticket related to this https://github.com/mjordan/islandora_repository_reports/issues/24

Amy is going to fork the module and study the code a bit ONLY to see if we can extend this module for now, but there is no development involved yet.

amym-li commented 1 year ago

I looked though the module and found 2 straightforward ways of extending the module:

Approach 1: Adding a new report type to the main module can be done by creating a new datasource file under /src/Plugin/DataSource and then adding it to the services.yml file

Approach 2: Creating a separate module for the new report type like the modules under /modules which will need to be enabled separately on the site.


Using approach 1, I created a new report type for displaying the number of media, grouped by collection (missing use case mentioned previously) [forked repo: amym-li/islandora_repository_reports]. However, it has the same issue where it does not include media counts for subcollections or compound objects.

Example: This graph displays the media counts for the top-level collections. There are ~323 media files in the repository total, but the graph only counts 10 total since it only counts the pieces of media that are immediate children of the collection.

image

amym-li commented 1 year ago

I pushed the new media-count-by-collection report type to a branch at digitalutsc/islandora_repository_reports. This report type also has the issue where it only counts the "first layer" of media (i.e. ignores media in subcollections and compound objects).


Some suggestions to resolve this issue:

Suggestion 1:

Recursively create a list of descendants and then sum up the node/media counts for each descendant.

In /src/Utils.php,

+  /**
+   * Gets the node ids of all descendants of a given node.
+   *
+   * @param string|int|null $parent_id
+   *   The node to check.
+   *
+   * @param array $discovered
+   *   An array containing discovered descendants.
+   * 
+   * @return array
+   *   An array containing node ids of $parent_id's descendants.
+   */
+  public function getDescendants($parent_id, $discovered=[]) {
+    if (is_null($parent_id)) {
+      return [];
+    }
+    
+    if (!in_array($parent_id, $discovered)) {
+      $discovered[] = $parent_id;
+    }
+    
+    // Get the parent node's immediate children
+    $children_query = \Drupal::entityQuery('node')->condition('field_member_of', $parent_id);
+    $children_result = $children_query->execute();
+    $children = array_values($children_result);
+
+    // Remove already discovered children
+    $children = array_diff($children, $discovered);
+
+    // Mark new children as discovered
+    $discovered = array_merge($children, $discovered);
+
+    $descendants = $children;
+    foreach ($children as $child) {
+      $grandchildren = $this->getDescendants($child, $discovered);
+      $descendants = array_merge($descendants, $grandchildren);
+    }
+
+    return array_unique($descendants);
+  }

Example usage in /src/Plugin/DataSource/Collection.php,

public function getData() {
    $utilities = \Drupal::service('islandora_repository_reports.utilities');
    if (count($utilities->getSelectedContentTypes()) == 0) {
      return [];
    }

    $entity_type_manager = \Drupal::service('entity_type.manager');
    $node_storage = $entity_type_manager->getStorage('node');
    $result = $node_storage->getAggregateQuery()
      ->groupBy('field_member_of')
      ->aggregate('field_member_of', 'COUNT')
      ->condition('type', $utilities->getSelectedContentTypes(), 'IN')
      ->execute();
    $collection_counts = [];
    foreach ($result as $collection) {
      if (!is_null($collection['field_member_of_target_id'])) {
        if ($collection_node = \Drupal::entityTypeManager()->getStorage('node')->load($collection['field_member_of_target_id'])) {
          if ($utilities->nodeIsCollection($collection_node)) {
            $collection_counts[$collection_node->getTitle()] = $collection['field_member_of_count'];

+           // Get all child nodes belonging to this collection
+           $children = $utilities->getDescendants($collection['field_member_of_target_id']);
+
+           // Sum up the member_of counts for all children
+           foreach ($children as $child_id) {
+             $child_result = array_search($child_id, array_column($result, 'field_member_of_target_id'));
+             $collection_counts[$collection_node->getTitle()] += $result[$child_result]['field_member_of_count'];
+           }

          }
        }
      }
    }

    $this->csvData = [[t('Collection'), 'Count']];
    foreach ($collection_counts as $collection => $count) {
      $this->csvData[] = [$collection, $count];
    }

    return $collection_counts;
  }

Example usage in /src/Plugin/DataSource/MediaByCollection.php,

public function getData() {
    $utilities = \Drupal::service('islandora_repository_reports.utilities');

    $entity_type_manager = \Drupal::service('entity_type.manager');
    $media_storage = $entity_type_manager->getStorage('media');
    $result = $media_storage->getAggregateQuery()
      ->groupBy('field_media_of')
      ->aggregate('field_media_of', 'COUNT')
      ->execute();
    $media_counts = [];
    foreach ($result as $collection) {
      if (!is_null($collection['field_media_of_target_id'])) {
        if ($collection_node = \Drupal::entityTypeManager()->getStorage('node')->load($collection['field_media_of_target_id'])) {
          if ($utilities->nodeIsCollection($collection_node)) {
            $media_counts[$collection_node->getTitle()] = $collection['field_media_of_count'];

+           // Get all child nodes belonging to this collection
+           $children = $utilities->getDescendants($collection['field_media_of_target_id']);
+
+           // Sum up the media_of counts for all children
+           foreach ($children as $child_id) {
+             $child_result = array_search($child_id, array_column($result, 'field_media_of_target_id'));
+             $media_counts[$collection_node->getTitle()] += $result[$child_result]['field_media_of_count'];

            }
          }
        }
      }
    }

    $this->csvData = [[t('Collection'), 'Count']];
    foreach ($media_counts as $collection => $count) {
      $this->csvData[] = [$collection, $count];
    }

    return $media_counts;
  }

Suggestion 2:

Use a solr query to get a list of all descendant nodes.

Solr has a itm_field_descendant_of field that stores a list of ids belonging to the node's ancestors. (See https://github.com/mjordan/islandora_repository_reports/issues/24#issuecomment-631474177)

Example query: /select?q=itm_field_descendant_of:99 returns all nodes that have node 99 as an ancestor

Then loop through returned nodes and total the node/media counts.

kstapelfeldt commented 1 year ago

Slight modification: