webiny / webiny-js

Open-source serverless enterprise CMS. Includes a headless CMS, page builder, form builder, and file manager. Easy to customize and expand. Deploys to AWS.
https://www.webiny.com
Other
7.24k stars 590 forks source link

Headless CMS queries are significantly slower with AACL #4104

Open tonsho opened 2 months ago

tonsho commented 2 months ago

Version

5.39.3

Operating System

n/a

Browser

n/a

What are the steps to reproduce this bug?

  1. Enable AACL
  2. Prepare content that references other models
  3. Query
    query listParentContents {
    listParentContents(limit: 100) {
    data {
      name
      children {
        name
      }
    }
    }
    }

What is the expected behavior?

Queries should return as quickly as before the introduction of AACL.

What do you see instead?

Below are the results of preparing equivalent content for each version and measuring query response times.

v5.39.3 (DynamoDB, Lambda: 512 MB)

Authorization Type Query Time
Admin User 2.46 sec
API Key (AACL) 9.06 sec

v5.38.1 (DynamoDB + Elasticsearch (t3.medium.search x 2), Lambda: 1024 MB)

Authorization Type Query Time
Admin User 1.92 sec
API Key (AACL) 5.61 sec

v5.36.0 (DynamoDB + Elasticsearch (m5.large.search x 2), Lambda: 512 MB)

Authorization Type Query Time
Admin User 0.88 sec
API Key (before AACL) 0.86 sec

画像1

Additional information

The following is a script for preparing content for the above measurements.

# ------------------------
# Environment Settings
# ------------------------
graphql_endpoint = 'https://***.cloudfront.net' # TODO: Set GraphQL API endpoint
locale = 'en-US' # TODO: Set locale
authorization = 'eyJraWQiOiJoZWJPQ1JFdGIyb...' # TODO: Set Admin User's token

# ------------------------
# Log Settings
# ------------------------
import sys
import logging
logger = logging.getLogger()
handler = logging.StreamHandler(sys.stdout)
logger.addHandler(handler)

log_level = logging.INFO
# log_level = logging.DEBUG
logger.setLevel(log_level)
handler.setLevel(log_level)

# ------------------------
# Setup Models and Data
# ------------------------
import json
import logging
import requests

logger = logging.getLogger()

class Webiny():
    def __init__(self, graphql_endpoint, locale, authrorization):
        self.graphql_endpoint = graphql_endpoint
        self.locale = locale
        self.authrorization = authrorization

    def call_cms_api(self, query, variables):
        ret = self._call_api(f'cms/manage/{self.locale}', query, variables)
        return ret

    def call_main_api(self, query, variables):
        ret = self._call_api('graphql', query, variables)
        return ret

    def _call_api(self, path, query, variables):
        ret = requests.post(
            url=f'{self.graphql_endpoint}/{path}',
            headers={
                'Content-Type': 'application/json',
                'Authorization': self.authrorization,
                'X-Tenant': 'root',
            },
            json={
                'query': query,
                'variables': variables,
            })
        ret_json = ret.json()
        logger.debug('mutation called: %s', json.dumps(ret_json))
        if 'errors' in ret_json:
            raise Exception(ret_json['errors'])

        return self._get_data_value(ret_json['data'])

    def _get_data_value(self, data):
        return data['data'] if 'data' in data else self._get_data_value(list(data.values())[0])

    def create_content_model_group(self, data):
        query = '''
            mutation createContentModelGroup($data: CmsContentModelGroupInput!) {
                createContentModelGroup(data: $data) {
                    data {
                        id
                    }
                    error {
                        message
                        code
                        data
                    }
                }
            }
        '''
        variables = {'data': data}
        ret = self.call_cms_api(query, variables)
        return ret

    def create_content_model(self, data):
        query = '''
            mutation createContentModel($data: CmsContentModelCreateInput!) {
                createContentModel(data: $data) {
                    data {
                        modelId 
                    }
                    error {
                        message
                        code
                        data
                    }
                }
            }
        '''
        variables = {'data': data}
        ret = self.call_cms_api(query, variables)
        return ret

    def register_content(self, model, data):
        create_base = '''
            mutation MyMutation($data: {model}Input!) {{
                create{model}(data: $data) {{
                    data {{
                        id
                    }}
                    error {{
                        message
                        code
                        data
                    }}
                }}
            }}
        '''

        publish_base = '''
            mutation MyMutation($revision: ID!) {{
                publish{model}(revision: $revision) {{
                    data {{
                        id
                    }}
                    error {{
                        message
                        code
                        data
                    }}
                }}
            }}
        '''

        ret = self.call_cms_api(create_base.format(model=model), {'data': data})
        revision_id = ret['id']
        ret = self.call_cms_api(publish_base.format(model=model), {'revision': revision_id})
        return ret

    def list_contents(self, plural_model_name):
        query_base = '''
            query MyQuery {{
                list{plural_model_name} {{
                    data {{
                        id
                        entryId
                    }}
                    error {{
                        message
                        code
                        data
                    }}
                }}
            }}
        '''
        ret = self.call_cms_api(query_base.format(plural_model_name=plural_model_name), None)
        return ret

    def remove_content(self, model, revision_id):
        delete_base = '''
            mutation MyMutation($revision: ID!) {{
                delete{model}(revision: $revision) {{
                    data
                    error {{
                        message
                        code
                        data
                    }}
                }}
            }}
        '''

        ret = self.call_cms_api(delete_base.format(model=model), {'revision': revision_id})
        return ret

    def create_api_key(self, data):
        query = '''
            mutation createApiKey($data: SecurityApiKeyInput!) {
                security{
                    createApiKey(data: $data) {
                        data {
                            token
                        }
                        error {
                            message
                            code
                            data
                        }
                    }
                }
            }
        '''
        variables = {'data': data}
        ret = self.call_main_api(query, variables)
        return ret

# ------------------------
# Content Models and API Key
# ------------------------
webiny = Webiny(graphql_endpoint, locale, authorization)
ret = webiny.create_content_model_group({'name': 'TestContentModelGroup', 'icon': 'far/star'})
conent_model_group_id = ret['id']
print('Content Model Group:', conent_model_group_id)

child_content_model = {
  "name": "ChildContent",
  "singularApiName": "ChildContent",
  "pluralApiName": "ChildContents",
  "group": conent_model_group_id,
  "layout": [
    [
      "tame087b"
    ]
  ],
  "fields": [
    {
      "id": "tame087b",
      "type": "text",
      "label": "name",
      "fieldId": "name",
      "renderer": {
          "name": "text-input"
      }
    }
  ],
  "tags": [
    "type:model"
  ],
  "titleFieldId": "name"
}

ret = webiny.create_content_model(child_content_model)
child_content_model_id = ret['modelId']
print(' - ChildContentModel:', child_content_model_id)

parent_content_model = {
  "name": "ParentContent",
  "singularApiName": "ParentContent",
  "pluralApiName": "ParentContents",
  "group": conent_model_group_id,
  "layout": [
    [
      "tame087b"
    ],
    [
      "o43tr6l3"
    ]
  ],
  "fields": [
    {
      "id": "tame087b",
      "type": "text",
      "label": "name",
      "fieldId": "name",
      "renderer": {
          "name": "text-input"
      }
    },
    {
      "id": "o43tr6l3",
      "fieldId": "children",
      "type": "ref",
      "label": "children",
      "multipleValues": True,
      "renderer": {
        "name": "ref-advanced-multiple"
      },
      "settings": {
        "models": [
          {
            "modelId": child_content_model_id
          }
        ]
      }
    }
  ],
  "tags": [
    "type:model"
  ],
  "titleFieldId": "name"
}

ret = webiny.create_content_model(parent_content_model)
parent_content_model_id = ret['modelId']
print(' - ParentContentModel:', parent_content_model_id)

ret = webiny.create_api_key({
    "name": "Read API Key",
    "description": "Read API Key",
    "permissions": [
        {
            "name": "content.i18n"
        },
        {
            "name": "cms.endpoint.read"
        },
        {
            "name": "cms.contentModelGroup",
            "groups": {
                locale: [conent_model_group_id]
            },
            "rwd": "r"
        },
        {
            "name": "cms.contentModel",
            "rwd": "r"
        },
        {
            "name": "cms.contentEntry",
            "rwd": "r",
            "pw": "null"
        }
    ]
})
api_key = ret['token']
print('API Key:', api_key)

# ------------------------
# Content Models and API Key
# ------------------------
webiny = Webiny(graphql_endpoint, locale, authorization)
print('Contents')
ret = webiny.register_content('ChildContent', {
    'name': 'child',
})
child_content_id = ret['id']
print(' - ChildContent:', child_content_id)

parent_content_ids = [
    webiny.register_content('ParentContent', {
        'name': f'parent {i}',
        'children': [{
            'modelId': child_content_model_id,
            'id': child_content_id,
        }],
    })['id'] for i in range(1, 101)
]
print(' - ParentContent:', parent_content_ids)

# ------------------------
# Contents
# ------------------------
webiny = Webiny(graphql_endpoint, locale, authorization)
print('Contents')
ret = webiny.register_content('ChildContent', {
    'name': 'child',
})
child_content_id = ret['id']
print(' - ChildContent:', child_content_id)

parent_content_ids = [
    webiny.register_content('ParentContent', {
        'name': f'parent {i}',
        'children': [{
            'modelId': child_content_model_id,
            'id': child_content_id,
        }],
    })['id'] for i in range(1, 101)
]
print(' - ParentContent:', parent_content_ids)

# ------------------------
# Performance Check
# ------------------------
import datetime
import logging
import requests
import statistics

logger = logging.getLogger()

def list_parent_contents(authorization):
    query = '''
        query listParentContents {
          listParentContents(limit: 100) {
            data {
              name
              children {
                name
              }
            }
          }
        }
    '''
    start = datetime.datetime.now()
    ret = requests.post(
        url=f'{graphql_endpoint}/cms/read/{locale}',
        headers={
            'Content-Type': 'application/json',
            'Authorization': authorization,
            'X-Tenant': 'root',
        },
        json={
            'query': query,
        })
    end = datetime.datetime.now()
    logger.debug(ret.content.decode())
    logger.info('### elapsed %s', end - start)
    return end - start

def trim_mean(data):
    sorted_data = sorted(data)
    return statistics.mean(sorted_data[1:-2])

print('--- Admin User ---')
durations = [list_parent_contents(authorization).total_seconds() for _ in range(5)]
auth_mean = trim_mean(durations)
print('trim mean:', auth_mean)

print('--- API Key ---')
durations = [list_parent_contents(api_key).total_seconds() for _ in range(5)]
ak_mean = trim_mean(durations)
print('trim mean:', ak_mean)

Possible solution

When a model references other models, the permissions are verified one by one in Lambda, retrieved by list, and in the case of AACL, this is where it becomes very time-consuming.