Add query-level caching to enhance performance

alixinne commented 8 years ago

Context

I have encountered performance issues when dealing with a project containing 35k strings in 13 translations. The project is using monolingual PO files.

Actual behaviour

The import process took around 25 minutes (2min per translation) to complete. Deleting the newly imported component was in the 70-75 minutes range.

Performance analysis

During the import process, the python process was using more than 95% CPU time, while the postgres process used the remaining 5%. Latency of disk I/O operations did not excess 10ms, so storage didn't seem to be the bottleneck. The same behavior was observed when deleting the component.

Running PostgreSQL with the pg_stat_statements extension enabled revealed that when deleting the project, the same 3 queries to fetch project, language and component properties were run multiple times per translation unit being deleted. See project_delete_request_stats.csv.txt

We can also guess that the 4th request to fetch translation info is called for every deleted unit, which should not be necessary.

Possible fix

Adding query-level caching through django-cache-machine improved performance by 50% when deleting a project (between 30 and 35 minutes). Although django-cache-machine provides automatic cache invalidation when updating models, more in-depth testing is required to ensure this doesn't break anything.

Adding django-cache-machine to Weblate can be done according to the docs:

diff --git a/requirements.txt b/requirements.txt
index a6c659e..38003cb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,3 +10,4 @@ django-crispy-forms>=1.4.0
 oauthlib>=0.6.3
 django_compressor>=1.5.0,!=2.0
 djangorestframework>=3.3
+django-cache-machine>=0.9
\ No newline at end of file
diff --git a/weblate/lang/models.py b/weblate/lang/models.py
index 5a163f4..ebb7f4e 100644
--- a/weblate/lang/models.py
+++ b/weblate/lang/models.py
@@ -39,6 +39,7 @@ from weblate.trans.site import get_site_url
 from weblate.appsettings import SIMPLIFY_LANGUAGES
 from weblate.logger import LOGGER

+from caching.base import CachingManager, CachingMixin

 def get_plural_type(code, pluralequation):
     '''
@@ -82,7 +83,7 @@ def get_english_lang():
         return 65535

-class LanguageManager(models.Manager):
+class LanguageManager(CachingManager):
     # pylint: disable=W0232

     _default_lang = None
@@ -367,7 +368,7 @@ def setup_lang(sender, **kwargs):

 @python_2_unicode_compatible
-class Language(models.Model, PercentMixin):
+class Language(models.Model, PercentMixin, CachingMixin):
     PLURAL_CHOICES = (
         (data.PLURAL_NONE, 'None'),
         (data.PLURAL_ONE_OTHER, 'One/other (classic plural)'),
diff --git a/weblate/trans/models/project.py b/weblate/trans/models/project.py
index c341210..d43d600 100644
--- a/weblate/trans/models/project.py
+++ b/weblate/trans/models/project.py
@@ -42,6 +42,8 @@ from weblate.trans.mixins import PercentMixin, URLMixin, PathMixin
 from weblate.trans.site import get_site_url
 from weblate.trans.data import data_dir

+from caching.base import CachingManager, CachingMixin
+

 def get_acl_cache_key(user):
     if user is None or user.id is None:
@@ -55,7 +57,7 @@ def get_acl_cache_key(user):
     return 'acl-project-{0}'.format(user_id)

-class ProjectManager(models.Manager):
+class ProjectManager(CachingManager):
     # pylint: disable=W0232

     def all_acl(self, user):
@@ -87,7 +89,7 @@ class ProjectManager(models.Manager):

 @python_2_unicode_compatible
-class Project(models.Model, PercentMixin, URLMixin, PathMixin):
+class Project(models.Model, PercentMixin, URLMixin, PathMixin, CachingMixin):
     name = models.CharField(
         verbose_name=ugettext_lazy('Project name'),
         max_length=100,
diff --git a/weblate/trans/models/subproject.py b/weblate/trans/models/subproject.py
index 1add7e5..dcff3d8 100644
--- a/weblate/trans/models/subproject.py
+++ b/weblate/trans/models/subproject.py
@@ -69,6 +69,7 @@ from weblate.accounts.models import (
 )
 from weblate.trans.models.changes import Change

+from caching.base import CachingManager, CachingMixin

 DEFAULT_COMMIT_MESSAGE = (
     'Translated using Weblate (%(language_name)s)\n\n'
@@ -88,7 +89,7 @@ MERGE_CHOICES = (
 )

-class SubProjectManager(models.Manager):
+class SubProjectManager(CachingManager):
     # pylint: disable=W0232

     def get_linked(self, val):
@@ -100,7 +101,7 @@ class SubProjectManager(models.Manager):

 @python_2_unicode_compatible
-class SubProject(models.Model, PercentMixin, URLMixin, PathMixin):
+class SubProject(models.Model, PercentMixin, URLMixin, PathMixin, CachingMixin):
     name = models.CharField(
         verbose_name=ugettext_lazy('Component name'),
         max_length=100,
diff --git a/weblate/trans/models/translation.py b/weblate/trans/models/translation.py
index 39b0c1a..83a45bf 100644
--- a/weblate/trans/models/translation.py
+++ b/weblate/trans/models/translation.py
@@ -51,8 +51,10 @@ from weblate.accounts.models import notify_new_string, get_author_name
 from weblate.trans.models.changes import Change
 from weblate.trans.checklists import TranslationChecklist

+from caching.base import CachingManager, CachingMixin

-class TranslationManager(models.Manager):
+
+class TranslationManager(CachingManager):
     # pylint: disable=W0232

     def check_sync(self, subproject, lang, code, path, force=False,
@@ -114,7 +116,7 @@ class TranslationManager(models.Manager):

 @python_2_unicode_compatible
-class Translation(models.Model, URLMixin, PercentMixin, LoggerMixin):
+class Translation(models.Model, URLMixin, PercentMixin, LoggerMixin, CachingMixin):
     subproject = models.ForeignKey('SubProject')
     language = models.ForeignKey(Language)
     revision = models.CharField(max_length=100, default='', blank=True)

Note that some pages that uses QuerySet's "values" which is not yet supported by django-cache-machine fail to load. A temporary fix to the cache-machine is as follows:

diff --git a/caching/base.py b/caching/base.py
index 9e0e324..9ffbc82 100644
--- a/caching/base.py
+++ b/caching/base.py
@@ -111,11 +111,14 @@ class CacheMachine(object):
         try:
             while True:
                 obj = next(iterator)
-                obj.from_cache = False
-                to_cache.append(obj)
+                if hasattr(obj, 'from_cache'):
+                    obj.from_cache = False
+                    to_cache.append(obj)
+                else:
+                    to_cache = None
                 yield obj
         except StopIteration:
-            if to_cache or config.CACHE_EMPTY_QUERYSETS:
+            if to_cache is not None and (to_cache or config.CACHE_EMPTY_QUERYSETS):
                 self.cache_objects(to_cache, query_key)
             raise

Server configuration

The testing server is a 4-CPU 2.6GHz, with 6GB RAM setup with:

CentOS 6-8.el6.centos.12.3.x86_64
nginx 1.10.1
uWSGI 2.0.13.1 in emperor mode, using Unix domain sockets with nginx
memcached 1.1.4, default config
PostgreSQL 9.5, using Unix domain sockets, settings based on pg_tune with some changes:

default_statistics_target = 50 # pgtune wizard 2016-06-20
maintenance_work_mem = 352MB # pgtune wizard 2016-06-20
constraint_exclusion = on # pgtune wizard 2016-06-20
checkpoint_completion_target = 0.9 # pgtune wizard 2016-06-20
effective_cache_size = 4GB # pgtune wizard 2016-06-20
work_mem = 36MB # pgtune wizard 2016-06-20
wal_buffers = 16MB # pgtune wizard 2016-06-20
wal_writer_delay = 5000ms
synchronous_commit = off
#checkpoint_segments = 16 # pgtune wizard 2016-06-20
max_wal_size = 512MB
shared_buffers = 1024MB # pgtune wizard 2016-06-20
max_connections = 80 # pgtune wizard 2016-06-20

Output of ./manage.py list_versions:

 * Weblate weblate-2.6-5-g88b9c27
 * Python 2.7.6
 * Django 1.9.7
 * six 1.10.0
 * python-social-auth 0.2.19
 * Translate Toolkit 2.0.0b2
 * Whoosh 2.7.4
 * Git 1.7.1
 * Pillow (PIL) 1.1.7
 * dateutil 2.5.3
 * lxml 3.6.0
 * django-crispy-forms 1.6.0
 * compressor 1.6
 * djangorestframework 3.3.3
 * pytz 2016.4
 * pyuca N/A
 * Database backends: django.db.backends.postgresql_psycopg2

nijel commented 8 years ago

How did you do the deletion? If using admin interface, 7c103164371e8d6e9898d04e8e671d0ef4aabdc6 might have helped in this case.

Anyway deleting indeed shouldn't be that slow, these queries seem to come from string representation of the translation object...

alixinne commented 8 years ago

I did the deletion through the admin interface, the measured times and requests only take into account actual deletion (when confirming after the preview stage).

My guess is that it has something to do with how Django handles model deletion. It looks like it's loading objects and their references before deleting them, which ends up loading for each unit, and associated change, their parent translation, sub-project and project.

It looks like it's more of a Django design issue not to rely on the database for cascade deletions to ensure compatibility among backends, which requires python-side processing of model relations.

alixinne commented 8 years ago

This seems to be the case: the Collector class responsible of collecting Django models for deletion can only delete objects relying on the database backend

[...] if there are no cascades, no parents and no signal listeners for the object class. [...]

See https://github.com/django/django/blob/c339a5a6f72690cd90d5a653dc108fbb60274a20/django/db/models/deletion.py#L120

nijel commented 8 years ago

There should be no listeners for Translation object (there are on above models, to handle filesystem cleanup, but that should not matter in this case).

alixinne commented 8 years ago

I think it has something to do with dependencies between models, I'll try to reproduce the issue on a simple Django app.

alixinne commented 8 years ago

There actually is a post_delete listener that is run for every deleted Unit: https://github.com/nijel/weblate/blob/master/weblate/trans/models/__init__.py#L157

The code in this listener seems to match the queries from the (without cache-machine) stats, which explains the slow deletion process. It would be interesting to see if the database-heavy part of this code could be converted to an "AFTER DELETE" trigger. This would require hand-crafting some SQL code, but should drastically improve the peformance of this post_delete listener.

alixinne commented 8 years ago

This would actually not remove the need for query-level caching, as import performance suffers from fetching the Source objects by their (checksum, subproject). As a source's (checksum, subproject) is not its primary key, cache-machine does not cache such requests, so this must be done manually.

diff --git a/weblate/trans/models/source.py b/weblate/trans/models/source.py
index b8a6737..e480e65 100644
--- a/weblate/trans/models/source.py
+++ b/weblate/trans/models/source.py
@@ -23,6 +23,8 @@ from django.utils.encoding import python_2_unicode_compatible
 from django.utils.translation import ugettext_lazy as _
 from weblate.trans.validators import validate_check_flags

+from caching.base import CachingManager, CachingMixin, cache, invalidator, DEFAULT_TIMEOUT
+
 PRIORITY_CHOICES = (
     (60, _('Very high')),
     (80, _('High')),
@@ -31,9 +33,39 @@ PRIORITY_CHOICES = (
     (140, _('Very low')),
 )

+class SourceManager(CachingManager):
+    def get_by_checksum(self, checksum, subproject, create=True):
+        created = False
+        key = "src:%s:%s" % (checksum, subproject.id)
+
+        # Try to get the source from cache
+        val = cache.get(key)
+        if val is None:
+            # Get or create the value
+            if create:
+                val, created = self.get_or_create(
+                    checksum=checksum,
+                    subproject_id=subproject.id
+                )
+            else:
+                val = self.get(
+                    checksum=checksum,
+                    subproject_id=subproject.id
+                )
+            # Add to the cache
+            cache.set(key, val, DEFAULT_TIMEOUT)
+            # Setup flush list for source key
+            invalidator.add_to_flush_list(
+                {val.flush_key(): [key]}
+            )
+        
+        if create:
+            return val, created
+        return val
+

 @python_2_unicode_compatible
-class Source(models.Model):
+class Source(models.Model, CachingMixin):
     checksum = models.CharField(max_length=40)
     subproject = models.ForeignKey('SubProject')
     timestamp = models.DateTimeField(auto_now_add=True)
@@ -47,6 +79,8 @@ class Source(models.Model):
         blank=True,
     )

+    objects = SourceManager()
+
     class Meta(object):
         permissions = (
             ('edit_priority', "Can edit priority"),
diff --git a/weblate/trans/models/unit.py b/weblate/trans/models/unit.py
index 2fe6c16..91c4746 100644
--- a/weblate/trans/models/unit.py
+++ b/weblate/trans/models/unit.py
@@ -68,7 +68,6 @@ def more_like_queue(pk, source, top, queue):
     result = more_like(pk, source, top)
     queue.put(result)

-
 class UnitManager(models.Manager):
     # pylint: disable=W0232

@@ -461,10 +460,8 @@ class Unit(models.Model, LoggerMixin):
             return

         # Ensure we track source string
-        source_info, source_created = Source.objects.get_or_create(
-            checksum=self.checksum,
-            subproject=self.translation.subproject
-        )
+        source_info, source_created = \
+            Source.objects.get_by_checksum(self.checksum, self.translation.subproject)
         contentsum_changed = self.contentsum != contentsum

         # Store updated values
@@ -1049,10 +1046,10 @@ class Unit(models.Model, LoggerMixin):
         Returns related source string object.
         """
         if self._source_info is None:
-            self._source_info = Source.objects.get(
-                checksum=self.checksum,
-                subproject=self.translation.subproject
-            )
+            self._source_info = Source.objects.get_by_checksum(
+                self.checksum,
+                self.translation.subproject,
+                create=False)
         return self._source_info

     def get_secondary_units(self, user):

Using cache-machine provides support for flush-lists, so cached entries are properly invalidated when their content change or they are deleted from the database.

nijel commented 8 years ago

Can you please submit the change as pull request?

Also, you're right, cleanup_deleted is the bottleneck in removal. It is really not needed in most cases, so maybe it would be better to keep these objects in database and do the cleanup job in background.

alixinne commented 8 years ago

As noted in the PR #1173 django-cache-machine has issues with recent versions of Django (1.7+), which requires the mentioned fix.

I guess part of the work done by the cleanup_delete method can be replaced by running updatechecks and cleanuptrans after bulk operations such as component import or delete.

WeblateOrg / weblate