Closed frikky closed 3 years ago
I profiled the loading time (with only import liquid
in loading.py
):
❯ PYTHONPROFILEIMPORTTIME=1 python loading.py (base) ─╯
import time: self [us] | cumulative | imported package
import time: 426 | 426 | _io
import time: 58 | 58 | marshal
import time: 610 | 610 | posix
import time: 853 | 1946 | _frozen_importlib_external
import time: 154 | 154 | time
import time: 351 | 504 | zipimport
import time: 77 | 77 | _codecs
import time: 619 | 695 | codecs
import time: 2522 | 2522 | encodings.aliases
import time: 1233 | 4450 | encodings
import time: 829 | 829 | encodings.utf_8
import time: 541 | 541 | _signal
import time: 448 | 448 | encodings.latin_1
import time: 105 | 105 | _abc
import time: 425 | 529 | abc
import time: 664 | 1192 | io
import time: 159 | 159 | _stat
import time: 373 | 532 | stat
import time: 1211 | 1211 | _collections_abc
import time: 180 | 180 | genericpath
import time: 391 | 571 | posixpath
import time: 762 | 3074 | os
import time: 338 | 338 | _sitebuiltins
import time: 93 | 93 | _locale
import time: 304 | 397 | _bootlocale
import time: 1006 | 1006 | types
import time: 762 | 762 | warnings
import time: 346 | 1108 | importlib
import time: 152 | 152 | importlib.machinery
import time: 328 | 328 | _heapq
import time: 377 | 704 | heapq
import time: 1764 | 1764 | itertools
import time: 307 | 307 | keyword
import time: 123 | 123 | _operator
import time: 10492 | 10615 | operator
import time: 341 | 341 | reprlib
import time: 91 | 91 | _collections
import time: 8600 | 22419 | collections
import time: 317 | 317 | collections.abc
import time: 75 | 75 | _functools
import time: 694 | 769 | functools
import time: 663 | 1431 | contextlib
import time: 1057 | 1057 | enum
import time: 102 | 102 | _sre
import time: 488 | 488 | sre_constants
import time: 518 | 1005 | sre_parse
import time: 502 | 1608 | sre_compile
import time: 263 | 263 | copyreg
import time: 896 | 3822 | re
import time: 12308 | 40295 | typing
import time: 1094 | 41540 | importlib.abc
import time: 560 | 43207 | importlib.util
import time: 1903 | 1903 | sitecustomize
import time: 151 | 151 | usercustomize
import time: 3388 | 53461 | site
import time: 380 | 380 | token
import time: 1405 | 1784 | tokenize
import time: 305 | 2089 | linecache
import time: 676 | 2764 | traceback
import time: 284 | 284 | _weakrefset
import time: 828 | 1112 | weakref
import time: 60 | 60 | _string
import time: 10185 | 10245 | string
import time: 1077 | 1077 | threading
import time: 151 | 151 | atexit
import time: 3358 | 18706 | logging
import time: 147 | 147 | org
import time: 29 | 176 | org.python
import time: 23 | 199 | org.python.core
import time: 349 | 547 | copy
import time: 4096 | 4096 | gettext
import time: 8075 | 12170 | argparse
import time: 372 | 372 | unicodedata
import time: 635 | 1007 | inflection
import time: 665 | 1672 | diot.transforms
import time: 1208 | 15596 | diot.diot
import time: 352 | 15948 | diot
import time: 1587 | 36240 | liquid.config
import time: 290 | 290 | fnmatch
import time: 158 | 158 | nt
import time: 116 | 116 | nt
import time: 109 | 109 | nt
import time: 116 | 116 | nt
import time: 497 | 994 | ntpath
import time: 87 | 87 | errno
import time: 187 | 187 | urllib
import time: 12645 | 12832 | urllib.parse
import time: 2419 | 16621 | pathlib
import time: 2043 | 2043 | textwrap
import time: 1928 | 1928 | rich
import time: 309 | 309 | math
import time: 439 | 439 | _datetime
import time: 1712 | 2459 | datetime
import time: 568 | 568 | rich._loop
import time: 937 | 937 | rich._pick
import time: 609 | 609 | rich._cell_widths
import time: 450 | 450 | rich._lru_cache
import time: 584 | 1642 | rich.cells
import time: 1713 | 3354 | rich._wrap
import time: 1463 | 1463 | typing_extensions
import time: 291 | 291 | _bisect
import time: 269 | 560 | bisect
import time: 184 | 184 | _random
import time: 137 | 137 | _sha512
import time: 544 | 1423 | random
import time: 376 | 376 | rich.errors
import time: 7032 | 7032 | signal
import time: 104 | 104 | pwd
import time: 631 | 631 | grp
import time: 1206 | 1206 | msvcrt
import time: 756 | 756 | _posixsubprocess
import time: 346 | 346 | select
import time: 932 | 932 | selectors
import time: 1572 | 12575 | subprocess
import time: 2601 | 15176 | platform
import time: 398 | 398 | colorsys
import time: 494 | 494 | rich.color_triplet
import time: 5194 | 5687 | rich.palette
import time: 461 | 6148 | rich._palettes
import time: 625 | 625 | rich.terminal_theme
import time: 6189 | 28534 | rich.color
import time: 2020 | 32352 | rich.style
import time: 1599 | 33950 | rich.segment
import time: 318 | 34268 | rich.jupyter
import time: 196 | 196 | rich.abc
import time: 224 | 420 | rich.protocol
import time: 1016 | 1435 | rich.measure
import time: 326 | 36028 | rich.constrain
import time: 468 | 37958 | rich.align
import time: 435 | 435 | rich.containers
import time: 200 | 200 | rich.control
import time: 10558 | 54009 | rich.text
import time: 429 | 54437 | rich._log_render
import time: 187 | 187 | _ast
import time: 1591 | 1778 | ast
import time: 217 | 217 | _opcode
import time: 495 | 712 | opcode
import time: 772 | 1484 | dis
import time: 14540 | 17800 | inspect
import time: 471 | 471 | zlib
import time: 327 | 327 | _compression
import time: 288 | 288 | _bz2
import time: 424 | 1038 | bz2
import time: 812 | 812 | _lzma
import time: 389 | 1200 | lzma
import time: 1526 | 4234 | shutil
import time: 1163 | 1163 | dataclasses
import time: 2896 | 2896 | termios
import time: 5459 | 8354 | getpass
import time: 966 | 966 | rich.default_styles
import time: 3499 | 3499 | configparser
import time: 1974 | 5473 | rich.theme
import time: 3153 | 9591 | rich.themes
import time: 4054 | 4054 | rich._emoji_codes
import time: 657 | 4710 | rich._emoji_replace
import time: 459 | 459 | rich.highlighter
import time: 11365 | 11365 | rich.markup
import time: 4510 | 4510 | pkgutil
import time: 688 | 688 | sysconfig
import time: 531 | 531 | _sysconfigdata__linux_x86_64-linux-gnu
import time: 2933 | 8659 | pydoc
import time: 671 | 9330 | rich.pager
import time: 316 | 316 | array
import time: 4177 | 4492 | rich.pretty
import time: 857 | 857 | rich.box
import time: 515 | 515 | rich.padding
import time: 520 | 1891 | rich.panel
import time: 459 | 459 | rich._ratio
import time: 5352 | 5810 | rich.table
import time: 915 | 8616 | rich.scope
import time: 688 | 688 | rich.screen
import time: 410 | 410 | rich.styled
import time: 13714 | 94918 | rich.console
import time: 365 | 365 | __future__
import time: 675 | 675 | pygments
import time: 1555 | 1555 | pygments.lexers._mapping
import time: 753 | 753 | pygments.modeline
import time: 245 | 245 | pygments.plugin
import time: 9923 | 9923 | pygments.util
import time: 844 | 13992 | pygments.lexers
import time: 570 | 570 | pygments.token
import time: 460 | 460 | rich.columns
import time: 571 | 571 | pygments.style
import time: 301 | 301 | pygments.styles
import time: 1120 | 1990 | rich.syntax
import time: 3698 | 21073 | rich.traceback
import time: 1246 | 176059 | rich.logging
import time: 9615 | 9615 | _hashlib
import time: 350 | 350 | _blake2
import time: 1198 | 11163 | hashlib
import time: 797 | 797 | regex._regex
import time: 5631 | 6427 | regex._regex_core
import time: 2299 | 2299 | locale
import time: 102 | 102 | regex.DEFAULT_VERSION
import time: 997 | 9825 | regex.regex
import time: 355 | 10179 | regex
import time: 7025 | 7025 | atomicwrites
import time: 3601 | 31967 | lark.utils
import time: 168 | 168 | future_builtins
import time: 469 | 637 | lark.tree
import time: 620 | 620 | lark.exceptions
import time: 574 | 574 | lark.lexer
import time: 514 | 1706 | lark.visitors
import time: 3377 | 3377 | _struct
import time: 634 | 4011 | struct
import time: 3930 | 3930 | _compat_pickle
import time: 632 | 632 | _pickle
import time: 146 | 146 | org
import time: 82 | 228 | org.python
import time: 30 | 257 | org.python.core
import time: 2053 | 10880 | pickle
import time: 604 | 604 | tempfile
import time: 2651 | 2651 | numbers
import time: 12228 | 12228 | lark.parse_tree_builder
import time: 295 | 295 | lark.parsers
import time: 8116 | 8116 | lark.grammar
import time: 1320 | 9729 | lark.parsers.grammar_analysis
import time: 455 | 455 | lark.parsers.earley_common
import time: 1635 | 1635 | lark.parsers.earley_forest
import time: 979 | 3068 | lark.parsers.earley
import time: 376 | 376 | lark.parsers.xearley
import time: 553 | 553 | lark.parsers.cyk
import time: 388 | 388 | lark.parsers.lalr_analysis
import time: 325 | 325 | lark.parsers.lalr_interactive_parser
import time: 569 | 1281 | lark.parsers.lalr_parser
import time: 272 | 272 | lark.common
import time: 3109 | 18386 | lark.parser_frontends
import time: 2464 | 35728 | lark.load_grammar
import time: 1022 | 48233 | lark.lark
import time: 1294 | 83835 | lark
import time: 383 | 383 | liquid.exceptions
import time: 5269 | 284208 | liquid.utils
import time: 471 | 471 | liquid.tags.manager
import time: 211 | 211 | liquid.tags.grammar
import time: 1349 | 1349 | html.entities
import time: 998 | 2347 | html
import time: 532 | 2879 | liquid.filters
import time: 870 | 3748 | liquid.tags.transformer
import time: 3686 | 7644 | liquid.tags.tag
import time: 7547 | 7547 | liquid.tags.tag__end
import time: 557 | 557 | liquid.tags.tag__literal
import time: 100773 | 100773 | liquid.tags.tag__output
import time: 347 | 347 | liquid.tags.tag__root
import time: 70344 | 70344 | liquid.tags.tag_assign
import time: 55620 | 55620 | liquid.tags.tag_capture
import time: 841 | 56460 | liquid.tags.tag_block
import time: 1489 | 1489 | liquid.tags.tag_break
import time: 593 | 593 | liquid.tags.tag_case
import time: 236 | 236 | liquid.tags.tag_comment
import time: 59908 | 59908 | liquid.tags.tag_config
import time: 353 | 353 | liquid.tags.tag_continue
import time: 75745 | 75745 | liquid.tags.tag_cycle
import time: 341 | 341 | liquid.tags.tag_decrement
import time: 229 | 229 | liquid.tags.tag_else
import time: 66640 | 66640 | liquid.tags.tag_if
import time: 345 | 66984 | liquid.tags.tag_elsif
import time: 1857 | 1857 | liquid.tags.tag_extends
import time: 81572 | 81572 | liquid.tags.tag_for
import time: 64875 | 64875 | liquid.tags.tag_include
import time: 383 | 383 | liquid.tags.tag_increment
import time: 212 | 212 | liquid.tags.tag_raw
import time: 72907 | 72907 | liquid.tags.tag_tablerow
import time: 673 | 673 | liquid.tags.tag_unless
import time: 319 | 319 | liquid.tags.tag_when
import time: 6978 | 679786 | liquid.tags
import time: 3406 | 683191 | liquid.nodes
import time: 527 | 683718 | liquid.parser
import time: 212 | 212 | liquid.python
import time: 3237 | 3237 | liquid.python.tags.inherited
import time: 1320 | 1320 | liquid.python.tags.transformer
import time: 219200 | 219200 | liquid.python.tags.tag_if
import time: 659431 | 879949 | liquid.python.tags.tag__inherited
import time: 256079 | 256079 | liquid.python.tags.tag_assign
import time: 213139 | 213139 | liquid.python.tags.tag_else
import time: 248695 | 248695 | liquid.python.tags.tag_for
import time: 311 | 311 | liquid.python.tags.tag_from
import time: 183 | 183 | liquid.python.tags.tag_import
import time: 170 | 170 | liquid.python.tags.tag_python
import time: 643 | 643 | liquid.python.tags.tag_unless
import time: 289983 | 289983 | liquid.python.tags.tag_while
import time: 1551 | 1893936 | liquid.python.tags
import time: 2265 | 1896411 | liquid.python.parser
import time: 451 | 451 | liquid.python.filters
import time: 723 | 2901748 | liquid.liquid
import time: 732 | 2902479 | liquid
The time was wasted on loading and parsing the grammar file, repeatedly.
We can cache the parsed grammar here: https://github.com/pwwang/liquidpy/blob/55772c2ce4346012d7a63a9d5c5c4a819e28d2c4/liquid/tags/grammar.py#L26
And it is also possible to load it at runtime to import BASE_GRAMMAR
inside the functions:
Would you like to submit a PR for this?
Or if it is a problem for you, I can work on this.
Would you like to submit a PR for this?
Or if it is a problem for you, I can work on this.
Thanks a lot for the amazing and quick help!
I'd love some help with it of course (don't we all), but if you don't have time I'll take a look in the next month sometime
@frikky liquidpy
was initially implemented by compiling the template into a piece of python code. Then the code was executed using an eval
-like way. However, that for sure brought some security issues. Later, I was thinking to use a lexer/parser to parse the template, that's why lark
was used. But this also has some problems: The flexibility to define/extend tags.
The ideal case is that we write a grammar for all tags, and load it once and for all. As I said, it's difficult to create a new tag or even modify the behavior of existing tags if someone wants to extend liquidpy
. That's why it is implemented the current way: defining the grammar for each tag and update grammar to the base one. While at runtime, the updated grammar gets parsed, for each tag ... which causes redundant parsing at least for the base grammar.
So here the solution would be either:
eval
-like way, but using a safer interpreter (such as https://github.com/newville/asteval). This uses the python interpreter itself, which will be way faster than any other interpreter we invented by introducing whichever 3rd-party lexer/parser.I would like to keep the flexibility of extending liquidpy
as we are using it in other projects of our own. Let me know if there are any concerns for you if we solve it the 2nd way. If not, we will start working on it, and try our best to keep the APIs unchanged.
I would like to keep the flexibility of extending
liquidpy
as we are using it in other projects of our own. Let me know if there are any concerns for you if we solve it the 2nd way. If not, we will start working on it, and try our best to keep the APIs unchanged.
Thanks for another great explanation!
Most of our issue is with speed, as we already allow our users code execution within all the code they run. It may indeed be a security concern in general, and something to be wary of for other's (including your projects), but to us it's all the same. By example, we further make it "insecure" by passing in **globals() to give our users access to (mostly) everything.
That's why I would indeed love the second option, as speed triumphs in our scenario.
Tell me if and when you need some coding or testing help, and we'll try to to put some resources on it! We can also share the most common use-cases in our scenario for it.
Appreciate it! :+1:
@frikky The whole package is now refactored based on jinja2. We have 4 modes:
standard
: Implementing the standard liquid template enginejekyll
: Implementing jekyll-compatible liquid template engineshopify
: Implementing shopify-compatible liquid template enginewild
: A wild mode that supports arbitrary code execution inside the template.The loading is pretty fast now.
> PYTHONPROFILEIMPORTTIME=1 python loading.py import time: self [us] | cumulative | imported package import time: 146 | 146 | zipimport import time: 817 | 817 | _frozen_importlib_external import time: 77 | 77 | _codecs import time: 736 | 812 | codecs import time: 689 | 689 | encodings.aliases import time: 1530 | 3030 | encodings import time: 439 | 439 | encodings.utf_8 import time: 163 | 163 | _signal import time: 536 | 536 | encodings.latin_1 import time: 59 | 59 | _abc import time: 417 | 475 | abc import time: 442 | 917 | io import time: 131 | 131 | _stat import time: 347 | 478 | stat import time: 1245 | 1245 | _collections_abc import time: 381 | 381 | genericpath import time: 834 | 1214 | posixpath import time: 1031 | 3967 | os import time: 402 | 402 | _sitebuiltins import time: 126 | 126 | _locale import time: 471 | 596 | _bootlocale import time: 530 | 530 | types import time: 471 | 471 | warnings import time: 491 | 962 | importlib import time: 387 | 387 | importlib.machinery import time: 977 | 1363 | importlib.abc import time: 98 | 98 | _operator import time: 1081 | 1179 | operator import time: 383 | 383 | keyword import time: 646 | 646 | _heapq import time: 408 | 1054 | heapq import time: 143 | 143 | itertools import time: 371 | 371 | reprlib import time: 88 | 88 | _collections import time: 1374 | 4590 | collections import time: 73 | 73 | _functools import time: 766 | 838 | functools import time: 955 | 6383 | contextlib import time: 934 | 9641 | importlib.util import time: 938 | 938 | sitecustomize import time: 231 | 231 | usercustomize import time: 5562 | 21863 | site import time: 393 | 393 | collections.abc import time: 993 | 993 | enum import time: 126 | 126 | _sre import time: 683 | 683 | sre_constants import time: 771 | 1453 | sre_parse import time: 630 | 2208 | sre_compile import time: 483 | 483 | copyreg import time: 883 | 4567 | re import time: 1955 | 6914 | typing import time: 418 | 418 | _weakrefset import time: 786 | 1204 | weakref import time: 427 | 427 | _json import time: 727 | 1153 | json.scanner import time: 825 | 1978 | json.decoder import time: 711 | 711 | json.encoder import time: 580 | 3268 | json import time: 159 | 159 | errno import time: 277 | 277 | time import time: 355 | 355 | token import time: 1252 | 1607 | tokenize import time: 341 | 1947 | linecache import time: 564 | 2511 | traceback import time: 913 | 3700 | threading import time: 617 | 617 | _struct import time: 334 | 951 | struct import time: 1094 | 1094 | _compat_pickle import time: 327 | 327 | org import time: 42 | 369 | org.python import time: 41 | 409 | org.python.core import time: 686 | 686 | _pickle import time: 1432 | 4570 | pickle import time: 635 | 635 | urllib import time: 1461 | 2096 | urllib.parse import time: 390 | 7055 | jinja2._compat import time: 51 | 51 | _string import time: 1468 | 1519 | string import time: 584 | 584 | markupsafe._compat import time: 356 | 356 | markupsafe._speedups import time: 1031 | 3488 | markupsafe import time: 1850 | 19517 | jinja2.utils import time: 2119 | 21636 | jinja2.nodes import time: 476 | 476 | math import time: 1749 | 1749 | _hashlib import time: 428 | 428 | _blake2 import time: 522 | 522 | _sha3 import time: 698 | 3396 | hashlib import time: 620 | 620 | _bisect import time: 465 | 1084 | bisect import time: 460 | 460 | _random import time: 748 | 5687 | random import time: 543 | 543 | jinja2.exceptions import time: 1475 | 2017 | jinja2.runtime import time: 1029 | 9208 | jinja2.filters import time: 685 | 685 | numbers import time: 1232 | 1917 | _decimal import time: 353 | 2270 | decimal import time: 736 | 3005 | jinja2.tests import time: 453 | 12665 | jinja2.defaults import time: 504 | 504 | unicodedata import time: 305 | 305 | jinja2._identifier import time: 4178 | 4986 | jinja2.lexer import time: 549 | 549 | jinja2.parser import time: 228 | 228 | org import time: 106 | 334 | org.python import time: 27 | 360 | org.python.core import time: 378 | 738 | copy import time: 360 | 360 | jinja2.visitor import time: 304 | 304 | jinja2.optimizer import time: 411 | 411 | jinja2.idtracking import time: 350 | 350 | __future__ import time: 1039 | 3198 | jinja2.compiler import time: 1105 | 45341 | jinja2.environment import time: 827 | 827 | jinja2.loaders import time: 401 | 401 | fnmatch import time: 856 | 856 | zlib import time: 534 | 534 | _compression import time: 565 | 565 | _bz2 import time: 605 | 1704 | bz2 import time: 842 | 842 | _lzma import time: 616 | 1458 | lzma import time: 97 | 97 | pwd import time: 575 | 575 | grp import time: 934 | 6023 | shutil import time: 658 | 6680 | tempfile import time: 650 | 7330 | jinja2.bccache import time: 422 | 422 | concurrent import time: 55 | 55 | atexit import time: 1307 | 1362 | logging import time: 810 | 2171 | concurrent.futures._base import time: 595 | 3187 | concurrent.futures import time: 747 | 747 | _socket import time: 641 | 641 | select import time: 1111 | 1751 | selectors import time: 3067 | 5564 | socket import time: 1208 | 1208 | signal import time: 484 | 484 | _posixsubprocess import time: 852 | 2543 | subprocess import time: 1669 | 1669 | _ssl import time: 440 | 440 | binascii import time: 507 | 947 | base64 import time: 3189 | 5804 | ssl import time: 472 | 472 | asyncio.constants import time: 382 | 382 | _opcode import time: 451 | 833 | opcode import time: 677 | 1510 | dis import time: 2057 | 3566 | inspect import time: 303 | 303 | asyncio.format_helpers import time: 364 | 666 | asyncio.base_futures import time: 261 | 261 | asyncio.log import time: 556 | 5048 | asyncio.coroutines import time: 461 | 461 | _contextvars import time: 372 | 832 | contextvars import time: 353 | 353 | asyncio.base_tasks import time: 762 | 1115 | _asyncio import time: 1078 | 3024 | asyncio.events import time: 548 | 548 | asyncio.futures import time: 566 | 566 | asyncio.protocols import time: 473 | 473 | asyncio.transports import time: 620 | 1092 | asyncio.sslproto import time: 788 | 788 | asyncio.tasks import time: 1458 | 30090 | asyncio.base_events import time: 1090 | 1090 | asyncio.locks import time: 357 | 357 | asyncio.runners import time: 498 | 498 | asyncio.queues import time: 655 | 655 | asyncio.streams import time: 444 | 444 | asyncio.subprocess import time: 452 | 452 | asyncio.base_subprocess import time: 742 | 742 | asyncio.selector_events import time: 817 | 2010 | asyncio.unix_events import time: 666 | 35807 | asyncio import time: 495 | 36302 | jinja2.asyncsupport import time: 422 | 422 | jinja2.asyncfilters import time: 786 | 91005 | jinja2 import time: 465 | 465 | liquid.filters import time: 1525 | 1525 | html.entities import time: 740 | 2264 | html import time: 365 | 365 | liquid.filters.manager import time: 804 | 3896 | liquid.filters.standard import time: 659 | 102472 | liquid.liquid import time: 314 | 314 | liquid.utils import time: 401 | 715 | liquid.patching import time: 10045 | 113232 | liquid
We believe this is a better and easier way to port liquid and its variants into python. Unfortunately, this leads to incompatibility with previous versions.
Before I release the new version, please take a look at the documentation and maybe part of the code (already merged into the master branch), to see if you are still interested in using it in your library.
You are very welcome to contribute as well. There are still some TODOs, including adding tests for modes other than standard and porting the rest filters from shopify and jekyll.
This is indeed pretty wild - great work! I don't know enough about the innards of it to have any opinion of how it's implemented, but trust it does indeed look way faster, and will have some feedback as we see our user-base make more use of it :)
Thanks a lot!! :heart:
Hey!
We recently started using Liquidpy in the Shuffle project. This is an amazing thing you've built, which will help out our community a lot, probably leading to more PR's over time like #33 :)
There's one major problem though: all our scripts now take 2-3 seconds~ to start (significantly worse based on specs), and use a lot more RAM than before (e.g. had to bump containers from 128Mb->256Mb in cloud).
Is there any code we could look at for improving the load time? We'd like to get it down to milliseconds like we had before.
Thanks! :+1: