pllua / pllua-deprecated

[DEPRECATED] This repository is no longer maintained. Please follow https://github.com/pllua/pllua
197 stars 16 forks source link

性能优化测试.sql #72

Open OpenMIS opened 5 years ago

OpenMIS commented 5 years ago

--创建临时表 create temp table foo(a int, b int, c int, d int, e int); --随机200万条记录 insert into foo select random()100, random()100, random()100, random()100, random()*100 from generate_series(1,2000000);

--使用内置的C语言实现的方法 select count(*) filter (where a = least(a,b,c,d,e)) from foo; --execution time: 531 ms; total time: 547 ms

CREATE FUNCTION min2(integer,integer,integer,integer,integer) RETURNS integer AS 'DllTest2', 'min2' LANGUAGE C STRICT;

--使用C语言写的扩展实现的方法 select count(*) filter (where a = min2(a,b,c,d,e)) from foo; --execution time: 531 ms; total time: 546 ms

--使用pgsql调用内置的C语言实现的least函数 CREATE OR REPLACE FUNCTION public.myleast1a(a integer, b integer, c integer, d integer, e integer ) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ declare

begin return least(a,b,c,d,e); end; $function$;

--执行测试函数 select count(*) filter (where a = myleast1a(a,b,c,d,e)) from foo; --execution time: 2.672 sec; total time: 2.688 sec

--使用pgsql,在里面使用sql。【注意】此种性能非常差,不能这么用。 CREATE OR REPLACE FUNCTION public.myleast2(VARIADIC integer[]) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ declare result int; a int; begin return (select min(v) from unnest($1) g(v)); end; $function$

--执行测试函数 select count(*) filter (where a = myleast2(a,b,c,d,e)) from foo; --execution time: 30.813 sec; total time: 30.813 sec

--使用单语句的SQL,【注意】性能比上面好点,但也非常差。 CREATE OR REPLACE FUNCTION public.myleast3(VARIADIC integer[]) RETURNS integer LANGUAGE sql IMMUTABLE STRICT AS $function$select min(v) from unnest($1) g(v)$function$

--执行测试函数 select count(*) filter (where a = myleast3(a,b,c,d,e)) from foo; --execution time: 20.031 sec; total time: 20.031 sec

--使用pgsql CREATE OR REPLACE FUNCTION public.myleast1(VARIADIC integer[]) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ declare result int; a int; begin foreach a in array $1 loop if result is null then result := a; elseif a < result then result := a; end if; end loop; return result; end; $function$;

--执行测试函数 select count(*) filter (where a = myleast1(a,b,c,d,e)) from foo; --execution time: 8.000 sec; total time: 8.016 sec

--使用pgsql调用内置的C语言实现的least函数 CREATE OR REPLACE FUNCTION public.myleast1a(VARIADIC integer[]) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ begin return least($1); end; $function$;

--执行测试函数 select count(*) filter (where a = myleast1a(a,b,c,d,e)) from foo;

--使用pgsql,减少if语句 CREATE OR REPLACE FUNCTION public.myleast1b(VARIADIC integer[]) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ declare result int; a int; begin foreach a in array $1 loop if result is null or a < result then result := a; end if; end loop; return result; end; $function$;

--执行测试函数 select count(*) filter (where a = myleast1b(a,b,c,d,e)) from foo; --execution time: 7.187 sec; total time: 7.187 sec

--使用pgsql,使用case替代if语句 CREATE OR REPLACE FUNCTION public.myleast1c(VARIADIC integer[]) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ declare result int; a int; begin foreach a in array $1 loop result := case when a < result then a else coalesce(result, a) end; end loop; return result; end; $function$;

--执行测试函数 select count(*) filter (where a = myleast1c(a,b,c,d,e)) from foo; --execution time: 6.687 sec; total time: 6.703 sec

--以下都是使用python 3.6.5 64位 windows 环境 --纯python数据库函数 CREATE OR REPLACE FUNCTION public.myleast6(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$

r = None for x in a: if r is None or x < r: r = x return r $function$

--执行测试函数 select count(*) filter (where a = myleast6(a,b,c,d,e)) from foo; --execution time: 7.344 sec; total time: 7.344 sec

--纯python数据库函数,在内部使用jit --【注意】在数据库函数里直接使用jit,如果在高压调用此数据库函数时会非常慢,因为每次执行都会调用jit。 CREATE OR REPLACE FUNCTION public.myleast6a(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$

from numba import jit

@jit def test2(b): r = None for x in b: if r is None or x < r: r = x return r

return test2(a) $function$

----执行测试函数 select count(*) filter (where a = myleast6a(a,b,c,d,e)) from foo; --8分钟还未计算完,强制终止掉上面测试


python模块: module1

导入jit

from numba import jit

@jit def test2(b): r = None for x in b: if r is None or x < r: r = x return r

def test3(b): r = None for x in b: if r is None or x < r: r = x return r

def test4(b): return min(b)

--python数据库函数调用python模块使用jit CREATE OR REPLACE FUNCTION public.myleast6b(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$

import module1 return module1.test2(a) $function$;

----执行测试函数 select count(*) filter (where a = myleast6b(a,b,c,d,e)) from foo; --execution time: 00:01:07; total time: 00:01:07

--python数据库函数调用python模块不使用jit CREATE OR REPLACE FUNCTION public.myleast6c(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$

import module1 return module1.test3(a) $function$;

----执行测试函数 select count(*) filter (where a = myleast6c(a,b,c,d,e)) from foo; --execution time: 9.672 sec; total time: 9.688 sec

--python数据库函数调用python模块直接使用内部min函数,不使用jit CREATE OR REPLACE FUNCTION public.myleast6d(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$ import module1 return module1.test4(a) $function$;

----执行测试函数 select count(*) filter (where a = myleast6d(a,b,c,d,e)) from foo; --execution time: 10.609 sec; total time: 10.625 sec

--python数据库函数调用python模块直接使用内部min函数,不使用jit CREATE OR REPLACE FUNCTION public.myleast6e(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$ return min(a) $function$;

----执行测试函数 select count(*) filter (where a = myleast6e(a,b,c,d,e)) from foo; --execution time: 7.906 sec; total time: 7.922 sec

--使用python调用内置的C语言实现的least函数,【注意】此方式存在性能问题。 CREATE OR REPLACE FUNCTION public.myleast6f(a integer, b integer, c integer, d integer, e integer ) RETURNS integer LANGUAGE plpython3u AS $function$ if "plan" in SD: plan = SD["plan"] else: plan = plpy.prepare("SELECT least($1, $2,$3,$4,$5) as f1", ["int", "int", "int", "int", "int"]) SD["plan"] = plan

rv = plpy.execute(plan, [a,b,c,d,e], 1) t = rv[0]["f1"] return t $function$;

--执行测试函数 select count(*) filter (where a = myleast6f(a,b,c,d,e)) from foo; --execution time: 34.141 sec; total time: 34.141 sec

--使用GD对象缓存jit的函数 --【注意】此方法不用每次执行调用jit,但也说明jit不适合非科学计算领域,比常规方式速度反而下降非常多。 CREATE OR REPLACE FUNCTION public.myleast6g(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$

if "test2" in GD: test2 = GD["test2"] else :
from numba import jit

@jit def test2(b): r = None for x in b: if r is None or x < r: r = x return r GD["test2"] = test2

return test2(a) $function$

--执行测试函数 select count(*) filter (where a = myleast6g(a,b,c,d,e)) from foo; --execution time: 00:01:02; total time: 00:01:02

--#import imp --#imp.reload(module1)

CREATE or replace FUNCTION myleast7_lua(a int, b int, c int, d int, e int) RETURNS int AS $$ return math.min(a,b,c,d,e); $$ LANGUAGE pllua;

select count(*) filter (where a = myleast7_lua(a,b,c,d,e)) from foo; --execution time: 8.851

--删除测试函数 drop FUNCTION public.myleast1(VARIADIC integer[]); drop FUNCTION public.myleast1a(a integer, b integer, c integer, d integer, e integer) drop FUNCTION public.myleast1b(VARIADIC integer[]); drop FUNCTION public.myleast1c(VARIADIC integer[]); drop FUNCTION public.myleast2(VARIADIC integer[]); drop FUNCTION public.myleast3(VARIADIC integer[]); drop FUNCTION public.myleast6(VARIADIC a integer[]); drop FUNCTION public.myleast6a(VARIADIC a integer[]); drop FUNCTION public.myleast6b(VARIADIC a integer[]); drop FUNCTION public.myleast6c(VARIADIC a integer[]); drop FUNCTION public.myleast6d(VARIADIC a integer[]); drop FUNCTION public.myleast6e(VARIADIC a integer[]); drop FUNCTION public.myleast6f(a integer, b integer, c integer, d integer, e integer); drop FUNCTION public.myleast6g(VARIADIC a integer[]); drop FUNCTION public.myleast7_lua(a integer, b integer, c integer, d integer, e integer);

--删除临时表 drop table foo;

/* 【结论】 1)C语言实现的函数最快远超过其他语言, 需要时间0.531秒,比最快的脚本语言6.687秒快12.59倍 2)pl/pgsql > pl/python3u > pllua

*/