--创建临时表
create temp table foo(a int, b int, c int, d int, e int);
--随机200万条记录
insert into foo select random()100, random()100, random()100, random()100, random()*100 from generate_series(1,2000000);
--使用内置的C语言实现的方法
select count(*) filter (where a = least(a,b,c,d,e)) from foo;
--execution time: 531 ms; total time: 547 ms
CREATE FUNCTION min2(integer,integer,integer,integer,integer) RETURNS integer
AS 'DllTest2', 'min2'
LANGUAGE C STRICT;
--使用C语言写的扩展实现的方法
select count(*) filter (where a = min2(a,b,c,d,e)) from foo;
--execution time: 531 ms; total time: 546 ms
--使用pgsql调用内置的C语言实现的least函数
CREATE OR REPLACE FUNCTION public.myleast1a(a integer, b integer, c integer, d integer, e integer )
RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT
AS $function$
declare
begin
return least(a,b,c,d,e);
end;
$function$;
--执行测试函数
select count(*) filter (where a = myleast1a(a,b,c,d,e)) from foo;
--execution time: 2.672 sec; total time: 2.688 sec
--使用pgsql,在里面使用sql。【注意】此种性能非常差,不能这么用。
CREATE OR REPLACE FUNCTION public.myleast2(VARIADIC integer[])
RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT
AS $function$
declare result int;
a int;
begin
return (select min(v) from unnest($1) g(v));
end;
$function$
--执行测试函数
select count(*) filter (where a = myleast2(a,b,c,d,e)) from foo;
--execution time: 30.813 sec; total time: 30.813 sec
--使用单语句的SQL,【注意】性能比上面好点,但也非常差。
CREATE OR REPLACE FUNCTION public.myleast3(VARIADIC integer[])
RETURNS integer LANGUAGE sql IMMUTABLE STRICT
AS $function$select min(v) from unnest($1) g(v)$function$
--执行测试函数
select count(*) filter (where a = myleast3(a,b,c,d,e)) from foo;
--execution time: 20.031 sec; total time: 20.031 sec
--使用pgsql
CREATE OR REPLACE FUNCTION public.myleast1(VARIADIC integer[])
RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT
AS $function$
declare result int;
a int;
begin
foreach a in array $1
loop
if result is null then
result := a;
elseif a < result then
result := a;
end if;
end loop;
return result;
end;
$function$;
--执行测试函数
select count(*) filter (where a = myleast1(a,b,c,d,e)) from foo;
--execution time: 8.000 sec; total time: 8.016 sec
--使用pgsql调用内置的C语言实现的least函数
CREATE OR REPLACE FUNCTION public.myleast1a(VARIADIC integer[])
RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT
AS $function$
begin
return least($1);
end;
$function$;
--执行测试函数
select count(*) filter (where a = myleast1a(a,b,c,d,e)) from foo;
--使用pgsql,减少if语句
CREATE OR REPLACE FUNCTION public.myleast1b(VARIADIC integer[])
RETURNS integer
LANGUAGE plpgsql
IMMUTABLE STRICT
AS $function$
declare result int;
a int;
begin
foreach a in array $1
loop
if result is null or a < result then
result := a;
end if;
end loop;
return result;
end;
$function$;
--执行测试函数
select count(*) filter (where a = myleast1b(a,b,c,d,e)) from foo;
--execution time: 7.187 sec; total time: 7.187 sec
--使用pgsql,使用case替代if语句
CREATE OR REPLACE FUNCTION public.myleast1c(VARIADIC integer[])
RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT
AS $function$
declare result int;
a int;
begin
foreach a in array $1
loop
result := case when a < result then a else coalesce(result, a) end;
end loop;
return result;
end;
$function$;
--执行测试函数
select count(*) filter (where a = myleast1c(a,b,c,d,e)) from foo;
--execution time: 6.687 sec; total time: 6.703 sec
--以下都是使用python 3.6.5 64位 windows 环境
--纯python数据库函数
CREATE OR REPLACE FUNCTION public.myleast6(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$
r = None
for x in a:
if r is None or x < r:
r = x
return r
$function$
--执行测试函数
select count(*) filter (where a = myleast6(a,b,c,d,e)) from foo;
--execution time: 7.344 sec; total time: 7.344 sec
--纯python数据库函数,在内部使用jit
--【注意】在数据库函数里直接使用jit,如果在高压调用此数据库函数时会非常慢,因为每次执行都会调用jit。
CREATE OR REPLACE FUNCTION public.myleast6a(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$
from numba import jit
@jit
def test2(b):
r = None
for x in b:
if r is None or x < r:
r = x
return r
return test2(a)
$function$
----执行测试函数
select count(*) filter (where a = myleast6a(a,b,c,d,e)) from foo;
--8分钟还未计算完,强制终止掉上面测试
python模块: module1
导入jit
from numba import jit
@jit
def test2(b):
r = None
for x in b:
if r is None or x < r:
r = x
return r
def test3(b):
r = None
for x in b:
if r is None or x < r:
r = x
return r
def test4(b):
return min(b)
--python数据库函数调用python模块使用jit
CREATE OR REPLACE FUNCTION public.myleast6b(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$
----执行测试函数
select count(*) filter (where a = myleast6c(a,b,c,d,e)) from foo;
--execution time: 9.672 sec; total time: 9.688 sec
--python数据库函数调用python模块直接使用内部min函数,不使用jit
CREATE OR REPLACE FUNCTION public.myleast6d(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$
import module1
return module1.test4(a)
$function$;
----执行测试函数
select count(*) filter (where a = myleast6d(a,b,c,d,e)) from foo;
--execution time: 10.609 sec; total time: 10.625 sec
--python数据库函数调用python模块直接使用内部min函数,不使用jit
CREATE OR REPLACE FUNCTION public.myleast6e(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$
return min(a)
$function$;
----执行测试函数
select count(*) filter (where a = myleast6e(a,b,c,d,e)) from foo;
--execution time: 7.906 sec; total time: 7.922 sec
--使用python调用内置的C语言实现的least函数,【注意】此方式存在性能问题。
CREATE OR REPLACE FUNCTION public.myleast6f(a integer, b integer, c integer, d integer, e integer )
RETURNS integer LANGUAGE plpython3u
AS $function$
if "plan" in SD:
plan = SD["plan"]
else:
plan = plpy.prepare("SELECT least($1, $2,$3,$4,$5) as f1", ["int", "int", "int", "int", "int"])
SD["plan"] = plan
rv = plpy.execute(plan, [a,b,c,d,e], 1)
t = rv[0]["f1"]
return t
$function$;
--执行测试函数
select count(*) filter (where a = myleast6f(a,b,c,d,e)) from foo;
--execution time: 34.141 sec; total time: 34.141 sec
--使用GD对象缓存jit的函数
--【注意】此方法不用每次执行调用jit,但也说明jit不适合非科学计算领域,比常规方式速度反而下降非常多。
CREATE OR REPLACE FUNCTION public.myleast6g(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$
if "test2" in GD:
test2 = GD["test2"]
else :
from numba import jit
@jit
def test2(b):
r = None
for x in b:
if r is None or x < r:
r = x
return r
GD["test2"] = test2
return test2(a)
$function$
--执行测试函数
select count(*) filter (where a = myleast6g(a,b,c,d,e)) from foo;
--execution time: 00:01:02; total time: 00:01:02
--#import imp
--#imp.reload(module1)
CREATE or replace FUNCTION myleast7_lua(a int, b int, c int, d int, e int) RETURNS int AS $$
return math.min(a,b,c,d,e);
$$ LANGUAGE pllua;
select count(*) filter (where a = myleast7_lua(a,b,c,d,e)) from foo;
--execution time: 8.851
--删除测试函数
drop FUNCTION public.myleast1(VARIADIC integer[]);
drop FUNCTION public.myleast1a(a integer, b integer, c integer, d integer, e integer)
drop FUNCTION public.myleast1b(VARIADIC integer[]);
drop FUNCTION public.myleast1c(VARIADIC integer[]);
drop FUNCTION public.myleast2(VARIADIC integer[]);
drop FUNCTION public.myleast3(VARIADIC integer[]);
drop FUNCTION public.myleast6(VARIADIC a integer[]);
drop FUNCTION public.myleast6a(VARIADIC a integer[]);
drop FUNCTION public.myleast6b(VARIADIC a integer[]);
drop FUNCTION public.myleast6c(VARIADIC a integer[]);
drop FUNCTION public.myleast6d(VARIADIC a integer[]);
drop FUNCTION public.myleast6e(VARIADIC a integer[]);
drop FUNCTION public.myleast6f(a integer, b integer, c integer, d integer, e integer);
drop FUNCTION public.myleast6g(VARIADIC a integer[]);
drop FUNCTION public.myleast7_lua(a integer, b integer, c integer, d integer, e integer);
--创建临时表 create temp table foo(a int, b int, c int, d int, e int); --随机200万条记录 insert into foo select random()100, random()100, random()100, random()100, random()*100 from generate_series(1,2000000);
--使用内置的C语言实现的方法 select count(*) filter (where a = least(a,b,c,d,e)) from foo; --execution time: 531 ms; total time: 547 ms
CREATE FUNCTION min2(integer,integer,integer,integer,integer) RETURNS integer AS 'DllTest2', 'min2' LANGUAGE C STRICT;
--使用C语言写的扩展实现的方法 select count(*) filter (where a = min2(a,b,c,d,e)) from foo; --execution time: 531 ms; total time: 546 ms
--使用pgsql调用内置的C语言实现的least函数 CREATE OR REPLACE FUNCTION public.myleast1a(a integer, b integer, c integer, d integer, e integer ) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ declare
begin return least(a,b,c,d,e); end; $function$;
--执行测试函数 select count(*) filter (where a = myleast1a(a,b,c,d,e)) from foo; --execution time: 2.672 sec; total time: 2.688 sec
--使用pgsql,在里面使用sql。【注意】此种性能非常差,不能这么用。 CREATE OR REPLACE FUNCTION public.myleast2(VARIADIC integer[]) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ declare result int; a int; begin return (select min(v) from unnest($1) g(v)); end; $function$
--执行测试函数 select count(*) filter (where a = myleast2(a,b,c,d,e)) from foo; --execution time: 30.813 sec; total time: 30.813 sec
--使用单语句的SQL,【注意】性能比上面好点,但也非常差。 CREATE OR REPLACE FUNCTION public.myleast3(VARIADIC integer[]) RETURNS integer LANGUAGE sql IMMUTABLE STRICT AS $function$select min(v) from unnest($1) g(v)$function$
--执行测试函数 select count(*) filter (where a = myleast3(a,b,c,d,e)) from foo; --execution time: 20.031 sec; total time: 20.031 sec
--使用pgsql CREATE OR REPLACE FUNCTION public.myleast1(VARIADIC integer[]) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ declare result int; a int; begin foreach a in array $1 loop if result is null then result := a; elseif a < result then result := a; end if; end loop; return result; end; $function$;
--执行测试函数 select count(*) filter (where a = myleast1(a,b,c,d,e)) from foo; --execution time: 8.000 sec; total time: 8.016 sec
--使用pgsql调用内置的C语言实现的least函数 CREATE OR REPLACE FUNCTION public.myleast1a(VARIADIC integer[]) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ begin return least($1); end; $function$;
--执行测试函数 select count(*) filter (where a = myleast1a(a,b,c,d,e)) from foo;
--使用pgsql,减少if语句 CREATE OR REPLACE FUNCTION public.myleast1b(VARIADIC integer[]) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ declare result int; a int; begin foreach a in array $1 loop if result is null or a < result then result := a; end if; end loop; return result; end; $function$;
--执行测试函数 select count(*) filter (where a = myleast1b(a,b,c,d,e)) from foo; --execution time: 7.187 sec; total time: 7.187 sec
--使用pgsql,使用case替代if语句 CREATE OR REPLACE FUNCTION public.myleast1c(VARIADIC integer[]) RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT AS $function$ declare result int; a int; begin foreach a in array $1 loop result := case when a < result then a else coalesce(result, a) end; end loop; return result; end; $function$;
--执行测试函数 select count(*) filter (where a = myleast1c(a,b,c,d,e)) from foo; --execution time: 6.687 sec; total time: 6.703 sec
--以下都是使用python 3.6.5 64位 windows 环境 --纯python数据库函数 CREATE OR REPLACE FUNCTION public.myleast6(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$
r = None for x in a: if r is None or x < r: r = x return r $function$
--执行测试函数 select count(*) filter (where a = myleast6(a,b,c,d,e)) from foo; --execution time: 7.344 sec; total time: 7.344 sec
--纯python数据库函数,在内部使用jit --【注意】在数据库函数里直接使用jit,如果在高压调用此数据库函数时会非常慢,因为每次执行都会调用jit。 CREATE OR REPLACE FUNCTION public.myleast6a(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$
from numba import jit
@jit def test2(b): r = None for x in b: if r is None or x < r: r = x return r
return test2(a) $function$
----执行测试函数 select count(*) filter (where a = myleast6a(a,b,c,d,e)) from foo; --8分钟还未计算完,强制终止掉上面测试
python模块: module1
导入jit
from numba import jit
@jit def test2(b): r = None for x in b: if r is None or x < r: r = x return r
def test3(b): r = None for x in b: if r is None or x < r: r = x return r
def test4(b): return min(b)
--python数据库函数调用python模块使用jit CREATE OR REPLACE FUNCTION public.myleast6b(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$
import module1 return module1.test2(a) $function$;
----执行测试函数 select count(*) filter (where a = myleast6b(a,b,c,d,e)) from foo; --execution time: 00:01:07; total time: 00:01:07
--python数据库函数调用python模块不使用jit CREATE OR REPLACE FUNCTION public.myleast6c(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$
import module1 return module1.test3(a) $function$;
----执行测试函数 select count(*) filter (where a = myleast6c(a,b,c,d,e)) from foo; --execution time: 9.672 sec; total time: 9.688 sec
--python数据库函数调用python模块直接使用内部min函数,不使用jit CREATE OR REPLACE FUNCTION public.myleast6d(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$ import module1 return module1.test4(a) $function$;
----执行测试函数 select count(*) filter (where a = myleast6d(a,b,c,d,e)) from foo; --execution time: 10.609 sec; total time: 10.625 sec
--python数据库函数调用python模块直接使用内部min函数,不使用jit CREATE OR REPLACE FUNCTION public.myleast6e(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$ return min(a) $function$;
----执行测试函数 select count(*) filter (where a = myleast6e(a,b,c,d,e)) from foo; --execution time: 7.906 sec; total time: 7.922 sec
--使用python调用内置的C语言实现的least函数,【注意】此方式存在性能问题。 CREATE OR REPLACE FUNCTION public.myleast6f(a integer, b integer, c integer, d integer, e integer ) RETURNS integer LANGUAGE plpython3u AS $function$ if "plan" in SD: plan = SD["plan"] else: plan = plpy.prepare("SELECT least($1, $2,$3,$4,$5) as f1", ["int", "int", "int", "int", "int"]) SD["plan"] = plan
rv = plpy.execute(plan, [a,b,c,d,e], 1) t = rv[0]["f1"] return t $function$;
--执行测试函数 select count(*) filter (where a = myleast6f(a,b,c,d,e)) from foo; --execution time: 34.141 sec; total time: 34.141 sec
--使用GD对象缓存jit的函数 --【注意】此方法不用每次执行调用jit,但也说明jit不适合非科学计算领域,比常规方式速度反而下降非常多。 CREATE OR REPLACE FUNCTION public.myleast6g(VARIADIC a integer[]) RETURNS integer LANGUAGE plpython3u AS $function$
if "test2" in GD: test2 = GD["test2"] else :
from numba import jit
@jit def test2(b): r = None for x in b: if r is None or x < r: r = x return r GD["test2"] = test2
return test2(a) $function$
--执行测试函数 select count(*) filter (where a = myleast6g(a,b,c,d,e)) from foo; --execution time: 00:01:02; total time: 00:01:02
--#import imp --#imp.reload(module1)
CREATE or replace FUNCTION myleast7_lua(a int, b int, c int, d int, e int) RETURNS int AS $$ return math.min(a,b,c,d,e); $$ LANGUAGE pllua;
select count(*) filter (where a = myleast7_lua(a,b,c,d,e)) from foo; --execution time: 8.851
--删除测试函数 drop FUNCTION public.myleast1(VARIADIC integer[]); drop FUNCTION public.myleast1a(a integer, b integer, c integer, d integer, e integer) drop FUNCTION public.myleast1b(VARIADIC integer[]); drop FUNCTION public.myleast1c(VARIADIC integer[]); drop FUNCTION public.myleast2(VARIADIC integer[]); drop FUNCTION public.myleast3(VARIADIC integer[]); drop FUNCTION public.myleast6(VARIADIC a integer[]); drop FUNCTION public.myleast6a(VARIADIC a integer[]); drop FUNCTION public.myleast6b(VARIADIC a integer[]); drop FUNCTION public.myleast6c(VARIADIC a integer[]); drop FUNCTION public.myleast6d(VARIADIC a integer[]); drop FUNCTION public.myleast6e(VARIADIC a integer[]); drop FUNCTION public.myleast6f(a integer, b integer, c integer, d integer, e integer); drop FUNCTION public.myleast6g(VARIADIC a integer[]); drop FUNCTION public.myleast7_lua(a integer, b integer, c integer, d integer, e integer);
--删除临时表 drop table foo;
/* 【结论】 1)C语言实现的函数最快远超过其他语言, 需要时间0.531秒,比最快的脚本语言6.687秒快12.59倍 2)pl/pgsql > pl/python3u > pllua
*/