Pavel Stehule's blog

Tuesday, March 26, 2013

frequent mistakes in plpgsql design

Hello
last days I had to correct some typical errors in audited code. There are:

non-use coalesce:

-- bad
BEGIN
  IF x1 IS NOT NULL THEN
    s := s || x1 || ',';
  ELSE
    s := s || 'NULL, ';
  END IF;
  IF x2 IS NOT NULL THEN ...

-- good
BEGIN
  s := coalesce(x1 || ',', 'NULL,') || ...

using implicit cast date to text:

-- bad
BEGIN
  month = substring(current_date::text FROM 6 FOR 2)::int;

-- good
BEGIN
  month = substring(to_char(current_date,'YYYY-MM-DD') FROM 6 FOR 2)::int;

-- but better
BEGIN
  month = EXTRACT(month FROM current_date);

using EXECUTE instead PERFORM
using explicit cursor instead FOR IN SELECT

unsecured dynamic SQL:

--bad
EXECUTE 'SELECT ' || column_name || ' FROM ' || table_name
           || ' WHERE ' || column_name || e'=\'' || some_variable || e'\''
  INTO var;

--good
EXECUTE 'SELECT ' quote_ident(column_name) || ' FROM ' || quote_ident(table_name)
           || WHERE ' || quote_ident(column_name) || '=' || quote_literal(some_variable)
  INTO var;

-- or on 8.4 and higher
EXECUTE 'SELECT ' quote_ident(column_name) || ' FROM ' || table_name::regclass
           || WHERE ' || quote_ident(column_name) || '= $1'
  INTO var
  USING some_variable

--bad
CREATE OR REPLACE FUNCTION create_table(schemaname varchar, tablename varchar)
RETURNS void AS $$
BEGIN
  EXECUTE 'CREATE TABLE ' || coalesce(schemaname || '.','') || tablename;
  RETRUN;
END;
$$ LANGUAGE plpgsql;

--good
CREATE OR REPLACE FUNCTION create_table(schemaname varchar, tablename varchar)
RETURNS void AS $$
BEGIN
  EXECUTE 'CREATE TABLE ' || coalesce(quote_ident(schemaname) || '.','') || quote_ident(tablename);
  RETURN;
END;
$$ LANGUAGE plpgsql;

long lines - 100 chars per line is perfect
too short functions - Don't wrap any and only one SQL statement

Don't use PL/pgSQL for deep recursion calls
Due internal design principles PL/pgSQL should not be well optimized for recursion calls. PL/pgSQL supports recursion, and for not too deeps calls can provide enough to satisfy sb performance, but it is very slow for deep recursion. Nice example is taken from presentation http://plv8-talk.herokuapp.com. It is perfect example how don't use PL/pgSQL ever.

-- non recursion form
CREATE OR REPLACE FUNCTION public.psqlfibnr(n integer)
 RETURNS integer
 LANGUAGE plpgsql
 IMMUTABLE STRICT
AS $function$
DECLARE 
  prev1 int = 0;
  prev2 int = 1;
  result int = 0;
BEGIN
  FOR i IN 1..n
  LOOP
    result := prev1 + prev2;
    prev2 := prev1;
    prev1 := result;
  END LOOP;
  RETURN result;
END;
$function$

-- recursion form
CREATE OR REPLACE FUNCTION public.psqlfibr(n integer)
 RETURNS integer
 LANGUAGE plpgsql
 IMMUTABLE STRICT
AS $function$
BEGIN
  IF n < 2 THEN
    RETURN n;
  END IF;
  RETURN psqlfib(n-1) + psqlfib(n-2);
END;
$function$

-- non recursive calls
postgres=# select n, psqlfibnr(n)
              from generate_series(0,35,5) as n;
 n  | psqlfibnr 
----+-----------
  0 |         0
  5 |         5
 10 |        55
 15 |       610
 20 |      6765
 25 |     75025
 30 |    832040
 35 |   9227465
(8 rows)

Time: 1.178 ms

-- recursive calls
postgres=# select n, psqlfib(n)
              from generate_series(0,35,5) as n;
 n  | psqlfib
----+---------
  0 |       0
  5 |       5
 10 |      55
 15 |     610
 20 |    6765
 25 |   75025
 30 |  832040
 35 | 9227465
(8 rows)

Time: 282992.820 ms

Speed of recursion calls in PL/pgSQL is not comparatable with recursion optimized languages like Javascript.

Don't use procedural code when you can use natural SQL tools. Use UNIQUE INDEX instead custom triggers for ensuring unequivocalness.

-- bad usage
CREATE OR REPLACE FUNCTION ensure_unique()
RETURNS TRIGGER AS $$
BEGIN
  IF EXISTS(SELECT * FROM tab WHERE id = NEW.id) THEN
    RAISE ERROR 'id is not unique';
  END IF;
  RETURN NEW;
END;
$$ LANGUAGE plpgsql;

-- good (use unique index)
CREATE UNIQUE INDEX ON tab(id);

Labels: plpgsql, postgresql

Sunday, October 23, 2011

Change a unknown record's field in PL/pgSQL

Hello
Week ago was a request on stackoverflow about change of composite variable's field by dynamic SQL. I wrote a first very simply solution:

CREATE OR REPLACE FUNCTION public.setfield(a anyelement, text, text)
RETURNS anyelement
LANGUAGE plpgsql
AS $function$
begin
  create temp table aux as select $1.*;
  execute 'update aux set ' || quote_ident($2) || ' = ' || quote_literal($3);
  select into $1 * from aux;
  drop table aux;
  return $1;
end;
$function$

It's works, but it's slow and it consumes lot of shared memory (impracticable for repeated using in one transaction). Next version was better - it is faster and isn't hungry:

CREATE OR REPLACE FUNCTION x.setfield2(anyelement, text, text)
 RETURNS anyelement
 LANGUAGE plpgsql
AS $function$
DECLARE 
  _name text;
  _values text[];
  _value text;
  _attnum int;
BEGIN
  FOR _name, _attnum
     IN SELECT a.attname, a.attnum
           FROM pg_catalog.pg_attribute a 
          WHERE a.attrelid = (SELECT typrelid
                                 FROM pg_type
                                WHERE oid = pg_typeof($1)::oid) 
  LOOP
    IF _name = $2 THEN
      _value := $3;
    ELSE
      EXECUTE 'SELECT (($1).' || quote_ident(_name) || ')::text' INTO _value USING $1;
    END IF;
    _values[_attnum] :=  COALESCE('"' || replace(replace(_value, '"', '""'), '''', '''''') || '"', '');
  END LOOP;
  EXECUTE 'SELECT (' || pg_typeof($1)::text || '''(' || array_to_string(_values,',') || ')'').*' INTO $1; 
  RETURN $1;
END;
$function$;

There are a few slower points: EXECUTE in loop, array's update in loop. But Erwin Brandstetter found a probably best and most simply solution - there are no more ways on PL/pgSQL level.

CREATE OR REPLACE FUNCTION public.setfield3(anyelement, text, text)
RETURNS anyelement
AS $body$
DECLARE
 _list text;

BEGIN
_list := (
   SELECT string_agg(x.fld, ',')
   FROM   (
      SELECT CASE WHEN a.attname = $2
              THEN quote_literal($3)
              ELSE quote_ident(a.attname)
             END AS fld
      FROM   pg_catalog.pg_attribute a 
      WHERE  a.attrelid = (SELECT typrelid
                           FROM   pg_type
                           WHERE  oid = pg_typeof($1)::oid) 
      ORDER BY a.attnum
   ) x
);

EXECUTE '
SELECT ' || _list || '
FROM   (SELECT $1.*) x'
USING  $1
INTO   $1;

RETURN $1;
END;
$body$ LANGUAGE plpgsql;

There is no PL/pgSQL's loop and there is just one EXECUTE. More - this code is just simple - without lot of string (quoting) operations.

Update: last Ervin' superfast version

CREATE FUNCTION f_setfield(INOUT _comp_val anyelement, _field text, _val text)
  RETURNS anyelement AS
$func$
BEGIN

EXECUTE 'SELECT ' || array_to_string(ARRAY(
      SELECT CASE WHEN attname = _field
                THEN '$2'
                ELSE '($1).' || quote_ident(attname)
             END AS fld
      FROM   pg_catalog.pg_attribute
      WHERE  attrelid = pg_typeof(_comp_val)::text::regclass
      AND    attnum > 0
      AND    attisdropped = FALSE
      ORDER  BY attnum
      ), ',')
USING  _comp_val, _val
INTO   _comp_val;

END
$func$ LANGUAGE plpgsql;

Very nice, Erwin!

Labels: plpgsql, postgresql

Tuesday, March 10, 2009

Experimental psql

Hello

I spent some on psql's face lifting. Some new features are really experimental. I don't expect early adopting it for core. It's more game for now. epsql will be available with release 8.4. What epsql can do?

UTF8 boxes

one new border style, five new border lines styles

Stored function's listing

should help with debuging


postgres=# select test(10);
ERROR:  division by zero
CONTEXT:  PL/pgSQL function "test" line 3 at assignment

postgres=# \lf test
*** CREATE OR REPLACE FUNCTION public.test(a integer)
***  RETURNS integer
***  LANGUAGE plpgsql
*** AS $function$
1 declare b int;
2 begin
3   b := a/0;
4   return a;
5 end;
*** $function$

Cursor's support

allows using data from database for statement parameters

\fetch

postgres=# BEGIN;
BEGIN
postgres=# DECLARE c CURSOR FOR SELECT * FROM pg_database;
DECLARE CURSOR
postgres=# \fetch c \echo :datname :datcollate
template1 cs_CZ.UTF-8
postgres=# \fetch c \echo :datname :datcollate
template0 cs_CZ.UTF-8
postgres=# \fetch c \echo :datname :datcollate
postgres cs_CZ.UTF-8
postgres=# \fetch c \echo :datname :datcollate
ERROR:  (not available)
postgres=# COMMIT;
COMMIT

\fetchall

postgres=# CREATE TABLE test1(a int); CREATE TABLE test2(a int);
CREATE TABLE
CREATE TABLE
postgres=# BEGIN;
BEGIN
postgres=# DECLARE c CURSOR FOR SELECT tablename FROM pg_tables WHERE tablename LIKE 'test%';
DECLARE CURSOR
postgres=# \fetchall c DROP TABLE :tablename;
DROP TABLE
DROP TABLE
postgres=# COMMIT;
COMMIT
postgres=#

I invite any ideas, comments.

Pavel

Labels: postgresql

Wednesday, August 27, 2008

default parameters for PL functions

Hello

I completed one task - defaults for PL functions. Using is simple - it is same as Firebird's 2.x defaults.

postgres=# create or replace function x1(int = 1,int = 2,int= 3)
        returns int as $$
          select $1+$2+$3;
        $$ language sql;
CREATE FUNCTION
postgres=# select x1();
x1
----
6
(1 row)

postgres=# select x1(10);;
x1
----
15
(1 row)

postgres=# select x1(10,20);
x1
----
33
(1 row)

postgres=# select x1(10,20,30);
x1
----
60
(1 row)

This is first step before named parameters feature - and less controversy. Second step will be difficult - there are two opinions about named parameter's syntax: variant a) using Oracle's syntax name => expression and variant b) use own syntax based on keyword "AS" expression AS name. I prefer variant @a - I thing so it's more readable (SQL use AS for labeling). Variant @b is safe from compatibility views. There was discussion on pg_hackers - without any conclusion. So I hope so at least defaults will be committed.

bye
Pavel

Labels: 8.4, postgresql

Saturday, December 1, 2007

Using $_SHARED as table cache in plperl

Oracle has new function result cache. It's nice idea. There are some use cases, mostly in www applications. There are nothing similar in PostgreSQL. I found simple similar solution in plperl (with important disadvantage to Oracle solution). It works, but without any pooling mechanisms its not really effective.

I created some tables:

CREATE TABLE Books(
id serial PRIMARY KEY,
name VARCHAR(20));

CREATE TABLE Sale(
book_id integer REFERENCES Books(id),
inserted timestamp DEFAULT(CURRENT_TIMESTAMP)
);

INSERT INTO Books VALUES(1,'Dracula');
INSERT INTO Books VALUES(2,'Nosferatu');
INSERT INTO Books VALUES(3,'Bacula');

INSERT INTO Sale VALUES(1, '2007-10-11');
INSERT INTO Sale VALUES(2, '2007-10-12');
INSERT INTO Sale VALUES(2, '2007-10-13');
INSERT INTO Sale VALUES(3, '2007-10-10');

and I created function Top10Books that returns top ten books.

-- Top10
CREATE OR REPLACE FUNCTION Top10Books(IN date, OUT ordr integer, OUT name varchar(20))
RETURNS SETOF RECORD
AS $$
BEGIN
ordr := 0;
FOR name IN SELECT b.name
              FROM Books b
                   JOIN
                   Sale s
                   ON b.id = s.book_id
             WHERE s.inserted BETWEEN date_trunc('month', $1)
                                  AND date_trunc('month', $1)
           + interval '1month' - interval '1day'
             GROUP BY b.name
             ORDER BY count(*) DESC
             LIMIT 10
LOOP
ordr := ordr + 1;
RETURN NEXT;
END LOOP;
RETURN;
END;
$$ LANGUAGE plpgsql;

One my customer put similar function on every page on some site (with really destructive impact to load on db server. Correct solution is to use some php cache or some similar tool. With plperl I am able cache result in Postgres too:

CREATE OR REPLACE FUNCTION Top10BooksCached(IN date, IN bool,
       OUT ordr integer, OUT name varchar(20))
RETURNS SETOF RECORD
AS $$
return $_SHARED{tableof_top10book}
    if (defined ($_SHARED{tableof_top10book}) and not (defined($_[1]) and $_[1] eq "t"));
if (not defined($_SHARED{plan_for_top10books}))
{
   $_SHARED{plan_for_top10books} = spi_prepare(
           'SELECT b.name                                                                                                                                                                                                
              FROM Books b                                                                                                                                                                                               
                   JOIN                                                                                                                                                                                                  
                   Sale s                                                                                                                                                                                                
                   ON b.id = s.book_id                                                                                                                                                                                   
             WHERE s.inserted BETWEEN date_trunc(\'month\', $1)                                                                                                                                                          
                                  AND date_trunc(\'month\', $1)
    + interval \'1month\' - interval \'1day\'                                                                                                                
             GROUP BY b.name                                                                                                                                                                                             
             ORDER BY count(*) DESC                                                                                                                                                                                      
             LIMIT 10' , 'DATE');
}
my $row;
my $i = 0;
my $heap;
my $sth = spi_query_prepared($_SHARED{plan_for_top10books}, $_[0]);
while (defined ($row = spi_fetchrow($sth))) {
    push @$heap, {ordr => ++$i, name => $row->{name}}
}
$_SHARED{tableof_top10book} =  $heap ;
return $_SHARED{tableof_top10book};
$$ LANGUAGE plperlu;

or with cache expiration

CREATE OR REPLACE FUNCTION Top10BooksCached(IN date, IN integer,
     OUT ordr integer, OUT name varchar(20))
RETURNS SETOF RECORD
AS $$
return $_SHARED{tableof_top10book}
    if (defined ($_SHARED{tableof_top10book})
            and defined($_SHARED{actualised_top10book})
            and ($_SHARED{actualised_top10book} + $_[1] > time));
if (not defined($_SHARED{plan_for_top10books}))
{
   $_SHARED{plan_for_top10books} = spi_prepare(
           'SELECT b.name                                                                                                                                                                                                
              FROM Books b                                                                                                                                                                                               
                   JOIN                                                                                                                                                                                                  
                   Sale s                                                                                                                                                                                                
                   ON b.id = s.book_id                                                                                                                                                                                   
             WHERE s.inserted BETWEEN date_trunc(\'month\', $1)                                                                                                                                                          
                                  AND date_trunc(\'month\', $1)
    + interval \'1month\' - interval \'1day\'                                                                                                                
             GROUP BY b.name                                                                                                                                                                                             
             ORDER BY count(*) DESC                                                                                                                                                                                      
             LIMIT 10' , 'DATE');
}
my $row;
my $i = 0;
my $heap;
my $sth = spi_query_prepared($_SHARED{plan_for_top10books}, $_[0]);
while (defined ($row = spi_fetchrow($sth))) {
    push @$heap, {ordr => ++$i, name => $row->{name}}
}
$_SHARED{tableof_top10book} =  $heap ;
$_SHARED{actualised_top10book} = time;
return $_SHARED{tableof_top10book};
$$ LANGUAGE plperlu;

An usage is simple:

-- first call is slow
postgres=# select * from Top10BooksCached(current_date, 300);
ordr |   name
------+-----------
1 | Nosferatu
2 | Bacula
3 | Dracula
(3 rows)

Time: 128,965 ms
-- second call is fast
postgres=# select * from Top10BooksCached(current_date, 300);
ordr |   name
------+-----------
1 | Nosferatu
2 | Bacula
3 | Dracula
(3 rows)

Time: 11,911 ms

Use it carefully! It's not good for big tables, and what more, this function can returns out-of-date values.

Labels: plperl, postgresql

Tuesday, November 20, 2007

Stacked recordset (multirecordset)

So I am little bit far now. Multirecorsets are available with current protocol, but some changes in libpq are necessary :(. Procedure's output can be much more dynamic than from SRF functions. There are not limits. I wrote small regress test. Its based on Roland Bauman's sample (that was be used in http://www.pgsql.cz/index.php/MySQL5:_Gentle_introduction_to_stored_procedures_of_MySQL5).

CREATE TYPE gender AS ENUM('m', 'f');

CREATE TABLE employees (
id serial PRIMARY KEY,
shop_id int,
gender gender,
name varchar(32),
salary int
);

CREATE TABLE shops (
id serial PRIMARY KEY,
shop varchar(32)
);

INSERT INTO shops (shop)
VALUES ('Zurich'), ('New York'), ('London');

INSERT INTO employees (shop_id, gender, name, salary)
VALUES
(1, 'm', 'Jon Simpson', 4500),
(1, 'f', 'Barbara Breitenmoser', 4700),
(2, 'f', 'Kirsten Ruegg', 5600),
(3, 'm', 'Ralph Teller', 5100),
(3, 'm', 'Peter Jonson', 5200);

Procedure crosstab generate and execute SQL like

SELECT shop,
SUM(CASE gender WHEN 'f' THEN salary ELSE 0 END) AS f,
SUM(CASE gender WHEN 'm' THEN salary ELSE 0 END) AS m,
SUM(salary) AS total
FROM employees e INNER JOIN shops s ON e.shop_id = s.id
GROUP BY shop

Source code:

CREATE OR REPLACE procedure crosstab(dimx_name varchar(32), dimx_source varchar(32),
                                  dimy_name varchar(32), dimy_source varchar(32),
                                  expr varchar(32)) AS
$$
DECLARE
cols_expr varchar;
xtab_expr varchar;
BEGIN
 EXECUTE  'SELECT array_to_string(ARRAY(SELECT ''SUM(CASE ' || dimx_name || ' WHEN '''''' || x.'
        || dimx_name || ' || '''''' THEN ' || expr || ' ELSE 0 END) AS "'' || x.'|| dimx_name
        || ' || ''"'' FROM (SELECT DISTINCT '
        || dimx_name || ' ' || dimx_source || ') x),'', '')' INTO cols_expr;
 xtab_expr := 'SELECT '|| dimy_name ||', ' || cols_expr || ', SUM(' || expr || ') AS Total '
      || dimy_source || ' GROUP BY '|| dimy_name;
EXECUTE xtab_expr;
END;
$$ LANGUAGE plpgsql;

Test output:

postgres=# CALL crosstab('gender', 'FROM employees','shop','FROM employees e INNER JOIN shops s ON e.shop_id = s.id','salary');
shop   |   m   |  f   | total
---------+-------+------+-------
New York |     0 | 5600 |  5600
Zurich   |  4500 | 4700 |  9200
London   | 10300 |    0 | 10300
(3 rows)

CALL 0
postgres=# CALL crosstab('shop', 'FROM shops', 'gender','FROM employees e INNER JOIN shops s ON e.shop_id = s.id','salary');
gender | London | New York | Zurich | total
-------+--------+----------+--------+-------
m      |  10300 |        0 |   4500 | 14800
f      |      0 |     5600 |   4700 | 10300
(2 rows)

CALL 0

And one really multirecordset's sample:

create or replace procedure free(int) as $$
declare s varchar = 'SELECT 1';
begin
 for i in 2..$1 loop
   s := s || ',' || to_char(i,'999');
   execute s;
 end loop; 
end$$ language plpgsql;

Output:

postgres=# call free(6);
?column? | ?column?
----------+----------
       1 |        2
(1 row)

?column? | ?column? | ?column?
----------+----------+----------
       1 |        2 |        3
(1 row)

?column? | ?column? | ?column? | ?column?
----------+----------+----------+----------
       1 |        2 |        3 |        4
(1 row)

?column? | ?column? | ?column? | ?column? | ?column?
----------+----------+----------+----------+----------
       1 |        2 |        3 |        4 |        5
(1 row)

?column? | ?column? | ?column? | ?column? | ?column? | ?column?
----------+----------+----------+----------+----------+----------
       1 |        2 |        3 |        4 |        5 |        6
(1 row)

CALL 0

Labels: plpgsql, postgresql

Wednesday, November 7, 2007

First real procedures on PostgreSQL

I am working on an enhancing of plpgsql. I would to add to plpgsql procedures and methods. Methods are flagged functions with different calling convention. Procedures are total different beast. Now I am able to run procedures without parameters. I would to support parameter passing by reference. It allows to take reference for any variable and potentially some interesting features (like string builder or array builder etc). What is best, I need not modify system tables. Procedure is function (from pg_proc perspective) that returns PROCEDURE type (it will be used for exit status).


postgres=#
create or replace procedure print()
as $$
begin
raise notice 'ahoj';
end;
$$ language plpgsql;
CREATE PROCEDURE

postgres=#
create or replace function test()
returns void as $$
begin
call print();
return;
end;
$$ language plpgsql;
CREATE FUNCTION

postgres=# select test();
NOTICE:  Found ()
CONTEXT:  PL/pgSQL function "test" line 4456553 at CALL
NOTICE:  OID: 16821
CONTEXT:  PL/pgSQL function "test" line 4456553 at CALL
NOTICE:  ahoj
CONTEXT:  PL/pgSQL function "test" line 4456553 at CALL
test
------

(1 row)

postgres=# \df print
                  List of functions
Schema | Name  | Result data type | Argument data types
--------+-------+------------------+---------------------
public | print | procedure        |
(1 row)

There is lot of work still, but first step is done.

Labels: plpgsql, postgresql

Pavel Stehule's blog

Tuesday, March 26, 2013

frequent mistakes in plpgsql design

Sunday, October 23, 2011

Change a unknown record's field in PL/pgSQL

Tuesday, March 10, 2009

Experimental psql

UTF8 boxes

Stored function's listing

Cursor's support

Wednesday, August 27, 2008

default parameters for PL functions

Saturday, December 1, 2007

Using $_SHARED as table cache in plperl

Tuesday, November 20, 2007

Stacked recordset (multirecordset)

Wednesday, November 7, 2007

First real procedures on PostgreSQL

About Me

Links

Previous Posts

Archives