Tuesday, March 26, 2013

frequent mistakes in plpgsql design

Hello
last days I had to correct some typical errors in audited code. There are:
  • non-use coalesce:
    -- bad
    BEGIN
      IF x1 IS NOT NULL THEN
        s := s || x1 || ',';
      ELSE
        s := s || 'NULL, ';
      END IF;
      IF x2 IS NOT NULL THEN ...
    
    -- good
    BEGIN
      s := coalesce(x1 || ',', 'NULL,') || ...
    
  • using implicit cast date to text:
    -- bad
    BEGIN
      month = substring(current_date::text FROM 6 FOR 2)::int;
    
    -- good
    BEGIN
      month = substring(to_char(current_date,'YYYY-MM-DD') FROM 6 FOR 2)::int;
    
    -- but better
    BEGIN
      month = EXTRACT(month FROM current_date);
    
  • using EXECUTE instead PERFORM
  • using explicit cursor instead FOR IN SELECT
  • unsecured dynamic SQL:
    --bad
    EXECUTE 'SELECT ' || column_name || ' FROM ' || table_name
               || ' WHERE ' || column_name || e'=\'' || some_variable || e'\''
      INTO var;
    
    --good
    EXECUTE 'SELECT ' quote_ident(column_name) || ' FROM ' || quote_ident(table_name)
               || WHERE ' || quote_ident(column_name) || '=' || quote_literal(some_variable)
      INTO var;
    
    -- or on 8.4 and higher
    EXECUTE 'SELECT ' quote_ident(column_name) || ' FROM ' || table_name::regclass
               || WHERE ' || quote_ident(column_name) || '= $1'
      INTO var
      USING some_variable
    
    --bad
    CREATE OR REPLACE FUNCTION create_table(schemaname varchar, tablename varchar)
    RETURNS void AS $$
    BEGIN
      EXECUTE 'CREATE TABLE ' || coalesce(schemaname || '.','') || tablename;
      RETRUN;
    END;
    $$ LANGUAGE plpgsql;
    
    --good
    CREATE OR REPLACE FUNCTION create_table(schemaname varchar, tablename varchar)
    RETURNS void AS $$
    BEGIN
      EXECUTE 'CREATE TABLE ' || coalesce(quote_ident(schemaname) || '.','') || quote_ident(tablename);
      RETURN;
    END;
    $$ LANGUAGE plpgsql;
    
  • long lines - 100 chars per line is perfect
  • too short functions - Don't wrap any and only one SQL statement
  • Don't use PL/pgSQL for deep recursion calls
    Due internal design principles PL/pgSQL should not be well optimized for recursion calls. PL/pgSQL supports recursion, and for not too deeps calls can provide enough to satisfy sb performance, but it is very slow for deep recursion. Nice example is taken from presentation http://plv8-talk.herokuapp.com. It is perfect example how don't use PL/pgSQL ever.
    -- non recursion form
    CREATE OR REPLACE FUNCTION public.psqlfibnr(n integer)
     RETURNS integer
     LANGUAGE plpgsql
     IMMUTABLE STRICT
    AS $function$
    DECLARE 
      prev1 int = 0;
      prev2 int = 1;
      result int = 0;
    BEGIN
      FOR i IN 1..n
      LOOP
        result := prev1 + prev2;
        prev2 := prev1;
        prev1 := result;
      END LOOP;
      RETURN result;
    END;
    $function$
    
    -- recursion form
    CREATE OR REPLACE FUNCTION public.psqlfibr(n integer)
     RETURNS integer
     LANGUAGE plpgsql
     IMMUTABLE STRICT
    AS $function$
    BEGIN
      IF n < 2 THEN
        RETURN n;
      END IF;
      RETURN psqlfib(n-1) + psqlfib(n-2);
    END;
    $function$
    
    -- non recursive calls
    postgres=# select n, psqlfibnr(n)
                  from generate_series(0,35,5) as n;
     n  | psqlfibnr 
    ----+-----------
      0 |         0
      5 |         5
     10 |        55
     15 |       610
     20 |      6765
     25 |     75025
     30 |    832040
     35 |   9227465
    (8 rows)
    
    Time: 1.178 ms
    
    -- recursive calls
    postgres=# select n, psqlfib(n)
                  from generate_series(0,35,5) as n;
     n  | psqlfib
    ----+---------
      0 |       0
      5 |       5
     10 |      55
     15 |     610
     20 |    6765
     25 |   75025
     30 |  832040
     35 | 9227465
    (8 rows)
    
    Time: 282992.820 ms
    
    Speed of recursion calls in PL/pgSQL is not comparatable with recursion optimized languages like Javascript.

Don't use procedural code when you can use natural SQL tools. Use UNIQUE INDEX instead custom triggers for ensuring unequivocalness.
-- bad usage
CREATE OR REPLACE FUNCTION ensure_unique()
RETURNS TRIGGER AS $$
BEGIN
  IF EXISTS(SELECT * FROM tab WHERE id = NEW.id) THEN
    RAISE ERROR 'id is not unique';
  END IF;
  RETURN NEW;
END;
$$ LANGUAGE plpgsql;

-- good (use unique index)
CREATE UNIQUE INDEX ON tab(id);

Labels: ,

Sunday, October 23, 2011

Change a unknown record's field in PL/pgSQL

Hello
Week ago was a request on stackoverflow about change of composite variable's field by dynamic SQL. I wrote a first very simply solution:
CREATE OR REPLACE FUNCTION public.setfield(a anyelement, text, text)
RETURNS anyelement
LANGUAGE plpgsql
AS $function$
begin
  create temp table aux as select $1.*;
  execute 'update aux set ' || quote_ident($2) || ' = ' || quote_literal($3);
  select into $1 * from aux;
  drop table aux;
  return $1;
end;
$function$
It's works, but it's slow and it consumes lot of shared memory (impracticable for repeated using in one transaction). Next version was better - it is faster and isn't hungry:
CREATE OR REPLACE FUNCTION x.setfield2(anyelement, text, text)
 RETURNS anyelement
 LANGUAGE plpgsql
AS $function$
DECLARE 
  _name text;
  _values text[];
  _value text;
  _attnum int;
BEGIN
  FOR _name, _attnum
     IN SELECT a.attname, a.attnum
           FROM pg_catalog.pg_attribute a 
          WHERE a.attrelid = (SELECT typrelid
                                 FROM pg_type
                                WHERE oid = pg_typeof($1)::oid) 
  LOOP
    IF _name = $2 THEN
      _value := $3;
    ELSE
      EXECUTE 'SELECT (($1).' || quote_ident(_name) || ')::text' INTO _value USING $1;
    END IF;
    _values[_attnum] :=  COALESCE('"' || replace(replace(_value, '"', '""'), '''', '''''') || '"', '');
  END LOOP;
  EXECUTE 'SELECT (' || pg_typeof($1)::text || '''(' || array_to_string(_values,',') || ')'').*' INTO $1; 
  RETURN $1;
END;
$function$;
There are a few slower points: EXECUTE in loop, array's update in loop. But Erwin Brandstetter found a probably best and most simply solution - there are no more ways on PL/pgSQL level.
CREATE OR REPLACE FUNCTION public.setfield3(anyelement, text, text)
RETURNS anyelement
AS $body$
DECLARE
 _list text;

BEGIN
_list := (
   SELECT string_agg(x.fld, ',')
   FROM   (
      SELECT CASE WHEN a.attname = $2
              THEN quote_literal($3)
              ELSE quote_ident(a.attname)
             END AS fld
      FROM   pg_catalog.pg_attribute a 
      WHERE  a.attrelid = (SELECT typrelid
                           FROM   pg_type
                           WHERE  oid = pg_typeof($1)::oid) 
      ORDER BY a.attnum
   ) x
);

EXECUTE '
SELECT ' || _list || '
FROM   (SELECT $1.*) x'
USING  $1
INTO   $1;

RETURN $1;
END;
$body$ LANGUAGE plpgsql;
There is no PL/pgSQL's loop and there is just one EXECUTE. More - this code is just simple - without lot of string (quoting) operations.

Update: last Ervin' superfast version
CREATE FUNCTION f_setfield(INOUT _comp_val anyelement, _field text, _val text)
  RETURNS anyelement AS
$func$
BEGIN

EXECUTE 'SELECT ' || array_to_string(ARRAY(
      SELECT CASE WHEN attname = _field
                THEN '$2'
                ELSE '($1).' || quote_ident(attname)
             END AS fld
      FROM   pg_catalog.pg_attribute
      WHERE  attrelid = pg_typeof(_comp_val)::text::regclass
      AND    attnum > 0
      AND    attisdropped = FALSE
      ORDER  BY attnum
      ), ',')
USING  _comp_val, _val
INTO   _comp_val;

END
$func$ LANGUAGE plpgsql;
Very nice, Erwin!

Labels: ,

Tuesday, March 10, 2009

Experimental psql

Hello

I spent some on psql's face lifting. Some new features are really experimental. I don't expect early adopting it for core. It's more game for now. epsql will be available with release 8.4. What epsql can do?

UTF8 boxes

  • one new border style, five new border lines styles

Stored function's listing

  • should help with debuging

    postgres=# select test(10);
    ERROR: division by zero
    CONTEXT: PL/pgSQL function "test" line 3 at assignment

    postgres=# \lf test
    *** CREATE OR REPLACE FUNCTION public.test(a integer)
    *** RETURNS integer
    *** LANGUAGE plpgsql
    *** AS $function$
    1 declare b int;
    2 begin
    3 b := a/0;
    4 return a;
    5 end;
    *** $function$

Cursor's support

  • allows using data from database for statement parameters
    • \fetch
      postgres=# BEGIN;
      BEGIN
      postgres=# DECLARE c CURSOR FOR SELECT * FROM pg_database;
      DECLARE CURSOR
      postgres=# \fetch c \echo :datname :datcollate
      template1 cs_CZ.UTF-8
      postgres=# \fetch c \echo :datname :datcollate
      template0 cs_CZ.UTF-8
      postgres=# \fetch c \echo :datname :datcollate
      postgres cs_CZ.UTF-8
      postgres=# \fetch c \echo :datname :datcollate
      ERROR: (not available)
      postgres=# COMMIT;
      COMMIT
    • \fetchall
      postgres=# CREATE TABLE test1(a int); CREATE TABLE test2(a int);
      CREATE TABLE
      CREATE TABLE
      postgres=# BEGIN;
      BEGIN
      postgres=# DECLARE c CURSOR FOR SELECT tablename FROM pg_tables WHERE tablename LIKE 'test%';
      DECLARE CURSOR
      postgres=# \fetchall c DROP TABLE :tablename;
      DROP TABLE
      DROP TABLE
      postgres=# COMMIT;
      COMMIT
      postgres=#
I invite any ideas, comments.

Pavel

Labels:

Wednesday, August 27, 2008

default parameters for PL functions

Hello

I completed one task - defaults for PL functions. Using is simple - it is same as Firebird's 2.x defaults.
postgres=# create or replace function x1(int = 1,int = 2,int= 3)
returns int as $$
select $1+$2+$3;
$$ language sql;
CREATE FUNCTION
postgres=# select x1();
x1
----
6
(1 row)

postgres=# select x1(10);;
x1
----
15
(1 row)

postgres=# select x1(10,20);
x1
----
33
(1 row)

postgres=# select x1(10,20,30);
x1
----
60
(1 row)
This is first step before named parameters feature - and less controversy. Second step will be difficult - there are two opinions about named parameter's syntax: variant a) using Oracle's syntax name => expression and variant b) use own syntax based on keyword "AS" expression AS name. I prefer variant @a - I thing so it's more readable (SQL use AS for labeling). Variant @b is safe from compatibility views. There was discussion on pg_hackers - without any conclusion. So I hope so at least defaults will be committed.

bye
Pavel

Labels: ,

Saturday, December 1, 2007

Using $_SHARED as table cache in plperl

Oracle has new function result cache. It's nice idea. There are some use cases, mostly in www applications. There are nothing similar in PostgreSQL. I found simple similar solution in plperl (with important disadvantage to Oracle solution). It works, but without any pooling mechanisms its not really effective.

I created some tables:
CREATE TABLE Books(
id serial PRIMARY KEY,
name VARCHAR(20));

CREATE TABLE Sale(
book_id integer REFERENCES Books(id),
inserted timestamp DEFAULT(CURRENT_TIMESTAMP)
);

INSERT INTO Books VALUES(1,'Dracula');
INSERT INTO Books VALUES(2,'Nosferatu');
INSERT INTO Books VALUES(3,'Bacula');

INSERT INTO Sale VALUES(1, '2007-10-11');
INSERT INTO Sale VALUES(2, '2007-10-12');
INSERT INTO Sale VALUES(2, '2007-10-13');
INSERT INTO Sale VALUES(3, '2007-10-10');
and I created function Top10Books that returns top ten books.
-- Top10
CREATE OR REPLACE FUNCTION Top10Books(IN date, OUT ordr integer, OUT name varchar(20))
RETURNS SETOF RECORD
AS $$
BEGIN
ordr := 0;
FOR name IN SELECT b.name
FROM Books b
JOIN
Sale s
ON b.id = s.book_id
WHERE s.inserted BETWEEN date_trunc('month', $1)
AND date_trunc('month', $1)
+ interval '1month' - interval '1day'
GROUP BY b.name
ORDER BY count(*) DESC
LIMIT 10
LOOP
ordr := ordr + 1;
RETURN NEXT;
END LOOP;
RETURN;
END;
$$ LANGUAGE plpgsql;
One my customer put similar function on every page on some site (with really destructive impact to load on db server. Correct solution is to use some php cache or some similar tool. With plperl I am able cache result in Postgres too:
CREATE OR REPLACE FUNCTION Top10BooksCached(IN date, IN bool,
OUT ordr integer, OUT name varchar(20))
RETURNS SETOF RECORD
AS $$
return $_SHARED{tableof_top10book}
if (defined ($_SHARED{tableof_top10book}) and not (defined($_[1]) and $_[1] eq "t"));
if (not defined($_SHARED{plan_for_top10books}))
{
$_SHARED{plan_for_top10books} = spi_prepare(
'SELECT b.name
FROM Books b
JOIN
Sale s
ON b.id = s.book_id
WHERE s.inserted BETWEEN date_trunc(\'month\', $1)
AND date_trunc(\'month\', $1)
+ interval \'1month\' - interval \'1day\'
GROUP BY b.name
ORDER BY count(*) DESC
LIMIT 10' , 'DATE');
}
my $row;
my $i = 0;
my $heap;
my $sth = spi_query_prepared($_SHARED{plan_for_top10books}, $_[0]);
while (defined ($row = spi_fetchrow($sth))) {
push @$heap, {ordr => ++$i, name => $row->{name}}
}
$_SHARED{tableof_top10book} = $heap ;
return $_SHARED{tableof_top10book};
$$ LANGUAGE plperlu;
or with cache expiration
CREATE OR REPLACE FUNCTION Top10BooksCached(IN date, IN integer,
OUT ordr integer, OUT name varchar(20))
RETURNS SETOF RECORD
AS $$
return $_SHARED{tableof_top10book}
if (defined ($_SHARED{tableof_top10book})
and defined($_SHARED{actualised_top10book})
and ($_SHARED{actualised_top10book} + $_[1] > time));
if (not defined($_SHARED{plan_for_top10books}))
{
$_SHARED{plan_for_top10books} = spi_prepare(
'SELECT b.name
FROM Books b
JOIN
Sale s
ON b.id = s.book_id
WHERE s.inserted BETWEEN date_trunc(\'month\', $1)
AND date_trunc(\'month\', $1)
+ interval \'1month\' - interval \'1day\'
GROUP BY b.name
ORDER BY count(*) DESC
LIMIT 10' , 'DATE');
}
my $row;
my $i = 0;
my $heap;
my $sth = spi_query_prepared($_SHARED{plan_for_top10books}, $_[0]);
while (defined ($row = spi_fetchrow($sth))) {
push @$heap, {ordr => ++$i, name => $row->{name}}
}
$_SHARED{tableof_top10book} = $heap ;
$_SHARED{actualised_top10book} = time;
return $_SHARED{tableof_top10book};
$$ LANGUAGE plperlu;
An usage is simple:
-- first call is slow
postgres=# select * from Top10BooksCached(current_date, 300);
ordr | name
------+-----------
1 | Nosferatu
2 | Bacula
3 | Dracula
(3 rows)

Time: 128,965 ms
-- second call is fast
postgres=# select * from Top10BooksCached(current_date, 300);
ordr | name
------+-----------
1 | Nosferatu
2 | Bacula
3 | Dracula
(3 rows)

Time: 11,911 ms
Use it carefully! It's not good for big tables, and what more, this function can returns out-of-date values.

Labels: ,

Tuesday, November 20, 2007

Stacked recordset (multirecordset)

So I am little bit far now. Multirecorsets are available with current protocol, but some changes in libpq are necessary :(. Procedure's output can be much more dynamic than from SRF functions. There are not limits. I wrote small regress test. Its based on Roland Bauman's sample (that was be used in http://www.pgsql.cz/index.php/MySQL5:_Gentle_introduction_to_stored_procedures_of_MySQL5).
CREATE TYPE gender AS ENUM('m', 'f');

CREATE TABLE employees (
id serial PRIMARY KEY,
shop_id int,
gender gender,
name varchar(32),
salary int
);

CREATE TABLE shops (
id serial PRIMARY KEY,
shop varchar(32)
);

INSERT INTO shops (shop)
VALUES ('Zurich'), ('New York'), ('London');

INSERT INTO employees (shop_id, gender, name, salary)
VALUES
(1, 'm', 'Jon Simpson', 4500),
(1, 'f', 'Barbara Breitenmoser', 4700),
(2, 'f', 'Kirsten Ruegg', 5600),
(3, 'm', 'Ralph Teller', 5100),
(3, 'm', 'Peter Jonson', 5200);
Procedure crosstab generate and execute SQL like
SELECT shop,
SUM(CASE gender WHEN 'f' THEN salary ELSE 0 END) AS f,
SUM(CASE gender WHEN 'm' THEN salary ELSE 0 END) AS m,
SUM(salary) AS total
FROM employees e INNER JOIN shops s ON e.shop_id = s.id
GROUP BY shop
Source code:
CREATE OR REPLACE procedure crosstab(dimx_name varchar(32), dimx_source varchar(32),
dimy_name varchar(32), dimy_source varchar(32),
expr varchar(32)) AS
$$
DECLARE
cols_expr varchar;
xtab_expr varchar;
BEGIN
EXECUTE 'SELECT array_to_string(ARRAY(SELECT ''SUM(CASE ' || dimx_name || ' WHEN '''''' || x.'
|| dimx_name || ' || '''''' THEN ' || expr || ' ELSE 0 END) AS "'' || x.'|| dimx_name
|| ' || ''"'' FROM (SELECT DISTINCT '
|| dimx_name || ' ' || dimx_source || ') x),'', '')' INTO cols_expr;
xtab_expr := 'SELECT '|| dimy_name ||', ' || cols_expr || ', SUM(' || expr || ') AS Total '
|| dimy_source || ' GROUP BY '|| dimy_name;
EXECUTE xtab_expr;
END;
$$ LANGUAGE plpgsql;
Test output:
postgres=# CALL crosstab('gender', 'FROM employees','shop','FROM employees e INNER JOIN shops s ON e.shop_id = s.id','salary');
shop | m | f | total
---------+-------+------+-------
New York | 0 | 5600 | 5600
Zurich | 4500 | 4700 | 9200
London | 10300 | 0 | 10300
(3 rows)

CALL 0
postgres=# CALL crosstab('shop', 'FROM shops', 'gender','FROM employees e INNER JOIN shops s ON e.shop_id = s.id','salary');
gender | London | New York | Zurich | total
-------+--------+----------+--------+-------
m | 10300 | 0 | 4500 | 14800
f | 0 | 5600 | 4700 | 10300
(2 rows)

CALL 0
And one really multirecordset's sample:
create or replace procedure free(int) as $$
declare s varchar = 'SELECT 1';
begin
for i in 2..$1 loop
s := s || ',' || to_char(i,'999');
execute s;
end loop;
end$$ language plpgsql;
Output:
postgres=# call free(6);
?column? | ?column?
----------+----------
1 | 2
(1 row)

?column? | ?column? | ?column?
----------+----------+----------
1 | 2 | 3
(1 row)

?column? | ?column? | ?column? | ?column?
----------+----------+----------+----------
1 | 2 | 3 | 4
(1 row)

?column? | ?column? | ?column? | ?column? | ?column?
----------+----------+----------+----------+----------
1 | 2 | 3 | 4 | 5
(1 row)

?column? | ?column? | ?column? | ?column? | ?column? | ?column?
----------+----------+----------+----------+----------+----------
1 | 2 | 3 | 4 | 5 | 6
(1 row)

CALL 0

Labels: ,

Wednesday, November 7, 2007

First real procedures on PostgreSQL

I am working on an enhancing of plpgsql. I would to add to plpgsql procedures and methods. Methods are flagged functions with different calling convention. Procedures are total different beast. Now I am able to run procedures without parameters. I would to support parameter passing by reference. It allows to take reference for any variable and potentially some interesting features (like string builder or array builder etc). What is best, I need not modify system tables. Procedure is function (from pg_proc perspective) that returns PROCEDURE type (it will be used for exit status).

postgres=#
create or replace procedure print()
as $$
begin
raise notice 'ahoj';
end;
$$ language plpgsql;
CREATE PROCEDURE

postgres=#
create or replace function test()
returns void as $$
begin
call print();
return;
end;
$$ language plpgsql;
CREATE FUNCTION

postgres=# select test();
NOTICE: Found ()
CONTEXT: PL/pgSQL function "test" line 4456553 at CALL
NOTICE: OID: 16821
CONTEXT: PL/pgSQL function "test" line 4456553 at CALL
NOTICE: ahoj
CONTEXT: PL/pgSQL function "test" line 4456553 at CALL
test
------

(1 row)

postgres=# \df print
List of functions
Schema | Name | Result data type | Argument data types
--------+-------+------------------+---------------------
public | print | procedure |
(1 row)

There is lot of work still, but first step is done.

Labels: ,