Friday, August 29, 2008

updatable cursor's test

Hello

I had to update every row of table with an result of external function. It was possibility for testing of some patterns:
postgres=# create table testcursor(i integer primary key, v integer);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "testcursor_pkey" for table "testcursor"
CREATE TABLE
postgres=# insert into testcursor select i, 0 from generate_series(1,100000) g(i);
INSERT 0 100000
postgres=# analyze testcursor;
ANALYZE

/* external function sample */
create or replace function ext_fce(a integer, b integer)
returns int as $
declare r int;
begin
/* protect section */
begin
r := b;
exception when others then
r := null;
end;
return r;
end;
$$ language plpgsql;
1. test - standard update statement
postgres=# update testcursor set v = ext_fce(i, 30);
UPDATE 100000
Time: 4369,246 ms
2. test - update with updateable cursors
create or replace function testc2(_v integer)
returns void as $$
declare
c cursor for select i from testcursor;
_i integer;
begin
open c;
fetch c into _i;
while found loop
update testcursor set v = ext_fce(_i, _v)
where current of c;
fetch c into _i;
end loop;
close c;
end;
$$ language plpgsql;
postgres=# select testc2(20);
testc2
--------

(1 row)

Time: 8434,985 ms
3. test - update with PK
create or replace function testc3(_v integer)
returns void as $$
declare _i integer;
begin
for _i in select i from testcursor loop
update testcursor set v = ext_fce(_i,_v) where i = _i;
end loop;
end;
$$ language plpgsql;
postgres=# select testc3(30);
testc3
--------

(1 row)

Time: 9959,209 ms
Result: Using updateable cursor is about 90% slower than one statement. Usig pk is about 17% slower than updateable cursor. - So updateable cursors has sense and it is well to use it. But every iteration is much slower than one statement's update.

Wednesday, August 27, 2008

default parameters for PL functions

Hello

I completed one task - defaults for PL functions. Using is simple - it is same as Firebird's 2.x defaults.
postgres=# create or replace function x1(int = 1,int = 2,int= 3)
returns int as $$
select $1+$2+$3;
$$ language sql;
CREATE FUNCTION
postgres=# select x1();
x1
----
6
(1 row)

postgres=# select x1(10);;
x1
----
15
(1 row)

postgres=# select x1(10,20);
x1
----
33
(1 row)

postgres=# select x1(10,20,30);
x1
----
60
(1 row)
This is first step before named parameters feature - and less controversy. Second step will be difficult - there are two opinions about named parameter's syntax: variant a) using Oracle's syntax name => expression and variant b) use own syntax based on keyword "AS" expression AS name. I prefer variant @a - I thing so it's more readable (SQL use AS for labeling). Variant @b is safe from compatibility views. There was discussion on pg_hackers - without any conclusion. So I hope so at least defaults will be committed.

bye
Pavel

Labels: ,

Monday, August 25, 2008

Using cursors for generating cross tables

I am working on procedure support in PostgreSQL more than one year. Missing procedures is one disadvantage of PostgreSQL. Because procedures are not called from SELECT statement, then it couldn't respect some rules like functions. Procedures are mainly used for explicit transaction controlling and for generating mutable results (like dynamic record set or stacked record set). Dynamic record set is interesting feature when we nothing know about result set's columns. It's typical for cross tables. Because PostgreSQL doesn't support procedures, we cannot return dynamic (mutable number of columns) queries directly, but we can return dynamic cursor. This method of generating cross tables is inspired by Roland Bauman's http://rpbouman.blogspot.com blog.
Cross table is based on query's pattern:
SELECT shop,
SUM(CASE gender WHEN 'f' THEN salary ELSE 0 END) AS f,
SUM(CASE gender WHEN 'm' THEN salary ELSE 0 END) AS m,
SUM(salary) AS total
FROM employees INNER JOIN shops USING (shop_id)
GROUP BY shop

for data:
CREATE TABLE employees (
id serial PRIMARY KEY,
shop_id int,
gender char(1),
name varchar(32),
salary int
);

CREATE TABLE shops (
id serial PRIMARY KEY,
shop varchar(32)
);

INSERT INTO shops(shop) VALUES('Zurich'),('New York'),('London');

INSERT INTO employees (shop_id, gender, name, salary)
VALUES
(1, 'm', 'Jon Simpson', 4500),
(1, 'f', 'Barbara Breitenmoser',4700),
(2, 'f', 'KirstenRuegg',5600),
(3, 'm', 'Ralp Teller',5100),
(3, 'm', 'Peter Jonson',4700);
I write function, that generate necessary SELECT statement and open dynamic cursor.
CREATE OR REPLACE FUNCTION do_cross_cursor(dimx_name varchar,
    dimx_source varchar, dimy_name varchar,
    dimy_source varchar, expr varchar)
RETURNS refcursor AS $$
DECLARE
col_list text[] := '{}';
query text;
r RECORD;
result refcursor := 'result';
BEGIN
FOR r IN EXECUTE 'SELECT DISTINCT '
  || dimx_name || '::text AS val ' || dimx_source
LOOP
col_list := array_append(col_list, 'SUM(CASE ' || dimx_name
 || ' WHEN ' || quote_literal(r.val) || ' THEN ' || expr
 || ' ELSE 0 END) AS ' || quote_ident(r.val) || '');
END LOOP;
query := 'SELECT ' || dimy_name || ', '
 || array_to_string(col_list, ',')
 || ', SUM(' || expr || ') AS Total '
 || dimy_source || ' GROUP BY ' || dimy_name;
OPEN result NO SCROLL FOR EXECUTE query;
RETURN result;
END;
$$ LANGUAGE plpgsql STRICT;
Because cursors should be used only in transaction, I have to use explicit transaction:
BEGIN;
SELECT do_cross_cursor('gender', 'FROM employees','shop',
     'FROM employees e JOIN shops s ON s.id = e.shop_id',
     'salary');
FETCH ALL FROM result;
END;

BEGIN;
SELECT do_cross_cursor('shop', 'FROM shops','gender',
      'FROM employees e JOIN shops s ON s.id = e.shop_id',
      'salary');
FETCH ALL FROM result;
END;
There is result:
postgres=# BEGIN;
BEGIN
postgres=#   SELECT do_cross_cursor('gender', 'FROM employees',
  'shop',
  'FROM employees e JOIN shops s ON s.id = e.shop_id', 'salary');
do_cross_cursor
-----------------
result
(1 row)

postgres=#   FETCH ALL FROM result;
 shop   |  m   |  f   | total
----------+------+------+-------
New York |    0 | 5600 |  5600
Zurich   | 4500 | 4700 |  9200
London   | 9800 |    0 |  9800
(3 rows)

postgres=# END;BEGIN;
COMMIT
BEGIN
postgres=#   SELECT do_cross_cursor('shop', 'FROM shops','gender',
 'FROM employees e JOIN shops s ON s.id = e.shop_id', 'salary');
do_cross_cursor
-----------------
result
(1 row)

postgres=#   FETCH ALL FROM result;
gender | New York | Zurich | London | total
--------+----------+--------+--------+-------
m      |        0 |   4500 |   9800 | 14300
f      |     5600 |   4700 |      0 | 10300
(2 rows)

postgres=# END;
COMMIT
A alternative for this solution is using a tablefuc extension:
postgres=# CREATE VIEW report AS SELECT shop, gender, salary 
                                    FROM shops, employees 
                                   WHERE shop_id = shops.id;
CREATE VIEW
postgres=# SELECT shop, gender, sum(salary) FROM report GROUP BY 1,2 
 UNION ALL SELECT shop, 'X',    sum(salary) FROM report GROUP BY 1 
 UNION ALL SELECT NULL, gender, sum(salary) FROM report GROUP BY 2 
 UNION ALL SELECT NULL, 'X',    sum(salary) FROM report ORDER BY 1;
   shop   | gender |  sum  
----------+--------+-------
 London   | m      |  9800
 London   | X      |  9800
 New York | f      |  5600
 New York | X      |  5600
 Zurich   | m      |  4500
 Zurich   | X      |  9200
 Zurich   | f      |  4700
          | m      | 14300
          | f      | 10300
          | X      | 24600
(10 rows)

postgres=# SELECT * FROM crosstab(
         'SELECT shop, gender, sum(salary) FROM report GROUP BY 1,2 
UNION ALL SELECT shop, ''X'',  sum(salary) FROM report GROUP BY 1 
UNION ALL SELECT NULL, gender, sum(salary) FROM report GROUP BY 2 
UNION ALL SELECT NULL, ''X'',  sum(salary) FROM report ORDER BY 1',
         'VALUES(''f''),(''m''),(''X'')') AS ct(shop varchar, 
                                                f bigint, 
                                                m bigint, 
                                                "total" bigint);
   shop   |   f   |   m   | total 
----------+-------+-------+-------
 London   |       |  9800 |  9800
 New York |  5600 |       |  5600
 Zurich   |  4700 |  4500 |  9200
          | 10300 | 14300 | 24600
(4 rows)