Thursday, October 28, 2010

Simple counter for batch operations

Hello I am working with large transformations now. It is some like:
CREATE TABLE source_table(..);
CREATE TABLE destination_table(..);

/* transformation function */
CREATE OR REPLACE FUNCTION to_destination_table(source_table) RETURNS RECORD AS $$ ...

/* transformation */
INSERT INTO destination_table
   SELECT (r).*
  FROM (SELECT to_destionation_table(_source) r
           FROM source _source) x
The batch operation is relative long. so I had to wrote simple counter. It is simple function, that counts a call and returns input value without any change. My function has a three parameters, first is a value for forwarding, second is a frequency of notification, and third parameter specifies a printing input value:
nic=# select sum(counter(v, 20, false)) from generate_series(1,100) g(v);
NOTICE:  processed 20 rows
NOTICE:  processed 40 rows
NOTICE:  processed 60 rows
NOTICE:  processed 80 rows
NOTICE:  processed 100 rows
 sum  
------
 5050
(1 row)

nic=# select sum(counter(v, 20, true)) from generate_series(1,100) g(v);
NOTICE:  processed 20 rows, current value is '20'
NOTICE:  processed 40 rows, current value is '40'
NOTICE:  processed 60 rows, current value is '60'
NOTICE:  processed 80 rows, current value is '80'
NOTICE:  processed 100 rows, current value is '100'
 sum  
------
 5050
(1 row)
This function is very simple - see source code:
#include "funcapi.h"
#include "utils/lsyscache.h"

PG_FUNCTION_INFO_V1(pst_counter);

Datum pst_counter(PG_FUNCTION_ARGS);

typedef struct
{
 long int iterations;
 int freq;
 Oid  typoutput;
} counter_cache;

/*
 * raise notice every n call,
 * returns input without change
 */
Datum
pst_counter(PG_FUNCTION_ARGS)
{
 Datum value = PG_GETARG_DATUM(0);
 counter_cache *ptr = (counter_cache *) fcinfo->flinfo->fn_extra;

 if (ptr == NULL)
 {

  fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
          sizeof(counter_cache));
  ptr = (counter_cache *) fcinfo->flinfo->fn_extra;
  ptr->iterations = 0;
  ptr->typoutput = InvalidOid;

  if (PG_ARGISNULL(1))
   elog(ERROR, "second parameter (output frequency) must not be NULL");

  ptr->freq = PG_GETARG_INT32(1);

  if (!PG_ARGISNULL(2) && PG_GETARG_BOOL(2))
  {
   Oid valtype;
   Oid typoutput;
   bool typIsVarlena;

   valtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
   getTypeOutputInfo(valtype, &typoutput, &typIsVarlena);
   ptr->typoutput = typoutput;
  }
 }
 
 if (++ptr->iterations % ptr->freq == 0)
 {
  if (!OidIsValid(ptr->typoutput))
  {
   elog(NOTICE, "processed %ld rows", ptr->iterations);
  }
  else
  {
   /* show a processed row, when it's requested */
   if (PG_ARGISNULL(0))
    elog(NOTICE, "processed %ld rows, current value is null", ptr->iterations);
   else
   {
    elog(NOTICE, "processed %ld rows, current value is '%s'", ptr->iterations,
              OidOutputFunctionCall(ptr->typoutput, value));
   }
  }
 }
 

 PG_RETURN_DATUM(value);
}


CREATE OR REPLACE FUNCTION counter(anyelement, int, bool)
RETURNS anyelement
AS 'MODULE_PATHNAME','pst_counter' LANGUAGE C;

And usage isn't complex too:
/* transformation */
INSERT INTO destination_table
   SELECT (r).*
  FROM (SELECT counter(to_destionation_table(_source), 1000, true) r
           FROM source _source) x
real output:
psql84:convert.sql:183: NOTICE:  processed 1230000 rows, current value is '(959674199,"2010-10-25 23:50:29.404568","2010-10-25 23:50:29.491842",,3,501,190000029,XXXXXX,f,9,21)'
psql84:convert.sql:183: NOTICE:  processed 1240000 rows, current value is '(959473959,"2010-10-25 19:20:52.498152","2010-10-25 19:20:52.564384",,3,501,189854563,XXXXXX,f,9,23)'
psql84:convert.sql:183: NOTICE:  processed 1250000 rows, current value is '(958884965,"2010-10-25 08:53:30.026877","2010-10-25 08:53:30.106039",,3,101,189849045,XXXXXX,f,13,48)'
psql84:convert.sql:183: NOTICE:  processed 1260000 rows, current value is '(959483959,"2010-10-25 19:32:02.136491","2010-10-25 19:32:04.070922",,3,500,189969079,XXXXXX,f,9,48)'
psql84:convert.sql:183: NOTICE:  processed 1270000 rows, current value is '(959488959,"2010-10-25 19:36:40.691078","2010-10-25 19:36:41.155615",,3,500,189037026,,f,9,)'
psql84:convert.sql:183: NOTICE:  processed 1280000 rows, current value is '(959493959,"2010-10-25 19:41:59.21508","2010-10-25 19:41:59.244398",,3,101,189971167,XXXXXX,f,13,42)'
psql84:convert.sql:183: NOTICE:  processed 1290000 rows, current value is '(959498959,"2010-10-25 19:49:22.494294","2010-10-25 19:49:22.516124",,3,1010,189971599,XXXXXX,f,9,53)'

0 Comments:

Post a Comment

Subscribe to Post Comments [Atom]

<< Home