Monday, February 28, 2011

Query to find top 20 most used tables in Postgresql

0 comments

Following query gives the top 20 most used tables in the database based on the collected statistics.

select c.relname as name, c.reltuples::numeric as No_of_records, c.relpages as  No_of_pages,
   pg_size_pretty(pg_relation_size(c.relname)) as size,
   t.seq_scan, t.seq_tup_read, t.idx_scan, t.idx_tup_fetch, t.n_tup_ins, t.n_tup_upd, t.n_tup_del,
   COALESCE(t.idx_tup_fetch,0) + COALESCE(t.seq_tup_read,0) as total_read
from pg_stat_user_tables as t inner join pg_class as c
on (t.relid = c.oid)
where c.relkind ='r'
order by total_read desc
limit 20
Continue reading →
Thursday, February 24, 2011

Mysql function to replace multiple occurrences of a character from a string

0 comments

"replace_multiple_occurrences_of_char_f"- this function is used to replace one or more occurances of a character in a string with a given character.
For example, in the string "ahmad------------iftekhar-rumman" , we can replace all the occurances of '-'(hyphen) with ' '(space) and take the output as "ahmad iftekhar rumman"

-------------------------
Input Parameters:
-------------------------
prm_strInput:  varchar(255) - the string
prm_from_char: varchar(1) - character to replace in the given string
prm_to_char: varchar(1) - replace with character

---------
Return:
-----------
varchar(255)

---------------
Source Code:
------------------

delimiter //
DROP FUNCTION IF EXISTS replace_multiple_occurrences_of_char_f //
CREATE FUNCTION replace_multiple_occurences_of_char_f (prm_strInput varchar(255), prm_from_char VARCHAR(1),prm_to_char VARCHAR(1))
RETURNS VARCHAR(255)
DETERMINISTIC
BEGIN
DECLARE nNumSpaces,spacePos1,spacePos2,v_strLen SMALLINT;
DECLARE i INT DEFAULT 0;
DECLARE v_parseStr varchar(255) DEFAULT prm_to_char;
SET spacePos1 = 0;
SET spacePos2 = 1;
SET prm_strInput = TRIM(prm_strInput);
SET nNumSpaces = LENGTH(prm_strInput) -  LENGTH(REPLACE(prm_strInput,prm_from_char,''));
SET prm_strInput = CONCAT(prm_from_char,prm_strInput,prm_from_char);
WHILE (i <= nNumSpaces)  DO
  
   SET spacePos1 = LOCATE(prm_from_char,prm_strInput,spacePos1 + 1);
  
   SET spacePos2 = LOCATE(prm_from_char,prm_strInput,spacePos1 + 1);
   SET v_strLen = (spacePos2-spacePos1)-1;
   IF v_strLen > 0 THEN
     SET v_parseStr = CONCAT(v_parseStr, SUBSTRING(prm_strInput,spacePos1 + 1, v_strLen ),prm_to_char);
   END IF;
   SET i = i + 1;
END WHILE;
  
   SET v_parseStr = SUBSTRING(v_parseStr, 2, length(v_parseStr)-2);
   return v_parseStr;
END
//
------------------
Usage Example:
------------------
select replace_multiple_occurences_of_char_f('ahmad------------iftekhar-rumman','-',' ');
ahmad iftekhar rumman

Continue reading →

Mysql function to remove non-alphanumeric character from a string

12 comments


"remove_non_alphanum_char_f" - this function is used remove non-alphanumeric character from a string
Input: varchar(255)
Output: varchar(255)

-------------------
Source Code:
------------------

delimiter //
DROP FUNCTION IF EXISTS remove_non_alphanum_char_f //
CREATE FUNCTION remove_non_alphanum_char_f (prm_strInput varchar(255))
RETURNS VARCHAR(255)
DETERMINISTIC
BEGIN
  DECLARE i INT DEFAULT 1;
  DECLARE v_char VARCHAR(1);
  DECLARE v_parseStr VARCHAR(255) DEFAULT ' ';
 
WHILE (i <= LENGTH(prm_strInput) )  DO
 
  SET v_char = SUBSTR(prm_strInput,i,1);
  IF v_char REGEXP  '^[A-Za-z0-9 ]+$' THEN  #alphanumeric
    
        SET v_parseStr = CONCAT(v_parseStr,v_char);  

  END IF;
  SET i = i + 1;
END WHILE;
RETURN trim(v_parseStr);
END
//

------------------
Usage Example:
------------------

select remove_non_alphanum_char_f('Sakib &^*))( scored a brilliant ---=== %% century') ;
Sakib  scored a brilliant   century
1 row in set (0.00 sec)

Continue reading →

Mysql initCaps Function

0 comments


Initcap takes a string as input and returns string with each word's first character in uppercase and the rest in lowercase.

Input: varchar(255)
Output: varchar(255)

-------------------
Source Code:

-------------------

delimiter //
DROP FUNCTION IF EXISTS initCaps //
CREATE FUNCTION initCaps (prm_strInput varchar(255))
RETURNS VARCHAR(255)
DETERMINISTIC
BEGIN
DECLARE nNumSpaces,spacePos1,spacePos2,v_strLen SMALLINT;
DECLARE i INT DEFAULT 0;
DECLARE v_parseStr varchar(255) DEFAULT ' ';
SET spacePos1 = 0;
SET spacePos2 = 1;
SET prm_strInput = TRIM(prm_strInput);
SET nNumSpaces = LENGTH(prm_strInput) -  LENGTH(REPLACE(prm_strInput,' ',''));
SET prm_strInput = CONCAT(' ',prm_strInput,' ');
WHILE (i <= nNumSpaces)  DO
 
   SET spacePos1 = LOCATE(' ',prm_strInput,spacePos1 + 1);
 
   SET spacePos2 = LOCATE(' ',prm_strInput,spacePos1 + 1);
   SET v_strLen = (spacePos2-spacePos1) -1;
   IF v_strLen > 0 THEN
     SET v_parseStr = CONCAT(v_parseStr, UPPER(SUBSTRING(prm_strInput,spacePos1 + 1,1)) , lower(SUBSTRING(prm_strInput,spacePos1 + 2,v_strLen -1)),' ');
  
   END IF;
   SET i = i + 1;
END WHILE;
 
   return trim(v_parseStr);
END
//

----------------------

Usage Example:
----------------------
mysql> select initCaps('Ahmad iftekhar RUMMAN') as c;
Ahmad Iftekhar Rumman
Continue reading →
Saturday, February 19, 2011

Sizing shared_buffer of Postgresql

0 comments
Database: Postgresql 9.0.1.
pg_buffercache is required to execute the following query and sizing shared_buffer based on the result.
SELECT
usagecount,count(*),isdirty,
round((count(*)/max(total_cache.cnt)::float*100)::numeric,2)  as percent_of_total_cache
FROM pg_buffercache,
( select count(*) as cnt from pg_buffercache) as total_cache
GROUP BY isdirty,usagecount
ORDER BY isdirty,usagecount;

 usagecount | count  | isdirty | percent_of_total_cache
------------+--------+---------+------------------------
          0 |  44204  | f       |                  16.86
          1 |  39288  | f       |                  14.99
          2 |  18917  | f       |                   7.22
          3 |  10702  | f       |                   4.08
          4 |  39549  | f       |                  15.09
          5 | 109484 | f       |                  41.76
(6 rows)

 usagecount | count  | isdirty | percent_of_total_cache
------------+--------+---------+------------------------
          0 |  44204 | f       |                  16.86
          1 |  39288 | f       |                  14.99
          2 |  18917 | f       |                   7.22
          3 |  10702 | f       |                   4.08
          4 |  39546 | f       |                  15.09
          5 | 109435 | f       |                  41.75
          5 |        52 | t       |                   0.02
(7 rows)
 usagecount | count  | isdirty | percent_of_total_cache
------------+--------+---------+------------------------
          0 |  44204 | f       |                  16.86
          1 |  39288 | f       |                  14.99
          2 |  18917 | f       |                   7.22
          3 |  10702 | f       |                   4.08
          4 |  39546 | f       |                  15.09
          5 | 109487 | f       |                  41.77
(6 rows)

Inspecting the result, I found that more than 50% of buffercache blocks were accumulated with a high usage count that is 4,5.
This was a strong evidence that I needed to increase the value for shared_buffer. As I had a dedicated database server with 32 GB RAM, I increased the value from 2 Gb to 4GB.

Continue reading →
Thursday, February 17, 2011

Script to purge Mysql binary logs

0 comments


Script to purge or  remove Mysql binary logs is purge_bin_log.sh

It is designed in such a way that it executes every night and removes binary logs older than 7 days.
Here, db.conf file is used to store database connection parameters.

-----------
db.conf
-----------

#/bin/bash
#DB Connection paramters
DBHOST=0.0.0.0
DBNAME=test
DBUSER=user
DBUSERPWD=std123456
DBPORT=3306

-----------------------
purge_bin_log.sh
-----------------------

#!/bin/bash
DBHOST=$(cat db.conf | grep -w DBHOST | awk -F'=' '{ print $2 }' )
echo "DB HOST = "$DBHOST

DBNAME=$(cat db.conf | grep -w DBNAME | awk -F'=' '{ print $2 }' )
echo "DB NAME = "$DBNAME

DBUSER=$(cat db.conf | grep -w DBUSER | awk -F'=' '{ print $2 }' )
echo "DB USER = "$DBUSER

DBUSERPWD=$(cat db.conf | grep -w DBUSERPWD | awk -F'=' '{ print $2 }' )
echo "DB PASSWORD = " $DBUSERPWD

DBPORT=$(cat db.conf | grep -w DBPORT | awk -F'=' '{ print $2 }' )
echo "DB PORT = "$DBPORT

purgedt=$(mysql $DB -h $DBHOST -u $DBUSER -P $DBPORT --password=$DBUSERPWD -e "select date_sub(current_date(), interval 7 day)" -s -N )
echo "Purge binary logs before $purgedt started at $(date +%m%d%y_%H:%M)"
str="PURGE BINARY LOGS BEFORE '"$purgedt"'"
mysql $DB -h $DBHOST -u $DBUSER -P $DBPORT --password=$DBUSERPWD -e "$str"
success=$?
if [ $success = 0 ];
then
   echo "Purge binary logs before $purgedt completed at $(date +%m%d%y_%H:%M)"
else
  echo "!!!Failed!!!"
fi     #if [ $success = 0 ] then
exit 0

A corn job is also created to execute the script at 00:01 every night.
01 00 * * * /root/dbbackup/backup_scripts/purge_bin_log.sh > /root/dbbackup/backup_scripts/purge_bin_log.log

Continue reading →
Tuesday, February 15, 2011

Backup script for Large Mysql database running for Wordpress with Hyperdb

0 comments
I had to take backup for a Mysql 5.1 Database that was running for Wordpress application with Hyperdb plugin. The application had almost 93000 blogs and as each blog consisted of 8 tables, there were 786000 tables in the database. All of the tables were created using MyISAM Storage Engine. The number of blogs were increasing day by day. 

The structure of the DB as follows :-

SHOW DATABASES
imported_data
rdb
rdb0
rdb1
....
rdb93 

Here,

imported_data - used to store data that were imported from legacy site.

rdb - used to store all common tables for wordpress and blog 1.

rdbN - used to store data for blogs. Blogs were partitioned in different databases using the formula blog_id/1000. That is, blog 0 was stored at db0 and blog 92134 was stored at db92.

At first, I tried with mysqldump program and it took 22 hours to complete the whole database backup. I was in a fix. Then I go with the following steps.


Modified at mysqldump.c

....
static char *get_actual_table_name(const char *old_table_name, MEM_ROOT *root)
{
....
DBUG_PRINT("Rumman", ("SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_NAME = \'%s\'",old_table_name));
my_snprintf(query, sizeof(query), "SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_NAME = \'%s\'",old_table_name
              );
 ...
}
...

Compiled mysql at /root/dbbackup/mysql_r

Backup Scripts 

I used t he following two scripts for digitalsports backup:

1. init_backup.sh : This script is used to initiate backup for all the blogs ( at present 93000). It creates backup slots and then execute the backup_script.sh according to the slots in parallel. Some important variables in this scripts are -

SCRIPT - denotes where the backup_script.sh exists
LOGDIR - directory where all the log files should create
PARALLEL - how many processes we want to start simultaneously for backup 
 2. backup_script.sh  - this script is used call the modified mysqldump. It takes two parameters $START_BLOG_ID and $END_BLOG_ID. Within a for loop, it takes backup for the blogs and create separate sql file for each blog with the name such as blog_.sql.

MYSQLDUMP_ds -  denotes where the modified mysqldump binary is located

PRODUCTION_TIME - denotes backup processes is stopped after this time

Both these scripts are added below.

Backup process works as follows;

  • Take the maximum blog id at first
  • Make slots for backup scripts using $PARALLEL and $STEP
  • Make $BACKUP_DIR and other sub-directories. For each day there is a sub-directory with rdb_<%m%d%y>
  • Call backup_script.sh and start  mysqldump for each blog
  • if stop.tmp is found in the $BACKUP_DIR, then backup processes will be stopped
  • if server time pass the $PRODUCTION_TIME, then backup processes will be stopped
  • To resume backup, we need to execute the init_backup.sh again and it will remove stop.tmp and start backup reading progress_NUMBER files from $BACKUP_DIR


-------------------------

 init_backup.sh  

-------------------------

#!/bin/bash

SCRIPT=/root/dbbackup/backup_scripts/backup_script.sh
LOGDIR=/BACKUP1/log
MIN_BLOG_ID=50
LAST_BLOG_ID=$MIN_BLOG_ID
PARALLEL=4

DBHOST=0.0.0.0
DB=rdb
DBUSER=ruser
DBUSERPWD=********
DBPORT=3306

DT=$(date +%m%d%y)
BACKUP_DIR=/BACKUP1/rdb_"$DT"
LOGDIR=$BACKUP_DIR/log_dir
if [ ! -d $BACKUP_DIR ]; then
   echo "Making backup directory at $BACKUP_DIR with sub-directories"
   mkdir -p $BACKUP_DIR
   mysql -h $DBHOST -P $DBPORT -u $DBUSER --password=$DBUSERPWD -e "show databases like 'rdb%'" -N -s > $BACKUP_DIR/dblist.lst
   for v_dbname in `cat $BACKUP_DIR/dblist.lst`
   do
      BACKUP_DIR=/BACKUP1/rdb_"$DT"/"$v_dbname"
      mkdir -p $BACKUP_DIR
   done  #for v_dbname in `cat $BACKUP_DIR/dblist.lst`
   BACKUP_DIR=/BACKUP1/rdb_"$DT"/r_imported_data
   mkdir -p $BACKUP_DIR
   BACKUP_DIR=/BACKUP1/rdb_"$DT"
   echo "done"
   echo "Making log directory"
   LOGDIR=$BACKUP_DIR/log_dir
   mkdir -p $LOGDIR/error
   echo "done"
else
  rm -f $BACKUP_DIR/stop.tmp
  echo "Resuming Backup"
fi


if [ ! -f "$BACKUP_DIR"/MAX_BLOG ];
then
  echo "MAX_BLOG file not found"
    MAX_BLOG_ID=$(mysql -h $DBHOST -P $DBPORT -u $DBUSER --password=$DBUSERPWD -e "select max(blog_id) from rdb.wp_blogs" -N -s )
    echo  "$MAX_BLOG_ID" > $BACKUP_DIR/MAX_BLOG
else
  echo "MAX_BLOG file found"
  MAX_BLOG_ID=$(cat "$BACKUP_DIR"/MAX_BLOG)
fi    # if [ -f $BACKUP_DIR/MAX_BLOG ];
echo "MAX BLOG = " $MAX_BLOG_ID

let STEP=$MAX_BLOG_ID/$PARALLEL

echo "$SCRIPT -1 0 $BACKUP_DIR  >> $LOGDIR/script_rdb_and_r_imported_data.log.$(date +%m%d%y) &"
$SCRIPT -1 0 $BACKUP_DIR  >> $LOGDIR/script_rdb_and_r_imported_data.log.$(date +%m%d%y) &

while [ $MIN_BLOG_ID -le $MAX_BLOG_ID ]
do                                                                


  let LAST_BLOG_ID="$MIN_BLOG_ID+$STEP"

  if [ "$LAST_BLOG_ID" -gt "$MAX_BLOG_ID" ];
  then
    let LAST_BLOG_ID="$MAX_BLOG_ID"
  fi

  if [ -f $BACKUP_DIR/progress_"$LAST_BLOG_ID" ];
  then
    MIN_BLOG_ID=$(cat $BACKUP_DIR/progress_"$LAST_BLOG_ID")
    echo $MIN_BLOG_ID
  fi #if [ -f $BACKUP_DIR/progress_$MIN_BLOG_ID_$LAST_BLOG_ID ];

  echo "$SCRIPT $MIN_BLOG_ID $LAST_BLOG_ID $BACKUP_DIR >> $LOGDIR/script_"$LAST_BLOG_ID".log.$(date +%m%d%y)  &"
  $SCRIPT $MIN_BLOG_ID $LAST_BLOG_ID $BACKUP_DIR >> $LOGDIR/script_"$LAST_BLOG_ID".log.$(date +%m%d%y)  &
  let MIN_BLOG_ID="$LAST_BLOG_ID+1"
  
done #end while [ $LAST_BLOG_ID -lt $MAX_BLOG_ID]  

  
--------------------------

backup_script.sh

--------------------------


#!/bin/bash

DBHOST=0.0.0.0
DB=rdb_old
DBUSER=ruser
DBUSERPWD=******
DBPORT=3306
PRODUCTION_TIME=090000

MYSQLDUMP_r=/root/dbbackup/mysql_r/bin/mysqldump

if [ -z $1 ]; then
   echo "!!!Start Blog ID for backup cannot be blank!!!"
   exit
elif [ $1 -ne "-1" ] && [ -z $2 ]; then
   echo "!!!End Blog ID for backup cannot be blank!!!"
   exit
fi

START_BLOG_ID=$1
END_BLOG_ID=$2
PARENT_BACKUP_DIR=$3
LOGDIR=$PARENT_BACKUP_DIR/log_dir
OUTFILE=/tmp/blogs_"$START_BLOG_ID"_"$END_BLOG_ID".out
DT=$(date +%m%d%y)
if [ $START_BLOG_ID -eq "-1" ];
then
  BACKUP_DIR=$PARENT_BACKUP_DIR/rdb
  if [ ! -f $BACKUP_DIR/rdb.sql.$(date +%m%d%y) ];
  then
    echo "rdb backup started on the $(date +%m%d%y_%H%M) ..."
    DB=rdb
    $MYSQLDUMP_r $DB -h $DBHOST -u $DBUSER -P $DBPORT --password=$DBUSERPWD  --log_error=$LOGDIR/error/rdb.sql.err        > $BACKUP_DIR/rdb.sql.$(date +%m%d%y)
      mysqldump_success=$?
      if [ $mysqldump_success -eq '0' ];
      then
         rm -f $LOGDIR/error/rdb.sql.err
      else
         cat $LOGDIR/error/rdb.sql.err
      fi   #if [ $mysqldump_success -eq '0' ];

      echo "rDB backup completed on the $(date +%m%d%y_%H%M)"
      touch $LOGDIR/backup_compled_rdb_$(date +%m%d%y_%H%M).tmp
      if [ -f /BACKUP1/rdb_"$DT"/stop.tmp ];
        then
              echo "Stopped"
              touch $LOGDIR/stopped_after_rdb
              exit 1
        fi #if [ -f /BACKUP1/rdb_"$DT"/stop.tmp ];
  fi #if [ ! -f $BACKUP_DIR/rdb.sql.$(date +%m%d%y) ];

  BACKUP_DIR=/BACKUP1/rdb_"$DT"/r_imported_data
  if [ ! -f $BACKUP_DIR/r_imported_data.sql.$(date +%m%d%y)  ];
  then
    echo "r_imported_databackup started on the $(date +%m%d%y_%H%M) ..."
      $MYSQLDUMP_r r_imported_data -h $DBHOST -u $DBUSER -P $DBPORT --password=$DBUSERPWD --log_error=$LOGDIR/error/r_imported_data.sql.err        > $BACKUP_DIR/r_imported_data.sql.$(date +%m%d%y)
      mysqldump_success=$?
      if [ $mysqldump_success -eq '0' ];
      then
         rm -f $LOGDIR/error/r_imported_data.sql.err
      else
       cat $LOGDIR/error/r_imported_data.sql.err
      fi
      echo "r_imported_data backup completed on the $(date +%m%d%y_%H%M)"
      touch $LOGDIR/backup_compled_r_imported_data_$(date +%m%d%y_%H%M).tmp
      exit
  fi    #if [ ! -f $BACKUP_DIR/r_imported_data.sql.$(date +%m%d%y)  ];

fi  
#Per Blog backup
rm -f $OUTFILE
mysql $DB -h $DBHOST -u $DBUSER -P $DBPORT --password=$DBUSERPWD -e "select blog_id  from wp_blogs where blog_id between $START_BLOG_ID and $END_BLOG_ID " -s -N > $OUTFILE
LAST_BLOG_HYPERDB=$(mysql -h $DBHOST -P $DBPORT -u $DBUSER --password=$DBUSERPWD -e "select config_value from r_imported_data.r_config where config_key='last_blog_clustered'" -N -s )
echo $LAST_BLOG_HYPERDB

if [ -s $OUTFILE ]; then
    echo "... Backup started for blog ID $START_BLOG_ID to $END_BLOG_ID on the $(date +%m%d%y_%H%M) ..."
        for i in `cat $OUTFILE`
        do
          
           #If time passed after $PRODUCTION_TIME then stop
           if [ $(date +%H%M%S) -gt $PRODUCTION_TIME ];
           then
              echo "Stoppped as time passed $PRODUCTION_TIME"
              touch $PARENT_BACKUP_DIR/stop.tmp
           fi #if [ $(date +%H%M%S) -gt $PRODUCTION_TIME  ];
          
            #Check for stop
                if [ -f $PARENT_BACKUP_DIR/stop.tmp ];
                then
                  echo "Stopped"
                  touch $LOGDIR/stopped_blog_"$i"
                  exit 1
                fi
                #Find database name
                if [ $i -le $LAST_BLOG_HYPERDB ];
                then
                    let DBNUM="$i"/1000
                    DB=rdb"$DBNUM"
                else
                  DB=rdb_old
                fi  
      
                BACKUP_DIR=$PARENT_BACKUP_DIR/"$DB"
                echo $BACKUP_DIR
                echo $DB
                echo "Backup Blog  $i ..."
                $MYSQLDUMP_r $DB -h $DBHOST -u $DBUSER -P $DBPORT --password=$DBUSERPWD  \
                wp_"$i"_commentmeta            \
                wp_"$i"_comments               \
                wp_"$i"_links                  \
                wp_"$i"_options                \
                wp_"$i"_postmeta               \
                wp_"$i"_posts                  \
                wp_"$i"_term_relationships     \
                wp_"$i"_term_taxonomy          \
                wp_"$i"_terms  --skip-lock-tables --log-error=$LOGDIR/error/blog_"$i".sql.err > $BACKUP_DIR/blog_"$i".sql.$(date +%m%d%y)
                mysqldump_success=$?
                if [ $mysqldump_success -eq '0' ];
            then
               rm -f $LOGDIR/error/blog_"$i".sql.err
               echo "done"
            else
               cat $LOGDIR/error/blog_"$i".sql.err
               echo "!!!Failed!!!"
            fi
            echo "$i" > $PARENT_BACKUP_DIR/progress_"$END_BLOG_ID"
        done
    echo "... Backup end for blog ID  $START_BLOG_ID to $END_BLOG_ID on the $(date +%m%d%y_%H%M) ..."
    touch $LOGDIR/backup_compled_"$START_BLOG_ID"_"$END_BLOG_ID"_$(date +%m%d%y_%H%M).tmp
fi   
Continue reading →
Sunday, February 13, 2011

Set Unix Shared Memory Parameters' (shm) value for Postgresql in CentOS

0 comments
I had to setup Postgresql 9.0.1 in a virtual machine running with CentOS5 with 2 GB RAM.

Two important Unix Shared Memory papramters are SHMMAX and SHMALL.

Here,
SHMMAX is the maximum size (in bytes) for a single shared memory segment
and
SHMALL is the total amount of shared memory (in pages) that all processes on the server can use.

I followed the following steps to calculate the above parameters for my Postgresql DB server:

page_size=`getconf PAGE_SIZE`
phys_pages=`getconf _PHYS_PAGES`
let shmall=$phys_pages/2
echo $shmall
let shmmax=$shmall*$page_size
echo $shmmax

Here,
_PHYS_PAGES : Total number of RAM pages used by processes in this container.

PAGE_SIZE: A page is a fixed length block of main memory, that is contiguous in both physical memory addressing and virtual memory addressing.

I got the following values for my DB server which was running with 2 GB RAM:

shmall=262166
shmmax=1073831936



That is maximum size for a single shared memory segment is 1 GB which is shmmax and all processes on the server can use upto 1 GB (shmall*page_size) of shared memory.

Now, I modified the value for the above two parameters using sysctl interface -

$ sysctl -w kernel.shmmax=1073831936
$ sysctl -w kernel.shmall=262166

Check that the values are set;

$ cat /proc/sys/kernel/shmmax
1073831936
$ cat /proc/sys/kernel/shmall
262166


In addition, I also preserved these values between reboots in the file /etc/sysctl.conf.
Continue reading →
Tuesday, February 8, 2011

Postgresql failover with Pgpool II replication

16 comments
Postgresql failover with Pgpool II replication can be configured for 24x7 database system where no downtime is allowable. The followings describe the steps that we have taken for configuration og Pgpool II replication.
1. Environment

    * Two identical servers 10.0.0.36 as 'Node 1' and 10.0.0.34 as 'Node 2' with Centos 5

    * Existing ssh-key-exchange between both the nodes.

    * Postgresql 8.3.8 has been installed in each server

    * Pgpool II 2.2.5 has been installed in 10.0.0.36

    * At Node 1, $PGDATA=/usr/local/pgsql/data

    * At Node 2, $PGDATA=/usr/local/pgsql/data

1.1 Wal Archiving is on in both the nodes.

At Node_1:

archive_mode = on
archive_command = 'rsync %p postgres@10.0.0.34:/var/lib/pgsql/wal_archive_36/%f<At Node_2:
archive_mode = on
archive_command = 'cp %p /var/lib/pgsql/wal_archive_34/%f<
 1.2 Pgpool II installation
As root user perform the followings:
cd /downloads/
tar -xzvf pgpool-II-2.2.5.tar.gz
cd pgpool-II-2.2.5
./configure --prefix=/opt/pgpoolII   --with-pgsql-libdir=/usr/local/pgsql/lib   --with-pgsql-includedir=/usrlocal/pgsql/include
make && make install

 1.2.1 Configuration
cd /opt/pgpoolII/
cp etc/pcp.conf.sample etc/pcp.conf
cp etc/pgpool.conf.sample etc/pgpool.conf
/opt/pgpoolII/bin/pg_md5 -p
password:postgres
e8a48653851e28c69d0506508fb27fc5
vi etc/pcp.conf
...
postgres:e8a48653851e28c69d0506508fb27fc5

 1.2.2 Modify parameters at pgpool.conf

Here we define ports for pgpool,pgpool communication manager, listen addresses and a lot of other things.

vi etc/pgpool.conf
...
listen_addresses ='*'
...
replication_mode = true

logdir = '/opt/pgpoolII/log'
.....
pid_file_name='/var/run/pgppool/pgpool.pid'
...
health_check_period= 5
health_check_user = 'postgres'
...
failover_command = 'echo host:%h new master id:%m old master id:%M>/opt/pgpoolII/log/failover.log'
failback_command = 'echo host:%h new master id:%m old master id:%M>/opt/pgpoolII/log/failback.log'
...
backend_hostname0 = '10.0.0.36'
backend_port0 = 5432
backend_weight0 = 1
backend_data_directory0 = '/usr/local/pgsql/data'
backend_hostname1 = '10.0.0.34'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/usr/local/pgsql/data'
..
 1.2.3 Make directory for pgpool pid file:

Login as root and make a directory pgpool in /var/run
cd /var/run
mkdir pgpool
chown -R postgres:postgres pgpool
 1.2.4 Install online recovery functions at both the nodes:
cd /downloads/pgpool-II-2.2.5/sql/pgpool-recovery/
make install
psql -f pgpool-recovery.sql template1
 1.2.5 pg_hba.conf configuration

As pgpool is running at 10.0.0.35 and the connection between pgpool and postgresql should be trusted, we configured as follows:
At Node_1:
host    all         all         10.0.0.35/32       trust
host    all         all         0.0.0.0/0          reject
At Node_2:
host    all         all         10.0.0.35/32       trust
host    all         all         0.0.0.0/0          reject
pool_hba.conf configuration of pgpool
In our test environment, we did not enable pgpool hba configuration. Important point is that Pgpool II does not support md5 authentication.

 2. Copy database from Node_1 to Node_2


2.1 At Node_1:

Assume postgresql is running.
psql -U postgres
> select pg_start_backup('initial_backup');
> \q
cd /usr/local/pgsql
rsync -avz data/* postgres@node_2:/usr/local/pgsql/data
psql -U postgres
> select pg_stop_backup();
> \q
2.2 At Node_2:
cd /usr/local/pgsql/data
rm postmaster.pid
Start postgresql at Node_2
pg_ctl start
Now postgresql DB is running in both the nodes. We will start PgpoolII.

 3. Start Pgpool II

At this stage, we start pgpool in debug mode.

/opt/pgpoolII/bin/pgpool  -f /opt/pgpoolII/etc/pgpool.conf -F /opt/pgpoolII/etc/pcp.conf -a /opt/pgpoolII/etc/pool_hba.conf -d -n > /opt/pgpoolII/log/pgpool.log 2>&1 &
Check log files to see if pgpool is working.

tail /opt/pgpoolII/log/pgpool.log
...
2009-10-29 18:49:07 DEBUG: pid 12752: num_backends: 2 num_backends: 2 total_weight: 2.000000
2009-10-29 18:49:07 DEBUG: pid 12752: backend 0 weight: 1073741823.500000
2009-10-29 18:49:07 DEBUG: pid 12752: backend 1 weight: 1073741823.500000
2009-10-29 18:49:07 DEBUG: pid 12753: I am 12753
...
2009-10-29 18:49:07 DEBUG: pid 12784: I am 12784
2009-10-29 18:49:07 LOG:   pid 12752: pgpool successfully started
2009-10-29 18:49:07 DEBUG: pid 12785: I am PCP 12785
...
2009-10-29 18:51:53 DEBUG: pid 17586: starting health checking
2009-10-29 18:51:53 DEBUG: pid 17586: health_check: 0 th DB node status: 1
2009-10-29 18:51:53 DEBUG: pid 17586: health_check: 1 th DB node status: 1
...
 4. Check if replication is working 

    - postgres is running on all nodes

    - pgpool is running on port 9999 on node 1

    - shell session on node1 established

 4.1 create a test database and insert some data
psql -p 9999
If the above command is successfull, we are confirm that Pgpool is working. Tthen do the following steps.
createdb -p 9999 bench_replication
pgbench -i -p 9999 bench_replication
psql -p 9999 bench_replication
bench_replication=# insert into history (tid, bid,aid,mtime,filler) (select 1,1,1,now(),i::text from
(select generate_series(1,1000000) as i) as q);
 4.2 Check data in each node

At Node_1:
psql -p 9999 bench_replication
select count(*) from history;
Count
--------
1000000
(1 row)
At Node_1:
psql -p 5432 bench_replication
select count(*) from history;
Count
--------
1000000
(1 row)
At Node_2:
psql -p 5432 bench_replication
select count(*) from history;
Count
--------
1000000
(1 row)
The results indicate that the replication is working.

 5. Enable online recovery


5.1 Configure pgpool.conf parameters:
...
recovery_user =  'postgres'
recovery_1st_stage_command =  'copy_base_backup'
recovery_2nd_stage_command = 'pgpool_recovery_pitr'
...
 Reload pgpool to reflect the changes.

5.2 Create scripts
I have write scripts for both the nodes. If anyone wants to setup Online recovery from either side only, he may use only required scripts.
------------------------------------------------
 copy_base_backup at Node_1:
 ------------------------------------------------

#! /bin/sh
psql -c "select pg_start_backup('pgpool_recovery')" postgres
echo "restore_command = 'cp /var/lib/pgsql/wal_archives_36/%f %p'">/var/lib/pgsql/data/recovery.conf
tar -C /var/lib/pgsql/data -zcf pgsql.tar.gz base global pg_clog pg_multixact pg_subtrans pg_tblspc pg_twophase pg_xlog recovery.conf
psql -c "select pg_stop_backup()" postgres
scp pgsql.tar.gz 10.0.0.34:/var/lib/pgsql/data
 #Expand a database backup
ssh -T 10.0.0.34 'cd /var/lib/pgsql/data; tar zxf pgsql.tar.gz' 2>/dev/null 1>/dev/null
rm -f recovery.conf
-------------------------------------------------
copy_base_backup at Node_2:
-------------------------------------------------
#! /bin/sh
 BINPATH=/opt/PostgreSQL/8.3/bin
$BINPATH/psql -c "select pg_start_backup('pgpool_recovery')" postgres
echo "restore_command = 'scp postgres@10.0.0.34:/var/lib/pgsql/wal_archives_34/%f %p'">/var/lib/pgsql/data/recovery.conf
tar -C /var/lib/pgsql/data -zcf pgsql.tar.gz base global pg_clog pg_multixact pg_subtrans pg_tblspc pg_twophase pg_xlog recovery.conf
$BINPATH/psql -c "select pg_stop_backup()" postgres
scp pgsql.tar.gz 10.0.0.36:/var/lib/pgsql/data
# Expand a database backup
ssh -T 10.0.0.36 'cd /var/lib/pgsql/data; tar zxf pgsql.tar.gz' 2>/dev/null 1>/dev/null
rm -f recovery.conf
-----------------------------------
pgpool_recovery_pitr:
-----------------------------------
#! /bin/sh
psql -c "select pg_switch_xlog()" postgres
--------------------------------------------------
pgpool_remote_start at Node_1:
--------------------------------------------------
#! /bin/sh
if [ $# -ne 2 ]
then
echo "pgpool_remote_start remote_host remote_datadir"
exit 1
fi
DEST=$1
DESTDIR=$2
PGCTL=/opt/PostgreSQL/8.3/bin/pg_ctl    #postgesql bin directory at Node_2
#Startup PostgreSQL server
ssh -T $DEST $PGCTL -w -D $DESTDIR start 2>/dev/null 1>/dev/null 
--------------------------------------------------
pgpool_remote_start at Node_2:
--------------------------------------------------
#! /bin/sh
if [ $# -ne 2 ]
then
echo "pgpool_remote_start remote_host remote_datadir"
exit 1
fi
DEST=$1
DESTDIR=$2
PGCTL=/usr/local/pgsql/bin/pg_ctl  #Path for postgresql bin directory at Node_1
#Startup PostgreSQL server
ssh -T $DEST $PGCTL -w -D $DESTDIR start 2>/dev/null 1>/dev/null 


6. Test failover:
To test failover is working perfectly, I just killed postgres processes on node_2 while an update statement is running at node_1 using port 9999.
At Node_1:
psql -p 9999 bench_repplication
update history set tid=2;

At Node_2:
pkill postgres
or
pg_ctl stop -m immediate

Check that at Node_1 the update is still running.

tail pgpool.log
...
2009-11-12 13:22:23 DEBUG: pid 13719: detect_error: kind: E
2009-11-12 13:22:23 DEBUG: pid 13719: detect_stop_postmaster_error: receive admin shutdown error from a node.
2009-11-12 13:22:23 LOG:   pid 13719: notice_backend_error: 1 fail over request from pid 13719
2009-11-12 13:22:23 DEBUG: pid 13715: failover_handler called
2009-11-12 13:22:23 DEBUG: pid 13715: failover_handler: starting to select new master node
2009-11-12 13:22:23 LOG:   pid 13715: starting degeneration. shutdown host 10.0.0.54(5432)
2009-11-12 13:22:23 DEBUG: pid 13715: VN:: Master_node_id is changing from 0 to 0
2009-11-12 13:22:23 LOG:   pid 13715: failover_handler: do not restart pgpool. same master node 0 was selected
2009-11-12 13:22:23 LOG:   pid 13715: failover done. shutdown host 10.0.0.54(5432)
2009-11-12 13:22:23 LOG:   pid 13715: execute command: echo host:10.0.0.54 new master id:0 old master id:0>/opt/pgpoolII/log/failover.log
2009-11-12 13:22:23 DEBUG: pid 13715: reap_handler called
2009-11-12 13:22:23 DEBUG: pid 13715: reap_handler: call wait3
2009-11-12 13:22:23 DEBUG: pid 13715: reap_handler: normally exited
2009-11-12 13:22:28 DEBUG: pid 13715: starting health checking
2009-11-12 13:22:28 DEBUG: pid 13715: health_check: 0 th DB node status: 2
2009-11-12 13:22:28 DEBUG: pid 13715: health_check: 1 th DB node status: 3
...


7. Online Recovery and Re-attach node:

After failover if we want to re-attach Node_1, we node to apply the changes happened at Node_2 through Pgpool. As a result , recovery at Node_1 is required.
/opt/pgpoolII/bin/pcp_recovery_node  20 10.0.0.36 9898 postgres postgres 1
Parameters are:
20 - timeout in seconds
10.0.0.36 - host ip where pgpool is running
9898 - port at which pgpool communication manager listens
postgres, postgres - username password at pcp.conf
1 - ID of the node we want to attach (refers to the backend number in pgpool.conf)

pgpool.log messages:
...
DEBUG: pid 4411: pcp_child: start online recovery
LOG: pid 4411: starting recovering node 1
DEBUG: pid 4411: exec_checkpoint: start checkpoint
DEBUG: pid 4411: exec_checkpoint: finish checkpoint
LOG: pid 4411: CHECKPOINT in the 1st stage done
LOG: pid 4411: starting recovery command: "SELECT pgpool_recovery('copy_base_backup', '10.0.0.34','/usr/local/pgsql/data')"
DEBUG: pid 4411: exec_recovery: start recovery
DEBUG: pid 29658: starting health checking
DEBUG: pid 29658: health_check: 0 the DB node status: 2
DEBUG: pid 29658: health_check: 1 the DB node status: 3
DEBUG: pid 4411: exec_recovery: finish recovery
LOG: pid 4411: 1st stage is done
LOG: pid 4411: starting 2nd stage
LOG: pid 4411: all connections from clients have been closed
DEBUG: pid 4411: exec_checkpoint: start checkpoint
DEBUG: pid 4411: exec_checkpoint: finish checkpoint
LOG: pid 4411: CHECKPOINT in the 2nd stage done
LOG: pid 4411: starting recovery command: "SELECT pgpool_recovery('pgpool_recovery_pitr', '10.0.0.34','/usr/local/pgsql/data')"
DEBUG: pid 4411: exec_recovery: start recovery
DEBUG: pid 4411: exec_recovery: finish recovery
DEBUG: pid 4411: exec_remote_start: start pgpool_remote_start
DEBUG: pid 29658: starting health checking
DEBUG: pid 4411: exec_remote_start: finish pgpool_remote_start
DEBUG: pid 29658: starting health checking
LOG: pid 4411: 1 node restarted
LOG: pid 4411: send_failback_request: fail back 1 th node request from pid 4411
LOG: pid 4411: recovery done
DEBUG: pid 29658: failover_handler called
DEBUG: pid 29658: failover_handler: starting to select new master node...


8. Database Downtime:
During production period if pgpool can communicate with any node, then system will not need any downtime from database end. But during online recovery, before 2nd stage, all connections with pgpool should be closed. It is designed in this way so that no transaction miss during the recover and re-attach of a node.
Continue reading →

Labels