Wednesday, September 2, 2015

Database System study sql

set serveroutput on
set lines 100
set pages 0
set head off
set feedback off
set colsep "|"
col bytes for 999999999999999.99
break on grantee
spool D:\xxx\sow\system_study_report.log
---spool d:\sysstudy.log
select 'SS_TABLESPACE'||'|'||tablespace_name||'|'||extent_management||'|'||segment_space_management||'|'||Initial_extent||'|'||contents
from dba_tablespaces;
select 'SS_DATAFILE'||'|'||tablespace_name||'|'||file_name||'|'||trunc((bytes/1024/1024),2)
from dba_data_files;
select 'SS_TEMPFILE'||'|'||tablespace_name||'|'||file_name||'|'||trunc((bytes/1024/1024),2)
from dba_temp_files;
select 'SS_CONTROL'||'|'||name
from v$controlfile;
select 'SS_REDO'||'|'||||'|'||b.member||'|'||trunc((a.bytes/1024/1024),2)
from v$log a, v$logfile b
select 'SS_ROLLBACK'||'|'||segment_name||'|'||tablespace_name||'|'||initial_extent||'|'||next_extent||'|'||min_extents||'|'||max_extents||'|'||optsize||'|'||dba_rollback_segs.status
from dba_rollback_segs, v$rollstat
where usn=segment_id;
select 'SS_USERS'||'|'||username||'|'||to_char(created,'DD-MON-YYYY')||'|'||default_tablespace||'|'||temporary_tablespace||'|'||profile
from dba_users;
select 'SS_PRIVS'||'|'||grantee||'|'|| granted_role from dba_role_privs where grantee not in ('SYS','SYSTEM','OUTLN')
and grantee in (select username from dba_users);
select 'SS_INIT'||'|'||name||'|'||value
from v$parameter
where name in

REM - Findings Section
SELECT trunc(sum(bytes/1024/1024),2) INTO X FROM dba_segments;
SELECT trunc(sum(bytes/1024/1024),2) INTO Y FROM dba_data_files;
SELECT trunc(sum(bytes/1024/1024),2) INTO Z FROM dba_temp_files;
select 'SS_TSSPSTAT'||'|'||f.tablespace_name||'|'||||'|'||u.used||'|'||||'|'||round((u.used/*100)||'|'||round((*100)
(select tablespace_name, sum(bytes/(1024*1024)) total from dba_data_files group by tablespace_name) a,
(select tablespace_name, round(sum(bytes/(1024*1024))) used from dba_extents group by tablespace_name) u,
(select tablespace_name, round(sum(bytes/(1024*1024))) free from dba_free_space group by tablespace_name) f
WHERE a.tablespace_name = f.tablespace_name
and a.tablespace_name = u.tablespace_name
order by;
---WHERE next_extent > (select MAX(f.bytes)
---from dba_free_space f
---where f.tablespace_name = s.tablespace_name);
select 'SS_FRAG'||'|'||owner||'|'||segment_name||'|'||segment_type||'|'||tablespace_name||'|'||extents||'|'||next_extent/1024/1024
from dba_segments
where extents > 20
and owner not in ('SYS','SYSTEM','SCOTT') ;

col value for 9999999999.99
select 'SS_SGA'||'|'||trunc((value/1024/1024),2) from v$sga ;
select trunc(sum(value/1024/1024),2)  INTO X from v$sga;
select trunc(sum(bytes/1024/1024),2) INTO Y FROM V$SGASTAT WHERE NAME LIKE '%free memory%';
select 'SS_SGA_LCS'||'|'||GETS||'|'||GETHITS||'|'||trunc(GETHITRATIO,2)||'|'||PINS||'|'||PINHITS||'|'||trunc(PINHITRATIO,2)||'|'||RELOADS||'|'||INVALIDATIONS from v$librarycache;
select trunc(sum(gethitratio)/count(*) *100,2) INTO X from v$librarycache;
SELECT 'SS_SGA_DCS'||'|'||PARAMETER||'|'||GETS||'|'||GETMISSES||'|'||trunc((1-GETMISSES/GETS)*100,2)
select 'SS_SGA_DCHR'||'|'||sum(gets)||'|'||sum(getmisses)||'|'||trunc((1-(sum(getmisses)/sum(gets)))*100,2)
from v$rowcache;
SELECT 'SS_SGA_DBCHR'||'|'||trunc((1-PHY.VALUE/(cur.value+con.value))*100,2) from v$sysstat phy,v$sysstat con,v$sysstat cur
where'db block gets'
and'consistent gets'
and'physical reads';
SELECT 'SS_SGA_DBCS'||'|'||NAME||'|'||trunc(VALUE,2)  FROM V$SYSSTAT WHERE NAME IN ('db block gets','consistent gets','physical reads');
select 'SS_LRUC'||'|'||name||'|'||trunc((1-sleeps/gets)*100,2) from v$latch
where name ='cache buffers lru chain';
select 'SS_SHW'||'|'||class||'|'||count||'|'||time  from v$waitstat
where class='segment header';
select 'SS_BBW'||'|'||event||'|'||total_waits from v$system_event where event='buffer busy waits';
select 'SS_SEBBW'||'|'||s.segment_name||'|'||s.segment_type||'|'||s.freelists||'|'||w.wait_time||'|'||w.seconds_in_wait||'|'||w.state
from dba_segments s,v$session_wait w
where w.event='buffer busy waits'
and w.p1=s.header_file
and w.p2=s.header_block
WHERE name ='sorts (disk)' ;
WHERE name ='sorts (memory)';
SELECT (1-d.VALUE/m.value)*100 INTO Z  FROM V$SYSsTAT d,v$sysstat m
WHERE ='sorts (disk)' and'sorts (memory)';
DBMS_OUTPUT.PUT_LINE ('SS_SORT'||'|'||X||'|'||Y||'|'||Z);
SELECT 'SS_IDX_LKP'||'|'||trunc((1-l.VALUE/(l.value+s.value))*100,2) FROM V$SYSsTAT l,v$sysstat s
WHERE ='table scans (short tables)' and 'table scans (long tables)';
select 'SS_SGA_LBCAU'||'|'||'Specified in INIT'||'|'||to_number(value,'999999999') from v$parameter where name ='log_buffer'
select 'SS_SGA_LBCAU'||'|'||'Allocated by OS'||'|'||bytes from v$sgastat where name ='log_buffer';
select 'SS_SGA_LBCW'||'|'||sid||'|'||event||'|'||seconds_in_wait||'|'||state
from v$session_wait where event like '%log buffer space %';
col name format a30
select 'SS_SGA_LBCS'||'|'||name||'|'||value
from v$sysstat
where name in ('redo log space requests','redo buffer allocation retries','redo entries');
select 'SS_SGA_LBCHR'||'|'||(1-(re.value/r.value ))*100
from v$sysstat re ,v$sysstat r
where ='redo buffer allocation retries' and'redo entries';
SELECT 'SS_OVERALL_WAITS'||'|'||class||'|'||count||'|'||time FROM V$WAITSTAT;
WHERE EVENT LIKE 'log file switch completion%';
WHERE EVENT LIKE 'log file switch (check%';
WHERE EVENT LIKE 'log file switch (arch%';
select 'SS_RBSTAT'||'|'||||'|'||a.WRITES||'|'||a.GETS||'|'||a.WAITS||'|'||a.HWMSIZE||'|'||a.SHRINKS||'|'||a.WRAPS||'|'||a.STATUS
from v$rollstat a, v$rollname b
where a.usn=b.usn;
select 'SS_LATCH'||'|'||name||'|'||gets||'|'||misses||'|'||sleeps||'|'||immedIATE_GETS||'|'||IMMEDIATE_MISSES
from v$latch where misses<>0;
spool off

Saturday, July 11, 2015

vcs cluster oracle 11g

VCS cluster for oracle

Many companies seek to have their information systems available to their customers at all hours of the day or night.  This typically means that key technical personnel must remain on call perpetually, and be able to respond to emergencies on short notice.  Then, when a server problem is detected, rapid response is mandatory. 

In spite of rapid response by reliable DBAs, there will typically be significant downtime in case of a server failure.  This lapse has led DBAs and System Administrators to consider cost-effective ways to meet a 24x7 uptime requirement.   Especially attractive would be some option that could automatically detect and recover from a server disaster.  It would also be best to avoid creating custom solutions that rely on unproven scripts or monitoring programs. 

These stringent requirements are addressed by an architecture commonly called “HA,” for High Availability.  Veritas Cluster Server, or VCS, is one example of an HA system.   The goal of all HA systems is the same: minimize downtime due to server failure.  The type of technology used in these HA systems is not new, nor is it especially exotic.  Many corporations requiring 24x7 availability use VCS or a similar product.  Other examples of HA systems are HP MC Service Guard and IBM HACMP.  Although this paper emphasizes the Veritas HA product, many of the principles described here are equally applicable to the HP and IBM products.


As shown in Figure 1, a typical cluster has two nodes.   VCS requires that a “service group” be defined for each database and associated applications.  Each service group contains everything that is relevant to that particular database and application.  Then, when failover occurs, everything in that Service Group transfers to the other node

For instance, in Figure 1, Service Group “A” contains a database, certain areas on the shared disk, and a “Virtual” IP address, or VIP.  This VIP points to whatever node the Service Group is currently associated with.  Thus, when the Service Group is on A, the VIP will point to the IP address of node A.  Upon failover, the VIP will point to the alternate node IP address.   Testing shows a typical time to transfer the entire service group is about two minutes (one minute to detect failure plus one minute to transfer everything in the service group).

Since there are both database and network-related resources in a service group, the DBA will work together with the Systems Administrator to configure VCS.  The Systems Administrator will take the lead, first creating the primary VCS configuration file, which is called  This file lists the various Resource Types that constitute a Service Group in the cluster.   For instance, some typical Resource Types are:  DiskGroup, IP, and Volume.   At this point, it is not necessary to define the Oracle-specific resources.  That may be done after all the disk and network related resources are setup.

Veritas provides an excellent GUI tool, called hagui, to assist in the initial setup.  This tool is a very convenient way to complete the definitions needed in the file.  In addition, hagui can display all the resources defined for any service group, and the status of the entire VCS cluster. 

Typical dependencies and resources for a VCS cluster are shown in Figure 2.  The main diagram on the right shows how the various resources relate to one another.  The bottom portion of the figure shows the resources that must be enabled first.  The very top of the tree shows the resources that are enabled last—for instance, the Oracle Listener, as well as the database itself.  Resources are typically shown in blue, meaning that the resource is fully available.

B Service Group







Figure 1. VCS Cluster Architecture

Figure 2. Typical hagui Display




The primary advantage of VCS (as well as other HA systems) is that failover of the database (and related applications, if desired) occurs with no loss of data, and no intervention by the DBA or Systems Administrator.  At the time of this failover, there is no need for the DBA to locate and apply the latest redo information, as required for a Hot-Standby configuration.  Everything up to the last commit is saved.  This occurs because the database is simply doing a shutdown abort, followed by a restart.  All Oracle data files are brought over together to the other node.

Due to the Virtual IP address defined for a service group, when failover occurs, new connections to the database are automatically routed to the correct node with no intervention whatsoever.  This is possible because each client, in its tnsnames file,  specifies a virtual host name, which “behind the scenes” really points to a specific server in the HA cluster.


 Some of the VCS failover criteria are configurable.  For example, a certain number of Listener restart attempts may be specified before a failover.  Also, the DBA may optionally specify that two different types of checks may be performed on both the database and the listener, or opt for a simpler single-check mechanism. 

If there are applications running on the same server as the database, these applications can be included in the same Service Group so that they failover along with the database.  (Note that this may require writing a separate “agent” to handle the application monitoring and restart.)


Veritas VCS is far simpler to implement than Advanced Replication or OPS (Oracle Parallel Server).  Unlike OPS, no data or user segmentation is required, because there is only one instance running at one time for a service group.  Additionally, when preparing for VCS, no modification to the application is required; in fact, the application does not “know” that the database has any failover capability—it “looks” like any other database.  

Finally, future databases can be added to the HA cluster with only moderate effort.  Once the basic setup is complete, the configuration can be modified to include new Oracle instances if needed.  This involves creation of a new Service Group to house all resources associated with the new database. 


Preparing an Oracle database for VCS is very similar to building a ‘vanilla’ database—but there are some differences.


The Oracle executables may be placed on either the local or the shared disk  There are some advantages to each method.

·         Located on Shared Disk.  If  there will only be a few databases involved for the entire VCS cluster, then ORACLE_HOME can easily be installed on each of the few Service Groups, along with all the database files.  In this setup, after database failover, the ORACLE_HOME goes along with the database files to the other node.  The main disadvantage of this approach is that each time a new database (and service group) is created, a complete Oracle install must be performed again, with the new set of executables placed in a new shared disk area.

·         Located on Local Disk.  If there will be many databases ultimately defined for the cluster, it is probably easier to just perform a single Oracle install for each node, and place ORACLE_HOME on the local disk.  Thus, if there are two nodes, an Oracle install is performed just two times—with no further installs (except for any future Oracle patches, etc.).  In this setup, the ORACLE_HOME on each local disk must be identical, so that after failover, each database will start properly.  Another advantage to this approach is that the Oracle executables can be upgraded one node at a time, while the database is active on the other node.

No matter which approach is chosen, it is critical that the installs be consistently performed, and that the node configuration matches.


After the issue of ORACLE_HOME is resolved, and all installs are complete, the DBA should identify the volume group and its file systems that will be “shared”  between the nodes in the cluster.   Note that the term shared does NOT mean that a file system is simultaneously accessed by both nodes (as done in OPS).   Instead, it means that a file system is either on one node or the other.  For instance, file systems /u02-/u04 might be reserved for one database; and /u05-/u07 for another. 

When creating the new database, be sure to place ALL oracle data files (including redo and .ctl files) in the shared volume group.   Do not intermix files from different databases on the same shared volume, because after failover, some database files would be “missing” when the shared file systems move to the other node.


The location of the admin/db directory can be located on either the shared or local disk. Placing on the shared disk is probably more suitable, however, because after failover all the dump destinations plus a single init.ora file will follow the database.   Putting the admin area on the local disk is workable, but then a “duplicate” admin directory needs to be created on the other node.

Setting up the admin area will require a few symbolic links.  If ORACLE_HOME is installed on the local disk, a symbolic link can be created from  the ‘usual’ /admin/SID to the new /admin on the shared volume.  For example:

    ln -s /sharedvg/admin/SID  $ORACLE_BASE/admin/SID

Be sure to repeat all link definitions on each node, so that the /admin/SID area for each node points to the same shared volume directory. 

Regardless of where exactly the admin area is situated, it is crucial that upon failover, the admin directory and all subdirectories can be found, along with the init.ora file.


At first, one might think that the usual one-listener-for-all-databases approach will also work for VCS.  However, this is one area where VCS requires a departure from regular database configuration.

Assuming that monitoring of the Oracle Listener is desired, a separate listener (and port) for each database is required.  This is necessary because VCS will shutdown the listener for a service group upon failover.   This makes it impractical to use one listener for all.  Therefore, one listener is defined for each service group.  This also means that the traditional name, LISTENER, cannot be used; rather, a new name is specified for each listener.   Upon failover, the appropriate listener is shutdown (if possible) on the original node, and restarted on the alternate node.

Each listener uses the Virtual IP address defined for its service group, rather than the actual server hostname.


It is critical that each node in the cluster be configured consistently, depending on whether ORACLE_HOME is on the local or shared disk.  For instance, the oracle user on each node must have proper environment variables.  This means similar (if not identical) .profile files on each node.   Also, the various cron jobs scheduled on each node should be examined to see if they could be impacted after a failover.

For each database, it is important to ensure that the proper password file will be accessible when the database fails over.  (This is only an issue if Oracle is installed on the local disk, since the password file is typically stored in $ORACLE_HOME/dbs.)

Since VCS is actually in control of database and listener startup, it is necessary to disable any form of automatic startup or shutdown that is outside VCS.  Thus, in the oratab file on each node, each database should be listed, but with ‘N’ specified rather than the usual ‘Y.’  This is necessary because VCS will control startup and shutdown of every database included in the HA definition.


Veritas Corporation likes to partition their application software into “agents.”   Thus, VCS uses two agents to monitor the database and listener.   These agents are the key to the entire VCS fault detection system, because they determine when a critical failure has actually occurred, and what to do when failures are detected.

The agent characteristics for Oracle use are defined using two Resource Types:  Oracle and Sqlnet.  As always, the hagui utility is most helpful in defining these agents.  When the hagui utility is used, as shown in Figure 3, it populates the various entries within the Oracle and Sqlnet areas in the file.  Of course, these entries may simply be entered directly, using vi, if desired.

Custom agents can also be created to monitor other processes, such as a critical application that might need special handling in case of failover.


Database checking consists of both a primary and a secondary check.  The secondary check is optional, whereas the Primary is always configured.   Due to the ease in setting up both checks, there seems to be little reason to not enable both.


In this check, the agent simply looks for the background UNIX processes (pmon, smon, etc).  This check occurs every one minute.  It should be obvious to experienced DBAs that the presence of these background processes does NOT guarantee that the database is actually usable.  For instance, many types of internal errors will leave some or all of these processes running, even though the database is complete unusable!    Hence the suggestion to also enable the secondary check.

As shown in Figure 3, the DBA can use the hagui tool to populate the following attributes:

SID                                              [instance name]
Owner                                         [oracle]

Home                                           [value of ORACLE_HOME]
Pfile                                             [path to init.ora file]
User, Pword, Table                    [used for secondary database monitoring]

Figure 3.  Database Agent Setup


Besides the simple checking for the background processes controlled by the primary check, VCS can be configured to perform a simple update transaction.   This secondary check is automatically enabled when the following Oracle attributes are defined: MonScript (which defines the script executed), User, Pword,  and Table.

In order to prepare the secondary check,  several database actions need to be performed:

  • create an oracle user to be used for performing this transaction for each database to be monitored
  • Grant minimal privileges, such as Connect, Resource
  • In this user’s schema, create a table with one column:  TSTAMP (date format). 
  • Insert one row into the table and commit. 
  • Confirm that this user can perform simple update of the table.

 For example:

Create user dbcheck identified by dbcheck;
Grant connect, resource to dbcheck;
Connect dbcheck/dbcheck
Create table DBTEST ( TSTAMP DATE);
Insert into DBTEST values (SYSDATE );


Besides the database agent, VCS requires that the DBA configure another agent just for checking the Listener(s).  As shown in Figure 4, the hagui tool can be used to configure the listener agent. 

Ensure that the following attributes are defined, either via the hagui tool, or by directly editing the configuration file.

Owner                 [typically, oracle],
Home                   [i.e., $ORACLE_HOME],
TnsAdmin           [typically $ORACLE_HOME/network/admin],
Listener               [e.g., LISTENER_GROUP1]
MonScript           [typically, ./bin/Sqlnet/]

The attributes MonScript  is used for secondary listener monitoring.  It simply issues an lsnrctl status command.



Figure 4.  Listener Agent

The parameter RestartLimit must be manually entered into the VCS configuration file.  This will allow VCS to attempt listener restart before failing over.   A setting of  three means that VCS will try 3 times to restart that particular listener before initiating a failover of the respective database.  The count is reset when VCS sets this ‘resource’ offline.


As part of the HA design, it is critical to consider the various options for archiving.  Since there are two completely different types of disk available, it is reasonable to consider duplicate sets of archive logs.   Thus, the DBA may prudently decide to have two sets of archive logs-one set on local, one on shared.

Setting this up is not technically difficult, but it would be easy to forget to test all configurations.  The DBA should confirm that the archive logs write correctly to all destinations.   Archive log directories must be setup for each database on each node, so that upon failover, archive logs are written. 

The archive destination entries in the init.ora file should specify destination 1 and destination 2, with seconds for reopen attempts:

log_archive_dest_1 = "location=/u00/arch/khawk reopen=180"
log_archive_dest_2 = "location=/u09/arch/khawk reopen=180"


The client tnsnames.ora file should always specify the Service Group (virtual) IP address, not the actual host name.  Upon failover, this IP address will automatically change so as to point to the correct node.  Once the client tnsnames file is setup, no change to the file is ever required, as long as the service group virtual IP address is not redefined.


Upon failover, there will typically be a short (typically a few seconds) delay, as database crash recovery is automatically activated.  However, in extreme cases, where checkpointing is infrequently performed, this time could become significant.   In order to reduce startup time, it is necessary to understand what actions are being performed once startup is commanded. 

Instance recovery time will be the sum of time-to-roll-forward un-checkpointed transactions, plus time to rollback uncommitted transactions.   The second value, rollback time, has (according to Oracle Tuning Guide documentation) been drastically reduced due to the new ‘Rollback on Demand’ feature.  This just leaves the time to roll forward.

Roll forward time is proportional to frequency of checkpoints.   Fortunately, the DBA has a plethora of ways to control checkpoint frequency, thereby guaranteeing a reasonable  time for rolling forward.  The simplest way is through the sizing of redo logs.  Once a redo log is full, a log switch occurs, along with a checkpoint. 

A trickier, but much more complicated method to control checkpointing is via the parameter FAST_START_IO_TARGET.  The Oracle Tuning Guide has very detailed charts showing how to use this parameter to control crash recovery startup time.   With this method, the number of I/O operations is limited, thereby putting a threshold on recovery time.  I suspect most DBAs will find this extra complication unwarranted.



If complications arise as the Systems Administrator is performing the initial definition of Service Groups, Network addressing, etc., consider contracting with a professional implementation consultant for a few days to perform the basic setup.   This is especially true if there are any unusual network configurations for the cluster.  


Normally, this init.ora parameter is important, but a slightly erroneous setting does not typically have catastrophic consequences.  Usually, upon reaching this limit, new database connections are simply refused, but there is no harm to the existing user connections.   When convenient, the DBA simply raises the limit.

With VCS, however, this parameter is absolutely critical!  This can be understood by considering the types of tests that the VCS agents are performing.   The key is the second database check.  The “secondary” database check performs a connection to the database, and then executes a simple transaction.  If the Processes parameter is too low, VCS will be unable to even connect.  Thus, the database check will fail, thus leading to an undesired failover.   To add to the confusion, the VCS log will indicate that the database “became offline unexpectedly, on its own.”


When VCS desires to perform a failover, a shutdown abort will typically be performed.  Unfortunately,  the OS (especially Solaris 2.6) will often not release shared memory and semaphores.  This means that the original node will be unavailable to restart of that instance.  Should the DBA attempt a switch back to the original node, the instance will be unable to start.   After a failure of this sort, the hagui utility can be used to display the status of all resources in the cluster, as shown in the Figure 5.

It is therefore crucial to detect this problem and clear shared memory after a failover has occurred.  One simple but effective solution is to set a cron job to notify the DBA if more shared memory segments are detected than there are instances running.

The DBA can detect and correct the shared memory problem by using the Unix commands  ipcs and ipcsrm.   The command ipcs -a lists all interprocess resources active on the server.  This includes shared memory segments as well as semaphores.  The command ipcrm allows the DBA to remove the resource.

In order to remove the shared memory segment, it is necessary to identify which segment relates to the database “no longer there.”   Of course, if there is only one database active per node, this is not an issue.   Otherwise, a shared memory segment can be matched to a particular instance by the memory required for that instance.  Simply look at the memory indicators displayed upon instance startup, or estimate database memory based on buffer sizes and Shared Pool size. 

Another method to identify the shared memory assigned to an instance is to use the Oracle-supplied program sysresv.   Ensure that the environment variables ORACLE_HOME and ORACLE_SID are set prior to running.

Figure 5.  VCS Failure due to Shared Memory


It is important to understand that VCS controls only the shared disk systems, not the local disks.   Thus, after server reboot, the OS should mount the local disks as usual.  Do not include the local file system in any VCS group.


Limitations and difficulties with VCS are actually limitations relevant to any of the HA solutions--whether Veritas, HP, or IBM.  The most difficult factor in implementing HA solutions is the need for thorough testing to ensure that the HA solution implemented will handle all relevant failure scenarios.   Without thorough testing, it is possible that the HA solution could actually provide less availability!  For instance, an improper setup might lead to frequent failovers to the alternate node, causing annoying breaks in service.

In reality, of course, it is impossible to check all conceivable failure situations; thus, a test plan must be designed to check the types of failures that can realistically occur.  It is also imperative that testing be conducted to ensure that all related applications fail-over togetherIt is not too helpful if the database correctly fails over, but the application is left behind on a useless node.


It is not realistic to employ a junior level DBA to setup and maintain VCS systems.   The reason is simple: the “stakes” are higher, with severe consequences for mistakes.  With a ‘regular’ Oracle database setup, the DBA can actually make a lot of mistakes, but yet the database will keep on running (albeit at a reduced performance level).  With VCS, a mistake likely means that upon failure the database will probably not failover correctly; in other words, bad setup means that there really isn’t any ‘HA.’ 

In a recent case, one group of local disks was mistakenly specified as being under VCS control; after a system reboot, the file system containing the Oracle executables was not mounted, leading to downtime.   Although this slip was discovered and corrected soon after implementation, it happened despite extensive testing by both the DBA and System Administrator.

Additionally, the servicing of databases that are configured for HA is slightly more complex.  For instance, one cannot simply perform a shutdown; rather, VCS must be ‘frozen’ first, and then a shutdown can occur.


Whenever the database restarts after a failover, some applications may need to be restarted.  This can be accomplished via a new type of database trigger that fires after startup of the database.   The code of the trigger invokes a user-created Java stored procedure, that in turn runs any desired Unix script.   The steps to configure this are:

·  Create special trigger
·  Setup Java Virtual Machine in the database
·  Create Java Stored Procedure


This is simple Pl/SQL code that calls a Unix shell script with any name.   It is critical that the ‘reset’ script be located on a shared volume, so that it will always be executed, even after failover.  Notice also that the ‘sh’ command requires the full path.

executecmd('/usr/bin/sh [path]/vreset');


  • Ensure that Java Shared Pool is set > 50 mb
  • As SYS, run $ORACLE_HOME/javavm/install/initjvm
  • Ensure that CLASSPATH is set. 
  • For user to run the java procedure, grant JAVASYSPRIV



·         Create java source file (source in Oracle Note 109095.1) called  
·         Compile source into class file:  javac
·         Load java class into database: loadjava -u user/password ExecuteCmd.class
·         Create Java procedure in database


With proper diligence and attention to detail, Veritas VCS can provide a highly effective HA solution.  Users of the system will appreciate the rapid failover capability that doesn’t depend on DBA intervention to activate.

A key factor in ensuring success with VCS, as well as with any HA product, is thorough testing of the configuration by experienced DBAs and System Administrators.  Although not trivial to configure, with reasonable care VCS can maximize uptime of critical 24x7 applications and databases.


The software versions assumed for purposes of this paper are: Veritas Database Edition for Oracle 2.1 (includes VCS 1.1.2); Oracle Enterprise Edition, Operating System Solaris 2.6.

Veritas Corporation,  VCS Reference Guide

Veritas Corporation,  VCS Oracle Agent Guide

Oracle Corporation, Note 109095.1, How to do a System Call from a JAVA Stored Procedure.

Special thanks to John Stucki of Veritas Corporation for his valuable assistance and suggestions.


Chris Lawson consults in the San Francisco bay area, where he specializes in performance tuning of financial applications.  He is a regular presenter at the Northern California Oracle Users Group (NOCOUG).   His previous papers, including the “Ten Database Mysteries”  series and “Oracle DBA: Physician or Magician?” may be found at  Chris may be reached at chris_lawson 

 Many companies seek to have their information systems available to their customers at all hours of the day or night.  This typically means that key technical personnel must remain on call perpetually, and be able to respond to emergencies on short notice.  Then, when a server problem is detected, rapid response is mandatory. 

In spite of rapid response by reliable DBAs, there will typically be significant downtime in case of a server failure.  This lapse has led DBAs and System Administrators to consider cost-effective ways to meet a 24x7 uptime requirement.   Especially attractive would be some option that could automatically detect and recover from a server disaster.  It would also be best to avoid creating custom solutions that rely on unproven scripts or monitoring programs. 

These stringent requirements are addressed by an architecture commonly called “HA,” for High Availability.  Veritas Cluster Server, or VCS, is one example of an HA system.   The goal of all HA systems is the same: minimize downtime due to server failure.  The type of technology used in these HA systems is not new, nor is it especially exotic.  Many corporations requiring 24x7 availability use VCS or a similar product.  Other examples of HA systems are HP MC Service Guard and IBM HACMP.  Although this paper emphasizes the Veritas HA product, many of the principles described here are equally applicable to the HP and IBM products.

Enterprise Manager Grid Control Advanced Installation and Configuration

ref="" I Reconfiguring Oracle Management Agent and Discovering New Targets This appendix describes the commands you must run to reconfigure Oracle Management Agent and discover new targets that were installed after the installation of the Management Agent. In particular, this appendix covers the following: Discovering New Targets with Standalone Management Agents Converting Standalone Management Agents to Cluster-Based Management Agents Discovering New Targets with Cluster-Based Management Agents Reconfiguring Cluster-Based Management Agents Note: The -t argument mentioned in the commands of this appendix indicate that you do not want the Management Agent to start after the operation (reconfiguration or discovery) ends. Discovering New Targets with Standalone Management Agents By default, the targets that were already installed on the host before installing the Management Agent are automatically discovered in Enterprise Manager Grid Control. However, the new targets installed after installing the Management Agent are not automatically discovered. Therefore, if you install any new target, and if you want to discover and monitor it in Enterprise Manager Grid Control, run the following command from the Oracle home of the Management Agent that is installed on the host: $/bin/agentca -d [ -t -i ] Converting Standalone Management Agents to Cluster-Based Management Agents After installing standalone Management Agents on multiple hosts, if you create a cluster of those hosts, you must ideally convert those standalone Management Agents to cluster-compatible Management Agents. This section describes how you can convert in graphical and silent mode. Convering in Graphical Mode Converting in Silent Mode Convering in Graphical Mode To convert a standalone Management Agent to a cluster-compatible Management Agent in a graphical mode, follow these steps: In Grid Control, click Deployments. On the Deployments page, from the Agent Installation section, click Install Agent. On the following page, click Fresh Install. On the Installation Details page, do the following: In the Source Software section, retain the default selection, that is, Default, from Management Server location. In the Version section, select the appropriate version of the already-installed standalone Management Agents. Note that to use Deployment Procedures, the Management Agents must be at least 10g Release 2 ( In the Host section, select the required platform, provide a list of hosts that are part of the cluster, check Cluster Install, click Populate Defaults to automatically specify a list of cluster nodes, and then specify a cluster name. In the OS Credentials section, provide the host credentials. In the Destination section, specify the installation base directory on the nodes that form the cluster. In the Additional Parameters section, supply any other optional parameters you want to run. Click Continue. Note: If you see the following error messages on the Prerequisite Details page, ignore them and click Continue to proceed with the installation: Some or all Agent home locations(Install base dir/agent10g or Install Base Dir/agent11g) provided are not empty directories The Agent Oracle Home ( InstallBaseDir/agent10g or InstallBaseDir/agent11g), is already registered with the inventory An agent home exists in the hosts Converting in Silent Mode To convert a standalone Management Agent to a cluster-compatible Management Agent in a silent mode, follow these steps: $/bin/agentca -f -c "{}" [ -t -i -n ] Discovering New Targets with Cluster-Based Management Agents By default, the targets that were already installed on the nodes of a cluster before converting a standalone Management Agent to a cluster-compatible Management Agent are automatically discovered in Enterprise Manager Grid Control. However, the new targets installed on a node after converting a standalone Management Agent to a cluster-compatible Management Agent are not automatically discovered. Therefore, if you install any new target on a node of a cluster, and if you want to discover and monitor it in Enterprise Manager Grid Control, run the following command from the Oracle home of the Management Agent on that node: $/bin/agentca -d -c "{}" [ -t -i -n ] Reconfiguring Cluster-Based Management Agents If you add new nodes to an existing cluster, you must reconfigure the cluster-based Management Agent to update the centry inventory with the information of those new nodes, and discover all the targets installed on those new nodes. To do so, run the following command from the Oracle home of the Management Agent on every node of the cluster: $/bin/agentca -f -c "{}" [ -t -i -n ] Note: When the agentca script runs, it takes a backup of the EMSTATE directory on the local node, and creates a new EMSTATE directory.