-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathrman_backup_subs.sh
756 lines (649 loc) · 28.7 KB
/
rman_backup_subs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
#----------------------------------------------------------------------------
# ********* RMAN backups script low-level subroutines rman_backup_subs.ksh **
#
# This subscript includes functions used by the main script rman_backup.ksh
#
#
#------------------------------------------------------------------
# === Checks and info before backups run ==========================
#------------------------------------------------------------------
function get_database_info
{ #1. get Oracle release number
dosql Release "SELECT SUBSTR(V,1,INSTR(v,'.')-1) FROM (SELECT MIN(V) V FROM (SELECT distinct(version) v FROM PRODUCT_COMPONENT_VERSION))"
echo "INFO: Major release of the database $db is $Release."
Ver10up=1
if [ "$Release" -le "9" ] ; then
Ver10up=0
echo "INFO: FRA is not supported in Oracle 9."
fi
#2. get database DG role
#-- cut out first word - e.g. "physical" and keep only 'PRIMARY' or 'STANDBY'
dosql DB_ROLE "SELECT SUBSTR(R,INSTR(R,' ')+1) FROM (SELECT DATABASE_ROLE R FROM V\$DATABASE)"
echo "INFO: Database Role: $DB_ROLE"
#3. check if this database is a RAC cluster
dosql RAC "SELECT CASE WHEN COUNT(*)<=1 THEN 'Single Instance' ELSE 'RAC' END FROM GV\$INSTANCE"
echo "INFO: Database Type: $RAC"
if [ "x$RAC" = "xRAC" ]; then
echo "INFO: Check if $BASE_PATH is shared across RAC cluster nodes."
#Snapshot control files and this script's lock files have to be on a shared filesystem.
#TODO: actually check this? Not hard to implement.
fi
#4. Show spfile and control files parameters
dosql sp_ctlf "SELECT RPAD(UPPER(NAME),rownum*8)||': '||DISPLAY_VALUE FROM V\$PARAMETER WHERE NAME IN ('spfile','control_files') ORDER BY NAME DESC"
echo "INFO: $sp_ctlf"
#5. Get Database name
dosql DB_NAME 'SELECT NAME FROM V$DATABASE'
echo "INFO: Database Name: $DB_NAME"
if [ $Ver10up -eq 1 ]; then
#6. Show if BCT is enabled
#TODO: Check V$BACKUP_DATAFILE.USED_CHANGE_TRACKING if it was actually used for an incremental backup
dosql BCT 'SELECT STATUS FROM V$BLOCK_CHANGE_TRACKING'
echo "INFO: Block Change Tracking: $BCT"
#7. Check if Flashback Database is enabled
dosql FLASHBACK "SELECT DECODE(FLASHBACK_ON, 'YES','ENABLED', 'DISABLED') FROM V\$DATABASE"
if [ $FLASHBACK = 'ENABLED' ]; then
dosql FLASH_RET "SELECT TRUNC(value/60)||' hours and '||mod(value,60)||' minutes' FROM V\$PARAMETER WHERE NAME='db_flashback_retention_target'"
echo "INFO: Database Flashback: $FLASHBACK, retention target is $FLASH_RET"
else
echo "INFO: Database Flashback: $FLASHBACK"
fi
fi
}
#-------------------------------------------
function check_best_practices {
dosql ctlf_record_keep_time "select value from v\$parameter where name='control_file_record_keep_time'"
if [ "$ctlf_record_keep_time" -lt $(( $RECOVERY_WINDOW +10 )) ] ; then
echo "WARN: CONTROL_FILE_RECORD_KEEP_TIME init parameter is $ctlf_record_keep_time. It is too low."
echo "WARN: Recommended value is $(( $RECOVERY_WINDOW +10 )) based on your recovery window and Oracle Note 829755.1."
if [ "$FIX_BEST_PRACTICES" -eq "1" ] ; then
dosql noreturn "ALTER SYSTEM SET CONTROL_FILE_RECORD_KEEP_TIME=$(( $RECOVERY_WINDOW +10 )) scope=BOTH"
echo "WARN: CONTROL_FILE_RECORD_KEEP_TIME fixed."
fi
fi
}
#-------------------------------------------
function check_FRA {
NO_FRA="10" #no fra return code
FRA_THRESHOLD=$1 #first parameter
[ $Ver10up -ne 1 ] && return 0 #there is no FRA in pre-10g
dosql FRA_PRC_USED_SOFT 'SELECT round(space_used*100/space_limit,0) pct FROM V$RECOVERY_FILE_DEST'
dosql FRA_PRC_USED_HARD 'SELECT round((space_used-space_reclaimable)*100/space_limit,0) pct FROM V$RECOVERY_FILE_DEST'
if [ "x$FRA_PRC_USED_SOFT" = "x" ] ; then
echo "ERROR: Can't detect Flash Recovery Area for $db."
email "Can not detect FRA" <<EMAIL
Can't detect Flash Recovery Area for $db.
The DB_RECOVERY_FILE_DEST and DB_RECOVERY_FILE_DEST_SIZE init params must be set.
http://download.oracle.com/docs/cd/B19306_01/backup.102/b14192/setup005.htm :
"Use of the flash recovery area is STRONGLY recommended"
EMAIL
return $NO_FRA
fi
echo "INFO: FLASH RECOVERY AREA is ${FRA_PRC_USED_SOFT}% FULL (including reclaimable space)"
echo "INFO: FLASH RECOVERY AREA is ${FRA_PRC_USED_HARD}% FULL (minus reclaimable space)"
if [ $FRA_PRC_USED_HARD -ge $FRA_THRESHOLD ] ; then
dosql FRA_USAGE 'set head on echo on
SELECT * FROM V$FLASH_RECOVERY_AREA_USAGE WHERE PERCENT_SPACE_USED>0'
email "flash_recovery_area is ${FRA_PRC_USED_HARD}% FULL" <<EMAIL
This is a WARNING.
FLASH RECOVERY AREA of $db@`hostname` database is ${FRA_PRC_USED_HARD}% FULL.
Components consuming recovery area space:
$FRA_USAGE
EMAIL
fi
#TODO: check if filesystem or ASM diskgroup where FRA is based has *more*
# free space than the FRA itself!
}
#-------------------------------------------
function DataGuard_check_and_prepare
{ #For DG databases we should connect using TNS (not just target=/), see Doc ID 1604302.1
#Assuming Oracle Wallet is already setup, for user SYS
rman_target="/@$db"
case $DB_ROLE in
PRIMARY)
check_primary_DG_health
if [ ! -z "$outofsync_standbys" ]; then
echo "WARN: Because of DG out-of-sync issue, this Primary will be used for backups."
else
#Only if secondary is in sync, it will be used to offload backups.
echo "INFO: $db is part of DG cluster. Standby database will be used for backup activities."
if [ "x$MODE" = 'xXCHK' ]; then
echo "INFO: Exiting."
return 1 #skip current database
else
echo "INFO: Backup type change: $MODE -> CTRL"
echo "INFO: Only control file and spfile backups will run on primary instance."
MODE="CTRL" #primary DG db: will run only control and spfile file backups
fi
fi
;;
STANDBY)
check_standby_DG_health
#TODO: if standby apply has fallen too far, don't do a stale backup?
;;
esac
}
function check_primary_DG_health
{ #Check if secondary database(s) are synchronized with this Primary database
dosql outofsync_standbys "SELECT DB_UNIQUE_NAME||' - '||SYNCHRONIZATION_STATUS FROM V\$ARCHIVE_DEST_STATUS WHERE TYPE<>'LOCAL' AND SYNCHRONIZATION_STATUS<>'OK' AND STATUS NOT IN ('INACTIVE','DEFERRED')"
#TODO: check for multiple-line result?
if [ -z "$outofsync_standbys" ]; then
echo "INFO: Primary DG health check: there are no out-of-sync standbys."
else
echo "WARN: Out-of-sync DG standbys: $outofsync_standbys."
fi
}
function check_standby_DG_health
{ #Check if this Secondary database is synchronized with Primary
dosql pridb 'SELECT PRIMARY_DB_UNIQUE_NAME FROM V$DATABASE'
dblogin="/@$pridb as sysdba"
#1. Get latest generated sequence# from *primary* database
dosql last_generated_seq 'SELECT MAX(SEQUENCE#) FROM V$ARCHIVED_LOG VAL, V$DATABASE VDB WHERE VAL.RESETLOGS_CHANGE# = VDB.RESETLOGS_CHANGE#' "$dblogin"
#2. Get latest received sequence# on this secondary database
dosql last_received_seq 'SELECT MAX(SEQUENCE#) FROM V$ARCHIVED_LOG VAL, V$DATABASE VDB WHERE VAL.RESETLOGS_CHANGE# = VDB.RESETLOGS_CHANGE#'
#3. Get latest applied sequence# on this secondary database
dosql last_applied_seq 'SELECT DISTINCT FHRBA_SEQ FROM X$KCVFH'
#Note: a reference to X$KCVFH was received from Oracle Support on one of SRs.
# A more documented way would be using query : SELECT MAX(SEQUENCE#) FROM V$ARCHIVED_LOG VAL,
# V$DATABASE VDB WHERE VAL.RESETLOGS_CHANGE# = VDB.RESETLOGS_CHANGE# AND VAL.APPLIED IN ('YES','IN-MEMORY')
# instead, but the latter fails if standby redo apply has fallen too far behind, so v$archived_log already
# doesn't have a record for latest applied log.
if [ $(( $last_received_seq +2 )) -lt $last_generated_seq \
-o $(( $last_applied_seq +2 )) -lt $last_generated_seq ]; then
echo "WARN: Standby $db is out of sync of $pridb primary seq# $last_generated_seq: latest received $last_received_seq, latest applied $last_applied_seq."
return 1
fi
echo "INFO: $pridb -> $db DG apply is healthy: primary seq# $last_generated_seq, secondary received $last_received_seq and applied $last_applied_seq."
}
#------------------------------------------------------------------
# === Lock files management =======================================
#------------------------------------------------------------------
function release_lock {
#This function removes lock files and is called directly, or as
#a signal handler for INT, TERM and EXIT signals
if [ "x$lock_fname" = "x" -o "x$lock_uuid" = "x" ]; then
#No lock file was created by this process
[ $BACKUP_DEBUG -eq 1 ] && echo "DEBUG: release_lock(): No lock was set."
#this might be okay if release_lock() 1st called explicitly, and then from exit trap.
return
fi
if [ -e $lock_fname ]; then
#1. confirm lock file was created by our process
if [ "x`cat $lock_fname`" != "x$lock_uuid" ]; then
[ $BACKUP_DEBUG -eq 1 ] && echo "WARN: '`cat $lock_fname`' != '$lock_uuid'."
echo "WARN: release_lock(): lock $lock_fname is from a different process. Exiting."
return
fi
#2. remove lock file
rm -f $lock_fname
else
echo "WARN: release_lock(): Lock $lock_fname doesn't exist."
fi
#3. finally, remove symlink to the lock file
rm -f $lock_fname.lock 2>/dev/null
echo "INFO: Released lock $lock_fname.lock by pid $$ on $HOSTNAME."
unset lock_fname lock_uuid
}
function proc_uuid {
host=$1;pid=$2
ps_cmd="ps -p $pid -o ppid,user,ruser,rgid,tty,args"
if [ "x$host" = "x$HOSTNAME" ]; then
uuid=`$ps_cmd |tail -1`
else
#backup process can be on a different host (e.g. a RAC database)
uuid=`ssh -o PasswordAuthentication=no -o StrictHostKeyChecking=no $host "$ps_cmd |tail -1"`
fi
if [ $? -ne 0 ] ; then
if [ "x$host" = "x$HOSTNAME" ]; then
echo "WARN: proc_uuid: Process $pid doesn't exist locally on $host." >&2
else
echo "WARN: proc_uuid: Couldn't check locking process on remote host $host or process $pid doesn't exist there." >&2
fi
echo "WARN: Process $pid doesn't exist on $host"
return 1
else
[ $BACKUP_DEBUG -eq 1 ] && echo "DEBUG: proc_uuid: Completed ps for process $pid on $host." >&2
fi
echo $host $pid $uuid #don't double quote: $IFS will remove extra spaces in $uuid
return 0
}
function validate_lock {
if [ ! -e $lock_fname ]; then
echo "WARN: $lock_fname.lock symlink exists, but not the actual lock file. Ignoring."
#we will just reuse symlink that already exists
return
fi
#The $lock_fname lock file exists too - read hostname and pid from it
eval set -A lock_data $(cat $lock_fname)
pid=${lock_data[1]};host=${lock_data[0]}
echo "INFO: validate_lock: lock file found: checking for locking process $pid on $host"
#validity check 1 - try to ps that process (it will fail if it doesn't exist)
remote_lock_uuid=$(proc_uuid $host $pid)
if [ $? -ne 0 ]; then
#Couldn't check if process exists. proc_uuid() also already showed WARN message
echo "WARN: validate_lock(): Couldn't check $pid on $host. $lock_fname is stale. Overwriting."
return
fi
#validity check 2 - compare proc_uuid() for a hostname and pid from the lock file
# and compare with it is actually is in the lock file.
#They will be different if now another process is running under the *same pid*.
if [ "x`cat $lock_fname`" != "x$remote_lock_uuid" ]; then
[ $BACKUP_DEBUG -eq 1 ] && echo "WARN: '`cat $lock_fname`' != '$remote_lock_uuid'."
echo "WARN: validate_lock(): $pid on $host is not a process that created the lock. $lock_fname is stale. Overwriting."
return
fi
#If we are here - lock file is valid. Exit nicely
echo "WARN: Lock $lock_fname.lock exists; Other RMAN process is still running. Exiting"
if [ "x$host" = "x$HOSTNAME" ]; then
#send email only if schedule overlap on the same host.
email "RMAN scripts schedule overlap" <<EMAIL
See log file $LOGFILE0 for more details:
Warning. Lock $lock_fname exists!
$simul_run_check RMAN function is already running. Exiting...
Other RMAN script running (host, pid, ppid, user, ruser, rgid, tty, args):
$remote_lock_uuid
Process is confirmed as still running.
EMAIL
else
[ $BACKUP_DEBUG -eq 1 ] && echo "DEBUG: Email hasn't been sent because schedule conflicts with another host."
fi
unset lock_fname lock_uuid
exit 1
}
function lock_it {
lock_fname="$BASE_PATH/lock/rmanbackup.$simul_run_check"
#Prepare identification string that is being written to the lock file.
lock_uuid=$(proc_uuid $HOSTNAME $$)
[ $? -ne 0 ] && exit 1 #ps -p $$ should always succeed
#1. Create a symlink. Note: $lock_fname may not exist yet
# Using symlinks because this is an atomic check. Works on NFS too.
ln -s $lock_fname $lock_fname.lock
if [ $? -ne 0 ] ; then
#Symlink exists, validate if lock file is not stale
validate_lock
rm -f $lock_fname
fi
#2. Write uuid to the lock file, and make sure release_lock is called even if script exits
trap release_lock INT TERM #don't add trap EXIT in a ksh function
echo $lock_uuid > $lock_fname
if [ $? -eq 0 ] ; then
echo "INFO: Acquired lock $lock_fname.lock by pid $$ on $HOSTNAME."
else
echo "WARN: Error writing to $lock_fname. Ignoring."
#it is important to run a backup - so just ignoring lock file problem
fi
}
#------------------------------------------------------------------
# === Subsroutines used for backup itself directly ================
#------------------------------------------------------------------
function prepare_channels {
if [ $Ver10up -ne 1 ]; then
#For 9i databases only (no FRA):
mkdir -p $ONDISK_LOCATION/$db
eval "CH_FORMAT=\"FORMAT '$BACKUP_FORMAT'\"" #eval() to expand $db and $tags
fi
c=1; while [[ $c -le $1 ]]
do
RMAN_CHANNELS="$RMAN_CHANNELS
ALLOCATE $2 CHANNEL ch$c $ALLOCATE_PARMS $CH_FORMAT;
SETLIMIT CHANNEL ch$c $CHANNEL_SETLIMIT;"
RMAN_RELEASE_CHANNELS="$RMAN_RELEASE_CHANNELS
RELEASE CHANNEL ch$c;"
(( c=c+1 ))
done
}
#maintenance channels are used by CHANGE, DELETE and CROSSCHECK commands.
function prepare_maintenance_channels {
#see ML Note 567555.1
# and http://docs.oracle.com/cd/B19306_01/backup.102/b14194/rcmsynta005.htm
#no release for maintenance channels
RMAN_RELEASE_CHANNELS=""
#maintenance channel type DISK is always allocated:
RMAN_CHANNELS="ALLOCATE CHANNEL FOR MAINTENANCE TYPE DISK;"
if [ $BACKUP_TYPE != 'DISK' ]; then
#SBT only if needed:
RMAN_CHANNELS="$RMAN_CHANNELS
ALLOCATE CHANNEL FOR MAINTENANCE $ALLOCATE_PARMS;"
fi
}
function rman_configures {
if [ "x$RAC" = "xRAC" ]; then
#RAC requires controlfile snapshot location to be in a shared filesystem that is
#readable by all instances. http://docs.oracle.com/cd/E11882_01/rac.112/e41960/rman.htm#RACAD851
#Assuming $BASE_PATH/scripts is on a shared filesystem:
SCRIPT="CONFIGURE SNAPSHOT CONTROLFILE NAME TO '$BASE_PATH/scripts/snap_${DB_NAME}.cf';
$SCRIPT"
fi
#Rest of CONFIGURE commands can be run only on a database of Primary role.
#Read 1519386.1: RMAN-5021 this configuration cannot be changed for a BACKUP or STANDBY.
#This could lead to standby having different retention policy from primary.
if [ "x$DB_ROLE" != 'xPRIMARY' ]; then
return
fi
SCRIPT="$RMAN_PRI_CONFIGURES
$SCRIPT"
if [ "x$DG" != 'xDG' ]; then
SCRIPT="CONFIGURE ARCHIVELOG DELETION POLICY CLEAR;
$SCRIPT"
return
fi
#APPLIED ON STANDBY works on 10g/11g, but APPLIED ON ALL STANDBY is 11g only.
#You should put any other configures into $RMAN_HEADER_SCRIPT, so it'll be applied to standby also.
if [ "$Release" -le "10" ] ; then
SCRIPT="CONFIGURE ARCHIVELOG DELETION POLICY TO APPLIED ON STANDBY;
$SCRIPT"
#In 10g DG you also want to set following according to DOC Id 728053.1
#ALTER SYSTEM SET "_LOG_DELETION_POLICY"='ALL' SCOPE=SPFILE;
#TODO: add to check_best_practices() function?
else
SCRIPT="CONFIGURE ARCHIVELOG DELETION POLICY TO APPLIED ON ALL STANDBY;
$SCRIPT"
fi
}
#-------------------------------------------
function archive_log_current {
if [ "x$DB_ROLE" = "xSTANDBY" ]; then
#If we are here, then it is a standby database.
#TODO: check. looks like 11.2.0.4+ tries to switchlog on primary automatically!?
dosql pridb 'SELECT PRIMARY_DB_UNIQUE_NAME FROM V$DATABASE'
if [ "x$pridb" = "x" ]; then
echo "WARN: There is no value in v$database.PRIMARY_DB_UNIQUE_NAME for $db database."
echo "Did this database ever received redo from primary database? Read http://goo.gl/CiV9Tl for PRIMARY_DB_UNIQUE_NAME"
echo "Continuing backup, but *** THIS HAS TO BE FIXED ***"
return
fi
echo "INFO: Current primary database is $pridb. Will issue ALTER SYSTEM ARCHIVE LOG CURRENT there."
dblogin="/@$pridb as sysdba" #assuming you already saved Oracle Wallet for this currently primary database for user SYS
else
#it's primary, so archive log switch should be done locally.
unset dblogin
fi
dosql noreturn "ALTER SYSTEM ARCHIVE LOG CURRENT" "$dblogin"
#TODO: ORA-16014: log xx sequence# xxx not archived, no available destinations
# should be ignored (especially for MODE=ARCH - archive log backups)
# This could happen e.g. if there is no space available but we
# have to run arch log backup anyway...
}
#-------------------------------------------
RENICE_SLEEP=25 #How many seconds to wait before attempt to renice
RENICE_WAITS=10 #How many attempts to wait for rman channels to start
# - So all channels will have time to fully startup.
#This function is used to limit RMAN's ability to hog CPU
# (especially helpful when used with bzip2 compression - 10g default)
function renice_rman {
#TODO: renice also RMAN channels running in parallel on another RAC nodes
# (using RAC nodes' password-less ssh setup)
c=1; while [[ $c -le $RENICE_WAITS ]]
do
sleep $RENICE_SLEEP #wait for all rman channels to start up
dosql PIDS "
SELECT p.spid FROM v\$session s join v\$process p on (s.paddr = p.addr)
WHERE lower(s.client_info) like 'rman%'
AND lower(s.program) like 'rman@%'
AND lower(s.module) like 'backup%'
AND s.logon_time > SYSDATE-$RENICE_SLEEP*$RENICE_WAITS/86400"
PIDS2=`echo "$PIDS" | tr "\n" "," | sed 's/,*$//'`
if [ "x$PIDS2" = "x" ]; then
[ $BACKUP_DEBUG -eq 1 ] && echo "renice_rman(): renice_attempts=$c, no RMAN channels found"
else
#found processes to renice
echo $PIDS | xargs $RENICE
echo "INFO: Reniced Oracle processes participating in RMAN backup to lower priority."
echo "List of RMAN related processes with PIDs $PIDS2:"
ps -o pid,nice,user,time,comm,args -p $PIDS2
return
fi
(( c=c+1 ))
done
if [[ $c -gt $RENICE_WAITS ]]; then
echo "WARN: renice_rman() could not detect active RMAN sessions. Waited too small?"
fi
#NOTE: On some platforms if RMAN creates LOCAL=YES connection, oracle dedicated processes will
# inherit nice property from RMAN processes that is already has priority reniced down.
}
#------------------------------------------------------------------
# === Checks, info and actions after backup ran ===================
#------------------------------------------------------------------
function report_backup_size
{ [ $Ver10up -ne 1 ] && return 0 #V$RMAN_OUTPUT and V$RMAN_STATUS are 10g+
#1. Get the first backup piece handle. Line format is different in debug and non-debug mode, e.g. respectively:
#RMAN-08530: piece handle=igp6dj9m_1_1 tag=ARCH_LOGS comment=API Version 2.0,MMS Version 9.0.0.84
#piece handle=dip6a1do_1_1 tag=ARCH_LOGS comment=API Version 2.0,MMS Version 9.0.0.84
handle=`egrep "^(RMAN-08530: )?piece handle=" $LOGFILE | head -n 1 | cut -d "=" -f 2 | cut -d " " -f 1`
if [ "x$handle" = "x" ] ; then
[ $BACKUP_DEBUG -eq 1 ] && echo "DEBUG: report_backup_size: can't find piece handle in the log file."
return
fi
#2. Query parent (session/rman) row from $RMAN_STATUS for the total backup size.
dosql used_mb "variable handle VARCHAR2(120);
begin :handle := '$handle';
end;
/
WITH q AS (
SELECT /*+ rule */
ROUND(SUM(INPUT_BYTES)/1024/1024,1) inp_mb, ROUND(SUM(OUTPUT_BYTES)/1024/1024,1) out_mb
FROM V\$RMAN_STATUS
WHERE ROW_TYPE='SESSION' and OPERATION='RMAN'
START WITH (RECID, STAMP) =
--(SELECT MAX(session_recid),MAX(session_stamp) FROM V\$RMAN_OUTPUT)
(SELECT /*+ rule */ s2.parent_recid, s2.parent_stamp
FROM sys.V_\$BACKUP_PIECE p JOIN V\$RMAN_STATUS s2 ON (p.rman_status_recid=s2.recid AND p.rman_status_stamp=s2.stamp)
WHERE handle=:handle)
CONNECT BY PRIOR RECID = PARENT_RECID AND PRIOR STAMP = PARENT_STAMP
) SELECT 'Backed up '|| TRIM(TO_CHAR(inp_mb,'99,999,999.9')) ||' Mb of data'
||CASE WHEN out_mb>0.1 and inp_mb/out_mb>1.02 THEN ' (output size is '||TRIM(TO_CHAR(out_mb,'99,999,999.9'))||' Mb)' END
||'.' as size_msg
FROM q"
#Reported output size is smaller for incremental backups and/or for backups with enabled compression.
echo "INFO: $used_mb"
}
#-------------------------------------------
function generate_clone_script {
#See http://docs.oracle.com/cd/B19306_01/backup.102/b14191/rcmdupdb.htm#i1008888
unset RMAN_CHANNELS RMAN_RELEASE_CHANNELS
prepare_channels 2 AUXILIARY
#In all below SQLs
# DECODE(SUBSTR(d.name,1,1), '+', SUBSTR(d.name,1,INSTR(d.name,'/')-1), d.name||'-new')
#used to leave on DG part if ASM is used, otherwise adds "-new" to a filename
dosql CLONE_DATANAMES "SELECT ' SET NEWNAME FOR DATAFILE '''||d.name||''' to '''||
DECODE(SUBSTR(d.name,1,1), '+', SUBSTR(d.name,1,INSTR(d.name,'/')-1), d.name||'-new')
||'''; -- '||t.name||', datafile#'||file#
FROM v\$datafile d JOIN v\$tablespace t ON (t.ts#=d.ts#)
ORDER BY t.name, file#"
dosql CLONE_TEMPNAMES "SELECT ' SET NEWNAME FOR TEMPFILE '''||name||''' to '''||
DECODE(SUBSTR(name,1,1), '+', SUBSTR(name,1,INSTR(name,'/')-1), name||'-new')
||'''; -- tempfile#'||file#
FROM v\$tempfile ORDER BY file#;
select ' SET UNTIL TIME \"to_date('''||to_char(SYSDATE,'Mon DD YYYY HH24:MI:SS')||''',''Mon DD YYYY HH24:MI:SS'')\";' from dual"
dosql CLONE_LOGNAMES "SET SERVEROUTPUT ON size 100000
DECLARE l_member pls_integer := 1; -- current member # in a group
BEGIN
dbms_output.put_line(' LOGFILE ');
FOR r IN (SELECT lf.group#, l.bytes/1024/1024 mb, l.members
, DECODE(SUBSTR(lf.member,1,1), '+', SUBSTR(lf.member,1,INSTR(lf.member,'/')-1), lf.member||'-new') f
FROM v\$logfile lf, v\$log l
WHERE lf.group# = l.group# AND lf.type='ONLINE' -- skip standby logs
ORDER BY lf.group#, member)
LOOP dbms_output.put( CASE WHEN l_member=1 THEN
case when r.group#=1 then ' ' else ' , ' end
|| 'GROUP ' || TO_CHAR(r.group#) || ' ('
ELSE ', '
END || ''''||r.f||'''');
l_member := l_member +1;
IF l_member -1 = r.members THEN
dbms_output.put_line(') SIZE ' || TO_CHAR(r.mb) || 'M ');
l_member := 1;
END IF;
END LOOP;
END;
/
select null from dual where 1=0
"
clone_script=$BASE_PATH/scripts/rmanclone_${db}_`date '+%Y%m%d'`.sh
cat <<CLONESCRIPT > $clone_script
#!/bin/sh
{
export ORACLE_SID=${db}new
export ORAENV_ASK=NO
. oraenv
#-- target is source database ($db) to be cloned
#-- auxiliary is a new database (${db}new) cloned out of target database
\$ORACLE_HOME/bin/rman target /@$db auxiliary / <<RMANSCRIPT
$RMAN_INIT
RUN { $RMAN_CHANNELS
$CLONE_DATANAMES
$CLONE_TEMPNAMES
DUPLICATE TARGET DATABASE TO ${db}new
$CLONE_LOGNAMES
;
}
RMANSCRIPT
} 2>&1 > $clone_script.log
CLONESCRIPT
echo "INFO: RMAN clone script generated as $clone_script"
}
#-------------------------------------------
function check_and_email_results {
rman_ignore_regex="WARNING: |RMAN-005(69|71): ==="
#List of RMAN errors to ignore: 1) all warnings;
#and 2) RMAN-00571 and RMAN-00569 are just used for RMAN error stack header (formatting)
#3) for Commvault, also ignore RMAN-06525: RMAN retention policy is set to none
# and RMAN-03002: failure of report command
[ "x$BACKUP_TYPE" = 'xCommvault' ] &&
rman_ignore_regex="$rman_ignore_regex|RMAN-0(3002|6525)"
[ $BACKUP_DEBUG -eq 1 ] && echo "DEBUG: RMAN errors to ignore regexp=$rman_ignore_regex"
#count number of RMAN errors:
errcount=`egrep "RMAN-[0-9]" $LOGFILE | egrep -v "$rman_ignore_regex" |wc -l`
if [ $errcount -ne 0 ] ; then
echo "Errors ($errcount) detected in $db rman backup"
email "RMAN errors" <<EMAIL
Log file $LOGFILE
Errors:
`egrep "(RMAN|ORA)-[0-9]" $LOGFILE | egrep -v "$rman_ignore_regex" |tail -n 15`
RMAN script used:
$SCRIPT
First 200 lines from the log file:
`head -n 200 $LOGFILE`
....
EMAIL
return
fi
#rest of the subroutine assumes no RMAN/ORA errors
case $MODE in
ARCH|CTRL) ;; #no "ok" emails for archived logs and control file backups
XCHK) #email for crosscheck and report results:
email "IMPORTANT: database recoverability reports!" <<EMAIL
Here are the main reports that show database recoverability and RMAN backups status:
`egrep -v "found to be 'AVAILABLE'|backup piece handle=|validation succeeded for archived log|archived log file name=" $LOGFILE`
For more details look at log file $LOGFILE on `hostname`.
EMAIL
;;
*) #email for all normal FULL/INCR backups:
#wc with redirection (<) does not print file name vs argumented filename
if [ `wc -l < $LOGFILE` -le 350 ] ; then
#If log file is smaller than 350 lines, email it completely:
email "RMAN backup complete" <<EMAIL
Complete contents of the log file $LOGFILE:
`cat $LOGFILE`
EMAIL
else
email "RMAN backup complete" <<EMAIL
First 200 and last 100 lines from log file $LOGFILE:
`head -n 200 $LOGFILE`
....
`tail -n 100 $LOGFILE`
EMAIL
fi
;;
esac
}
#-------------------------------------------
function report_runtime {
S2=$SECONDS
typeset -i minutes
minutes=$(( ($S2 - $S1)/60 ))
seconds=$(( ($S2 - $S1)%60 ))
echo "== Backup script took $minutes minutes $seconds seconds to complete."
}
#-------------------------------------------
function remove_old_files {
echo "INFO: Deleting old log files..."
find $BASE_PATH/log \( -mtime +65 -name "rmanbackup*.log" \
-o -mtime +4 -name "rmanbackup*.rman-trace" \
\) -print -exec rm {} \;
find $BASE_PATH/scripts \( -mtime +65 -name "rmanclone_*.sh" \
-o -mtime +35 -name "controlf-*.ctl-bkpcopy" \
-o -mtime +65 -name "*.*-bkpcopy" \
\) -print -exec rm {} \;
#TODO: add $BASE_PATH/scripts/snap_*.cf? It would be just one file per database...
}
#------------------------------------------------------------------
# === Basic functions =============================================
#------------------------------------------------------------------
function dosql {
typeset -n retvar=$1
login=${3:-/as sysdba}
[ $BACKUP_DEBUG -eq 1 ] && echo "DEBUG: sqlplus $login about to run: $2"
retvar=$( $ORACLE_HOME/bin/sqlplus -S "$login" <<EOF
WHENEVER OSERROR EXIT 5;
WHENEVER SQLERROR EXIT SQL.SQLCODE;
set echo off head off feed off newpage none pagesize 1000 linesize 200
$2;
exit;
EOF
)
#adding RMAN-00000 in case of errors so it will be catched by check_and_email_results() in case of errors
sqlplus_retcode=$?
if [ $sqlplus_retcode -eq 5 ] ; then
echo "ERROR: RMAN-00000: An OS error happened while running sqlplus script: $retvar"
unset retvar
elif [ $sqlplus_retcode -ne 0 ] ; then
echo "ERROR: RMAN-00000: An SQL error happened while running sqlplus script: $retvar"
unset retvar
else
[ $BACKUP_DEBUG -eq 1 ] && echo "DEBUG: sqlplus returned $retvar"
fi
}
#-------------------------------------------
function email {
subject=$1
mailx -s "$db@$HOSTNAME $subject *****" $DBA_EMAIL
echo "INFO: email with subject '$subject' sent to $DBA_EMAIL at `date`"
}
#------------------------------------------------------------------
# === Global variables management =================================
#------------------------------------------------------------------
function parse_params {
eval set -A params $(echo $db | tr ':' ' ')
db=${params[0]}
DG=${params[1]}
orig_MODE=$MODE
}
#-------------------------------------------
function reset_global_vars {
unset DB_ROLE RAC sp_ctlf SCRIPT RMAN_CHANNELS RMAN_RELEASE_CHANNELS PIDS CH_FORMAT
unset CLONE_DATANAMES CLONE_TEMPNAMES CLONE_LOGNAMES handle ctlf_record_keep_time
unset compressed Ver10up Release FRA_PRC_USED_SOFT FRA_PRC_USED_HARD FRA_THRESHOLD pridb
unset p c lines S1 S2 params minutes seconds errcount rman_debug clone_script dt
unset sqlplus_retcode BCT FLASHBACK FLASH_RET outofsync_standbys
unset host pid ps_cmd uuid lock_data remote_lock_uuid DB_NAME
unset last_generated_seq last_received_seq last_applied_seq
#Global variables not to clean (they don't change from a database to database): DG
#
PATH=$orig_PATH:$PATHS
MODE=$orig_MODE
#
S1=$SECONDS
rman_target="/"
#Use Oracle's oraenv to set oracle environment variables for current SID
ORACLE_SID=$db
#Check if ORACLE_SID is correct, so oraenv will not hang asking for correct ORACLE_SID
# (ORAENV_ASK doesn't control this)
ORACLE_HOME=`awk -F: '{if ($1 == "'$ORACLE_SID'") {print $2; exit}}' $ORATAB 2>/dev/null`
if [ "x$ORACLE_HOME" = "x" ]; then
echo "ERROR: RMAN-00000: $ORACLE_SID is not found in $ORATAB on $HOSTNAME. Skipping this database."
return 1
fi
ORAENV_ASK=NO
. oraenv
echo "INFO: ORACLE_HOME=$ORACLE_HOME"
}