First look at the load on the database (see
)
Def v_secs=3600 -- bucket size
Def v_days=1 -- total time analyze
Def v_bars=5 -- size of one AAS in characters
Def v_graph=80
col aveact format 999.99
col graph format a80
col fpct format 9.99
col spct format 9.99
col tpct format 9.99
col aas format 9.99
col pct1 format 999
col pct2 format 999
col first format a15
col second format a15
Def p_value=4
select to_char(start_time,'DD HH24:MI'),
--samples,
--total,
--waits,
--cpu,
(total/&v_secs) aas,
--round(fpct * (total/&v_secs),2) aas1,
fpct*100 pct1,
decode(fpct,null,null,first) first,
--round(spct * (total/&v_secs),2) aas2,
spct*100 pct2,
decode(spct,null,null,second) second,
-- substr, ie trunc, the whole graph to make sure it doesn't overflow
substr(
-- substr, ie trunc, the graph below the # of CPU cores line
-- draw the whole graph and trunc at # of cores line
substr(
rpad('+',round((cpu*&v_bars)/&v_secs),'+') ||
rpad('o',round((io*&v_bars)/&v_secs),'o') ||
rpad('-',round((waits*&v_bars)/&v_secs),'-') ||
rpad(' ',&p_value * &v_bars,' '),0,(&p_value * &v_bars)) ||
&p_value ||
-- draw the whole graph, then cut off the amount we drew before the # of cores
substr(
rpad('+',round((cpu*&v_bars)/&v_secs),'+') ||
rpad('o',round((io*&v_bars)/&v_secs),'o') ||
rpad('-',round((waits*&v_bars)/&v_secs),'-') ||
rpad(' ',&p_value * &v_bars,' '),(&p_value * &v_bars),( &v_graph-&v_bars*&p_value) )
,0,&v_graph)
graph
-- spct,
-- decode(spct,null,null,second) second,
-- tpct,
-- decode(tpct,null,null,third) third
from (
select start_time
, max(samples) samples
, sum(top.total) total
, round(max(decode(top.seq,1,pct,null)),2) fpct
, substr(max(decode(top.seq,1,decode(top.event,'ON CPU','CPU',event),null)),0,15) first
, round(max(decode(top.seq,2,pct,null)),2) spct
, substr(max(decode(top.seq,2,decode(top.event,'ON CPU','CPU',event),null)),0,15) second
, round(max(decode(top.seq,3,pct,null)),2) tpct
, substr(max(decode(top.seq,3,decode(top.event,'ON CPU','CPU',event),null)),0,10) third
, sum(waits) waits
, sum(io) io
, sum(cpu) cpu
from (
select
to_date(tday||' '||tmod*&v_secs,'YYMMDD SSSSS') start_time
, event
, total
, row_number() over ( partition by id order by total desc ) seq
, ratio_to_report( sum(total)) over ( partition by id ) pct
, max(samples) samples
, sum(decode(event,'ON CPU',total,0)) cpu
, sum(decode(event,'ON CPU',0,
'db file sequential read',0,
'db file scattered read',0,
'db file parallel read',0,
'direct path read',0,
'direct path read temp',0,
'direct path write',0,
'direct path write temp',0, total)) waits
, sum(decode(event,'db file sequential read',total,
'db file scattered read',total,
'db file parallel read',total,
'direct path read',total,
'direct path read temp',total,
'direct path write',total,
'direct path write temp',total, 0)) io
from (
select
to_char(sample_time,'YYMMDD') tday
, trunc(to_char(sample_time,'SSSSS')/&v_secs) tmod
, to_char(sample_time,'YYMMDD')||trunc(to_char(sample_time,'SSSSS')/&v_secs) id
, decode(ash.session_state,'ON CPU','ON CPU',ash.event) event
, sum(decode(session_state,'ON CPU',10,decode(session_type,'BACKGROUND',0,10))) total
, (max(sample_id)-min(sample_id)+1) samples
from
dba_hist_active_sess_history ash
where
-- sample_time > sysdate - &v_days
-- and sample_time < ( select min(sample_time) from v$active_session_history)
dbid=&DBID
group by trunc(to_char(sample_time,'SSSSS')/&v_secs)
, to_char(sample_time,'YYMMDD')
, decode(ash.session_state,'ON CPU','ON CPU',ash.event)
) chunks
group by id, tday, tmod, event, total
) top
group by start_time
) aveact
order by start_time
/
The output of the above query should look like
TO_CHAR( AAS PCT1 FIRST PCT2 SECOND GRAPH
-------- ----- ---- --------------- ---- --------------- -------------------------------------------
16 00:00 7.02 52 db file sequent 27 CPU ++++++++++oooooooooo4ooooooooooooooo--
16 01:00 3.80 57 CPU 36 db file sequent +++++++++++oooooooo 4
16 02:00 5.51 38 db file sequent 21 CPU ++++++oooooooooooo--4---------
16 03:00 2.89 69 db file sequent 20 CPU +++ooooooooooo 4
16 04:00 .34 45 db file sequent 28 CPU o 4
16 05:00 .78 58 db file sequent 24 CPU +ooo 4
16 06:00 .59 41 db file sequent 39 CPU +oo 4
16 07:00 .19 57 CPU 21 db file sequent + 4
16 08:00 .18 57 CPU 28 db file scatter + 4
16 09:00 .50 69 db file sequent 24 CPU +oo 4
16 10:00 1.79 79 db file sequent 13 CPU +ooooooo 4
16 11:00 2.51 76 db file sequent 12 CPU +ooooooooooo 4
The “graph” on the right shows the load over time each line is an hour by default.
The “+” represent CPU, “o” represent I/O and “-” represent a wait. The columns “FIRST” and “SECOND” represent the top two things happening on the database.
In the above data the main activity is time spent waiting for I/O. The next question is “what does I/O latency look like?” In this particular case, the wait is specifically on “db file sequential read”, so the next query looks at latency for “db file sequential read”
set pagesize 100
col event_name format a30
col avg_ms format 99999.99
col ct format 999,999,999
select
btime, event_name,
(time_ms_end-time_ms_beg)/nullif(count_end-count_beg,0) avg_ms,
(count_end-count_beg) ct
from (
select
e.event_name,
to_char(s.BEGIN_INTERVAL_TIME,'DD-MON-YY HH24:MI') btime,
total_waits count_end,
time_waited_micro/1000 time_ms_end,
Lag (e.time_waited_micro/1000)
OVER( PARTITION BY e.event_name ORDER BY s.snap_id) time_ms_beg,
Lag (e.total_waits)
OVER( PARTITION BY e.event_name ORDER BY s.snap_id) count_beg
from
DBA_HIST_SYSTEM_EVENT e,
DBA_HIST_SNAPSHOT s
where
s.snap_id=e.snap_id
--and e.wait_class in ( 'User I/O', 'System I/O')
-- and e.event_name in ( 'db file sequential read',
-- 'db file scattered read',
-- 'db file parallel read',
-- 'direct path read',
-- 'direct path read temp',
-- 'direct path write',
-- 'direct path write temp')
and e.event_name in ( 'db file sequential read')
and e.dbid=&DBID
and e.dbid=s.dbid
order by e.event_name, begin_interval_time
)
where (count_end-count_beg) > 0
order by event_name,btime
/
The above query has commented out all but ‘db file sequential read’ (ie single block reads) to make the output easier to read, but if there are other I/O waits then the query should be changed to get the latencies of those other types of I/O waits. The output of the above query should look something like
BTIME EVENT_NAME AVG_MS CT
--------------- ------------------------------ --------- ------------
15-JAN-12 15:04 db file sequential read 12.66 854,744
15-JAN-12 16:00 db file sequential read 9.21 1,242,296
15-JAN-12 17:00 db file sequential read 13.47 889,314
15-JAN-12 18:00 db file sequential read 6.16 1,109,994
15-JAN-12 19:00 db file sequential read 1.86 215,607
15-JAN-12 20:00 db file sequential read 2.12 185,727
15-JAN-12 21:00 db file sequential read 1.60 1,281,880
15-JAN-12 22:00 db file sequential read 7.16 327,645
15-JAN-12 23:00 db file sequential read 7.45 778,819
16-JAN-12 00:00 db file sequential read 19.02 674,674
16-JAN-12 01:00 db file sequential read 7.38 651,391
16-JAN-12 02:01 db file sequential read 6.21 1,194,226
16-JAN-12 03:00 db file sequential read 7.33 959,630
16-JAN-12 04:00 db file sequential read 2.59 201,810
16-JAN-12 05:00 db file sequential read 4.05 419,692
16-JAN-12 06:00 db file sequential read 2.71 300,792
16-JAN-12 07:00 db file sequential read .83 141,872
16-JAN-12 08:00 db file sequential read .84 157,132
16-JAN-12 09:00 db file sequential read 2.70 456,984
16-JAN-12 10:00 db file sequential read 4.07 1,234,105
16-JAN-12 11:00 db file sequential read 6.19 1,089,680
The above latencies look ok, ie under 10ms which is fine for I/O from physical spindle but the customer might be use to latencies from UNIX file cache reads or form SAN cache reads.
Where is the I/O coming from?
col type for a16
col "CPU" for 999999.9
col "IO" for 999999.9
select * from (
select
ash.SQL_ID , ash.SQL_PLAN_HASH_VALUE Plan_hash, aud.name type,
sum(decode(ash.session_state,'ON CPU',1,0)) "CPU",
sum(decode(ash.session_state,'WAITING',1,0)) -
sum(decode(ash.session_state,'WAITING', decode(wait_class, 'User I/O',1,0),0)) "WAIT" ,
sum(decode(ash.session_state,'WAITING', decode(wait_class, 'User I/O',1,0),0)) "IO" ,
sum(decode(ash.session_state,'ON CPU',1,1)) "TOTAL"
from dba_hist_active_sess_history ash,
audit_actions aud
where SQL_ID is not NULL
and ash.dbid=&DBID
and ash.sql_opcode=aud.action
group by sql_id, SQL_PLAN_HASH_VALUE , aud.name
order by sum(decode(session_state,'ON CPU',1,1)) desc
) where rownum < 20
/
Output looks like
SQL_ID PLAN_HASH TYPE CPU WAIT IO TOTAL
------------- ---------- ---------------- --------- ---------- --------- ----------
fgzp9yqqjcjvm 707845071 UPDATE 25.0 95 4081.0 4201
8u8y8mc1qxd98 131695425 SELECT 18.0 57 3754.0 3829
cfk8gy594h42s 3743737989 SELECT 2021.0 17 82.0 2120
cnx6ht8bdmf4c 0 PL/SQL EXECUTE 546.0 367 868.0 1781
gyj8wh7vx960y 1736948211 SELECT 197.0 11 1227.0 1435
b6jarjrw5d6tq 2817015872 INSERT 548.0 131 101.0 780
Look at the stats for the top SQL:
col avg_elapsed_sec for 9,999,999,999.999
col disk for 9,999,999.9
col lio for 9,999,999.9
col cpu_sec for 99,999,999.999
col io_time_sec for 9,999,999.999
col ap_time_sec for 9,999,999.999
col cc_time_sec for 9,999,999.999
select
to_char(snap.begin_interval_time,'DD-MON-YYYY HH24') ,
(sum(ELAPSED_TIME_DELTA)/nullif(sum(EXECUTIONS_DELTA),0))/1000000 avg_elapsed_sec,
sum(EXECUTIONS_DELTA) execs,
sum(DISK_READS_DELTA)/nullif(sum(EXECUTIONS_DELTA),0) disk,
sum(BUFFER_GETS_DELTA)/nullif(sum(EXECUTIONS_DELTA),0) lio,
sum(ROWS_PROCESSED_DELTA)/nullif(sum(EXECUTIONS_DELTA),0) rws,
(sum(CPU_TIME_DELTA)/nullif(sum(EXECUTIONS_DELTA),0))/1000000 cpu_sec,
(sum(IOWAIT_DELTA)/nullif(sum(EXECUTIONS_DELTA),0))/1000000 io_time_sec,
(sum(APWAIT_DELTA)/nullif(sum(EXECUTIONS_DELTA),0))/1000000 ap_time_sec,
(sum(CCWAIT_DELTA)/nullif(sum(EXECUTIONS_DELTA),0))/1000000 cc_time_sec,
sum(DIRECT_WRITES_DELTA)/nullif(sum(EXECUTIONS_DELTA),0) dio,
sum(PHYSICAL_READ_REQUESTS_DELTA)/nullif(sum(EXECUTIONS_DELTA),0) reads,
sum(PHYSICAL_WRITE_REQUESTS_DELTA)/nullif(sum(EXECUTIONS_DELTA),0) writes
from dba_hist_sqlstat sql,
dba_hist_snapshot snap
where sql.sql_id='&sql_id'
and sql.dbid=&dbid
and sql.dbid=snap.dbid
and snap.snap_id=sql.snap_id
group by to_char(snap.begin_interval_time,'DD-MON-YYYY HH24')
order by 1
/
Output looks like
TO_CHAR(SNAP.B AVG_ELAPSED EXECS DISK LIO RWS CPU_SEC IO_TIME_SEC AP_TIME_SEC CC_TIME_SEC DIO
-------------- ------------------ ---------- ------------ ------------ ---------- --------------- -------------- -------------- -------------- ---------- ---
15-JAN-2012 23 3,012.128 1 315,363.0 2,394,156.0 0 37.624 2,889.440 .000 .000 0
16-JAN-2012 05 1,111.465 1 252,377.0 2,625,538.0 0 28.885 1,090.000 .000 .000 0
Get the SQL Text
set long 10000
select sql_text from dba_hist_sqltext
where sql_id = '&1'
and rownum < 2
/
Look at I/O sizes for multiblock reads. For multiblock reads, it’s good to have an idea of what the I/O sizes are. The following query gives an upper bound. NOTE the averages and even mins can be highly misleading but the max should be a good indicator.
col event for a25
select event,round(min(p3)) mn,
round(avg(p3)) av,
round(max(p3)) mx,
count(*) cnt
from dba_hist_active_sess_history
--from v$active_session_history
where (event like 'db file%' or event like 'direct %')
and event not like '%parallel%'
and dbid=&DBID
group by event
order by event
/
looks like
EVENT MN AV MX CNT
------------------------- ---------- ---------- ---------- ----------
db file scattered read 2 16 16 892
db file sequential read 1 1 1 105
direct path read 1 1 1 1
direct path write 1 1 1 2
direct path write temp 4 29 31 17
Comments