# 7200 rpm = 120 rps = 240 sequential reads per second # database size 1.5 GB, large table size 1.1 GB # table & index 8 kB pages Pt = 163968 Pi = 27392 P = Pt + Pi # cache & hard disk performance Tch = 0.22 # ms (measured) # typical avg seek time: 3 ms high end to 12-15 ms for mobile drives Thd = 4.7 + Tch # ms, (4.17 ms = 60 s/ (2*7200), low seek time) # parametric formula, transaction number i >= 0 # probability of a loaded page, i.e. cache hit rate p(i,n) = 1.0 - exp(-i / n) # 1 - p(i,n) = exp(-i / n) # cache miss rate d(i,n,Tslow,Tfast) = Tslow * (1.0 - p(i,n)) + Tfast * p(i,n) # = (Tslow - Tfast) exp(-i / n) + Tfast t(i,n,Tslow,Tfast) = Tfast * i + n * (Tslow - Tfast) * (1 - exp(-i / n)) # simple model dS(i) = d(i,P,Thd,Tch) tS(i) = t(i,P,Thd,Tch) tpsS(i) = 1000.0 / dS(i) # table & index model dTI(i) = d(i,Pt,Thd,0.2*Tch) + d(i,Pi,Thd,0.8*Tch) tTI(i) = t(i,Pt,Thd,0.2*Tch) + t(i,Pi,Thd,0.8*Tch) tpsTI(i) = 1000.0 / dTI(i) # by computing the zeros of the second derivative of tps(i), # we can compute that the maximum tps speed variation occurs when # exp(-i / n) = Tfast / (Tslow-Tfast)) # i.e. # i = n ln((Tslow-Tfast) / Tfast) # i ~ 191350 * 3.06 ~ 586000 # that is after mn for the simple model # generate a portable image set terminal png # compose set title "database warmup formula" set xlabel "minutes" set xtics 2 set ytics 500 set ylabel "tps" set key top left # plots set multiplot set parametric # plot 2 models plot [i=0:1500000] [0:20] [0:5000] \ tS(i)/60000,tpsS(i) title "simple model" lt rgb "red", \ tTI(i)/60000.0,tpsTI(i) title "table+index model" lt rgb "blue" # add points every 100,000 transactions set samples 15 plot [i=0:1500000] [0:20] [0:5000] \ tS(i)/60000,tpsS(i) with points notitle lt rgb "red", \ tTI(i)/60000.0,tpsTI(i) with points notitle lt rgb "blue" # #transactions: tTI(i)/60000.0,i/350 # the filling is linear: tTI(i)/60000.0,5000*p(i,Pt)