# Simulating different ways to correct for bias in estimates of sd. sd.real = 15 # correct standard deviation n.samples = 3000 # number of samples to collect for each sample size sizes = c(2,3,5,10,20,60,100,200) # sample sizes to evaluate on n.sizes = length(sizes) plotResult = function(constant) { # Simulating data set.seed(42) i = 1 estimates = rep(NA, n.samples*n.sizes) for(size in sizes) { for(sample in 1:n.samples) { # Draw sample from population x = rnorm(size, 0, sd.real) # Calculate population SD given this sample estimates[i] = sqrt(sum((x - mean(x))^2) / (length(x) - constant)) i = i + 1 } } # Plot individual data points title = paste('constant=', round(constant, 3), ', error=', round(mean(estimates) - sd.real, 3)) plot(estimates, main=title, ylim=c(0, 2*sd.real)) # Plot estimates for each sample size abline(h=sd.real, col='red', lwd=3) # real sd for(i in 1:n.sizes) { abline(v=i*n.samples, col='grey') # vertical lines start = (i-1)*n.samples end = i*n.samples sd.sim = mean(estimates[start:end]) segments(start, sd.sim, end, sd.sim, col='blue', lwd=3, lend='butt') # estimated sd } } # Do simulations using different constants par(mfrow=c(2,2)) plotResult(0) # no correction, just sample sd (heavily biased) plotResult(sqrt(1)) # Bessel's correction (somewhat biased) plotResult(sqrt(2)) # Optimal? plotResult(1.5) # The "rule of thumb" unbiased correction