Main page: https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/crimtab
head(crimtab)
## 142.24 144.78 147.32 149.86 152.4 154.94 157.48 160.02 162.56 165.1 167.64
## 9.4 0 0 0 0 0 0 0 0 0 0 0
## 9.5 0 0 0 0 0 1 0 0 0 0 0
## 9.6 0 0 0 0 0 0 0 0 0 0 0
## 9.7 0 0 0 0 0 0 0 0 0 0 0
## 9.8 0 0 0 0 0 0 1 0 0 0 0
## 9.9 0 0 1 0 1 0 1 0 0 0 0
## 170.18 172.72 175.26 177.8 180.34 182.88 185.42 187.96 190.5 193.04 195.58
## 9.4 0 0 0 0 0 0 0 0 0 0 0
## 9.5 0 0 0 0 0 0 0 0 0 0 0
## 9.6 0 0 0 0 0 0 0 0 0 0 0
## 9.7 0 0 0 0 0 0 0 0 0 0 0
## 9.8 0 0 0 0 0 0 0 0 0 0 0
## 9.9 0 0 0 0 0 0 0 0 0 0 0
require(stats)
dim(crimtab)
## [1] 42 22
utils::str(crimtab)
## 'table' int [1:42, 1:22] 0 0 0 0 0 0 1 0 0 0 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:42] "9.4" "9.5" "9.6" "9.7" ...
## ..$ : chr [1:22] "142.24" "144.78" "147.32" "149.86" ...
## for nicer printing:
local({cT <- crimtab
colnames(cT) <- substring(colnames(cT), 2, 3)
print(cT, zero.print = " ")
})
## 42 44 47 49 52 54 57 60 62 65 67 70 72 75 77 80 82 85 87 90 93 95
## 9.4
## 9.5 1
## 9.6
## 9.7
## 9.8 1
## 9.9 1 1 1
## 10 1 1 2 2 1
## 10.1 1 3 1 1 1
## 10.2 2 2 2 1 2 1
## 10.3 1 1 3 2 2 3 5
## 10.4 1 1 2 3 3 4 3 3
## 10.5 1 3 7 6 4 3 1 3 1 1
## 10.6 1 4 5 9 14 6 3 1 1
## 10.7 1 2 4 9 14 16 15 7 3 1 2
## 10.8 2 5 6 14 27 10 7 1 2 1
## 10.9 2 6 14 24 27 14 10 4 1
## 11 2 6 12 15 31 37 27 17 10 6
## 11.1 3 3 12 22 26 24 26 24 7 4 1
## 11.2 3 2 7 21 30 38 29 27 20 4 1 1
## 11.3 1 5 10 24 26 39 26 24 7 2
## 11.4 3 4 9 29 56 58 26 22 10 11
## 11.5 5 11 17 33 57 38 34 25 11 2
## 11.6 2 1 4 13 37 39 48 38 27 12 2 2 1
## 11.7 2 9 17 30 37 48 45 24 9 9 2
## 11.8 1 2 11 15 35 41 34 29 10 5 1
## 11.9 1 1 2 12 10 27 32 35 19 10 9 3 1
## 12 1 4 8 19 42 39 22 16 8 2 2
## 12.1 2 4 13 22 28 15 27 10 4 1
## 12.2 1 2 5 6 23 17 16 11 8 1 1
## 12.3 4 8 10 13 20 23 6 5
## 12.4 1 1 1 2 7 12 4 7 7 1 1
## 12.5 1 1 3 12 11 8 6 8 2
## 12.6 1 3 5 7 8 6 3 1 1
## 12.7 1 1 7 5 5 8 2 2
## 12.8 1 2 3 1 8 5 3 1 1
## 12.9 1 2 2 1 1
## 13 3 1 1 2 1
## 13.1 1 1
## 13.2 1 1 1 3
## 13.3 1 1
## 13.4
## 13.5 1
## Repeat Student's experiment:
# 1) Reconstitute 3000 raw data for heights in inches and rounded to
# nearest integer as in Student's paper:
(heIn <- round(as.numeric(colnames(crimtab)) / 2.54))
## [1] 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
d.hei <- data.frame(height = rep(heIn, colSums(crimtab)))
# 2) shuffle the data:
set.seed(1)
d.hei <- d.hei[sample(1:3000), , drop = FALSE]
# 3) Make 750 samples each of size 4:
d.hei$sample <- as.factor(rep(1:750, each = 4))
# 4) Compute the means and standard deviations (n) for the 750 samples:
h.mean <- with(d.hei, tapply(height, sample, FUN = mean))
h.sd <- with(d.hei, tapply(height, sample, FUN = sd)) * sqrt(3/4)
# 5) Compute the difference between the mean of each sample and
# the mean of the population and then divide by the
# standard deviation of the sample:
zobs <- (h.mean - mean(d.hei[,"height"]))/h.sd
# 6) Replace infinite values by +/- 6 as in Student's paper:
zobs[infZ <- is.infinite(zobs)] # none of them
## named numeric(0)
zobs[infZ] <- 6 * sign(zobs[infZ])
# 7) Plot the distribution:
require(grDevices); require(graphics)
hist(x = zobs, probability = TRUE, xlab = "Student's z",
col = grey(0.8), border = grey(0.5),
main = "Distribution of Student's z score for 'crimtab' data")