Here I provide a exact one sample or paired sample test for mean(s), which is the exact version of permutation test to compare a single sample against a mean or to compare paired samples’ differences against a mean.
The algorithm in this function is based on Bryan F. J. Manly. 1997. Randomization, bootstrap and Monte Carlo methods in biology. 2nd edition. pp. 91-97. That is, all possible permutations (i.e. all possible cases of exchange between $x_i$ and $\mu$ in one-sample test or all possible cases of exchange between $x_{1i}$ and $x_{2i}$) in paired-sample test are processed to calculate a exact p-value.
R code
# Copyright 2014 Chen-Pan Liao | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
# To understand more details about this function, see: | |
# Bryan F. J. Manly. 1997. | |
# Randomization, bootstrap and Monte Carlo methods | |
# in biology. 2nd edition. pp. 91-97. | |
# Also see http://apansharing.blogspot.tw/2014/01/ | |
# a-r-function-exact-onepaired-sample.html | |
## main function | |
exactOneOrPairedSampleTest <- function( | |
x1, | |
x2 = NULL, | |
mu = 0, | |
alternative = c("t","g","l") | |
# x1 A numeric vector; necessary | |
# x2 A numeric vector; necessary for paired test | |
# mu A numeric value; | |
# for one sample test: H0: x1 = mu; | |
# for pired sample test: H0: x1 - x2 = mu. | |
# alternative A string to define the rejected area; | |
# "t" for H1: abs(difference) > abs(mu) | |
# "g" for H1: difference > mu | |
# "l" for H1: difference < mu | |
# where difference is x1 for one sample test | |
# or x1-x2 for paired sample | |
) { | |
# determing one sample or paired sampe | |
# var is.onesample | |
if (is.null(x2)) { | |
is.onesample <- T | |
} else { | |
is.onesample <- F | |
} | |
# initiation | |
# var DF, N | |
x1.name <- deparse(substitute(x1)) | |
x2.name <- deparse(substitute(x2)) | |
alternative <- alternative[1] | |
mu <- as.numeric(mu) | |
x1 <- as.numeric(x1) | |
if(is.onesample) { | |
DF <- data.frame (x1, mu = mu) | |
} else { | |
x2 <- as.numeric(x2) | |
DF <- data.frame (x1, x2) | |
} | |
DF <- na.omit(DF) | |
n <- length(DF[,1]) | |
N <- 2^n | |
# test statistic | |
# var diff, test | |
if (is.onesample) { | |
DF$diff <- DF$x1 - mu | |
} else { | |
DF$diff <- DF$x1 - DF$x2 - mu | |
} | |
test.0 = mean(DF$diff) | |
# binary matrix | |
# var bmat | |
bmat <- as.matrix(expand.grid(rep( list(c(1,-1)) , n))) | |
# exact permutation | |
test.perm <- numeric(N) | |
for (i in 1:N) { | |
diff.perm.sum <- DF$diff * bmat[i,] | |
test.perm[i] <- mean(diff.perm.sum) | |
} | |
# p-value | |
if (alternative == "t") { | |
rejected.N <- sum( abs(test.perm) >= abs(test.0) ) | |
} | |
if (alternative == "g") { | |
rejected.N <- sum(test.perm >= test.0) | |
} | |
if (alternative == "l") { | |
rejected.N <- sum(test.perm <= test.0) | |
} | |
p.value <- rejected.N / N | |
# return | |
out <- list( | |
x1 = x1, | |
x2 = x2, | |
x1.name = x1.name, | |
x2.name = x2.name, | |
n = n, | |
mu = mu, | |
test.0 = test.0, | |
is.onesample = is.onesample, | |
alternative = alternative, | |
test.perm = test.perm, | |
DF = DF, | |
N = N, | |
p.value = p.value, | |
rejected.N = rejected.N | |
) | |
class(out) <- "exactOneOrPairedSampleTest" | |
return (out); | |
} | |
## print function | |
print.exactOneOrPairedSampleTest <- function(w){ | |
if (w$is.onesample){ | |
cat("\n\tExact one sample test\n\n") | |
} else { | |
cat("\tExact paired sample test\n\n") | |
} | |
cat("Alternative hypothesis: ") | |
if (w$is.onesample){ | |
if (w$alternative == "t") { | |
cat("mean of", w$x1.name, "is not equal to", w$mu, "\n") | |
} | |
if (w$alternative == "g") { | |
cat("mean of", w$x1.name, "is greater than", w$mu) | |
} | |
if (w$alternative == "l") { | |
cat("mean of", w$x1.name, "is less than", w$mu) | |
} | |
} else { | |
if (w$alternative == "t") { | |
cat("mean of", w$x1.name, "-", w$x2.name, "is not equal to", w$mu) | |
} | |
if (w$alternative == "g") { | |
cat("means of", w$x1.name, "-", w$x2.name, "is greater than", w$mu) | |
} | |
if (w$alternative == "l") { | |
cat("means of", w$x1.name, "-", w$x2.name, "is less than", w$mu) | |
} | |
} | |
cat("\n") | |
if (w$is.onesample){ | |
cat( | |
paste( | |
"mean(", w$x1.name, ") - mu = ", w$test.0, "\n", | |
sep="" | |
)) | |
} | |
if (!w$is.onesample){ | |
cat( | |
paste( | |
"mean((", w$x1.name, ")-(", w$x2.name, ")) - mu = ", | |
w$test.0, | |
"\n", | |
sep="" | |
) | |
) | |
} | |
cat("Number of total permutation =", w$N, "\n") | |
cat("Number of rejected permutation =", w$rejected.N, "\n") | |
cat("P-value =", w$p.value, "\n") | |
} | |
## hist function | |
hist.exactOneOrPairedSampleTest <- function(w){ | |
if (w$is.onesample){ | |
hist( | |
w$test.perm, breaks=60, | |
xlab=paste( | |
"mean(", w$x1.name, ") - mu", | |
sep="" | |
), main="" | |
) | |
} else { | |
hist( | |
w$test.perm, breaks=60, | |
xlab=paste( | |
"mean((", w$x1.name, ")-(", w$x2.name, ")) - mu", | |
sep="" | |
), main="" | |
) | |
} | |
title("Histogram of exact permutation") | |
abline(v=w$test.0, lty=2) | |
if (w$alternative == "t") { | |
abline(v=w$test.0*(-1), lty=2) | |
} | |
} |
Arguments
The usage of this function exactOneOrPairedSampleTest()
is very similar to the R built-in function t.test()
. There are four arguments:
x1
: a numeric vector specifying $x_{1i}$x2 = NULL
: a numeric vector specifying $x_{2i}$ (in paired-sample case)mu = 0
: a number specifying true mean or true difference $\mu$alternative = c("t","g","l")
a single character specifying $H_0$: true mean of $x_1$ or mean of $x_{1i} - x_{2i}$ is equal, greater or less than $\mu$, respectively.
Example 1: one-sample test
Let $x_i = \{43,67,64,64,51,53,53,26,36,48,34,48,6 \}$, $i=1 \ldots 13$:
> x <- c(43,67,64,64,51,53,53,26,36,48,34,48,6)
Now we can call the above function exactOneOrPairedSampleTest()
to test $H_0$: true mean of $x_i = 56$ against $H_A$: true mean of $x_i \neq 56$:
> test1 <- exactOneOrPairedSampleTest(x, alternative="t", mu=56) > test1 Exact one sample test Alternative hypothesis: true mean is not equal to 56 mean(x) - mu = -10.3846153846154 Number of total permutation = 8192 Number of rejected permutation = 364 P-value = 0.04443359
The results show that:
- $\sum_{i=1}^{13} x_i / 13 - \mu = 45.61538 - 56 = -10.38462$;
- Totally $8192 = 2^{13}$ permutations are processed;
- 364 permutations do not support $H_0$;
- $P = 364/2^{13} = 0.0444$.
We can also call the function hist.exactOneOrPairedSampleTest()
:
> hist(test1)
As you can see, the vertical lines $x=\pm 10.38462$ shows the critical boundary of rejecting $H_0$. Note that it is a two-tail test.
Finally, you may call the returned object:
> str(test1) List of 14 $ x1 : num [1:13] 43 67 64 64 51 53 53 26 36 48 ... $ x2 : NULL $ x1.name : chr "x" $ x2.name : chr "NULL" $ n : int 13 $ mu : num 56 $ test.0 : num -10.4 $ is.onesample: logi TRUE $ alternative : chr "t" $ test.perm : num [1:8192] -10.38 -8.38 -12.08 -10.08 -11.62 ... $ DF :'data.frame': 13 obs. of 3 variables: ..$ x1 : num [1:13] 43 67 64 64 51 53 53 26 36 48 ... ..$ mu : num [1:13] 56 56 56 56 56 56 56 56 56 56 ... ..$ diff: num [1:13] -13 11 8 8 -5 -3 -3 -30 -20 -8 ... $ N : num 8192 $ p.value : num 0.0444 $ rejected.N : int 364 - attr(*, "class")= chr "exactOneOrPairedSampleTest"
to get more details of the results if you need them.
Example 2: paired-sample test
Let $x_{1i} = \{92, 0,72,80,57,76,81,67,50,77,90\}$ and $x_{2i} = \{43,67,64,64,51,53,53,26,36,48,34\}$, where each $i$ is paired and $i=1 \ldots 11$:
> x1 <- c(92, 0,72,80,57,76,81,67,50,77,90) > x2 <- c(43,67,64,64,51,53,53,26,36,48,34)
Now we can call the function exactOneOrPairedSampleTest()
to test $H_0$: true mean of $x_{1i} - x_{2i} \leq 10$ against $H_A$: true mean of $x_{1i} - x_{2i} > 10$:
> test2 <- exactOneOrPairedSampleTest(x1, x2, alternative="g", mu=10) > # equivalent to test2 <- exactOneOrPairedSampleTest(x1 - x2, alternative="g", mu=10) > test2 Exact paired sample test Alternative hypothesis: means of x1 - x2 is greater than 10 mean((x1)-(x2)) - mu = 8.45454545454546 Number of total permutation = 2048 Number of rejected permutation = 445 P-value = 0.2172852
The results show that:
- $\sum_{i=1}^{11} (x_{1i}-x_{2i}) / 11 - 10 = 8.45455$;
- Totally $2048 = 2^{11}$ permutations are processed;
- 445 permutations do not support $H_0$;
- $P = 445/2^{11} = 0.2172852$.
We can also call the function hist.exactOneOrPairedSampleTest()
:
> hist(test2)
As you can see, the vertical lines $x=8.45455$ shows the critical boundary of rejecting $H_0$. Note that this is a right-tail test.
Again, you may call the returned object:
> str(test2) List of 14 $ x1 : num [1:11] 92 0 72 80 57 76 81 67 50 77 ... $ x2 : num [1:11] 43 67 64 64 51 53 53 26 36 48 ... $ x1.name : chr "x1" $ x2.name : chr "x2" $ n : int 11 $ mu : num 10 $ test.0 : num 8.45 $ is.onesample: logi FALSE $ alternative : chr "g" $ test.perm : num [1:2048] 8.45 1.36 22.45 15.36 8.82 ... $ DF :'data.frame': 11 obs. of 3 variables: ..$ x1 : num [1:11] 92 0 72 80 57 76 81 67 50 77 ... ..$ x2 : num [1:11] 43 67 64 64 51 53 53 26 36 48 ... ..$ diff: num [1:11] 39 -77 -2 6 -4 13 18 31 4 19 ... $ N : num 2048 $ p.value : num 0.217 $ rejected.N : int 445 - attr(*, "class")= chr "exactOneOrPairedSampleTest"
to get more details of the results if you need them.