A R function applying G-test for 2-way contingency data in R is demonstrated. Yates’ correction and Williams’ correction are also included in the output.
R Source
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2013 Chen-Pan Liao | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 2 of the License, or | |
# any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
g.test.2way <- function( | |
x # must be a 2-way xtabs/matrix | |
) | |
{ | |
## initiating | |
data.o <- x | |
sum.total <- sum(x) | |
sum.row <- rowSums(x) | |
sum.col <- colSums(x) | |
n.row <- dim(x)[1] | |
n.col <- dim(x)[2] | |
chisq.df <- (n.col - 1) * (n.row - 1) | |
## check | |
if(n.row == 1) {stop("Numbers of row must be larger than 1")} | |
if(n.col == 1) {stop("Numbers of column must be larger than 1")} | |
if(mode(data.o) != "numeric") {stop("Mode of x must be numeric")} | |
if( | |
sum(class(data.o) != c("matrix", "xtabs")) == 0 | |
) { | |
stop("Class of x must be matrix or xtabs") | |
} | |
## matrix of expected value | |
data.e <- matrix(NA, ncol = n.col, nrow = n.row) | |
for (m in 1:n.row){ | |
for (n in 1:n.col){ | |
data.e[m,n] <- sum.col[n] * sum.row[m] / sum.total | |
} | |
} | |
## matrix of ovserved value with Yates' correction | |
data.o.y <- matrix(NA, ncol = n.col, nrow = n.row) | |
for (m in 1:n.row){ | |
for (n in 1:n.col){ | |
if (data.e[m,n] - data.o[m,n] > 0){ | |
data.o.y[m,n] <- data.o[m,n] + 0.5 | |
} else if (data.e[m,n] - data.o[m,n] < 0){ | |
data.o.y[m,n] <- data.o[m,n] - 0.5 | |
} else { | |
data.o.y[m,n] <- data.o[m,n] | |
} | |
} | |
} | |
## q_min for Williams' correction | |
q.min <- 1 + | |
(sum.total * sum(1 / sum.col) - 1) * | |
(sum.total * sum(1 / sum.row) - 1) / | |
(6 * (chisq.df) * sum.total) | |
## G-value | |
isnot.na.index <- !is.na(data.o * log(data.o / data.e)) | |
g <- 2 * sum((data.o * log(data.o / data.e))[isnot.na.index]) | |
isnot.na.index.y <- !is.na(data.o.y * log(data.o.y / data.e)) | |
g.y <- 2 * sum((data.o.y * log(data.o.y / data.e))[isnot.na.index]) | |
g.w <- g / q.min | |
## P-value | |
p.g <- 1 - pchisq(g, chisq.df) | |
p.g.y <- 1 - pchisq(g.y, chisq.df) | |
p.g.w <- 1 - pchisq(g.w, chisq.df) | |
## return | |
z <- list( | |
data.observed = data.o, | |
data.expected = data.e, | |
data.observed.Yates = data.o.y, | |
df = chisq.df, | |
q.min = q.min, | |
g = g, | |
g.Yates = g.y, | |
g.Williams = g.w, | |
p = p.g, | |
p.Yates = p.g.y, | |
p.Williams = p.g.w | |
) | |
class(z) <- "g.test.2way" | |
return(z) | |
} | |
## print function | |
print.g.test.2way <- function(x){ | |
cat("Observed value:\n") | |
print(x$data.observed) | |
cat("Expected value:\n") | |
print(x$data.expected) | |
cat("Observed value with Yates' correction:\n") | |
print(x$data.observed.Yates) | |
cat("Degree of freedom =", x$df, "\n") | |
cat(sprintf("G-test:\n\tG = %g,\tp = %g\n", x$g, x$p)) | |
cat(sprintf( | |
"G-test with Yates' correction:\n\tG = %g,\tp = %g\n", | |
x$g.Yates, x$p.Yates | |
)) | |
cat(sprintf( | |
"G-test with Williams' correction:\n\tG = %g,\tq_min = %g,\tp = %g\n", | |
x$g.Williams, x$q.min, x$p.Williams | |
)) | |
} |
Example
R code
my.2way.contingency.table <- matrix( c( 5,0,3,12,6, 4,2,7,23,11 ), nrow = 2, ncol=5, byrow=T ) g.test.result <- g.test.2way(my.2way.contingency.table) g.test.result str(g.test.result)
R output
> my.2way.contingency.table <- matrix( + c( + 5,0,3,12,6, + 4,2,7,23,11 + ), + nrow = 2, ncol=5, byrow=T + ) > > g.test.result <- g.test.2way(my.2way.contingency.table) > g.test.result Observed value: [,1] [,2] [,3] [,4] [,5] [1,] 5 0 3 12 6 [2,] 4 2 7 23 11 Expected value: [,1] [,2] [,3] [,4] [,5] [1,] 3.205479 0.7123288 3.561644 12.46575 6.054795 [2,] 5.794521 1.2876712 6.438356 22.53425 10.945205 Observed value with Yates' correction: [,1] [,2] [,3] [,4] [,5] [1,] 4.5 0.5 3.5 12.5 6.5 [2,] 4.5 1.5 6.5 22.5 10.5 Degree of freedom = 4 G-test: G = 3.41127, p = 0.491497 G-test with Yates' correction: G = 1.28744, p = 0.863503 G-test with Williams' correction: G = 3.07349, q_min = 1.1099, p = 0.545603 > str(g.test.result) List of 11 $ data.observed : num [1:2, 1:5] 5 4 0 2 3 7 12 23 6 11 $ data.expected : num [1:2, 1:5] 3.205 5.795 0.712 1.288 3.562 ... $ data.observed.Yates: num [1:2, 1:5] 4.5 4.5 0.5 1.5 3.5 6.5 12.5 22.5 6.5 10.5 $ df : num 4 $ q.min : num 1.11 $ g : num 3.41 $ g.Yates : num 1.29 $ g.Williams : num 3.07 $ p : num 0.491 $ p.Yates : num 0.864 $ p.Williams : num 0.546 - attr(*, "class")= chr "g.test.2way"
No comments:
Post a Comment