G-test for 2-way contingency data in R

A R function applying G-test for 2-way contingency data in R is demonstrated. Yates’ correction and Williams’ correction are also included in the output.

R Source

# Copyright 2013 Chen-Pan Liao
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
g.test.2way <- function(
x # must be a 2-way xtabs/matrix
)
{
## initiating
data.o <- x
sum.total <- sum(x)
sum.row <- rowSums(x)
sum.col <- colSums(x)
n.row <- dim(x)[1]
n.col <- dim(x)[2]
chisq.df <- (n.col - 1) * (n.row - 1)
## check
if(n.row == 1) {stop("Numbers of row must be larger than 1")}
if(n.col == 1) {stop("Numbers of column must be larger than 1")}
if(mode(data.o) != "numeric") {stop("Mode of x must be numeric")}
if(
sum(class(data.o) != c("matrix", "xtabs")) == 0
) {
stop("Class of x must be matrix or xtabs")
}
## matrix of expected value
data.e <- matrix(NA, ncol = n.col, nrow = n.row)
for (m in 1:n.row){
for (n in 1:n.col){
data.e[m,n] <- sum.col[n] * sum.row[m] / sum.total
}
}
## matrix of ovserved value with Yates' correction
data.o.y <- matrix(NA, ncol = n.col, nrow = n.row)
for (m in 1:n.row){
for (n in 1:n.col){
if (data.e[m,n] - data.o[m,n] > 0){
data.o.y[m,n] <- data.o[m,n] + 0.5
} else if (data.e[m,n] - data.o[m,n] < 0){
data.o.y[m,n] <- data.o[m,n] - 0.5
} else {
data.o.y[m,n] <- data.o[m,n]
}
}
}
## q_min for Williams' correction
q.min <- 1 +
(sum.total * sum(1 / sum.col) - 1) *
(sum.total * sum(1 / sum.row) - 1) /
(6 * (chisq.df) * sum.total)
## G-value
isnot.na.index <- !is.na(data.o * log(data.o / data.e))
g <- 2 * sum((data.o * log(data.o / data.e))[isnot.na.index])
isnot.na.index.y <- !is.na(data.o.y * log(data.o.y / data.e))
g.y <- 2 * sum((data.o.y * log(data.o.y / data.e))[isnot.na.index])
g.w <- g / q.min
## P-value
p.g <- 1 - pchisq(g, chisq.df)
p.g.y <- 1 - pchisq(g.y, chisq.df)
p.g.w <- 1 - pchisq(g.w, chisq.df)
## return
z <- list(
data.observed = data.o,
data.expected = data.e,
data.observed.Yates = data.o.y,
df = chisq.df,
q.min = q.min,
g = g,
g.Yates = g.y,
g.Williams = g.w,
p = p.g,
p.Yates = p.g.y,
p.Williams = p.g.w
)
class(z) <- "g.test.2way"
return(z)
}
## print function
print.g.test.2way <- function(x){
cat("Observed value:\n")
print(x$data.observed)
cat("Expected value:\n")
print(x$data.expected)
cat("Observed value with Yates' correction:\n")
print(x$data.observed.Yates)
cat("Degree of freedom =", x$df, "\n")
cat(sprintf("G-test:\n\tG = %g,\tp = %g\n", x$g, x$p))
cat(sprintf(
"G-test with Yates' correction:\n\tG = %g,\tp = %g\n",
x$g.Yates, x$p.Yates
))
cat(sprintf(
"G-test with Williams' correction:\n\tG = %g,\tq_min = %g,\tp = %g\n",
x$g.Williams, x$q.min, x$p.Williams
))
}
view raw g.test.2way.R hosted with ❤ by GitHub

Example

R code
my.2way.contingency.table <- matrix(
  c(
    5,0,3,12,6,
    4,2,7,23,11
  ),
  nrow = 2, ncol=5, byrow=T
)

g.test.result <- g.test.2way(my.2way.contingency.table)
g.test.result
str(g.test.result)
R output
> my.2way.contingency.table <- matrix(
+   c(
+     5,0,3,12,6,
+     4,2,7,23,11
+   ),
+   nrow = 2, ncol=5, byrow=T
+ )
> 
> g.test.result <- g.test.2way(my.2way.contingency.table)
> g.test.result
Observed value:
     [,1] [,2] [,3] [,4] [,5]
[1,]    5    0    3   12    6
[2,]    4    2    7   23   11
Expected value:
         [,1]      [,2]     [,3]     [,4]      [,5]
[1,] 3.205479 0.7123288 3.561644 12.46575  6.054795
[2,] 5.794521 1.2876712 6.438356 22.53425 10.945205
Observed value with Yates' correction:
     [,1] [,2] [,3] [,4] [,5]
[1,]  4.5  0.5  3.5 12.5  6.5
[2,]  4.5  1.5  6.5 22.5 10.5
Degree of freedom = 4 
G-test:
 G = 3.41127, p = 0.491497
G-test with Yates' correction:
 G = 1.28744, p = 0.863503
G-test with Williams' correction:
 G = 3.07349, q_min = 1.1099, p = 0.545603
> str(g.test.result)
List of 11
 $ data.observed      : num [1:2, 1:5] 5 4 0 2 3 7 12 23 6 11
 $ data.expected      : num [1:2, 1:5] 3.205 5.795 0.712 1.288 3.562 ...
 $ data.observed.Yates: num [1:2, 1:5] 4.5 4.5 0.5 1.5 3.5 6.5 12.5 22.5 6.5 10.5
 $ df                 : num 4
 $ q.min              : num 1.11
 $ g                  : num 3.41
 $ g.Yates            : num 1.29
 $ g.Williams         : num 3.07
 $ p                  : num 0.491
 $ p.Yates            : num 0.864
 $ p.Williams         : num 0.546
 - attr(*, "class")= chr "g.test.2way"