##############################################################
# #
# Title : Conducting Configurational Comparative Research #
# with Qualitative Comparative Analysis: #
# A Hands-On Tutorial for Applied Evaluation #
# Scholars and Practitioners #
# #
# Author: Alrik Thiem #
# #
# Date : 2 November 2016 #
# #
##############################################################
# load the QCApro package
library(QCApro)
# Phase I: From the raw data to the truth table
#-------------------------------------------------------------------------------
# import data from Cragun et al. (2014)
CRA <- read.csv("CragunEtal2014.csv", row.names = 1)
CRA
# calibrate endogenous variable pf into HPF (high patient follow-through) and
# LPF (low patient follow-through)
CRA$hpf <- ifelse(grepl("41_55|56_70|71_85|85_100", CRA$pf), 1, 0)
CRA$lpf <- ifelse(grepl("0_10", CRA$pf), 1, 0)
CRA
# transform factor labels to upper-case letters (often, lower-case letters are
# used to indicate negation in QCA, which we do not want)
names(CRA)[-1] <- toupper(names(CRA)[-1])
# truth table for HPF
tt.HPF <- truthTable(CRA[,-match(c("pf","LPF"), names(CRA))], outcome = "HPF",
show.cases = TRUE)
tt.HPF
# truth table for LPF
tt.LPF <- truthTable(CRA[,-match(c("pf","HPF"), names(CRA))], outcome = "LPF",
show.cases = TRUE)
tt.LPF
# Phase II: From the truth table to the PI chart
#-------------------------------------------------------------------------------
# the PI chart for HPF
chart.HPF <- eQMC(tt.HPF)$PIchart
chart.HPF
# the PI chart for LPF
chart.LPF <- eQMC(tt.LPF)$PIchart
chart.LPF
# searching for simple necessary conditions for high/low patient follow-through
# without regard to difference-making as recommended by Ragin (2000, 254-255) or
# Schneider and Wagemann (2012, 197-219) outputs RR for HPF, but RR is not even
# a potential INUS condition for HPF, as can be verified by inspecting the
# corresponding PI chart
# minimally necessary conditions for HPF
rownames(superSubset(CRA[,-match(c("pf","LPF"), names(CRA))], outcome = "HPF",
incl.cut = 0.8)$incl.cov)
# minimally necessary conditions for LPF
rownames(superSubset(CRA[,-match(c("pf","HPF"), names(CRA))], outcome = "LPF",
incl.cut = 0.8)$incl.cov)
chart.HPF
# Phase III: From the PI chart to the solution
#-------------------------------------------------------------------------------
# derive the solutions for HPF, with details and cases
sol.HPF <- eQMC(tt.HPF, details = TRUE, show.cases = TRUE)
sol.HPF
# derive the solutions for LPF, with details and cases
sol.LPF <- eQMC(tt.LPF, details = TRUE, show.cases = TRUE)
sol.LPF
#------------------------------------------------------------------------------#
# additional issues mentioned in the article #
#------------------------------------------------------------------------------#
# the creation of two separate factors HPF/LPF is not necessary; QCApro can
# work with multivalent factors, both exogenous and endogenous (see footnote 7
# in the article)
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
# create a trivalent factor PF (0: low patient follow-through, 1: medium patient
# follow-through, 2: high patient follow-through)
CRA$PF <- ifelse(CRA$LPF == 0 & CRA$HPF == 1, 2,
ifelse(CRA$LPF == 0 & CRA$HPF == 0, 1, 0))
# truth table and solution for high patient follow-through (PF{2})
tt.PF <- truthTable(CRA[,-match(c("pf","LPF","HPF"), names(CRA))],
outcome = "PF{2}", show.cases = TRUE, complete = TRUE)
eQMC(tt.PF)
#-------------------------------------------------------------------------------
# always produce parsimonious solutions; conservative and intermediate solutions
# make causal inferences that go beyond the data by assuming that some
# (intermediate) or all (conservative) remainders are data points that occur
# in conjunction with the negation of the outcome being analyzed; it is not true
# that the conservative solution is "[c]onservative because [...] the
# researcher [...] is exclusively guided by the empirical information
# at hand" (Schneider and Wagemann 2012, 162); in fact, conservative solutions
# are based on manipulations to the empirical information at hand
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
# the conservative solution for high patient follow-through
sol.cs <- eQMC(tt.PF, sol.type = "cs", details = TRUE, show.cases = TRUE)
sol.cs
# the remainders (minterms that are not instantiated by observations)
rem <- tt.PF$tt[tt.PF$tt$OUT == "?", 1:match("OUT", names(tt.PF$tt))]
head(rem)
tail(rem)
# artifically instantiate all remainders as negative minterms, using single
# (random) observations that show low (0) or medium (1) patient follow-through,
# but not high (2) patient follow-through
rem$OUT <- sample(c(0,1), nrow(rem), replace = TRUE)
names(rem)[which(names(rem) == "OUT")] <- "PF"
# the new inflated data set
CRA.infl.cs <- rbind(CRA[,-match(c("pf", "HPF", "LPF"), names(CRA))], rem)
# the new truth table for the inflated data set
tt.PF.infl.cs <- truthTable(CRA.infl.cs, outcome = "PF{2}")
tt.PF.infl.cs
# the solution for the new, inflated data set is the same as the conservative
# solution for the original, non-inflated data set; that is, the conservative
# solution introduces counterfactual cases as real cases; the lower
# the diversity index score (the ratio between the number of all observed
# minterms and the number of all logically possible minterms), the more extreme
# the generation of artificial data by the conservative search strategy of QCA
sol.infl.cs <- eQMC(tt.PF.infl.cs)
sol.infl.cs
# test whether the solution for the inflated data set is exactly the same as for
# the original, non-inflated data set
unlist(sol.infl.cs$solution) == unlist(sol.cs$solution)
#-------------------------------------------------------------------------------
# the intermediate solution for high patient follow-through; directional
# expectations derived from Cragun et al. (2016, 265-266)
sol.im <- eQMC(tt.PF, dir.exp = c("-","-","1","1","0","0"))
# the so-called "difficult counterfactuals"; there are four sets because
# each model from the parsimonious solution generates its own set
diffcount <- list(C1P1 = data.frame(sol.im$i.sol$C1P1$DC),
C1P2 = data.frame(sol.im$i.sol$C1P2$DC),
C1P3 = data.frame(sol.im$i.sol$C1P3$DC),
C1P4 = data.frame(sol.im$i.sol$C1P4$DC))
# 14 difficult counterfactuals for each of the four solutions
str(diffcount)
# artifically instantiate all difficult counterfactuals as negative minterms,
# using single (random) observations that show low (0) or medium (1) patient
# follow-through, but not high (2) patient follow-through
for (i in seq(4)) {
diffcount[[i]]$PF <- sample(c(0,1), nrow(diffcount[[i]]), replace = TRUE)
}
CRA.infl.im <- lapply(diffcount,
function (x) {rbind(CRA[,-match(c("pf", "HPF", "LPF"), names(CRA))], x)}
)
# the new inflated data sets
str(CRA.infl.im)
# the four new truth tables for the inflated data sets
tt.PF.infl.im <- lapply(CRA.infl.im,
function (x) {truthTable(x, outcome = "PF{2}")}
)
tt.PF.infl.im
# the solution for the new, inflated data set is the same as the intermediate
# solution for the original, non-inflated data set; that is, the intermediate
# solution introduces counterfactual cases as real cases
sol.infl.im <- lapply(tt.PF.infl.im,
function (x) {eQMC(x, dir.exp = c("-","-","1","1","0","0"))}
)
sol.infl.im
# test whether the solutions for the inflated data sets are exactly the same as
# for the original, non-inflated data sets
lapply(names(sol.infl.im), function (x) {
unlist(eval(parse(
text = paste0("sol.infl.im$", x, "$i.sol$C1P1$solution")))) ==
unlist(eval(parse(text = paste0("sol.im$i.sol$", x, "$solution"))))}
)
# LESSON: If you do not want to infer more than what your data warrants, never
# use conservative or intermediate solutions, but only parsimonious ones.
# References
#-------------------------------------------------------------------------------
# Cragun, D., DeBate, R. D., Vadaparampil, S. T., Baldwin, J., Hampel, H., &
# Pal, T. (2014). Comparing universal Lynch syndrome tumor-screening programs
# to evaluate associations between implementation strategies and patient
# follow-through. Genetics in Medicine, 16, 773-782.
#
# Cragun, D., Pal, T., Vadaparampil, S. T., Baldwin, J., Hampel, H., & DeBate,
# R. D. (2016). Qualitative comparative analysis: A hybrid method for
# identifying factors associated with program effectiveness. Journal of Mixed
# Methods Research, 10, 251-272.
#
# Ragin, C. C. (2000). Fuzzy-set social science. Chicago: University of Chicago
# Press.
#
# Schneider, C. Q., & Wagemann, C. (2012). Set-theoretic methods for the social
# sciences: A guide to Qualitative Comparative Analysis (QCA). Cambridge:
# Cambridge University Press.