library(rio) library(FactoMineR) d <- import("~/Downloads/Cultural tastes online (2023)(1-11).xlsx") # pare it down pnames <- d[[6]] while (any(duplicated(pnames))) { pnames[duplicated(pnames)] <- paste(pnames[duplicated(pnames)], "+") } rownames(d) <- pnames d <- d[7:14] names(d) <- c( "SM_freq", "SM_platform", "SM_posts", "MT_freq", "MT_platform", "MT_type", "MS_freq", "MS_platform" ) # a color palette cols <- c( "#CC6677", "#332288", "#117733", "#88CCEE", "#882255", "#44AA99", "#999933", "#AA4499" ) # separate columns multicols <- c(2,5,8) for(cnum in multicols){ items <- strsplit(d[[cnum]],';') item_names <- unique(unlist(items)) new_names <- paste(names(d)[cnum],gsub("\\W+",'_',item_names),sep="-") for(i in seq(length(item_names))){ newcol <- sapply(items,function(x){item_names[i] %in% x}) d[[new_names[i]]] <- newcol } } d <- d[-multicols] # reorder d <- d[c(1,2,6:16,3,4,17:27,5,28:32)] # MCA m <- MCA(d, graph = FALSE) cats <- apply(d, 2, function(x) nlevels(as.factor(x))) cats <- c(smfreq=3,smpost=3,smplatform=22,mtfreq=3,mtkind=3,mtplatform=22,msfreq=3,msplatform=10) m_vars <- data.frame( m$var$coord, Variable = rep(names(cats), cats), Col = rep(cols, cats)) m_obs <- data.frame(m$ind$coord) # social structure pdf('site/data/cultural_tastes_ca_resps.pdf',width=12,height=10) plot(NA, xlim = range(m_obs[[1]]), ylim = range(m_obs[[2]]), main = "Respondents", xlab = "dim 1", ylab = "dim 2" ) text(jitter(m_obs[[1]]), jitter(m_obs[[2]]), labels = rownames(m_obs), cex = .5) dev.off() # cultural tastes pdf('site/data/cultural_tastes_ca_tastes.pdf',width=12,height=10) plot(NA, xlim = range(m_vars[[1]]), ylim = range(m_vars[[2]]), main = "Tastes", xlab = "dim 1", ylab = "dim 2" ) for (i in seq_len(nrow(m_vars))) { lines(c(0, m_vars[i, 1]), c(0, m_vars[i, 2]), col = m_vars[i, "Col"], lty = 3, lwd = 1.5) text(m_vars[i, 1], m_vars[i, 2], label = rownames(m_vars)[i], cex = .7, col = m_vars[i, "Col"]) } dev.off() # both pdf('site/data/cultural_tastes_ca_both.pdf',width=12,height=10) plot(NA, xlim = range(c(m_obs[[1]],m_vars[[1]])), ylim = range(c(m_obs[[2]]),m_vars[[2]]), main = "Respondents and tastes", xlab = "dim 1", ylab = "dim 2" ) text(jitter(m_obs[[1]]), jitter(m_obs[[2]]), labels = rownames(m_obs), cex = .5) for (i in seq_len(nrow(m_vars))) { lines(c(0, m_vars[i, 1]), c(0, m_vars[i, 2]), col = m_vars[i, "Col"], lty = 3, lwd = 1.5) text(m_vars[i, 1], m_vars[i, 2], label = rownames(m_vars)[i], cex = .7, col = m_vars[i, "Col"]) } dev.off() # anonymize and prep for download dta <- d[-which(as.character(sapply(d,class))=='character')] names(dta) <- (gsub("SM_","social_media_",names(dta))) names(dta) <- (gsub("MT_","movies_tv_",names(dta))) names(dta) <- (gsub("MS_","music_",names(dta))) names(dta) <- tolower(names(dta)) # get rid of unused columns dta <- dta[sapply(dta,function(c){ if(class(c)=="character"){ return(TRUE) }else{ return(sum(c)>0) } })] rownames(dta) <- NULL # write to file write.csv(dta, file = "site/data/cultural_tastes.csv", row.names=FALSE)