Skip to content

data.dic

kamclean edited this page Jun 22, 2017 · 1 revision

data.dic <- function(df){ library(stringr);library(purrr)

Capture str output

dd <- as.data.frame(capture.output(str(df, list.len=ncol(df))), stringsAsFactors = F) dd <- subset(dd, rownames(dd)!=1)# remove row 1

rbind nested list

dd <- suppressWarnings(as.data.frame(do.call("rbind", strsplit(as.character(dd[,1]), ":")), stringsAsFactors = F)) colnames(dd) = c("colnames", "Type", "Values") # rename columns

Add column with % missing data

dd$"NA (%)" <- paste0(format(round(sapply(df, function(x) sum(length(which(is.na(x)))))/nrow(df)*100,1),1),"%")

remove $ from Variable column

dd$colnames <- gsub(" ", "", substr(dd$colnames,4,nchar(dd$colnames)))

Split Type column

dd$Type_space1 <- unlist(map(str_locate_all(as.character(dd$Type), " "), 2)) dd$Values <- substr(dd$Type,dd$Type_space1+1, nchar(dd$Type)) dd$Type <- substr(dd$Type,1, dd$Type_space1) dd$Values <- gsub("w/", "", dd$Values) dd$Type_space1 <- NULL

Rename Type values

dd$Type <- ifelse(dd$Type==" Ord.factor ", "Factor", dd$Type) dd$Type <- ifelse(dd$Type==" Factor ", "Factor", dd$Type) dd$Type <- ifelse(dd$Type==" chr ", "Character", dd$Type) dd$Type <- ifelse(dd$Type==" num ", "Numerical", dd$Type)

return(dd)}

Clone this wiki locally