-
Notifications
You must be signed in to change notification settings - Fork 0
data.dic
data.dic <- function(df){ library(stringr);library(purrr)
dd <- as.data.frame(capture.output(str(df, list.len=ncol(df))), stringsAsFactors = F) dd <- subset(dd, rownames(dd)!=1)# remove row 1
dd <- suppressWarnings(as.data.frame(do.call("rbind", strsplit(as.character(dd[,1]), ":")), stringsAsFactors = F)) colnames(dd) = c("colnames", "Type", "Values") # rename columns
dd$"NA (%)" <- paste0(format(round(sapply(df, function(x) sum(length(which(is.na(x)))))/nrow(df)*100,1),1),"%")
dd$colnames <- gsub(" ", "", substr(dd$colnames,4,nchar(dd$colnames)))
dd$Type_space1 <- unlist(map(str_locate_all(as.character(dd$Type), " "), 2)) dd$Values <- substr(dd$Type,dd$Type_space1+1, nchar(dd$Type)) dd$Type <- substr(dd$Type,1, dd$Type_space1) dd$Values <- gsub("w/", "", dd$Values) dd$Type_space1 <- NULL
dd$Type <- ifelse(dd$Type==" Ord.factor ", "Factor", dd$Type) dd$Type <- ifelse(dd$Type==" Factor ", "Factor", dd$Type) dd$Type <- ifelse(dd$Type==" chr ", "Character", dd$Type) dd$Type <- ifelse(dd$Type==" num ", "Numerical", dd$Type)
return(dd)}