diff --git a/.Rbuildignore b/.Rbuildignore index 561a653e..59863e10 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,4 +9,5 @@ ^pkgdown$ ^\.circleci$ ^\.circleci/config\.yml$ -^\.github$ \ No newline at end of file +^\.github$ +^cran-comments\.md$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 00000000..2d19fc76 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 00000000..c8c5b62f --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,51 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + +name: R-CMD-check.yaml + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual", "--compact-vignettes=gs+qpdf")' diff --git a/DESCRIPTION b/DESCRIPTION index 872132bf..1c5c6270 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,11 @@ Package: dsBase -Title: DataSHIELD Server Site Base Functions -Description: DataSHIELD Server Site Base Functions. -Version: 6.3.2 +Title: 'DataSHIELD' Server Site Base Functions +Description: Base 'DataSHIELD' functions for the server side. 'DataSHIELD' is a software package which allows + you to do non-disclosive federated analysis on sensitive data. 'DataSHIELD' analytic functions have + been designed to only share non disclosive summary statistics, with built in automated output + checking based on statistical disclosure control. With data sites setting the threshold values for + the automated output checks. For more details, see 'citation("dsBase")'. +Version: 6.3.3 Authors@R: c(person(given = "Paul", family = "Burton", role = c("aut")), @@ -28,6 +32,11 @@ Authors@R: c(person(given = "Paul", family = "Avraam", role = c("aut"), comment = c(ORCID = "0000-0001-8908-2441")), + person(given = "Yannick", + family = "Marcon", + role = c("aut"), + email = "yannick.marcon@obiba.org", + comment = c(ORCID = "0000-0003-0138-2023")), person(given = "Stuart", family = "Wheater", role = c("aut", "cre"), @@ -50,180 +59,5 @@ Imports: childsds Suggests: testthat -AggregateMethods: - asFactorDS1, - asListDS, - aucDS, - boxPlotGGDS, - checkNegValueDS, - classDS, - colnamesDS, - corTestDS, - corDS, - covDS, - dataFrameSubsetDS1, - densityGridDS, - extractQuantilesDS1, - extractQuantilesDS2, - dimDS, - gamlssDS, - glmDS1, - glmDS2, - glmerSLMADS2, - glmPredictDS.ag, - glmSLMADS1, - glmSLMADS2, - glmSummaryDS.ag, - heatmapPlotDS, - hetcorDS, - histogramDS1, - histogramDS2, - isNaDS, - isValidDS, - kurtosisDS1, - kurtosisDS2, - lengthDS, - levelsDS, - lexisDS1, - listDisclosureSettingsDS, - lmerSLMADS2, - lsDS, - matrixDetDS1, - meanDS, - meanSdGpDS, - messageDS, - metadataDS, - miceDS, - minMaxRandDS, - namesDS, - numNaDS, - quantileMeanDS, - rangeDS, - ranksSecureDS1, - ranksSecureDS3, - rmDS, - scatterPlotDS, - scoreVectDS, - setSeedDS, - skewnessDS1, - skewnessDS2, - table1DDS, - table2DDS, - tableDS, - tableDS2, - tapplyDS, - testObjExistsDS, - varDS, - exists=base::exists, - is.character=base::is.character, - is.factor=base::is.factor, - is.list=base::is.list, - is.null=base::is.null, - is.numeric=base::is.numeric, - NROW=base::NROW, - t.test=stats::t.test -AssignMethods: - absDS, - asCharacterDS, - asDataMatrixDS, - asFactorDS2, - asFactorSimpleDS, - asIntegerDS, - asListDS, - asLogicalDS, - asMatrixDS, - asNumericDS, - blackBoxDS, - blackBoxRanksDS, - BooleDS, - boxPlotGG_data_TreatmentDS, - boxPlotGG_data_Treatment_numericDS, - bp_standardsDS, - cbindDS, - cDS, - changeRefGroupDS, - completeCasesDS, - dataFrameDS, - dataFrameFillDS, - dataFrameSortDS, - dataFrameSubsetDS2, - dmtC2SDS, - elsplineDS, - glmerSLMADS.assign, - glmPredictDS.as, - glmSLMADS.assign, - glmSummaryDS.as, - getWGSRDS, - igb_standardsDS, - listDS, - lexisDS2, - lexisDS3, - lmerSLMADS.assign, - lsplineDS, - matrixDetDS2, - matrixDiagDS, - matrixDimnamesDS, - matrixDS, - matrixInvertDS, - matrixMultDS, - matrixTransposeDS, - mergeDS, - nsDS, - qlsplineDS, - ranksSecureDS2, - ranksSecureDS4, - ranksSecureDS5, - rbindDS, - rBinomDS, - recodeLevelsDS, - recodeValuesDS, - repDS, - replaceNaDS, - reShapeDS, - rNormDS, - rowColCalcDS, - rPoisDS, - rUnifDS, - sampleDS, - seqDS, - sqrtDS, - subsetByClassDS, - subsetDS, - tableDS.assign, - tapplyDS.assign, - uniqueDS, - unListDS, - vectorDS, - as.character=base::as.character, - as.null=base::as.null, - as.numeric=base::as.numeric, - attach=base::attach, - c=dsBase::vectorDS, - complete.cases=stats::complete.cases, - list=base::list, - exp=base::exp, - log=base::log, - sqrt=base::sqrt, - abs=base::abs, - sin=base::sin, - cos=base::cos, - tan=base::tan, - asin=base::asin, - acos=base::acos, - atan=base::atan, - sum=base::sum, - unlist=base::unlist -Options: - datashield.privacyLevel=5, - default.datashield.privacyControlLevel="banana", - default.nfilter.glm=0.33, - default.nfilter.kNN=3, - default.nfilter.string=80, - default.nfilter.subset=3, - default.nfilter.stringShort=20, - default.nfilter.tab=3, - default.nfilter.noise=0.25, - default.nfilter.levels.density=0.33, - default.nfilter.levels.max=40 RoxygenNote: 7.3.2 Encoding: UTF-8 diff --git a/R/BooleDS.R b/R/BooleDS.R index e4c04548..1ad5e14f 100644 --- a/R/BooleDS.R +++ b/R/BooleDS.R @@ -18,9 +18,11 @@ #' @param na.assign.text A character string taking values 'NA', '1' or '0'. If 'NA' #' then any NA values in the #' input vector remain as NAs in the output vector. If '1' or '0' NA values in the -#' input vector are -#' all converted to 1 or 0 respectively.#' @return the levels of the input variable. +#' input vector are all converted to 1 or 0 respectively. +#' #' @author DataSHIELD Development Team +#' +#' @return the levels of the input variable. #' @export #' BooleDS <- function(V1.name=NULL, V2.name=NULL, Boolean.operator.n=NULL, na.assign.text, numeric.output=TRUE){ diff --git a/R/absDS.R b/R/absDS.R index cd27699a..1f7dc518 100644 --- a/R/absDS.R +++ b/R/absDS.R @@ -6,21 +6,19 @@ #' @param x a string character, the name of a numeric or integer vector #' @return the object specified by the \code{newobj} argument #' of \code{ds.abs} (or default name \code{abs.newobj}) -#' which is written to the serverside. The output object is of class numeric +#' which is written to the serverside. The output object is of class numeric #' or integer. #' @author Demetris Avraam for DataSHIELD Development Team #' @export #' -absDS <- function(x){ - - x.var <- eval(parse(text=x), envir = parent.frame()) +absDS <- function(x) { + x.var <- eval(parse(text = x), envir = parent.frame()) # compute the absolute values of x out <- abs(x.var) - + # assign the outcome to the data servers return(out) - } # ASSIGN FUNCTION # absDS diff --git a/R/asCharacterDS.R b/R/asCharacterDS.R index 1d6013e5..f8e0d1ec 100644 --- a/R/asCharacterDS.R +++ b/R/asCharacterDS.R @@ -1,4 +1,4 @@ -#' +#' #' @title Coerces an R object into class character #' @description this function is based on the native R function \code{as.character} #' @details See help for function \code{as.character} in native R @@ -11,14 +11,12 @@ #' details see help on the clientside function \code{ds.asCharacter} #' @author Amadou Gaye, Paul Burton, Demetris Avraam for DataSHIELD Development Team #' @export -#' -asCharacterDS <- function (x.name){ - - x<-eval(parse(text=x.name), envir = parent.frame()) +#' +asCharacterDS <- function(x.name) { + x <- eval(parse(text = x.name), envir = parent.frame()) output <- as.character(x) return(output) - } # ASSIGN FUNCTION # asCharacterDS diff --git a/R/asDataMatrixDS.R b/R/asDataMatrixDS.R index 5add2f63..5b4e5d71 100644 --- a/R/asDataMatrixDS.R +++ b/R/asDataMatrixDS.R @@ -16,15 +16,13 @@ #' details see help on the clientside function \code{ds.asDataMatrix} #' @author Paul Burton for DataSHIELD Development Team #' @export -asDataMatrixDS <- function (x.name){ - -if(is.character(x.name)){ - x<-eval(parse(text=x.name), envir = parent.frame()) - - }else{ - studysideMessage<-"ERROR: x.name must be specified as a character string" - stop(studysideMessage, call. = FALSE) - } +asDataMatrixDS <- function(x.name) { + if (is.character(x.name)) { + x <- eval(parse(text = x.name), envir = parent.frame()) + } else { + studysideMessage <- "ERROR: x.name must be specified as a character string" + stop(studysideMessage, call. = FALSE) + } output <- data.matrix(x) diff --git a/R/asFactorDS2.R b/R/asFactorDS2.R index c744bfc5..74af1d67 100644 --- a/R/asFactorDS2.R +++ b/R/asFactorDS2.R @@ -9,7 +9,7 @@ #' @param all.unique.levels.transmit the levels that the variable will be transmitted to. #' @param fixed.dummy.vars a boolean that determines whether the new object will be represented as #' a vector or as a matrix of dummy variables indicating the factor level of each data point. -#' If this argyment is set to FALSE (default) then the input variable is converted to a factor and +#' If this argument is set to FALSE (default) then the input variable is converted to a factor and #' assigned as a vector. If is set to TRUE then the input variable is converted to a factor but #' assigned as a matrix of dummy variables. #' @param baseline.level a number indicating the baseline level to be used in the creation of the diff --git a/R/blackBoxDS.R b/R/blackBoxDS.R index 82096a9d..8e7e33f1 100644 --- a/R/blackBoxDS.R +++ b/R/blackBoxDS.R @@ -65,7 +65,10 @@ blackBoxDS <- function(input.var.name=NULL, #nfilter.noise <- as.numeric(thr$nfilter.noise) #nfilter.levels <- as.numeric(thr$nfilter.levels) ######################################################## - + + # back-up current .Random.seed and revert on.exit + old_seed <- .Random.seed + on.exit(.Random.seed <- old_seed, add = TRUE) input.var <- eval(parse(text=input.var.name), envir = parent.frame()) @@ -311,7 +314,7 @@ utils::head(rank.intermediate.value.matrix) utils::tail(rank.intermediate.value.matrix) -cat("\nRANKS IN ALL COLUMNS ABOVE SHOULD BE THE SAME\n") +message("\nRANKS IN ALL COLUMNS ABOVE SHOULD BE THE SAME\n") control.vector control.value @@ -365,7 +368,7 @@ if(sum(round(rank(blackbox.output.df[,3])-rank(blackbox.output.df[,4]),2)==0)!=n of memory") stop(error.message, call. = FALSE) }else{ - cat("\nPROCESSING SUCCESSFUL, ALL RANKS AGREE FOR ALL TRANSFORMATIONS\n\n") + message("\nPROCESSING SUCCESSFUL, ALL RANKS AGREE FOR ALL TRANSFORMATIONS\n\n") } diff --git a/R/blackBoxRanksDS.R b/R/blackBoxRanksDS.R index ac950401..cb2fc21f 100644 --- a/R/blackBoxRanksDS.R +++ b/R/blackBoxRanksDS.R @@ -61,6 +61,9 @@ blackBoxRanksDS <- function(input.var.name=NULL, shared.seedval){ #START FUNC #nfilter.levels <- as.numeric(thr$nfilter.levels) ######################################################## + # back-up current .Random.seed and revert on.exit + old_seed <- .Random.seed + on.exit(.Random.seed <- old_seed, add = TRUE) input.var <- eval(parse(text=input.var.name), envir = parent.frame()) input.global.ranks<-input.var @@ -189,7 +192,7 @@ colnames(rank.intermediate.value.matrix)<-c("input.global.ranks.orig","input.var 1:6,"ID.seq.real.orig") -cat("\nRANKS IN ALL COLUMNS ABOVE SHOULD BE THE SAME\n") +message("\nRANKS IN ALL COLUMNS ABOVE SHOULD BE THE SAME\n") control.vector control.value @@ -255,7 +258,7 @@ if(sum(round(rank(blackbox.ranks.df[,5])-rank(blackbox.ranks.df[,8]),2)==0)!=num of memory") stop(error.message, call. = FALSE) }else{ - cat("\nPROCESSING SUCCESSFUL, ALL RANKS AGREE FOR ALL TRANSFORMATIONS\n\n") + message("\nPROCESSING SUCCESSFUL, ALL RANKS AGREE FOR ALL TRANSFORMATIONS\n\n") } diff --git a/R/checkPermissivePrivacyControlLevel.R b/R/checkPermissivePrivacyControlLevel.R index 7b57f7d3..64fd3ae6 100644 --- a/R/checkPermissivePrivacyControlLevel.R +++ b/R/checkPermissivePrivacyControlLevel.R @@ -1,11 +1,14 @@ #' #' @title checkPermissivePrivacyControlLevel -#' @description This serverside function check that the server is running in "permissive" privacy control level. +#' @description This server-side function check that the server is running in "permissive" privacy control level. #' @details Tests whether the R option "datashield.privacyControlLevel" is set to "permissive", if it isn't #' will cause a call to stop() with the message "BLOCKED: The server is running in 'non-permissive' mode which #' has caused this method to be blocked". #' @param privacyControlLevels is a vector of strings which contains the privacy control level names which are permitted by the calling method. -#' @author Wheater, Dr SM., DataSHIELD Team. +#' +#' @author Wheater, Dr SM., DataSHIELD Development Team. +#' +#' @return No return value, called for side effects #' @export #' checkPermissivePrivacyControlLevel <- function(privacyControlLevels){ diff --git a/R/covDS.R b/R/covDS.R index eeb933ba..9f645b62 100644 --- a/R/covDS.R +++ b/R/covDS.R @@ -16,7 +16,7 @@ #' @return a list that includes a matrix with elements the sum of products between each two variables, a matrix with #' elements the sum of the values of each variable, a matrix with elements the number of complete cases in each #' pair of variables, a list with the number of missing values in each variable separately (columnwise) and the number -#' of missing values casewise or pairwise depending on the arqument \code{use}, and an error message which indicates +#' of missing values casewise or pairwise depending on the argument \code{use}, and an error message which indicates #' whether or not the input variables pass the disclosure controls. The first disclosure control checks that the number #' of variables is not bigger than a percentage of the individual-level records (the allowed percentage is pre-specified #' by the 'nfilter.glm'). The second disclosure control checks that none of them is dichotomous with a level having fewer diff --git a/R/extract.R b/R/extract.R index 455dc744..5c1dc669 100644 --- a/R/extract.R +++ b/R/extract.R @@ -4,6 +4,7 @@ #' @details Not required #' @param input a vector or a list of characters #' @keywords internal +#' @noRd #' @return a vector of characters #' extract <- function(input){ diff --git a/R/glmDS1.R b/R/glmDS1.R index 4c78cee5..a76d615c 100644 --- a/R/glmDS1.R +++ b/R/glmDS1.R @@ -1,6 +1,6 @@ #' #' @title glmDS1 called by ds.glm -#' @description This is the first serverside aggregate function called by ds.glm +#' @description This is the first server-side aggregate function called by ds.glm #' @details It is an #' aggregation function that sets up the model structure #' and creates the starting beta.vector that feeds, via ds.glm, into glmDS2 to enable @@ -14,7 +14,10 @@ #' @param offset the offset #' @param data an #' optional character string specifying a data.frame object holding the data to be -#' analysed under the specified model +#' analysed under the specified model +#' +#' @return List with values from GLM model. +#' #' @author Burton PR for DataSHIELD Development Team #' @export #' diff --git a/R/glmDS2.R b/R/glmDS2.R index b9dd2efb..9287075c 100644 --- a/R/glmDS2.R +++ b/R/glmDS2.R @@ -1,6 +1,6 @@ #' #' @title glmDS2 called by ds.glm -#' @description This is the second serverside aggregate function called by ds.glm. +#' @description This is the second server-side aggregate function called by ds.glm. #' @details It is an aggregate function that uses the model structure and starting #' beta.vector constructed by glmDS1 to iteratively fit the generalized linear model #' that has been specified. The function glmDS2 also carries out a series of disclosure @@ -18,7 +18,10 @@ #' @param weights an optional variable providing regression weights #' @param dataName an optional character string specifying a data.frame object holding #' the data to be analysed under the specified model same +#' #' @author Paul Burton, for DataSHIELD Development Team +#' +#' @return List with values from GLM model #' @export #' glmDS2 <- function (formula, family, beta.vect, offset, weights, dataName) { diff --git a/R/heatmapPlotDS.R b/R/heatmapPlotDS.R index 69ff4346..6dccbd5b 100644 --- a/R/heatmapPlotDS.R +++ b/R/heatmapPlotDS.R @@ -11,7 +11,7 @@ #' plot of non-disclosive graphs (e.g. scatter plots, heatmap plots, contour plots, etc). #' @param x the name of a numeric vector, the x-variable. #' @param y the name of a numeric vector, the y-variable. -#' @param k the number of the nearest neghbours for which their centroid is calculated if the +#' @param k the number of the nearest neighbours for which their centroid is calculated if the #' \code{method.indicator} is equal to 1 (i.e. deterministic method). #' @param noise the percentage of the initial variance that is used as the variance of the embedded #' noise if the \code{method.indicator} is equal to 2 (i.e. probabilistic method). @@ -38,6 +38,10 @@ heatmapPlotDS <- function(x, y, k, noise, method.indicator){ #nfilter.levels.max <- as.numeric(thr$nfilter.levels.max) # ################################################################### + # back-up current .Random.seed and revert on.exit + old_seed <- .Random.seed + on.exit(.Random.seed <- old_seed, add = TRUE) + # Cbind the columns of the two variables and remove any rows that include NAs data.table <- cbind.data.frame(x, y) data.complete <- stats::na.omit(data.table) diff --git a/R/histogramDS1.R b/R/histogramDS1.R index a9ebafda..a79d2f52 100644 --- a/R/histogramDS1.R +++ b/R/histogramDS1.R @@ -3,7 +3,7 @@ #' @description this function returns the minimum and maximum of the input numeric vector which #' depends on the argument \code{method.indicator}. If the method.indicator is set to 1 (i.e. the #' 'smallCellsRule' is used) the computed minimum and maximum values are multiplied by a very small -#' random number. If the method.indicator is set to 2 (i.e. the 'deteministic' method is used) the +#' random number. If the method.indicator is set to 2 (i.e. the 'deterministic' method is used) the #' function returns the minimum and maximum values of the vector with the scaled centroids. If the #' method.indicator is set to 3 (i.e. the 'probabilistic' method is used) the function returns the #' minimum and maximum values of the generated 'noisy' vector. @@ -12,7 +12,7 @@ #' control that is used for the generation of the histogram. If the value is equal to 1 then the #' 'smallCellsRule' is used. If the value is equal to 2 then the 'deterministic' method is used. #' If the value is set to 3 then the 'probabilistic' method is used. -#' @param k the number of the nearest neghbours for which their centroid is calculated if the +#' @param k the number of the nearest neighbours for which their centroid is calculated if the #' \code{method.indicator} is equal to 2 (i.e. deterministic method). #' @param noise the percentage of the initial variance that is used as the variance of the embedded #' noise if the \code{method.indicator} is equal to 3 (i.e. probabilistic method). @@ -36,6 +36,10 @@ histogramDS1 <- function(xvect, method.indicator, k, noise){ nfilter.levels.max <- as.numeric(thr$nfilter.levels.max) # ################################################################## + # back-up current .Random.seed and revert on.exit + old_seed <- .Random.seed + on.exit(.Random.seed <- old_seed, add = TRUE) + # print an error message if the input vector is not a numeric if(!(is.numeric(xvect))){ output <- "The input vector is not a numeric!" diff --git a/R/histogramDS2.R b/R/histogramDS2.R index 34b3e143..1f7a8acc 100644 --- a/R/histogramDS2.R +++ b/R/histogramDS2.R @@ -14,7 +14,7 @@ #' control that is used for the generation of the histogram. If the value is equal to 1 then the #' 'smallCellsRule' is used. If the value is equal to 2 then the 'deterministic' method is used. #' If the value is set to 3 then the 'probabilistic' method is used. -#' @param k the number of the nearest neghbours for which their centroid is calculated if the +#' @param k the number of the nearest neighbours for which their centroid is calculated if the #' \code{method.indicator} is equal to 2 (i.e. deterministic method). #' @param noise the percentage of the initial variance that is used as the variance of the embedded #' noise if the \code{method.indicator} is equal to 3 (i.e. probabilistic method). @@ -38,6 +38,10 @@ histogramDS2 <- function (xvect, num.breaks, min, max, method.indicator, k, nois # nfilter.levels.max <- as.numeric(thr$nfilter.levels.max) # ################################################################## + # back-up current .Random.seed and revert on.exit + old_seed <- .Random.seed + on.exit(.Random.seed <- old_seed, add = TRUE) + if (method.indicator==1){ # Check if the number of breaks meets the DataSHIELD privacy criteria (disclosure control for diff --git a/R/lexisDS1.R b/R/lexisDS1.R index 0c2a7534..6c0f8de7 100644 --- a/R/lexisDS1.R +++ b/R/lexisDS1.R @@ -1,11 +1,14 @@ #' #' @title lexisDS1 #' -#' @description The first serverside function called by ds.lexis. +#' @description The first server-side function called by ds.lexis. #' @details This is an aggregate function. #' For more details see the extensive header for ds.lexis. #' @param exitCol a character string specifying the variable holding the time that each individual is censored or fails +#' #' @author Burton PR +#' +#' @return List with `max.time` #' @export #' lexisDS1 <- function(exitCol=NULL){ diff --git a/R/lexisDS2.R b/R/lexisDS2.R index 14b3cfdb..e4c08117 100644 --- a/R/lexisDS2.R +++ b/R/lexisDS2.R @@ -7,7 +7,7 @@ #' regression. lexisDS2 also #' carries out a series of disclosure checks and if the arguments or data fail any of #' those tests, -#' creation of the exapanded dataframe is blocked and an appropriate serverside error +#' creation of the expanded dataframe is blocked and an appropriate serverside error #' message is stored. #' For more details see the extensive header for ds.lexis. #' @param datatext a clientside provided character string specifying the data.frame @@ -18,7 +18,7 @@ #' @param maxmaxtime a clientside generated object specifying the maximum follow up #' time in any of the sources #' @param idCol a clientside generated character string specifying the variable -#' holding the IDs of indivuals in the data set to be expanded +#' holding the IDs of individuals in the data set to be expanded #' @param entryCol a clientside specified character string identifying the variable #' holding the time that each individual starts follow up #' @param exitCol a clientside specified character string identifying the variable @@ -29,7 +29,10 @@ #' column names of additional variables to include in the #' final expanded table. If the 'variables' argument is not set (is null) but the #' 'data' argument is set the full data.frame will be expanded and carried forward +#' #' @author Burton PR +#' +#' @return List with `expanded.table` #' @export #' lexisDS2 <- function(datatext=NULL, intervalWidth, maxmaxtime, idCol, entryCol, exitCol, statusCol, vartext=NULL){ @@ -192,13 +195,13 @@ lexisDS2 <- function(datatext=NULL, intervalWidth, maxmaxtime, idCol, entryCol, period.surv<-end.breaks-start.breaks - print(start.breaks) - print(end.breaks) + message(paste0(start.breaks, collapse = ", ")) + message(paste0(end.breaks, collapse = ", ")) totints<-length(end.breaks) totsubs<-dim(DF)[1] - print(totints) - print(totsubs) + message(totints) + message(totsubs) survival.matrix<-matrix(data=0,totsubs,totints) diff --git a/R/lexisDS3.R b/R/lexisDS3.R index 71fb3b29..75a31906 100644 --- a/R/lexisDS3.R +++ b/R/lexisDS3.R @@ -8,7 +8,8 @@ #' returned output from ds.lexis. Specifically, without lexisDS3 the output consists of a table within #' a list, but lexisDS3 converts this directly into a dataframe. #' For more details see the extensive header for ds.lexis. -#' +#' +#' @return Data frame with `messageobj` object #' @export #' lexisDS3 <- function(){ diff --git a/R/listDisclosureSettingsDS.R b/R/listDisclosureSettingsDS.R index 67d3d4f8..7d4aedc8 100644 --- a/R/listDisclosureSettingsDS.R +++ b/R/listDisclosureSettingsDS.R @@ -4,6 +4,8 @@ #' ds.listDisclosureSettings #' @details For more details see the extensive header for ds.listDisclosureSettings #' @author Paul Burton, Demetris Avraam for DataSHIELD Development Team +#' +#' @return List with DataSHIELD disclosure settings #' @export #' listDisclosureSettingsDS <- function(){ diff --git a/R/matrixMultDS.R b/R/matrixMultDS.R index 0a3408da..67f4ae64 100644 --- a/R/matrixMultDS.R +++ b/R/matrixMultDS.R @@ -2,7 +2,7 @@ #' @title matrixMultDS serverside assign function called by ds.matrixMult #' @description Calculates the matrix product of two matrices and writes output to serverside #' @details Undertakes standard matrix multiplication where with input matrices A and B with -#' dimensions A: mxn and B: nxp the output C has dimensions mxp and each elemnt C[i,j] has +#' dimensions A: mxn and B: nxp the output C has dimensions mxp and each element C[i,j] has #' value equal to the dot product of row i of A and column j of B where the dot product #' is obtained as sum(A[i,1]*B[1,j] + A[i,2]*B[2,j] + .... + A[i,n]*B[n,j]). This calculation #' is only valid if the number of columns of A is the same as the number of rows of B diff --git a/R/meanSdGpDS.R b/R/meanSdGpDS.R index b8047845..41fdb721 100644 --- a/R/meanSdGpDS.R +++ b/R/meanSdGpDS.R @@ -1,13 +1,15 @@ #' #' @title MeanSdGpDS -#' @description Serverside function called by ds.meanSdGp +#' @description Server-side function called by ds.meanSdGp #' @details Computes the mean and standard deviation across groups defined by one #' factor -#' @param X a clientside supplied character string identifying the variable for which +#' @param X a client-side supplied character string identifying the variable for which #' means/SDs are to be calculated -#' @param INDEX a clientside supplied character string identifying the factor across +#' @param INDEX a client-side supplied character string identifying the factor across #' which means/SDs are to be calculated #' @author Burton PR +#' +#' @return List with results from the group statistics #' @export #' meanSdGpDS <- function (X, INDEX){ diff --git a/R/minMaxRandDS.R b/R/minMaxRandDS.R index fbc452e6..0346eb75 100644 --- a/R/minMaxRandDS.R +++ b/R/minMaxRandDS.R @@ -19,7 +19,11 @@ #' @export #' minMaxRandDS <- function(input.var.name){ #START FUNC - + + # back-up current .Random.seed and revert on.exit + old_seed <- .Random.seed + on.exit(.Random.seed <- old_seed, add = TRUE) + input.var <- eval(parse(text=input.var.name), envir = parent.frame()) #create seed that is unknown and cannot be repeated diff --git a/R/rangeDS.R b/R/rangeDS.R index 2151dd1d..6cf7689c 100644 --- a/R/rangeDS.R +++ b/R/rangeDS.R @@ -1,33 +1,35 @@ -#' +#' #' @title returns the minimum and maximum of a numeric vector -#' @description this function is similar to R function \code{range} but instead to not return -#' the real minimum and maximum, the computed values are multiplied by a very small random number. -#' @param xvect a numerical +#' @description this function is similar to R function \code{range} but instead to not return +#' the real minimum and maximum, the computed values are multiplied by a very small random number. +#' @param xvect a numerical #' @return a numeric vector which contains the minimum and the maximum values of the vector #' @author Amadou Gaye, Demetris Avraam for DataSHIELD Development Team #' @export #' -rangeDS <- function(xvect){ - +rangeDS <- function(xvect) { + # back-up current .Random.seed and revert on.exit + old_seed <- .Random.seed + on.exit(.Random.seed <- old_seed, add = TRUE) + # print an error message if the input vector is not a numeric - if(!(is.numeric(xvect))){ + if (!(is.numeric(xvect))) { output <- "The input vector is not a numeric!" - }else{ - + } else { # the study-specific seed for random number generation seed <- getOption("datashield.seed") - if (is.null(seed)) - stop("rangeDS requires 'datashield.seed' R option to operate", call.=FALSE) + if (is.null(seed)) { + stop("rangeDS requires 'datashield.seed' R option to operate", call. = FALSE) + } set.seed(seed) - - rr <- c(min(xvect, na.rm=TRUE), max(xvect, na.rm=TRUE)) + + rr <- c(min(xvect, na.rm = TRUE), max(xvect, na.rm = TRUE)) random1 <- stats::runif(1, 0.95, 1) random2 <- stats::runif(1, 1, 1.05) - output <- c(rr[1]*random1, rr[2]*random2) + output <- c(rr[1] * random1, rr[2] * random2) } - - return (output) - + + return(output) } # AGGREGATE FUNCTION # rangeDS diff --git a/R/scatterPlotDS.R b/R/scatterPlotDS.R index 7d5514ab..bf229041 100644 --- a/R/scatterPlotDS.R +++ b/R/scatterPlotDS.R @@ -19,7 +19,7 @@ #' @param method.indicator an integer either 1 or 2. If the user selects the deterministic #' method in the client side function the method.indicator is set to 1 while if the user selects #' the probabilistic method this argument is set to 2. -#' @param k the number of the nearest neghbours for which their centroid is calculated if the +#' @param k the number of the nearest neighbours for which their centroid is calculated if the #' deterministic method is selected. #' @param noise the percentage of the initial variance that is used as the variance of the embedded #' noise if the probabilistic method is selected. @@ -43,6 +43,10 @@ scatterPlotDS <- function(x, y, method.indicator, k, noise){ #nfilter.levels.max <- as.numeric(thr$nfilter.levels.max) # ################################################################### + # back-up current .Random.seed and revert on.exit + old_seed <- .Random.seed + on.exit(.Random.seed <- old_seed, add = TRUE) + # Cbind the columns of the two variables and remove any rows that include NAs data.table <- cbind.data.frame(x, y) data.complete <- stats::na.omit(data.table) diff --git a/R/setFilterDS.R b/R/setFilterDS.R index a7814160..8a84f0bb 100644 --- a/R/setFilterDS.R +++ b/R/setFilterDS.R @@ -7,6 +7,7 @@ #' @param x a dummy argument #' @return an integer between 1 and 5 #' @keywords internal +#' @noRd #' @author Gaye, A. #' setFilterDS <- function(x=getOption("datashield.privacyLevel", default=5)){ diff --git a/R/subsetByClassHelper1.R b/R/subsetByClassHelper1.R index 1e5a9065..7c288da3 100644 --- a/R/subsetByClassHelper1.R +++ b/R/subsetByClassHelper1.R @@ -7,6 +7,7 @@ #' @param filter the minimum number observation (i.e. rows) that are allowed. #' @return a list which contains the subsets. #' @keywords internal +#' @noRd #' @author Gaye, A. #' subsetByClassHelper1 <- function(xvect=NULL, xname=NULL, filter=NULL){ diff --git a/R/subsetByClassHelper2.R b/R/subsetByClassHelper2.R index a888030c..f59cc907 100644 --- a/R/subsetByClassHelper2.R +++ b/R/subsetByClassHelper2.R @@ -12,6 +12,7 @@ #' @return a list which contains the subsets, their names and an integer that indicates how many columns were #' not factors. #' @keywords internal +#' @noRd #' @author Gaye, A. #' subsetByClassHelper2 <- function(df=NULL, iter=NULL, filter=NULL){ diff --git a/R/subsetByClassHelper3.R b/R/subsetByClassHelper3.R index c43962eb..de401f62 100644 --- a/R/subsetByClassHelper3.R +++ b/R/subsetByClassHelper3.R @@ -10,6 +10,7 @@ #' @return a list which contains the subsets, their names and an integer that indicates if #' the variable specified by user is a factor. #' @keywords internal +#' @noRd #' @author Gaye, A. #' subsetByClassHelper3 <- function(df=NULL, indx1=NULL, filter=NULL){ diff --git a/R/subsetDS.R b/R/subsetDS.R index 3760902a..7ba57ac2 100644 --- a/R/subsetDS.R +++ b/R/subsetDS.R @@ -1,10 +1,10 @@ #' #' @title Generates a valid subset of a table or a vector #' @description The function uses the R classical subsetting with squared brackets '[]' and allows also to -#' subset using a logical oprator and a threshold. The object to subset from must be a vector (factor, numeric -#' or charcater) or a table (data.frame or matrix). +#' subset using a logical operator and a threshold. The object to subset from must be a vector (factor, numeric +#' or character) or a table (data.frame or matrix). #' @details If the input data is a table: The user specifies the rows and/or columns to include in the subset if the input -#' object is a table; the columns can be refered to by their names. The name of a vector (i.e. a variable) can also be provided +#' object is a table; the columns can be referred to by their names. The name of a vector (i.e. a variable) can also be provided #' with a logical operator and a threshold (see example 3). #' If the input data is a vector: when the parameters 'rows', 'logical' and 'threshold' are all provided the last two are ignored ( #' 'rows' has precedence over the other two parameters then). @@ -21,9 +21,9 @@ #' @param th a numeric, the threshold to use in conjunction with the logical parameter. This parameter is ignored #' if the input data is not a vector. #' @param varname a character, if the input data is a table, if this parameter is provided along with the 'logical' and 'threshold' -#' parameters, a subtable is based the threshold applied to the speicified variable. This parameter is however ignored if the parameter +#' parameters, a subtable is based the threshold applied to the specified variable. This parameter is however ignored if the parameter #' 'rows' and/or 'cols' are provided. -#' @return a subset of the vector, matric or dataframe as specified is stored on the server side +#' @return a subset of the vector, matrix or dataframe as specified is stored on the server side #' @author Gaye, A. #' @export #' diff --git a/R/table1DDS.R b/R/table1DDS.R index fb67c544..d73e3867 100644 --- a/R/table1DDS.R +++ b/R/table1DDS.R @@ -3,7 +3,7 @@ #' @description This function generates a 1-dimensional table where potentially disclosive cells. #' (based on the set threshold) are replaced by a missing value ('NA'). #' @details It generates a 1-dimensional tables where valid (non-disclosive) 1-dimensional tables are defined -#' as data from sources where no table cells have counts between 1 and the set threshold. When the ouput table +#' as data from sources where no table cells have counts between 1 and the set threshold. When the output table #' is invalid all cells but the total count are replaced by missing values. Only the total count is visible #' on the table returned to the client site. A message is also returned with the 1-dimensional; the message #' says "invalid table - invalid counts present" if the table is invalid and 'valid table' otherwise. diff --git a/R/table2DDS.R b/R/table2DDS.R index 41de343c..5b4f5dd1 100644 --- a/R/table2DDS.R +++ b/R/table2DDS.R @@ -3,7 +3,7 @@ #' @description This function generates a 2-dimensional contingency table where potentially disclosive cells #' (based on a set threshold) are replaced by a missing value ('NA'). #' @details It generates 2-dimensional contingency tables where valid (non-disclosive) tables are defined -#' as those where none of their cells have counts between 1 and the set threshold "nfilter.tab". When the ouput table +#' as those where none of their cells have counts between 1 and the set threshold "nfilter.tab". When the output table #' is invalid all cells except the total counts are replaced by missing values. Only the total counts are visible #' on the table returned to the client side. A message is also returned with the 2-dimensional table; the message #' says "invalid table - invalid counts present" if the table is invalid and 'valid table' otherwise. diff --git a/R/testObjExistsDS.R b/R/testObjExistsDS.R index 1b860c6c..9c3a26e7 100644 --- a/R/testObjExistsDS.R +++ b/R/testObjExistsDS.R @@ -1,13 +1,16 @@ #' #' @title testObjExistsDS -#' @description The serverside function called by ds.testObjExists +#' @description The server-side function called by ds.testObjExists #' @details Tests whether a given object exists in #' all sources. It is called at the end of all #' recently written assign functions to check the new (assigned) object has been #' created in all sources -#' @param test.obj.name a clientside provided character string specifying the variable +#' @param test.obj.name a client-side provided character string specifying the variable #' whose presence is to be tested in each data source +#' #' @author Burton PR +#' +#' @return List with `test.obj.exists` and `test.obj.class` #' @export #' testObjExistsDS <- function(test.obj.name=NULL){ diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 00000000..6586ae7c --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,20 @@ +ENV <- new.env() + +.onLoad = function(libname, pkgname) { + + #### !!! If making changes, update: .onLoad(), set_opts(), show_opts(), .check_options() + + options( + datashield.privacyLevel = 5, + default.datashield.privacyControlLevel = "banana", + default.nfilter.glm = 0.33, + default.nfilter.kNN = 3, + default.nfilter.string = 80, + default.nfilter.subset = 3, + default.nfilter.stringShort = 20, + default.nfilter.tab = 3, + default.nfilter.noise = 0.25, + default.nfilter.levels.density = 0.33, + default.nfilter.levels.max = 40 + ) +} \ No newline at end of file diff --git a/docs/404.html b/docs/404.html index 6b3e8c72..c2a9f93c 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ dsBase - 6.3.2 + 6.3.3 @@ -73,7 +73,7 @@

Page not found (404)