(This abstract was borrowed
- from another version of this item.)},
- owner = {matifou},
- timestamp = {2014.05.21},
- url = {http://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html}
-}
-
-@ARTICLE{Lee2008,
- author = {David S. Lee},
- title = {Randomized experiments from non-random selection in U.S. House elections},
- journal = {Journal of Econometrics},
- year = {2008},
- volume = {142},
- pages = {675-697},
- owner = {mat},
- timestamp = {2013.04.17}
-}
-
-@ARTICLE{LeeLemieux2010,
- author = {Lee, David S. and Thomas Lemieux},
- title = {Regression Discontinuity Designs in Economics},
- journal = {Journal of Economic Literature},
- year = {2010},
- volume = {48(2)},
- pages = {281-355},
- owner = {mat},
- timestamp = {2012.11.19}
-}
-
-@ARTICLE{McCrary2008,
- author = {McCrary, Justin},
- title = {Manipulation of the Running Variable in the Regression Discontinuity
- Design: A Density Test},
- journal = {Journal of Econometrics},
- year = {2008},
- volume = {142},
- pages = {698-714},
- owner = {mat},
- timestamp = {2013.04.17}
-}
-
-@TECHREPORT{Porter2003,
- author = {Porter, Jack},
- title = {Estimation in the Regression Discontinuity Model},
- institution = {University of Wisconsin, Madison, Department of Economics},
- year = {2003},
- owner = {mat},
- timestamp = {2013.04.17}
-}
-
-@ARTICLE{RuppertSheatherEtAl1995,
- author = {Ruppert, D. and Sheather, S. J. and Wand, M. P.},
- title = {An effective bandwidth selector for local least squares regression},
- journal = {Journal of the American Statistical Association},
- year = {1995},
- volume = {90},
- pages = {1257-1270},
- owner = {mat},
- timestamp = {2013.04.17}
-}
-
-@comment{jabref-meta: selector_publisher:}
-
-@comment{jabref-meta: selector_author:}
-
-@comment{jabref-meta: selector_journal:}
-
-@comment{jabref-meta: selector_keywords:}
-
diff --git a/RDDtools/vignettes/RDDtools.lyx b/RDDtools/vignettes/RDDtools.lyx
deleted file mode 100644
index 0f0cfce..0000000
--- a/RDDtools/vignettes/RDDtools.lyx
+++ /dev/null
@@ -1,2394 +0,0 @@
-#LyX 2.1 created this file. For more info see http://www.lyx.org/
-\lyxformat 474
-\begin_document
-\begin_header
-\textclass jss
-\begin_preamble
-
-\usepackage{amsmath}
-\usepackage{nameref}
-
-%the following commands are used only for articles and codesnippets
-
-\author{Matthieu Stigler\\Affiliation IHEID}
-\title{\pkg{RDDtools}: an overview }
-
-% the same as above, without any formatting
-\Plainauthor{Matthieu Stigler}
-\Plaintitle{\pkg{RDDtools}: a toolbox to practice }
-%if necessary, provide a short title
-\Shorttitle{\pkg{RDDtools}: a toolbox to practice }
-
-\Abstract{\pkg{RDDtools} is a R package for sharp regression discontinuity design (RDD). It offers various estimators, tests and graphical procedures following the guidelines of \citet{ImbensLemieux2008} and \citet{LeeLemieux2010}. This note illustrate how to use the package, using the well-known dataset of \citet{Lee2008}.
-
-
-NOTE THAT this is a preliminary note, on a preliminary package still under development. Changes of the function names, arguments and output are to be expected, as well as possible mistakes and inconsistencies. Please report any mistakes or suggestion to \email{Matthieu.Stigler@iheid.ch}}
-%at least one keyword is needed
-\Keywords{Regression discontinuity design, non-parametric analysis, \pkg{RDDtools}, \proglang{R}}
-%the same as above, without any formatting
-\Plainkeywords{Regression discontinuity design, non-parametric analysis,RDDtools, R}
-
-%the following commands are used only for book or software reviews
-
-%\Reviewer{Some Author\\University of Somewhere}
-%\Plainreviewer{Some Author}
-
-
-%without any formatting
-%\Plaintitle{LyX and R: Secrets of the LyX Master}
-%\Shorttitle{LyX and R}
-
-
-
-%The address of at least one author should be given in the following format
-\Address{
- Matthieu Stigler\\
- Centre for Finance and development\\
- IHEID\\
- Geneva\\
- E-mail: \email{Matthieu.Stigler@iheid.ch}
-}
-%you can add a telephone and fax number before the e-mail in the format
-%Telephone: +12/3/4567-89
-%Fax: +12/3/4567-89
-
-%if you use Sweave, include the following line (with % symbols):
-%% need no \usepackage{Sweave.sty}
-
-%% Arg min operator:
-\DeclareMathOperator*{\argmi}{arg\,min}
-\newcommand{\argmin}[1]{\underset{#1}{\argmi}}
-
-\DeclareMathOperator*{\Ker}{\mathcal{K}}
-\end_preamble
-\options nojss
-\use_default_options false
-\begin_modules
-knitr
-\end_modules
-\maintain_unincluded_children false
-\language english
-\language_package default
-\inputencoding auto
-\fontencoding global
-\font_roman default
-\font_sans default
-\font_typewriter default
-\font_math auto
-\font_default_family default
-\use_non_tex_fonts false
-\font_sc false
-\font_osf false
-\font_sf_scale 100
-\font_tt_scale 100
-\graphics default
-\default_output_format default
-\output_sync 0
-\bibtex_command default
-\index_command default
-\paperfontsize default
-\spacing single
-\use_hyperref false
-\papersize default
-\use_geometry false
-\use_package amsmath 0
-\use_package amssymb 2
-\use_package cancel 0
-\use_package esint 0
-\use_package mathdots 1
-\use_package mathtools 0
-\use_package mhchem 1
-\use_package stackrel 0
-\use_package stmaryrd 0
-\use_package undertilde 0
-\cite_engine natbib
-\cite_engine_type authoryear
-\biblio_style plainnat
-\use_bibtopic false
-\use_indices false
-\paperorientation portrait
-\suppress_date false
-\justification true
-\use_refstyle 0
-\index Index
-\shortcut idx
-\color #008000
-\end_index
-\secnumdepth 3
-\tocdepth 3
-\paragraph_separation indent
-\paragraph_indentation default
-\quotes_language english
-\papercolumns 1
-\papersides 1
-\paperpagestyle default
-\tracking_changes false
-\output_changes false
-\html_math_output 0
-\html_css_as_file 0
-\html_be_strict false
-\end_header
-
-\begin_body
-
-\begin_layout Standard
-\begin_inset CommandInset toc
-LatexCommand tableofcontents
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Section
-Introduction
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-addcontentsline{toc}{section}{Introduction}
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Subsection
-Introduction to RDD
-\end_layout
-
-\begin_layout Standard
-\begin_inset Note Note
-status open
-
-\begin_layout Plain Layout
-The Regression Discontinuity Design (RDD) method is a method for impact
- evaluation in situations where attribution of the programme cannot be assumed
- to be random, yet is done based on a known selection rule.
- Examples of such situations are scholarships attributed based on a score
- (the seminal example due to
-\begin_inset CommandInset citation
-LatexCommand citealp
-key "ThistlewaiteCampbell1960"
-
-\end_inset
-
-), a maximum number of children in a classroom
-\begin_inset CommandInset citation
-LatexCommand citep
-key "AngristLavy1999"
-
-\end_inset
-
-, majority rules for election
-\begin_inset CommandInset citation
-LatexCommand citep
-key "Lee2008"
-
-\end_inset
-
- or the choice of an HIV training programme targetting small schools
-\begin_inset CommandInset citation
-LatexCommand citep
-key "ArcandWouabe2010"
-
-\end_inset
-
-.
- The underlying idea is that, although
-\end_layout
-
-\begin_layout Plain Layout
-to exploit the discontinuies in the programme attribution introduced by
- the rule to assume that around the discontinuity point.
-
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Subsection
-Introduction to RDDtools
-\end_layout
-
-\begin_layout Standard
-The R package
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-pkg{RDDtools}
-\end_layout
-
-\end_inset
-
- aims at offering a complete a toolbox for regression discontinuity design,
- following the step-by-step recommendations of
-\begin_inset CommandInset citation
-LatexCommand citet
-key "ImbensLemieux2008"
-
-\end_inset
-
- and
-\begin_inset CommandInset citation
-LatexCommand citet
-key "LeeLemieux2010"
-
-\end_inset
-
-.
- Summarising the approaches advocated in the two papers, a RDD analysis
- comprises of following steps:
-\end_layout
-
-\begin_layout Enumerate
-Graphical representation of the data
-\end_layout
-
-\begin_layout Enumerate
-Estimation
-\end_layout
-
-\begin_layout Enumerate
-Validity tests
-\end_layout
-
-\begin_layout Standard
-We add to this list a step that is too often forgotten, yet can be very
- burdensome: data preparation.
- Hence, this list is extended with the fundamental step 0, which involves
- preparing the data in the right way.
-
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-pkg{RDDtools}
-\end_layout
-
-\end_inset
-
- offers an object-oriented way to analysis, building on the R mechanism
- of S3 methods and classes.
- Concretely, this implies that the user has to specify the input data only
- once, and that most of the functions can be called directly on the new
- object of class
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDdata}
-\end_layout
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Section
-Step 0: data input
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-addcontentsline{toc}{section}{Step 0: data input}
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-As first step of the analysis, the user has to specify the input data into
- the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDdata}
-\end_layout
-
-\end_inset
-
- function, which takes following arguments:
-\end_layout
-
-\begin_layout Description
-y The outcome variable
-\end_layout
-
-\begin_layout Description
-x The forcing variable
-\end_layout
-
-\begin_layout Description
-cutpoint The cutpoint/threshold (note only one cutpoint can be given)
-\end_layout
-
-\begin_layout Description
-covar Eventual covariates
-\end_layout
-
-\begin_layout Standard
-The RDDdata function returns an object of class
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDdata}
-\end_layout
-
-\end_inset
-
-, as well as of the usual
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-proglang{R}
-\end_layout
-
-\end_inset
-
- class
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{data.frame}
-\end_layout
-
-\end_inset
-
-.
-
-\end_layout
-
-\begin_layout Standard
-To illustrate this, we show how to use this with the benchmark dataset of
-
-\begin_inset CommandInset citation
-LatexCommand citet
-key "Lee2008"
-
-\end_inset
-
-, adding randomly generated covariates for the sake of illustration.
- The dataset is shipped with the package, and is available under the name
-
-\emph on
-Lee2008.
-
-\emph default
-Using the R
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{head}
-\end_layout
-
-\end_inset
-
- function, we look at the first rows of the dataset:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-library(RDDtools)
-\end_layout
-
-\begin_layout Plain Layout
-
-data(Lee2008)
-\end_layout
-
-\begin_layout Plain Layout
-
-head(Lee2008)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-The data is already clean, so the only step required is to fit it into the
- RDDdata function, adding however the information on the cutpoint.
- For illustration purpose, we add also some random covariates as a matrix
- Z:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-n_Lee <- nrow(Lee2008)
-\end_layout
-
-\begin_layout Plain Layout
-
-Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2),
-\end_layout
-
-\begin_layout Plain Layout
-
-z3=sample(letters[1:3], size=n_Lee, replace=TRUE))
-\end_layout
-
-\begin_layout Plain Layout
-
-Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,cutpoint=0)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-We now have an object
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{Lee2008_rdd}
-\end_layout
-
-\end_inset
-
- of class
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDdata}
-\end_layout
-
-\end_inset
-
- (and
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{data.frame}
-\end_layout
-
-\end_inset
-
-).
- It has a specific
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{summary}
-\end_layout
-
-\end_inset
-
- method, which gives a few summary informations about the dataset:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-summary(Lee2008_rdd)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Another function for
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDdata}
-\end_layout
-
-\end_inset
-
- objects is the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{plot()}
-\end_layout
-
-\end_inset
-
- function, discussed in the next section.
-
-\end_layout
-
-\begin_layout Section
-Step 1: Graphical representation
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-addcontentsline{toc}{section}{Step 1: Graphical representation}
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Once the dataset has been formatted with the RDDdata function, it can be
- used directly for simple illustration.
- Indeed, as recommended by
-\begin_inset CommandInset citation
-LatexCommand citet
-key "LeeLemieux2010"
-
-\end_inset
-
-, it is always good to show the raw data first, if ones wishes to convince
- that there is a discontinuity.
- This is simply done using the standard R plot() function, which has been
- customised for RDDdata objects.
- The function shows a scatter plot of the outcome variable against the forcing
- variable.
- Following
-\begin_inset CommandInset citation
-LatexCommand citet
-key "LeeLemieux2010"
-
-\end_inset
-
-, not all single datapoints are shown: instead, a
-\begin_inset Quotes eld
-\end_inset
-
-binned
-\begin_inset Quotes erd
-\end_inset
-
- scatterplot is shown, using non-overlapping averages:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-plot(Lee2008_rdd)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-The bandwidth for the bins (also called binwidth) can be set by the user
- with the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{h}
-\end_layout
-
-\end_inset
-
- argument.
- If this it is not provided by the user, the function uses by default the
- global bandwidth of
-\begin_inset CommandInset citation
-LatexCommand citet
-key "RuppertSheatherEtAl1995"
-
-\end_inset
-
-, implemented in the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDbw_RSW()}
-\end_layout
-
-\end_inset
-
- function.
-
-\end_layout
-
-\begin_layout Standard
-Another argument that might be useful for the user is the option
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{nplot}
-\end_layout
-
-\end_inset
-
-, which allows to plot multiple plots with different bandwidths:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-plot(Lee2008_rdd, nplot=3, h=c(0.02, 0.03, 0.04))
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Note however that experience shows that showing multiple plots have the
- effect to shrink considerably the y axis, reducing the visual impression
- of discontinuity.
-
-\end_layout
-
-\begin_layout Section
-Step 2: Estimation
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-addcontentsline{toc}{section}{Step 2: Estimation}
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-RDDtools offers currently two estimators:
-\end_layout
-
-\begin_layout Itemize
-the simple parametric estimator: function
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDreg_lm()}
-\end_layout
-
-\end_inset
-
-.
-
-\end_layout
-
-\begin_layout Itemize
-the non-parametric local-linear estimator: function
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDreg_np()}
-\end_layout
-
-\end_inset
-
-.
-
-\end_layout
-
-\begin_layout Standard
-These two functions share some common arguments, which are:
-\end_layout
-
-\begin_layout Description
-RDDobject: the input data as obtained with the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDdata()}
-\end_layout
-
-\end_inset
-
- function
-\end_layout
-
-\begin_layout Description
-bw: the bandwidth.
-
-\end_layout
-
-\begin_layout Description
-covariates: this will allow to add covariates in the analysis.
- Note that it is presently NOT used.
-
-\end_layout
-
-\begin_layout Standard
-The bandwidth argument has a different behaviour in the parametric and non-param
-etric way: while the parametric estimation can be done without bandwidth,
- the non-parametric estimator is by definition based on a bandwidth.
- This means that the default behaviours are different: if no bandwidth is
- given for the parametric model, the model will be simply estimated withut
- bandwidth, that is covering the full sample on both sides of the cutpoint.
- On the other side, if no bandwidth is provided in the non-parametric case,
- a bandwidth will still be computed automatically using the method advocated
- by
-\begin_inset CommandInset citation
-LatexCommand citet
-key "ImbensKalyanaraman2012"
-
-\end_inset
-
-.
-
-\end_layout
-
-\begin_layout Subsection
-Parametric
-\end_layout
-
-\begin_layout Standard
-The parametric estimator simply estimates a function over the whole sample
- (hence called
-\emph on
-pooled regression
-\emph default
- by
-\begin_inset CommandInset citation
-LatexCommand citealp
-key "LeeLemieux2010"
-
-\end_inset
-
-):
-\end_layout
-
-\begin_layout Standard
-\begin_inset Formula
-\begin{equation}
-Y=\alpha+\tau D+\beta(X-c)+\epsilon\label{eq:ParamStandard}
-\end{equation}
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-where D is a dummy variable, indicating whether the observations are above
- (or equal to) the cutoff point, i.e.
-
-\begin_inset Formula $D=I(X\geq c)$
-\end_inset
-
-.
- The parameter of interest is
-\begin_inset Formula $\tau$
-\end_inset
-
-, which represents the difference in intercepts
-\begin_inset Formula $\alpha_{r}-\alpha_{l}$
-\end_inset
-
-, i.e.
- the discontinuity.
- Note that equation
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "eq:ParamStandard"
-
-\end_inset
-
- imposes the slope to be equal on both sides of the cutoff point.
- While such restriction should hold locally around the threshold (due to
- the assumption of random assignment around the cutoff point), the parametric
- regression is done by default using the whole sample, so the restriction
- is unlikely to hold.
- In this case, one should rather estimate:
-\end_layout
-
-\begin_layout Standard
-\begin_inset Formula
-\begin{equation}
-Y=\alpha+\tau D+\beta_{1}(X-c)+\beta_{2}D(X-c)+\epsilon\label{eq:Param2slopes}
-\end{equation}
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-so that
-\begin_inset Formula $\beta_{1}=\beta_{l}$
-\end_inset
-
-, and
-\begin_inset Formula $\beta_{2}=\beta_{r}-\beta_{l}$
-\end_inset
-
-.
-
-\end_layout
-
-\begin_layout Standard
-The two estimators are available with the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDreg_lm()}
-\end_layout
-
-\end_inset
-
- function, the choice between the specifications being made through the
-
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{slope=c("separate", "same")}
-\end_layout
-
-\end_inset
-
- argument:
-\end_layout
-
-\begin_layout Description
-separate: the default, estimates different slopes, i.e.
- equation
-\begin_inset space ~
-\end_inset
-
-
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "eq:Param2slopes"
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Description
-same: Estimates a common slope, i.e.
- equation
-\begin_inset space ~
-\end_inset
-
-
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "eq:ParamStandard"
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-Note that the order of X has been set as 1 in both cases.
- If the function shows moderate non-linearity, this can be potentially captured
- by adding further power of X, leading to (for the separate slope equation:)
-\end_layout
-
-\begin_layout Standard
-\begin_inset Formula
-\begin{equation}
-Y=\alpha+\tau D+\beta_{1}^{1}(X-c)+\beta_{2}^{1}D(X-c)+\ldots+\beta_{1}^{p}(X-c)^{p}+\beta_{2}^{p}D(X-c)^{p}+\epsilon\label{eq:ParamSlopesPowers}
-\end{equation}
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-The order of the polynomial can be adjusted with the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{order}
-\end_layout
-
-\end_inset
-
- argument.
-
-\end_layout
-
-\begin_layout Standard
-Finally, the estimator can be restricted to a (symmetric) window around
- the cutoff point, as is done usually in practice.
- This is done using the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{bw}
-\end_layout
-
-\end_inset
-
- option.
-
-\end_layout
-
-\begin_layout Standard
-In summary, the function
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDreg_lm()}
-\end_layout
-
-\end_inset
-
- has three main options:
-\end_layout
-
-\begin_layout Description
-slope: Whether to use different slopes on each side of the cutoff (default)
- or not.
-\end_layout
-
-\begin_layout Description
-order: Order of the polynomial in X.
- Default to 1.
-\end_layout
-
-\begin_layout Description
-bw: Eventual window to estimate the data.
- Default to full data.
-
-\end_layout
-
-\begin_layout Standard
-We show now the different applications, still using the Lee dataset:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-reg_linear_1 <- RDDreg_lm(Lee2008_rdd)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-We now estimate different versions, first restricting the slope to be the
- same, then changing the order, and finally using a smaller window:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-reg_linear_2 <- RDDreg_lm(Lee2008_rdd, slope="same")
-\end_layout
-
-\begin_layout Plain Layout
-
-reg_linear_3 <- RDDreg_lm(Lee2008_rdd, order=3)
-\end_layout
-
-\begin_layout Plain Layout
-
-reg_linear_4 <- RDDreg_lm(Lee2008_rdd, bw=0.4)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Model's output is shown with the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{print()}
-\end_layout
-
-\end_inset
-
- and
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{summary()}
-\end_layout
-
-\end_inset
-
- function: while the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{print()}
-\end_layout
-
-\end_inset
-
- function just shows few informations and the LATE estimate, the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{summary()}
-\end_layout
-
-\end_inset
-
- function shows the full output of the underlying regression model:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-reg_linear_1
-\end_layout
-
-\begin_layout Plain Layout
-
-summary(reg_linear_1)
-\end_layout
-
-\begin_layout Plain Layout
-
-reg_linear_2
-\end_layout
-
-\begin_layout Plain Layout
-
-reg_linear_3
-\end_layout
-
-\begin_layout Plain Layout
-
-reg_linear_4
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Finally, a
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{plot()}
-\end_layout
-
-\end_inset
-
- function adds the estimated curve to the binned plot.
- Here we show the difference between the model estimated with polynomial
- of order 1 and order 3:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-par(mfrow=c(2,1))
-\end_layout
-
-\begin_layout Plain Layout
-
-plot(reg_linear_1)
-\end_layout
-
-\begin_layout Plain Layout
-
-plot(reg_linear_3)
-\end_layout
-
-\begin_layout Plain Layout
-
-par(mfrow=c(1,1))
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Subsection
-Non-parametric
-\end_layout
-
-\begin_layout Standard
-Although the parametric estimator is often used in practice, another estimator
- has important appeal, in this context where one is interested in estimating
- a regression just around a cutoff.
- In this case, non-parametric estimators such as the local-linear kernel
- regression of
-\begin_inset CommandInset citation
-LatexCommand citet
-key "FanGijbels1992,FanGijbels1996"
-
-\end_inset
-
-, which aim at estimating a regression locally at each point, have interesting
- features, as advocated by
-\begin_inset CommandInset citation
-LatexCommand citet
-key "Porter2003"
-
-\end_inset
-
-.
- A local linear regression amounts to do a simple weighted linear regression,
- where the weights are given by a kernel function.
- Formally, the local-linear estimator (LLE) is given by its estimating equation:
-\end_layout
-
-\begin_layout Standard
-\begin_inset Note Note
-status open
-
-\begin_layout Plain Layout
-
-\backslash
-hat{
-\backslash
-alpha(c)},
-\backslash
-hat{
-\backslash
-beta(c)},
-\backslash
-hat{
-\backslash
-tau(c)} =
-\backslash
-argmin{
-\backslash
-alpha,
-\backslash
-beta,
-\backslash
-tau}
-\backslash
-sum_{i=1}^n
-\backslash
-left(Y_i -
-\backslash
-alpha -
-\backslash
-tau D -
-\backslash
-beta (X_i-c)
-\backslash
-right )^2 K(
-\backslash
-frac{X_i-c}{h})
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-
-\family roman
-\series medium
-\shape up
-\size normal
-\emph off
-\bar no
-\strikeout off
-\uuline off
-\uwave off
-\noun off
-\color none
-\begin_inset Formula
-\begin{equation}
-\hat{\alpha}(c),\hat{\beta}(c),\hat{\tau}(c)=\argmin{\alpha,\beta,\tau}\sum_{i=1}^{n}\left(Y_{i}-\alpha-\tau D-\beta(X_{i}-c)\right)^{2}\mathcal{K}\left(\frac{X_{i}-c}{h}\right)\label{eq:LLEform}
-\end{equation}
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-where
-\begin_inset Formula $\mathcal{K}(\cdot)$
-\end_inset
-
- is a kernel function attributing weights to each point according to their
- distance to the point c.
- Note that the parameters
-\begin_inset Formula $\alpha$
-\end_inset
-
-,
-\begin_inset Formula $\beta$
-\end_inset
-
- and
-\begin_inset Formula $\tau$
-\end_inset
-
- are written as of function of
-\begin_inset Formula $c$
-\end_inset
-
- to emphasize the fact that these are
-\emph on
-local
-\emph default
- estimate, unlike in the parametric rate.
- The kernel used in RDDtools here is the triangular kernel (also called
-
-\emph on
-edge
-\emph default
- function sometimes):
-\begin_inset Formula $K(x)=I(|x|\leq1)(1-|x|)$
-\end_inset
-
-.
- This choice, which departs from the the suggestion of
-\begin_inset CommandInset citation
-LatexCommand citet
-key "LeeLemieux2010"
-
-\end_inset
-
-, is driven by the fact that the triangular kernel was shown to be optimal
- when one estimates a parameter at a boundary, which is precisely our case
- here
-\begin_inset CommandInset citation
-LatexCommand citep
-key "ChengFanEtAl1997"
-
-\end_inset
-
-.
- Unlike the package
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-pkg{rdd}
-\end_layout
-
-\end_inset
-
-, we do not offer other kernels in
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-pkg{RDDtools}
-\end_layout
-
-\end_inset
-
-, since the kernel selected is optimal, and changing the kernel is found
- to have little impact compared to changing the bandwidths.
-\end_layout
-
-\begin_layout Standard
-Note that using the LLE estimator reduces to do a weighted OLS (WOLS) at
- each point
-\begin_inset Foot
-status open
-
-\begin_layout Plain Layout
-See
-\begin_inset CommandInset citation
-LatexCommand citep
-after "equ. 3.4, page 58"
-key "FanGijbels1996"
-
-\end_inset
-
-.
-
-\end_layout
-
-\end_inset
-
-, which allows to use the usual regression function
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{lm()}
-\end_layout
-
-\end_inset
-
- in R, specifying the weights as given by the kernel.
- However, although this is a WOLS, the variance of the LLE is not the same
- as that of the WOLS, unless one is ready to assume that the bandwidth used
- is the true
-\emph on
-bandwidth
-\emph default
-
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-A second option is use a smaller bandwidth, in which case standard inference
- can be applied.
- This has however the drawback of using a sub-optimal bandwidth, with a
- slower rate of convergence.
-
-\end_layout
-
-\end_inset
-
-.
- However, most, if not all, papers in the literature do use the standard
- WOLS inference, eventually adjusted for heteroskedasticity.
- This is also done currently in the RDDtools package, although we intend
- to do this following the work of
-\begin_inset CommandInset citation
-LatexCommand citet
-key "CalonicoCattaneoEtAl2012"
-
-\end_inset
-
-.
-
-\end_layout
-
-\begin_layout Standard
-Another question arises is the choice of the bandwidth, which is a crucial
- question since this choice has a huge impact on the estimation.
- Typically, decreasing the bandwidth will reduce the bias of the estimator,
- but increase its variance.
- One way of choosing the bandwidth is then to try to minimise the mean-squared
- error (MSE) of the estimator, which allows to trade-off bias and variance.
- This approach is pursued by
-\begin_inset CommandInset citation
-LatexCommand citet
-key "ImbensKalyanaraman2012"
-
-\end_inset
-
-, and is available in
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-pkg{RDDtools}
-\end_layout
-
-\end_inset
-
- with the function
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDbw_IK()}
-\end_layout
-
-\end_inset
-
-.
- This function takes simply a RDDdata object as input, and returns the optimal
- value according to the MSE criterion.
-
-\end_layout
-
-\begin_layout Standard
-As an illustration, we use now the non-parametric estimator for the Lee
- dataset, estimating first the bandwidth and then the discontinuity with
-
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDreg_np()}
-\end_layout
-
-\end_inset
-
-:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-bw_IK <- RDDbw_IK(Lee2008_rdd)
-\end_layout
-
-\begin_layout Plain Layout
-
-bw_IK
-\end_layout
-
-\begin_layout Plain Layout
-
-reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd, bw=bw_IK)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-The output, of class
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDreg_np}
-\end_layout
-
-\end_inset
-
-, has the usual
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{print()}
-\end_layout
-
-\end_inset
-
-,
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{summary()}
-\end_layout
-
-\end_inset
-
- and
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{plot()}
-\end_layout
-
-\end_inset
-
- functions:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-reg_nonpara
-\end_layout
-
-\begin_layout Plain Layout
-
-summary(reg_nonpara)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-The
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{plot()}
-\end_layout
-
-\end_inset
-
- function shows the point estimates
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-Note that the estimates are obtained with the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{locpoly()}
-\end_layout
-
-\end_inset
-
- function from package
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-pkg{KernSmooth}
-\end_layout
-
-\end_inset
-
-.
- This has however the disadvantage that it is not the same kernel used as
- in the previously, since the locpoly function uses a gaussian kernel, while
- we use a triangular one.
- Since this is only for visual purpose, the difference should however not
- be perceptible.
- Furthermore, using the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{locpoly()}
-\end_layout
-
-\end_inset
-
- function has the advantage that the algorithm is way faster, since the
- authors did implement a fast binned implementation, see
-\begin_inset CommandInset citation
-LatexCommand citet
-after "section 3.6"
-key "FanGijbels1996"
-
-\end_inset
-
-.
-
-\end_layout
-
-\end_inset
-
- over a grid defined within the bandwidth range, i.e.
- the parameter
-\begin_inset Formula $\alpha(x)$
-\end_inset
-
- from equation
-\begin_inset space ~
-\end_inset
-
-
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "eq:LLEform"
-
-\end_inset
-
- such as
-\begin_inset Formula $\alpha(x)\quad$
-\end_inset
-
-
-\begin_inset Formula $\forall$
-\end_inset
-
-
-\begin_inset Formula $[x-bw;x+bw]$
-\end_inset
-
-.
- This should not be confused with the line drawn in the parametric plots,
- which show the curve
-\begin_inset Formula $y=f(x)=\hat{\alpha}+\hat{\beta}(x-c)+\hat{\tau}D$
-\end_inset
-
-.
-
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-plot(reg_nonpara)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Subsection
-Assessing the sensibility of the estimator
-\end_layout
-
-\begin_layout Standard
-Both the parametric and non-parametric estimators are dependent on the choice
- of extra-parameters such as the polynomial order, or the bandwidth.
- It is however known that this choice can have a big impact, especially
- in the case of the bandwidth choice for the non-parametric case.
- A simple way to assess the sensitivity of the results is to plot the value
- of the estimate against multiple bandwidths.
- This is the purpose of the function
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{plotSensi()}
-\end_layout
-
-\end_inset
-
-, which work both on
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDreg_lm()}
-\end_layout
-
-\end_inset
-
- as well as
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDreg_np()}
-\end_layout
-
-\end_inset
-
-.
- In the former case, the function will assess the sensitivity against the
- polynomial order (eventually the bandwidth if it was specified), while
- in the latter case against the bandwidth.
-
-\end_layout
-
-\begin_layout Standard
-We illustrate this on the previous non-parametric estimator:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-plotSensi(reg_nonpara, device="ggplot")
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-and we illustrate it also on the parametric estimator where a bandwidth
- was specified:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-plotSensi(reg_linear_4, device="ggplot")
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Section
-Step 3: Validity tests
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-addcontentsline{toc}{section}{Step 3: Validity tests}
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Once the discontinuity estimated and its sensitivity against the bandwidth
- choice assessed, the last step in the analysis is to proceed to a few validity
- tests.
-
-\end_layout
-
-\begin_layout Subsection
-Placebo tests
-\end_layout
-
-\begin_layout Standard
-A way to convince its readers that the discontinuity one has found is a
- true one is to show that it is not the a spurious result one could have
- obtained at a random cutoff.
- Hence, as advocated by
-\begin_inset CommandInset citation
-LatexCommand citet
-key "ImbensLemieux2008"
-
-\end_inset
-
-, one can run placebo tests, where one estimates a discontinuity but at
- a different point than the true cutoff.
- This is available through the
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{plotPlacebo()}
-\end_layout
-
-\end_inset
-
- function, which works on
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDreg_lm}
-\end_layout
-
-\end_inset
-
- or
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-code{RDDreg_np}
-\end_layout
-
-\end_inset
-
- objects.
- An important question is on which point this should be tested.
- The fact is that the sample should not contain the cutoff point (so that
- the presence of a discontinuity at its point does not impact the estimates
- at other points), and be far away from that cutoff (as well as from the
- min and max of the whole distribution) so that it contains a fair amount
- of points at both sides for estimation.
- The default is then to run for points on the left within the first and
- last quartiles of the left sample, and the same on the right.
-\end_layout
-
-\begin_layout Standard
-We illustrate this on the non-parametric estimator:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-plotPlacebo(reg_nonpara, device="ggplot")
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Subsection
-Forcing variable
-\end_layout
-
-\begin_layout Standard
-One of the cases where the assumptions underlying the RDD analysis might
- be incorrect is when participants are allowed to manipulate the variable
- that lead to treatment, i.e.
- are able to affect whether they are treated or not.
- This question is usually answered factually, looking at the context of
- the experiment.
- One can however also test whether the forcing variable itself shows a trace
- of manipulation, which would result into a discontinuity of its density,
- as suggested by
-\begin_inset CommandInset citation
-LatexCommand citet
-key "McCrary2008"
-
-\end_inset
-
-.
-
-\end_layout
-
-\begin_layout Standard
-The test was implemented by D Dimmery in package
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-pkg{rdd}
-\end_layout
-
-\end_inset
-
-, and is simply wrapped by the function dens_test(), so that it works directly
- on a RDDdata object:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-dens_test(Lee2008_rdd)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-The test automatically returns a plot, showing the density estimates at
- the left and right of the cutoff, together with the confidence intervals
- of these estimates.
- One rejects the null hypothesis of no discontinuity if visually the confidence
- intervals do not overlap.
-
-\end_layout
-
-\begin_layout Subsection
-Baseline Covariates
-\end_layout
-
-\begin_layout Standard
-Another crucial assumption in RDD is that treatment is randomly distributed
- around the cutoff, so that individuals around are similar.
- This can be easily tested, as is done in the Randomised Control Trial (RCT)
- case, by running test for balanced covariates.
- Two kinds of tests have been implemented, allowing to test equality in
- means (t-test) or in distribution (Kolmogorov-Smirnov).
- As this is a typical case of multiple testing, both functions offers the
- possibility to adjust the p-values with various procedures such as the
- Bonferoni, Holmes or the more recent Benjamini-Hochberg procedures.
-
-\end_layout
-
-\begin_layout Standard
-We run here the equality in means test:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-covarTest_mean(Lee2008_rdd)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-as well as the equality in distribution test:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-<<>>=
-\end_layout
-
-\begin_layout Plain Layout
-
-covarTest_dis(Lee2008_rdd)
-\end_layout
-
-\begin_layout Plain Layout
-
-@
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Since the covariates were generated randomly with a single parameter, we
- would expect that no equality test is rejected.
-
-\end_layout
-
-\begin_layout Section
-Conclusion
-\end_layout
-
-\begin_layout Standard
-\begin_inset CommandInset bibtex
-LatexCommand bibtex
-bibfiles "RDD_refs"
-options "bibtotoc,econometrica"
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-%
-\backslash
-addcontentsline{toc}{section}{
-\backslash
-refname}
-\end_layout
-
-\begin_layout Plain Layout
-
-%
-\backslash
-bibliography{./RDDrefs}
-\end_layout
-
-\begin_layout Plain Layout
-
-%
-\backslash
-bibliography{/home/mat/Dropbox/Documents/Ordi/Bibtex/GeneralBiblio,/home/mat/Dro
-pbox/Documents/Ordi/Bibtex/biblioFAO_mat}
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\end_body
-\end_document
diff --git a/RDDtools/vignettes/RDDtools.pdf b/RDDtools/vignettes/RDDtools.pdf
deleted file mode 100644
index c52136a..0000000
Binary files a/RDDtools/vignettes/RDDtools.pdf and /dev/null differ
diff --git a/RDDtools/vignettes/RDDtools.tex b/RDDtools/vignettes/RDDtools.tex
deleted file mode 100644
index 6a7a261..0000000
--- a/RDDtools/vignettes/RDDtools.tex
+++ /dev/null
@@ -1,887 +0,0 @@
-%% LyX 2.1.0 created this file. For more info, see http://www.lyx.org/.
-%% Do not edit unless you really know what you are doing.
-\documentclass[english,nojss]{jss}\usepackage[]{graphicx}\usepackage[]{color}
-%% maxwidth is the original width if it is less than linewidth
-%% otherwise use linewidth (to make sure the graphics do not exceed the margin)
-\makeatletter
-\def\maxwidth{ %
- \ifdim\Gin@nat@width>\linewidth
- \linewidth
- \else
- \Gin@nat@width
- \fi
-}
-\makeatother
-
-\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
-\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
-\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
-\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
-\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
-\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
-\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
-\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
-\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
-\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
-
-\usepackage{framed}
-\makeatletter
-\newenvironment{kframe}{%
- \def\at@end@of@kframe{}%
- \ifinner\ifhmode%
- \def\at@end@of@kframe{\end{minipage}}%
- \begin{minipage}{\columnwidth}%
- \fi\fi%
- \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
- \colorbox{shadecolor}{##1}\hskip-\fboxsep
- % There is no \\@totalrightmargin, so:
- \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
- \MakeFramed {\advance\hsize-\width
- \@totalleftmargin\z@ \linewidth\hsize
- \@setminipage}}%
- {\par\unskip\endMakeFramed%
- \at@end@of@kframe}
-\makeatother
-
-\definecolor{shadecolor}{rgb}{.97, .97, .97}
-\definecolor{messagecolor}{rgb}{0, 0, 0}
-\definecolor{warningcolor}{rgb}{1, 0, 1}
-\definecolor{errorcolor}{rgb}{1, 0, 0}
-\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
-
-\usepackage{alltt}
-\usepackage[T1]{fontenc}
-\usepackage[latin9]{inputenc}
-\usepackage{amssymb}
-\usepackage[authoryear]{natbib}
-
-\makeatletter
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Textclass specific LaTeX commands.
- %\usepackage{Sweave}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands.
-
-\usepackage{amsmath}
-\usepackage{nameref}
-
-%the following commands are used only for articles and codesnippets
-
-\author{Matthieu Stigler\\Affiliation IHEID}
-\title{\pkg{RDDtools}: an overview }
-
-% the same as above, without any formatting
-\Plainauthor{Matthieu Stigler}
-\Plaintitle{\pkg{RDDtools}: a toolbox to practice }
-%if necessary, provide a short title
-\Shorttitle{\pkg{RDDtools}: a toolbox to practice }
-
-\Abstract{\pkg{RDDtools} is a R package for sharp regression discontinuity design (RDD). It offers various estimators, tests and graphical procedures following the guidelines of \citet{ImbensLemieux2008} and \citet{LeeLemieux2010}. This note illustrate how to use the package, using the well-known dataset of \citet{Lee2008}.
-
-
-NOTE THAT this is a preliminary note, on a preliminary package still under development. Changes of the function names, arguments and output are to be expected, as well as possible mistakes and inconsistencies. Please report any mistakes or suggestion to \email{Matthieu.Stigler@iheid.ch}}
-%at least one keyword is needed
-\Keywords{Regression discontinuity design, non-parametric analysis, \pkg{RDDtools}, \proglang{R}}
-%the same as above, without any formatting
-\Plainkeywords{Regression discontinuity design, non-parametric analysis,RDDtools, R}
-
-%the following commands are used only for book or software reviews
-
-%\Reviewer{Some Author\\University of Somewhere}
-%\Plainreviewer{Some Author}
-
-%the following commands are used only for book reviews
-%\Booktitle{LyX and \proglang{R}: Secrets of the LyX Master}
-%\Bookauthor{Book Author}
-%\Pubyear{2008}
-%\ISBN{0-12345-678-9}
-%\Pages{500}
-
-%the following command is used only for software reviews
-%\Softwaretitle{\proglang{gretl 1.7.4}}
-
-%the following commands are used only for book or software reviews
-%\Publisher{LyX Publishing Inc.}
-%\Pubaddress{LyX City}
-%\Price{USD 59.95 (P), USD 99.95 (H)}
-%\URL{http://www.lyx.org/}
-
-%without any formatting
-%\Plaintitle{LyX and R: Secrets of the LyX Master}
-%\Shorttitle{LyX and R}
-
-%the following commands are used for articles, codesnippets, book reviews and software reviews
-
-%publication information
-%do not use these commands before the article has been accepted
-%\Volume{00}
-%\Issue{0}
-%\Month{Month}
-%\Year{2000}
-%\Submitdate{2000-00-00}
-%\Acceptdate{2000-00-00}
-
-%The address of at least one author should be given in the following format
-\Address{
- Matthieu Stigler\\
- Centre for Finance and development\\
- IHEID\\
- Geneva\\
- E-mail: \email{Matthieu.Stigler@iheid.ch}
-}
-%you can add a telephone and fax number before the e-mail in the format
-%Telephone: +12/3/4567-89
-%Fax: +12/3/4567-89
-
-%if you use Sweave, include the following line (with % symbols):
-%% need no \usepackage{Sweave.sty}
-
-%% Arg min operator:
-\DeclareMathOperator*{\argmi}{arg\,min}
-\newcommand{\argmin}[1]{\underset{#1}{\argmi}}
-
-\DeclareMathOperator*{\Ker}{\mathcal{K}}
-
-\makeatother
-
-\usepackage{babel}
-\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
-\begin{document}
-\tableofcontents{}
-
-
-\section{Introduction}
-
-\addcontentsline{toc}{section}{Introduction}
-
-
-\subsection{Introduction to RDD}
-
-
-
-
-\subsection{Introduction to RDDtools}
-
-The R package \pkg{RDDtools} aims at offering a complete a toolbox
-for regression discontinuity design, following the step-by-step recommendations
-of \citet{ImbensLemieux2008} and \citet{LeeLemieux2010}. Summarising
-the approaches advocated in the two papers, a RDD analysis comprises
-of following steps:
-\begin{enumerate}
-\item Graphical representation of the data
-\item Estimation
-\item Validity tests
-\end{enumerate}
-We add to this list a step that is too often forgotten, yet can be
-very burdensome: data preparation. Hence, this list is extended with
-the fundamental step 0, which involves preparing the data in the right
-way.
-
-\pkg{RDDtools} offers an object-oriented way to analysis, building
-on the R mechanism of S3 methods and classes. Concretely, this implies
-that the user has to specify the input data only once, and that most
-of the functions can be called directly on the new object of class
-\code{RDDdata}.
-
-
-\section{Step 0: data input}
-
-\addcontentsline{toc}{section}{Step 0: data input}
-
-As first step of the analysis, the user has to specify the input data
-into the \code{RDDdata} function, which takes following arguments:
-\begin{description}
-\item [{y}] The outcome variable
-\item [{x}] The forcing variable
-\item [{cutpoint}] The cutpoint/threshold (note only one cutpoint can be
-given)
-\item [{covar}] Eventual covariates
-\end{description}
-The RDDdata function returns an object of class \code{RDDdata}, as
-well as of the usual \proglang{R} class \code{data.frame}.
-
-To illustrate this, we show how to use this with the benchmark dataset
-of \citet{Lee2008}, adding randomly generated covariates for the
-sake of illustration. The dataset is shipped with the package, and
-is available under the name \emph{Lee2008. }Using the R \code{head}
-function, we look at the first rows of the dataset:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{library}\hlstd{(RDDtools)}
-\hlkwd{data}\hlstd{(Lee2008)}
-\hlkwd{head}\hlstd{(Lee2008)}
-\end{alltt}
-\begin{verbatim}
-## x y
-## 1 0.1049 0.5810
-## 2 0.1393 0.4611
-## 3 -0.0736 0.5434
-## 4 0.0868 0.5846
-## 5 0.3994 0.5803
-## 6 0.1681 0.6244
-\end{verbatim}
-\end{kframe}
-\end{knitrout}
-
-
-The data is already clean, so the only step required is to fit it
-into the RDDdata function, adding however the information on the cutpoint.
-For illustration purpose, we add also some random covariates as a
-matrix Z:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlstd{n_Lee} \hlkwb{<-} \hlkwd{nrow}\hlstd{(Lee2008)}
-\hlstd{Z} \hlkwb{<-} \hlkwd{data.frame}\hlstd{(}\hlkwc{z1} \hlstd{=} \hlkwd{rnorm}\hlstd{(n_Lee),} \hlkwc{z2} \hlstd{=} \hlkwd{rnorm}\hlstd{(n_Lee,} \hlkwc{mean} \hlstd{=} \hlnum{20}\hlstd{,} \hlkwc{sd} \hlstd{=} \hlnum{2}\hlstd{),} \hlkwc{z3} \hlstd{=} \hlkwd{sample}\hlstd{(letters[}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{],}
- \hlkwc{size} \hlstd{= n_Lee,} \hlkwc{replace} \hlstd{=} \hlnum{TRUE}\hlstd{))}
-\hlstd{Lee2008_rdd} \hlkwb{<-} \hlkwd{RDDdata}\hlstd{(}\hlkwc{y} \hlstd{= Lee2008}\hlopt{$}\hlstd{y,} \hlkwc{x} \hlstd{= Lee2008}\hlopt{$}\hlstd{x,} \hlkwc{covar} \hlstd{= Z,} \hlkwc{cutpoint} \hlstd{=} \hlnum{0}\hlstd{)}
-\end{alltt}
-\end{kframe}
-\end{knitrout}
-
-
-We now have an object \code{Lee2008_rdd} of class \code{RDDdata}
-(and \code{data.frame}). It has a specific \code{summary} method,
-which gives a few summary informations about the dataset:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{summary}\hlstd{(Lee2008_rdd)}
-\end{alltt}
-\begin{verbatim}
-## ### RDDdata object ###
-##
-## Cutpoint: 0
-## Sample size:
-## -Full : 6558
-## -Left : 2740
-## -Right: 3818
-## Covariates: yes
-\end{verbatim}
-\end{kframe}
-\end{knitrout}
-
-
-Another function for \code{RDDdata} objects is the \code{plot()}
-function, discussed in the next section.
-
-
-\section{Step 1: Graphical representation}
-
-\addcontentsline{toc}{section}{Step 1: Graphical representation}
-
-Once the dataset has been formatted with the RDDdata function, it
-can be used directly for simple illustration. Indeed, as recommended
-by \citet{LeeLemieux2010}, it is always good to show the raw data
-first, if ones wishes to convince that there is a discontinuity. This
-is simply done using the standard R plot() function, which has been
-customised for RDDdata objects. The function shows a scatter plot
-of the outcome variable against the forcing variable. Following \citet{LeeLemieux2010},
-not all single datapoints are shown: instead, a ``binned'' scatterplot
-is shown, using non-overlapping averages:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{plot}\hlstd{(Lee2008_rdd)}
-\end{alltt}
-\end{kframe}
-\includegraphics[width=\maxwidth]{figure/unnamed-chunk-4}
-
-\end{knitrout}
-
-
-The bandwidth for the bins (also called binwidth) can be set by the
-user with the \code{h} argument. If this it is not provided by the
-user, the function uses by default the global bandwidth of \citet{RuppertSheatherEtAl1995},
-implemented in the \code{RDDbw_RSW()} function.
-
-Another argument that might be useful for the user is the option \code{nplot},
-which allows to plot multiple plots with different bandwidths:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{plot}\hlstd{(Lee2008_rdd,} \hlkwc{nplot} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{h} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{0.02}\hlstd{,} \hlnum{0.03}\hlstd{,} \hlnum{0.04}\hlstd{))}
-\end{alltt}
-\end{kframe}
-\includegraphics[width=\maxwidth]{figure/unnamed-chunk-5}
-
-\end{knitrout}
-
-
-Note however that experience shows that showing multiple plots have
-the effect to shrink considerably the y axis, reducing the visual
-impression of discontinuity.
-
-
-\section{Step 2: Estimation}
-
-\addcontentsline{toc}{section}{Step 2: Estimation}
-
-RDDtools offers currently two estimators:
-\begin{itemize}
-\item the simple parametric estimator: function \code{RDDreg_lm()}.
-\item the non-parametric local-linear estimator: function \code{RDDreg_np()}.
-\end{itemize}
-These two functions share some common arguments, which are:
-\begin{description}
-\item [{RDDobject:}] the input data as obtained with the \code{RDDdata()}
-function
-\item [{bw:}] the bandwidth.
-\item [{covariates:}] this will allow to add covariates in the analysis.
-Note that it is presently NOT used.
-\end{description}
-The bandwidth argument has a different behaviour in the parametric
-and non-parametric way: while the parametric estimation can be done
-without bandwidth, the non-parametric estimator is by definition based
-on a bandwidth. This means that the default behaviours are different:
-if no bandwidth is given for the parametric model, the model will
-be simply estimated withut bandwidth, that is covering the full sample
-on both sides of the cutpoint. On the other side, if no bandwidth
-is provided in the non-parametric case, a bandwidth will still be
-computed automatically using the method advocated by \citet{ImbensKalyanaraman2012}.
-
-
-\subsection{Parametric}
-
-The parametric estimator simply estimates a function over the whole
-sample (hence called \emph{pooled regression} by \citealp{LeeLemieux2010}):
-
-\begin{equation}
-Y=\alpha+\tau D+\beta(X-c)+\epsilon\label{eq:ParamStandard}
-\end{equation}
-
-
-where D is a dummy variable, indicating whether the observations are
-above (or equal to) the cutoff point, i.e. $D=I(X\geq c)$. The parameter
-of interest is $\tau$, which represents the difference in intercepts
-$\alpha_{r}-\alpha_{l}$, i.e. the discontinuity. Note that equation
-\ref{eq:ParamStandard} imposes the slope to be equal on both sides
-of the cutoff point. While such restriction should hold locally around
-the threshold (due to the assumption of random assignment around the
-cutoff point), the parametric regression is done by default using
-the whole sample, so the restriction is unlikely to hold. In this
-case, one should rather estimate:
-
-\begin{equation}
-Y=\alpha+\tau D+\beta_{1}(X-c)+\beta_{2}D(X-c)+\epsilon\label{eq:Param2slopes}
-\end{equation}
-
-
-so that $\beta_{1}=\beta_{l}$, and $\beta_{2}=\beta_{r}-\beta_{l}$.
-
-The two estimators are available with the \code{RDDreg_lm()} function,
-the choice between the specifications being made through the \code{slope=c("separate", "same")}
-argument:
-\begin{description}
-\item [{separate:}] the default, estimates different slopes, i.e. equation~\ref{eq:Param2slopes}.
-\item [{same:}] Estimates a common slope, i.e. equation~\ref{eq:ParamStandard}.
-\end{description}
-Note that the order of X has been set as 1 in both cases. If the function
-shows moderate non-linearity, this can be potentially captured by
-adding further power of X, leading to (for the separate slope equation:)
-
-\begin{equation}
-Y=\alpha+\tau D+\beta_{1}^{1}(X-c)+\beta_{2}^{1}D(X-c)+\ldots+\beta_{1}^{p}(X-c)^{p}+\beta_{2}^{p}D(X-c)^{p}+\epsilon\label{eq:ParamSlopesPowers}
-\end{equation}
-
-
-The order of the polynomial can be adjusted with the \code{order}
-argument.
-
-Finally, the estimator can be restricted to a (symmetric) window around
-the cutoff point, as is done usually in practice. This is done using
-the \code{bw} option.
-
-In summary, the function \code{RDDreg_lm()} has three main options:
-\begin{description}
-\item [{slope:}] Whether to use different slopes on each side of the cutoff
-(default) or not.
-\item [{order:}] Order of the polynomial in X. Default to 1.
-\item [{bw:}] Eventual window to estimate the data. Default to full data.
-\end{description}
-We show now the different applications, still using the Lee dataset:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlstd{reg_linear_1} \hlkwb{<-} \hlkwd{RDDreg_lm}\hlstd{(Lee2008_rdd)}
-\end{alltt}
-\end{kframe}
-\end{knitrout}
-
-
-We now estimate different versions, first restricting the slope to
-be the same, then changing the order, and finally using a smaller
-window:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlstd{reg_linear_2} \hlkwb{<-} \hlkwd{RDDreg_lm}\hlstd{(Lee2008_rdd,} \hlkwc{slope} \hlstd{=} \hlstr{"same"}\hlstd{)}
-\hlstd{reg_linear_3} \hlkwb{<-} \hlkwd{RDDreg_lm}\hlstd{(Lee2008_rdd,} \hlkwc{order} \hlstd{=} \hlnum{3}\hlstd{)}
-\hlstd{reg_linear_4} \hlkwb{<-} \hlkwd{RDDreg_lm}\hlstd{(Lee2008_rdd,} \hlkwc{bw} \hlstd{=} \hlnum{0.4}\hlstd{)}
-\end{alltt}
-\end{kframe}
-\end{knitrout}
-
-
-Model's output is shown with the \code{print()} and \code{summary()}
-function: while the \code{print()} function just shows few informations
-and the LATE estimate, the \code{summary()} function shows the full
-output of the underlying regression model:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlstd{reg_linear_1}
-\end{alltt}
-\begin{verbatim}
-## ### RDD regression: parametric ###
-## Polynomial order: 1
-## Slopes: separate
-## Number of obs: 6558 (left: 2740, right: 3818)
-##
-## Coefficient:
-## Estimate Std. Error t value Pr(>|t|)
-## D 0.11823 0.00568 20.8 <2e-16 ***
-## ---
-## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-\end{verbatim}
-\begin{alltt}
-\hlkwd{summary}\hlstd{(reg_linear_1)}
-\end{alltt}
-\begin{verbatim}
-##
-## Call:
-## lm(formula = y ~ ., data = dat_step1, weights = weights)
-##
-## Residuals:
-## Min 1Q Median 3Q Max
-## -0.8941 -0.0619 0.0023 0.0713 0.8640
-##
-## Coefficients:
-## Estimate Std. Error t value Pr(>|t|)
-## (Intercept) 0.43295 0.00428 101.25 < 2e-16 ***
-## D 0.11823 0.00568 20.82 < 2e-16 ***
-## x 0.29691 0.01155 25.71 < 2e-16 ***
-## x_right 0.04598 0.01350 3.41 0.00066 ***
-## ---
-## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-##
-## Residual standard error: 0.138 on 6554 degrees of freedom
-## Multiple R-squared: 0.671, Adjusted R-squared: 0.671
-## F-statistic: 4.45e+03 on 3 and 6554 DF, p-value: <2e-16
-\end{verbatim}
-\begin{alltt}
-\hlstd{reg_linear_2}
-\end{alltt}
-\begin{verbatim}
-## ### RDD regression: parametric ###
-## Polynomial order: 1
-## Slopes: same
-## Number of obs: 6558 (left: 2740, right: 3818)
-##
-## Coefficient:
-## Estimate Std. Error t value Pr(>|t|)
-## D 0.11373 0.00553 20.6 <2e-16 ***
-## ---
-## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-\end{verbatim}
-\begin{alltt}
-\hlstd{reg_linear_3}
-\end{alltt}
-\begin{verbatim}
-## ### RDD regression: parametric ###
-## Polynomial order: 3
-## Slopes: separate
-## Number of obs: 6558 (left: 2740, right: 3818)
-##
-## Coefficient:
-## Estimate Std. Error t value Pr(>|t|)
-## D 0.1115 0.0107 10.5 <2e-16 ***
-## ---
-## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-\end{verbatim}
-\begin{alltt}
-\hlstd{reg_linear_4}
-\end{alltt}
-\begin{verbatim}
-## ### RDD regression: parametric ###
-## Polynomial order: 1
-## Slopes: separate
-## Bandwidth: 0.4
-## Number of obs: 4169 (left: 2043, right: 2126)
-##
-## Coefficient:
-## Estimate Std. Error t value Pr(>|t|)
-## D 0.08863 0.00727 12.2 <2e-16 ***
-## ---
-## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-\end{verbatim}
-\end{kframe}
-\end{knitrout}
-
-
-Finally, a \code{plot()} function adds the estimated curve to the
-binned plot. Here we show the difference between the model estimated
-with polynomial of order 1 and order 3:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{par}\hlstd{(}\hlkwc{mfrow} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,} \hlnum{1}\hlstd{))}
-\hlkwd{plot}\hlstd{(reg_linear_1)}
-\hlkwd{plot}\hlstd{(reg_linear_3)}
-\end{alltt}
-\end{kframe}
-\includegraphics[width=\maxwidth]{figure/unnamed-chunk-9}
-\begin{kframe}\begin{alltt}
-\hlkwd{par}\hlstd{(}\hlkwc{mfrow} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,} \hlnum{1}\hlstd{))}
-\end{alltt}
-\end{kframe}
-\end{knitrout}
-
-
-
-\subsection{Non-parametric}
-
-Although the parametric estimator is often used in practice, another
-estimator has important appeal, in this context where one is interested
-in estimating a regression just around a cutoff. In this case, non-parametric
-estimators such as the local-linear kernel regression of \citet{FanGijbels1992,FanGijbels1996},
-which aim at estimating a regression locally at each point, have interesting
-features, as advocated by \citet{Porter2003}. A local linear regression
-amounts to do a simple weighted linear regression, where the weights
-are given by a kernel function. Formally, the local-linear estimator
-(LLE) is given by its estimating equation:
-
-
-
-\begin{equation}
-\hat{\alpha}(c),\hat{\beta}(c),\hat{\tau}(c)=\argmin{\alpha,\beta,\tau}\sum_{i=1}^{n}\left(Y_{i}-\alpha-\tau D-\beta(X_{i}-c)\right)^{2}\mathcal{K}\left(\frac{X_{i}-c}{h}\right)\label{eq:LLEform}
-\end{equation}
-
-
-where $\mathcal{K}(\cdot)$ is a kernel function attributing weights
-to each point according to their distance to the point c. Note that
-the parameters $\alpha$, $\beta$ and $\tau$ are written as of function
-of $c$ to emphasize the fact that these are \emph{local} estimate,
-unlike in the parametric rate. The kernel used in RDDtools here is
-the triangular kernel (also called \emph{edge} function sometimes):
-$K(x)=I(|x|\leq1)(1-|x|)$. This choice, which departs from the the
-suggestion of \citet{LeeLemieux2010}, is driven by the fact that
-the triangular kernel was shown to be optimal when one estimates a
-parameter at a boundary, which is precisely our case here \citep{ChengFanEtAl1997}.
-Unlike the package \pkg{rdd}, we do not offer other kernels in \pkg{RDDtools},
-since the kernel selected is optimal, and changing the kernel is found
-to have little impact compared to changing the bandwidths.
-
-Note that using the LLE estimator reduces to do a weighted OLS (WOLS)
-at each point%
-\footnote{See \citep[equ. 3.4, page 58]{FanGijbels1996}. %
-}, which allows to use the usual regression function \code{lm()} in
-R, specifying the weights as given by the kernel. However, although
-this is a WOLS, the variance of the LLE is not the same as that of
-the WOLS, unless one is ready to assume that the bandwidth used is
-the true \emph{bandwidth}%
-\footnote{A second option is use a smaller bandwidth, in which case standard
-inference can be applied. This has however the drawback of using a
-sub-optimal bandwidth, with a slower rate of convergence. %
-}. However, most, if not all, papers in the literature do use the standard
-WOLS inference, eventually adjusted for heteroskedasticity. This is
-also done currently in the RDDtools package, although we intend to
-do this following the work of \citet{CalonicoCattaneoEtAl2012}.
-
-Another question arises is the choice of the bandwidth, which is a
-crucial question since this choice has a huge impact on the estimation.
-Typically, decreasing the bandwidth will reduce the bias of the estimator,
-but increase its variance. One way of choosing the bandwidth is then
-to try to minimise the mean-squared error (MSE) of the estimator,
-which allows to trade-off bias and variance. This approach is pursued
-by \citet{ImbensKalyanaraman2012}, and is available in \pkg{RDDtools}
-with the function \code{RDDbw_IK()}. This function takes simply a
-RDDdata object as input, and returns the optimal value according to
-the MSE criterion.
-
-As an illustration, we use now the non-parametric estimator for the
-Lee dataset, estimating first the bandwidth and then the discontinuity
-with \code{RDDreg_np()}:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlstd{bw_IK} \hlkwb{<-} \hlkwd{RDDbw_IK}\hlstd{(Lee2008_rdd)}
-\hlstd{bw_IK}
-\end{alltt}
-\begin{verbatim}
-## h_opt
-## 0.2939
-\end{verbatim}
-\begin{alltt}
-\hlstd{reg_nonpara} \hlkwb{<-} \hlkwd{RDDreg_np}\hlstd{(}\hlkwc{RDDobject} \hlstd{= Lee2008_rdd,} \hlkwc{bw} \hlstd{= bw_IK)}
-\end{alltt}
-\end{kframe}
-\end{knitrout}
-
-
-The output, of class \code{RDDreg_np}, has the usual \code{print()},
-\code{summary()} and \code{plot()} functions:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlstd{reg_nonpara}
-\end{alltt}
-\begin{verbatim}
-## ### RDD regression: nonparametric local linear###
-## Bandwidth: 0.2939
-## Number of obs: 3200 (left: 1594, right: 1606)
-##
-## Coefficient:
-## Estimate Std. Error z value Pr(>|z|)
-## D 0.07992 0.00946 8.44 <2e-16 ***
-## ---
-## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-\end{verbatim}
-\begin{alltt}
-\hlkwd{summary}\hlstd{(reg_nonpara)}
-\end{alltt}
-\begin{verbatim}
-## ### RDD regression: nonparametric local linear###
-## Bandwidth: 0.2939
-## Number of obs: 3200 (left: 1594, right: 1606)
-##
-## Weighted Residuals:
-## Min 1Q Median 3Q Max
-## -0.9775 -0.0672 -0.0050 0.0450 0.9376
-##
-## Coefficient:
-## Estimate Std. Error z value Pr(>|z|)
-## D 0.07992 0.00946 8.44 <2e-16 ***
-## ---
-## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-##
-## Local R squared: 0.356
-\end{verbatim}
-\end{kframe}
-\end{knitrout}
-
-
-The \code{plot()} function shows the point estimates%
-\footnote{Note that the estimates are obtained with the \code{locpoly()} function
-from package \pkg{KernSmooth}. This has however the disadvantage
-that it is not the same kernel used as in the previously, since the
-locpoly function uses a gaussian kernel, while we use a triangular
-one. Since this is only for visual purpose, the difference should
-however not be perceptible. Furthermore, using the \code{locpoly()}
-function has the advantage that the algorithm is way faster, since
-the authors did implement a fast binned implementation, see \citet[section 3.6]{FanGijbels1996}. %
-} over a grid defined within the bandwidth range, i.e. the parameter
-$\alpha(x)$ from equation~\ref{eq:LLEform} such as $\alpha(x)\quad$$\forall$
-$[x-bw;x+bw]$. This should not be confused with the line drawn in
-the parametric plots, which show the curve $y=f(x)=\hat{\alpha}+\hat{\beta}(x-c)+\hat{\tau}D$.
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{plot}\hlstd{(reg_nonpara)}
-\end{alltt}
-\end{kframe}
-\includegraphics[width=\maxwidth]{figure/unnamed-chunk-12}
-
-\end{knitrout}
-
-
-
-\subsection{Assessing the sensibility of the estimator}
-
-Both the parametric and non-parametric estimators are dependent on
-the choice of extra-parameters such as the polynomial order, or the
-bandwidth. It is however known that this choice can have a big impact,
-especially in the case of the bandwidth choice for the non-parametric
-case. A simple way to assess the sensitivity of the results is to
-plot the value of the estimate against multiple bandwidths. This is
-the purpose of the function \code{plotSensi()}, which work both on
-\code{RDDreg_lm()} as well as \code{RDDreg_np()}. In the former
-case, the function will assess the sensitivity against the polynomial
-order (eventually the bandwidth if it was specified), while in the
-latter case against the bandwidth.
-
-We illustrate this on the previous non-parametric estimator:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{plotSensi}\hlstd{(reg_nonpara,} \hlkwc{device} \hlstd{=} \hlstr{"ggplot"}\hlstd{)}
-\end{alltt}
-\end{kframe}
-\includegraphics[width=\maxwidth]{figure/unnamed-chunk-13}
-
-\end{knitrout}
-
-
-and we illustrate it also on the parametric estimator where a bandwidth
-was specified:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{plotSensi}\hlstd{(reg_linear_4,} \hlkwc{device} \hlstd{=} \hlstr{"ggplot"}\hlstd{)}
-\end{alltt}
-\end{kframe}
-\includegraphics[width=\maxwidth]{figure/unnamed-chunk-14}
-
-\end{knitrout}
-
-
-
-\section{Step 3: Validity tests}
-
-\addcontentsline{toc}{section}{Step 3: Validity tests}
-
-Once the discontinuity estimated and its sensitivity against the bandwidth
-choice assessed, the last step in the analysis is to proceed to a
-few validity tests.
-
-
-\subsection{Placebo tests}
-
-A way to convince its readers that the discontinuity one has found
-is a true one is to show that it is not the a spurious result one
-could have obtained at a random cutoff. Hence, as advocated by \citet{ImbensLemieux2008},
-one can run placebo tests, where one estimates a discontinuity but
-at a different point than the true cutoff. This is available through
-the \code{plotPlacebo()} function, which works on \code{RDDreg_lm}
-or \code{RDDreg_np} objects. An important question is on which point
-this should be tested. The fact is that the sample should not contain
-the cutoff point (so that the presence of a discontinuity at its point
-does not impact the estimates at other points), and be far away from
-that cutoff (as well as from the min and max of the whole distribution)
-so that it contains a fair amount of points at both sides for estimation.
-The default is then to run for points on the left within the first
-and last quartiles of the left sample, and the same on the right.
-
-We illustrate this on the non-parametric estimator:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{plotPlacebo}\hlstd{(reg_nonpara,} \hlkwc{device} \hlstd{=} \hlstr{"ggplot"}\hlstd{)}
-\end{alltt}
-\end{kframe}
-\includegraphics[width=\maxwidth]{figure/unnamed-chunk-15}
-
-\end{knitrout}
-
-
-
-\subsection{Forcing variable}
-
-One of the cases where the assumptions underlying the RDD analysis
-might be incorrect is when participants are allowed to manipulate
-the variable that lead to treatment, i.e. are able to affect whether
-they are treated or not. This question is usually answered factually,
-looking at the context of the experiment. One can however also test
-whether the forcing variable itself shows a trace of manipulation,
-which would result into a discontinuity of its density, as suggested
-by \citet{McCrary2008}.
-
-The test was implemented by D Dimmery in package \pkg{rdd}, and is
-simply wrapped by the function dens\_test(), so that it works directly
-on a RDDdata object:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{dens_test}\hlstd{(Lee2008_rdd)}
-\end{alltt}
-\end{kframe}
-\includegraphics[width=\maxwidth]{figure/unnamed-chunk-16}
-\begin{kframe}\begin{verbatim}
-##
-## McCrary Test for no discontinuity of density around cutpoint
-##
-## data: Lee2008_rdd
-## z-val = 1.295, p-value = 0.1952
-## alternative hypothesis: Density is discontinuous around cutpoint
-## sample estimates:
-## Discontinuity
-## 0.1035
-\end{verbatim}
-\end{kframe}
-\end{knitrout}
-
-
-The test automatically returns a plot, showing the density estimates
-at the left and right of the cutoff, together with the confidence
-intervals of these estimates. One rejects the null hypothesis of no
-discontinuity if visually the confidence intervals do not overlap.
-
-
-\subsection{Baseline Covariates}
-
-Another crucial assumption in RDD is that treatment is randomly distributed
-around the cutoff, so that individuals around are similar. This can
-be easily tested, as is done in the Randomised Control Trial (RCT)
-case, by running test for balanced covariates. Two kinds of tests
-have been implemented, allowing to test equality in means (t-test)
-or in distribution (Kolmogorov-Smirnov). As this is a typical case
-of multiple testing, both functions offers the possibility to adjust
-the p-values with various procedures such as the Bonferoni, Holmes
-or the more recent Benjamini-Hochberg procedures.
-
-We run here the equality in means test:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{covarTest_mean}\hlstd{(Lee2008_rdd)}
-\end{alltt}
-\begin{verbatim}
-## mean of x mean of y Difference statistic p.value
-## z1 0.03658 0.01154 -0.02504 1.019 0.3082
-## z2 20.02 20 -0.02255 0.4549 0.6492
-## z3 2.008 2.009 0.001503 -0.07364 0.9413
-\end{verbatim}
-\end{kframe}
-\end{knitrout}
-
-
-as well as the equality in distribution test:
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{covarTest_dis}\hlstd{(Lee2008_rdd)}
-\end{alltt}
-
-
-{\ttfamily\noindent\color{warningcolor}{\#\# Warning: p-value will be approximate in the presence of ties}}\begin{verbatim}
-## statistic p.value
-## z1 0.02406 0.3145
-## z2 0.0157 0.8263
-## z3 0.004626 1
-\end{verbatim}
-\end{kframe}
-\end{knitrout}
-
-
-Since the covariates were generated randomly with a single parameter,
-we would expect that no equality test is rejected.
-
-
-\section{Conclusion}
-
-\bibliographystyle{econometrica}
-\addcontentsline{toc}{section}{\refname}\bibliography{RDD_refs}
-
-
-%\addcontentsline{toc}{section}{\refname}
-%\bibliography{./RDDrefs}
-%\bibliography{/home/mat/Dropbox/Documents/Ordi/Bibtex/GeneralBiblio,/home/mat/Dropbox/Documents/Ordi/Bibtex/biblioFAO_mat}
-\end{document}
diff --git a/README.Rmd b/README.Rmd
deleted file mode 100644
index 3981194..0000000
--- a/README.Rmd
+++ /dev/null
@@ -1,173 +0,0 @@
-RDDtools: an R package for Regression Discontinuity Design
-========================================================
-
-**RDDtools** is a new R package under development, designed to offer a set of tools to run all the steps required for a Regression Discontinuity Design (RDD) Analysis, from primary data visualisation to discontinuity estimation, sensitivity and placebo testing.
-
-
-Installing **RDDtools**
------------------------
-
-This github website hosts the source code. One of the easiest ways to install the package from github is by using the R package **devtools**:
-
-```{r eval=FALSE}
-library(devtools)
-install_github(repo="RDDtools", username="MatthieuStigler", subdir="RDDtools")
-```
-
-Note however the latest version of RDDtools only works with R 3.0, and that you might need to install [Rtools](http://stat.ethz.ch/CRAN/bin/windows/Rtools/) if on Windows.
-
-
-Documentation
------------------------
-The (preliminary) documentation is available in the help files directly, as well as in the *vignette*. The vignette can be accessed from R with vignette("RDDtools"), or by accessing the [pdf](https://github.com/MatthieuStigler/RDDtools/raw/master/RDDtools/inst/doc/RDDtools.pdf) stored on this github.
-
-RDDtools: main features
------------------------
-
-
-+ Simple visualisation of the data using binned-plot: **plot()**
-
-+ Bandwidth selection:
- + MSE-RDD bandwidth procedure of [Imbens and Kalyanaraman 2012]: **RDDbw_IK()**
- + MSE global bandwidth procedure of [Ruppert et al 1995]: **RDDbw_RSW()**
-+ Estimation:
- + RDD parametric estimation: **RDDreg_lm()** This includes specifying the polynomial order, including covariates with various specifications as advocated in [Imbens and Lemieux 2008].
- + RDD local non-parametric estimation: **RDDreg_np()**. Can also include covariates, and allows different types of inference (fully non-parametric, or parametric approximation).
- + RDD generalised estimation: allows to use custom estimating functions to get the RDD coefficient. Could allow for example a probit RDD, or quantile regression.
-+ Post-Estimation tools:
- + Various tools, to obtain predictions at given covariate values ( **RDDpred()** ), or to convert to other classes, to lm ( **as.lm()** ), or to the package *np* ( **as.npreg()** ).
- + Function to do inference with clustered data: **clusterInf()** either using a cluster covariance matrix ( **vcovCluster()** ) or by a degrees of freedom correction (as in [Cameron et al. 2008]).
-+ Regression sensitivity analysis:
- + Plot the sensitivity of the coefficient with respect to the bandwith: **plotSensi()**
- + *Placebo plot* using different cutpoints: **plotPlacebo()**
-+ Design sensitivity analysis:
- + McCrary test of manipulation of the forcing variable: wrapper **dens_test()** to the function **DCdensity()** from package **rdd**.
- + Test of equal means of covariates: **covarTest_mean()**
- + Test of equal density of covariates: **covarTest_dens()**
-+ Datasets
- + Contains the seminal dataset of [Lee 2008]: **Lee2008**
- + Contains functions to replicate the Monte-Carlo simulations of [Imbens and Kalyanaraman 2012]: **gen_MC_IK()**
-
-Using RDDtools: a quick example
------------------------
-**RDDtools** works in an object-oriented way: the user has to define once the characteristic of the data, creating a *RDDdata* object, on which different anaylsis tools can be applied.
-
-### Data preparation and visualisation
-Load the package, and load the built-in dataset from [Lee 2008]:
-
-```{r options, echo=FALSE}
-opts_chunk$set(warning= FALSE, message=FALSE, fig.align="center", fig.path='figuresREADME/')
-```
-
-
-```{r}
-library(RDDtools)
-data(Lee2008)
-```
-
-Declare the data to be a *RDDdata* object:
-
-```{r}
-Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
-```
-
-
-You can now directly summarise and visualise this data:
-
-```{r dataPlot}
-summary(Lee2008_rdd)
-plot(Lee2008_rdd)
-```
-
-### Estimation
-
-#### Parametric
-
-Estimate parametrically, by fitting a 4th order polynomial:
-```{r reg_para}
-reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4)
-reg_para
-
-plot(reg_para)
-```
-
-
-#### Non-parametric
-As well as run a simple local regression, using the [Imbens and Kalyanaraman 2012] bandwidth:
-```{r RegPlot}
-bw_ik <- RDDbw_IK(Lee2008_rdd)
-reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd, bw=bw_ik)
-print(reg_nonpara)
-plot(x=reg_nonpara)
-
-```
-
-### Regression Sensitivity tests:
-
-One can easily check the sensitivity of the estimate to different bandwidths:
-```{r SensiPlot}
-plotSensi(reg_nonpara, from=0.05, to=1, by=0.1)
-```
-
-Or run the Placebo test, estimating the RDD effect based on fake cutpoints:
-```{r placeboPlot}
-plotPlacebo(reg_nonpara)
-```
-
-### Design Sensitivity tests:
-
-Design sensitivity tests check whether the discontinuity found can actually be attributed ot other causes. Two types of tests are available:
-
-+ Discontinuity comes from manipulation: test whether there is possible manipulation around the cutoff, McCrary 2008 test: **dens_test()**
-+ Discontinuity comes from other variables: should test whether discontinuity arises with covariates. Currently, only simple tests of equality of covariates around the threshold are available:
-
-#### Discontinuity comes from manipulation: McCrary test
-
-use simply the function **dens_test()**, on either the raw data, or the regression output:
-```{r DensPlot}
-dens_test(reg_nonpara)
-```
-
-#### Discontinuity comes from covariates: covariates balance tests
-
-Two tests available:
-+ equal means of covariates: **covarTest_mean()**
-+ equal density of covariates: **covarTest_dens()**
-
-
-We need here to simulate some data, given that the Lee (2008) dataset contains no covariates.
-We here simulate three variables, with the second having a different mean on the left and the right.
-
-```{r}
-set.seed(123)
-n_Lee <- nrow(Lee2008)
-Z <- data.frame(z1 = rnorm(n_Lee, sd=2),
- z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)),
- z3 = sample(letters, size = n_Lee, replace = TRUE))
-Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0)
-```
-
-
-Run the tests:
-```{r}
-## test for equality of means around cutoff:
-covarTest_mean(Lee2008_rdd_Z, bw=0.3)
-
-## Can also use function covarTest_dis() for Kolmogorov-Smirnov test:
-covarTest_dis(Lee2008_rdd_Z, bw=0.3)
-```
-
-Tests correctly reject equality of the second, and correctly do not reject equality for the first and third.
-
- [Imbens and Kalyanaraman 2012]: http://ideas.repec.org/a/oup/restud/v79y2012i3p933-959.html "Imbens, G. & Kalyanaraman, K. (2012) Optimal Bandwidth Choice for the Regression Discontinuity Estimator, Review of Economic Studies, 79, 933-959"
-
- [Lee 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p675-697.html "Lee, D. S. (2008) Randomized experiments from non-random selection in U.S. House elections, Journal of Econometrics, 142, 675-697"
-
- [Imbens and Lemieux 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html "Imbens, G. & Lemieux, T. (2008) Regression discontinuity designs: A guide to practice, Journal of Econometrics, Vol. 142(2), pages 615-635"
-
- [Cameron et al. 2008]: http://ideas.repec.org/a/tpr/restat/v90y2008i3p414-427.html "Cameron, Gelbach and Miller (2008) Bootstrap-Based Improvements for Inference with Clustered Errors, The Review of Economics and Statistics, Vol. 90(3), pages 414-427"
-
- [Ruppert et al 1995]: http://www.jstor.org/stable/2291516 "Ruppert, D., Sheather, S. J. and Wand, M. P. (1995). An effective bandwidth selector for local least squares regression. Journal of the American Statistical Association, 90, 1257–1270."
-
-
-
\ No newline at end of file
diff --git a/README.md b/README.md
index 81662c0..37b57a6 100644
--- a/README.md
+++ b/README.md
@@ -1,278 +1,70 @@
-RDDtools: an R package for Regression Discontinuity Design
-========================================================
+rddtools
+========
-**RDDtools** is a new R package under development, designed to offer a set of tools to run all the steps required for a Regression Discontinuity Design (RDD) Analysis, from primary data visualisation to discontinuity estimation, sensitivity and placebo testing.
+[](https://www.gnu.org/licenses/gpl-3.0.html)
+[](https://cran.r-project.org/package=rddtools)
+[](https://github.com/bquast/rddtools/actions?workflow=R-CMD-check)
+[](https://cran.r-project.org/package=rddtools)
+[](https://cran.r-project.org/package=rddtools)
+**rddtools** is an R package designed to offer a set of tools to run all the steps required for a Regression Discontinuity Design (RDD) Analysis, from primary data visualisation to discontinuity estimation, sensitivity and placebo testing.
-Installing **RDDtools**
+
+Installing **rddtools**
-----------------------
This github website hosts the source code. One of the easiest ways to install the package from github is by using the R package **devtools**:
-
```r
-library(devtools)
-install_github(repo = "RDDtools", username = "MatthieuStigler", subdir = "RDDtools")
+if (!require('remotes')) install.packages('remotes')
+remotes::install_github('bquast/rddtools')
```
-
-Note however the latest version of RDDtools only works with R 3.0, and that you might need to install [Rtools](http://stat.ethz.ch/CRAN/bin/windows/Rtools/) if on Windows.
+Note however the latest version of rddtools only works with R 3.0, and that you might need to install [Rtools](https://cran.r-project.org/bin/windows/Rtools/) if on Windows.
Documentation
-----------------------
-The (preliminary) documentation is available in the help files directly, as well as in the *vignette*. The vignette can be accessed from R with vignette("RDDtools"), or by accessing the [pdf](https://github.com/MatthieuStigler/RDDtools/raw/master/RDDtools/inst/doc/RDDtools.pdf) stored on this github.
+The (preliminary) documentation is available in the help files directly, as well as in the *vignettes*. The vignettes can be accessed from R.
-RDDtools: main features
------------------------
+```r
+vignette('rddtools')
+```
+rddtools: main features
+-----------------------
-+ Simple visualisation of the data using binned-plot: **plot()**
++ Simple visualisation of the data using binned-plot: `plot()`
+ Bandwidth selection:
- + MSE-RDD bandwidth procedure of [Imbens and Kalyanaraman 2012]: **RDDbw_IK()**
- + MSE global bandwidth procedure of [Ruppert et al 1995]: **RDDbw_RSW()**
+ + MSE-RDD bandwidth procedure of [Imbens and Kalyanaraman 2012]: `rdd_bw_ik()`
+ + MSE global bandwidth procedure of [Ruppert et al 1995]: `rdd_bw_rsw()`
+ Estimation:
- + RDD parametric estimation: **RDDreg_lm()** This includes specifying the polynomial order, including covariates with various specifications as advocated in [Imbens and Lemieux 2008].
- + RDD local non-parametric estimation: **RDDreg_np()**. Can also include covariates, and allows different types of inference (fully non-parametric, or parametric approximation).
+ + RDD parametric estimation: `rdd_reg_lm()` This includes specifying the polynomial order, including covariates with various specifications as advocated in [Imbens and Lemieux 2008].
+ + RDD local non-parametric estimation: `rdd_reg_np()`. Can also include covariates, and allows different types of inference (fully non-parametric, or parametric approximation).
+ RDD generalised estimation: allows to use custom estimating functions to get the RDD coefficient. Could allow for example a probit RDD, or quantile regression.
+ Post-Estimation tools:
- + Various tools, to obtain predictions at given covariate values ( **RDDpred()** ), or to convert to other classes, to lm ( **as.lm()** ), or to the package *np* ( **as.npreg()** ).
- + Function to do inference with clustered data: **clusterInf()** either using a cluster covariance matrix ( **vcovCluster()** ) or by a degrees of freedom correction (as in [Cameron et al. 2008]).
+ + Various tools, to obtain predictions at given covariate values ( `rdd_pred()` ), or to convert to other classes, to lm ( **as.lm()** ), or to the package `np` ( `as.npreg()` ).
+ + Function to do inference with clustered data: `clusterInf()` either using a cluster covariance matrix ( **vcovCluster()** ) or by a degrees of freedom correction (as in [Cameron et al. 2008]).
+ Regression sensitivity analysis:
- + Plot the sensitivity of the coefficient with respect to the bandwith: **plotSensi()**
- + *Placebo plot* using different cutpoints: **plotPlacebo()**
+ + Plot the sensitivity of the coefficient with respect to the bandwith: `plotSensi()`
+ + *Placebo plot* using different cutpoints: `plotPlacebo()`
+ Design sensitivity analysis:
- + McCrary test of manipulation of the forcing variable: wrapper **dens_test()** to the function **DCdensity()** from package **rdd**.
- + Test of equal means of covariates: **covarTest_mean()**
- + Test of equal density of covariates: **covarTest_dens()**
+ + McCrary test of manipulation of the forcing variable: wrapper `dens_test()` to the function `DCdensity()` from package `rdd`.
+ + Test of equal means of covariates: `covarTest_mean()`
+ + Test of equal density of covariates: `covarTest_dens()`
+ Datasets
- + Contains the seminal dataset of [Lee 2008]: **Lee2008**
- + Contains functions to replicate the Monte-Carlo simulations of [Imbens and Kalyanaraman 2012]: **gen_MC_IK()**
+ + Contains the seminal dataset of [Lee 2008]: `house`
+ + Contains functions to replicate the Monte-Carlo simulations of [Imbens and Kalyanaraman 2012]: `gen_mc_ik()`
-Using RDDtools: a quick example
+References
-----------------------
-**RDDtools** works in an object-oriented way: the user has to define once the characteristic of the data, creating a *RDDdata* object, on which different anaylsis tools can be applied.
-
-### Data preparation and visualisation
-Load the package, and load the built-in dataset from [Lee 2008]:
-
-
-
-
-
-
-```r
-library(RDDtools)
-data(Lee2008)
-```
-
-
-Declare the data to be a *RDDdata* object:
-
-
-```r
-Lee2008_rdd <- RDDdata(y = Lee2008$y, x = Lee2008$x, cutpoint = 0)
-```
-
-
-
-You can now directly summarise and visualise this data:
-
-
-```r
-summary(Lee2008_rdd)
-```
-
-```
-## ### RDDdata object ###
-##
-## Cutpoint: 0
-## Sample size:
-## -Full : 6558
-## -Left : 2740
-## -Right: 3818
-## Covariates: no
-```
-
-```r
-plot(Lee2008_rdd)
-```
-
-
-
-
-### Estimation
-
-#### Parametric
-
-Estimate parametrically, by fitting a 4th order polynomial:
-
-```r
-reg_para <- RDDreg_lm(RDDobject = Lee2008_rdd, order = 4)
-reg_para
-```
-
-```
-## ### RDD regression: parametric ###
-## Polynomial order: 4
-## Slopes: separate
-## Number of obs: 6558 (left: 2740, right: 3818)
-##
-## Coefficient:
-## Estimate Std. Error t value Pr(>|t|)
-## D 0.0766 0.0132 5.79 7.6e-09 ***
-## ---
-## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-```
-
-```r
-
-plot(reg_para)
-```
-
-
-
-
-
-#### Non-parametric
-As well as run a simple local regression, using the [Imbens and Kalyanaraman 2012] bandwidth:
-
-```r
-bw_ik <- RDDbw_IK(Lee2008_rdd)
-reg_nonpara <- RDDreg_np(RDDobject = Lee2008_rdd, bw = bw_ik)
-print(reg_nonpara)
-```
-
-```
-## ### RDD regression: nonparametric local linear###
-## Bandwidth: 0.2939
-## Number of obs: 3200 (left: 1594, right: 1606)
-##
-## Coefficient:
-## Estimate Std. Error z value Pr(>|z|)
-## D 0.07992 0.00946 8.44 <2e-16 ***
-## ---
-## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-```
-
-```r
-plot(x = reg_nonpara)
-```
-
-
-
-
-### Regression Sensitivity tests:
-
-One can easily check the sensitivity of the estimate to different bandwidths:
-
-```r
-plotSensi(reg_nonpara, from = 0.05, to = 1, by = 0.1)
-```
-
-
-
-
-Or run the Placebo test, estimating the RDD effect based on fake cutpoints:
-
-```r
-plotPlacebo(reg_nonpara)
-```
-
-
-
-
-### Design Sensitivity tests:
-
-Design sensitivity tests check whether the discontinuity found can actually be attributed ot other causes. Two types of tests are available:
-
-+ Discontinuity comes from manipulation: test whether there is possible manipulation around the cutoff, McCrary 2008 test: **dens_test()**
-+ Discontinuity comes from other variables: should test whether discontinuity arises with covariates. Currently, only simple tests of equality of covariates around the threshold are available:
-
-#### Discontinuity comes from manipulation: McCrary test
-
-use simply the function **dens_test()**, on either the raw data, or the regression output:
-
-```r
-dens_test(reg_nonpara)
-```
-
-
-
-```
-##
-## McCrary Test for no discontinuity of density around cutpoint
-##
-## data: reg_nonpara
-## z-val = 1.295, p-value = 0.1952
-## alternative hypothesis: Density is discontinuous around cutpoint
-## sample estimates:
-## Discontinuity
-## 0.1035
-```
-
-
-#### Discontinuity comes from covariates: covariates balance tests
-
-Two tests available:
-+ equal means of covariates: **covarTest_mean()**
-+ equal density of covariates: **covarTest_dens()**
-
-
-We need here to simulate some data, given that the Lee (2008) dataset contains no covariates.
-We here simulate three variables, with the second having a different mean on the left and the right.
-
-
-```r
-set.seed(123)
-n_Lee <- nrow(Lee2008)
-Z <- data.frame(z1 = rnorm(n_Lee, sd = 2), z2 = rnorm(n_Lee, mean = ifelse(Lee2008 <
- 0, 5, 8)), z3 = sample(letters, size = n_Lee, replace = TRUE))
-Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0)
-```
-
-
-
-Run the tests:
-
-```r
-## test for equality of means around cutoff:
-covarTest_mean(Lee2008_rdd_Z, bw = 0.3)
-```
-
-```
-## mean of x mean of y Difference statistic p.value
-## z1 0.004268 0.02186 0.01759 -0.2539 0.7996
-## z2 5.006 7.985 2.979 -84.85 0
-## z3 13.19 13.44 0.2465 -0.941 0.3468
-```
-
-```r
-
-## Can also use function covarTest_dis() for Kolmogorov-Smirnov test:
-covarTest_dis(Lee2008_rdd_Z, bw = 0.3)
-```
-
-```
-## statistic p.value
-## z1 0.03482 0.2727
-## z2 0.8648 0
-## z3 0.03009 0.4474
-```
-
-
-Tests correctly reject equality of the second, and correctly do not reject equality for the first and third.
-
- [Imbens and Kalyanaraman 2012]: http://ideas.repec.org/a/oup/restud/v79y2012i3p933-959.html "Imbens, G. & Kalyanaraman, K. (2012) Optimal Bandwidth Choice for the Regression Discontinuity Estimator, Review of Economic Studies, 79, 933-959"
+ [Imbens and Kalyanaraman 2012]: https://ideas.repec.org/a/oup/restud/v79y2012i3p933-959.html "Imbens, G. & Kalyanaraman, K. (2012) Optimal Bandwidth Choice for the Regression Discontinuity Estimator, Review of Economic Studies, 79, 933-959"
- [Lee 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p675-697.html "Lee, D. S. (2008) Randomized experiments from non-random selection in U.S. House elections, Journal of Econometrics, 142, 675-697"
+ [Lee 2008]: https://ideas.repec.org/a/eee/econom/v142y2008i2p675-697.html "Lee, D. S. (2008) Randomized experiments from non-random selection in U.S. House elections, Journal of Econometrics, 142, 675-697"
- [Imbens and Lemieux 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html "Imbens, G. & Lemieux, T. (2008) Regression discontinuity designs: A guide to practice, Journal of Econometrics, Vol. 142(2), pages 615-635"
+ [Imbens and Lemieux 2008]: https://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html "Imbens, G. & Lemieux, T. (2008) Regression discontinuity designs: A guide to practice, Journal of Econometrics, Vol. 142(2), pages 615-635"
- [Cameron et al. 2008]: http://ideas.repec.org/a/tpr/restat/v90y2008i3p414-427.html "Cameron, Gelbach and Miller (2008) Bootstrap-Based Improvements for Inference with Clustered Errors, The Review of Economics and Statistics, Vol. 90(3), pages 414-427"
-
- [Ruppert et al 1995]: http://www.jstor.org/stable/2291516 "Ruppert, D., Sheather, S. J. and Wand, M. P. (1995). An effective bandwidth selector for local least squares regression. Journal of the American Statistical Association, 90, 1257–1270."
-
-
+ [Cameron et al. 2008]: https://ideas.repec.org/a/tpr/restat/v90y2008i3p414-427.html "Cameron, Gelbach and Miller (2008) Bootstrap-Based Improvements for Inference with Clustered Errors, The Review of Economics and Statistics, Vol. 90(3), pages 414-427"
+ [Ruppert et al 1995]: https://www.jstor.org/stable/2291516 "Ruppert, D., Sheather, S. J. and Wand, M. P. (1995). An effective bandwidth selector for local least squares regression. Journal of the American Statistical Association, 90, 1257–1270."
diff --git a/cran-comments.md b/cran-comments.md
new file mode 100644
index 0000000..52a2f35
--- /dev/null
+++ b/cran-comments.md
@@ -0,0 +1,3 @@
+# Test environments
+
+This is a re-submission of package rddtools, which was archived following the archival of package rdd. The dependence on archived rdd has been removed.
\ No newline at end of file
diff --git a/RDDtools/data/STAR_MHE.rda b/data/STAR_MHE.rda
similarity index 100%
rename from RDDtools/data/STAR_MHE.rda
rename to data/STAR_MHE.rda
diff --git a/data/house.rda b/data/house.rda
new file mode 100644
index 0000000..6a157b9
Binary files /dev/null and b/data/house.rda differ
diff --git a/data/indh.rda b/data/indh.rda
new file mode 100644
index 0000000..54949c1
Binary files /dev/null and b/data/indh.rda differ
diff --git a/figuresREADME/DensPlot.png b/figuresREADME/DensPlot.png
deleted file mode 100644
index ef7498d..0000000
Binary files a/figuresREADME/DensPlot.png and /dev/null differ
diff --git a/figuresREADME/RegPlot.png b/figuresREADME/RegPlot.png
deleted file mode 100644
index ba49f78..0000000
Binary files a/figuresREADME/RegPlot.png and /dev/null differ
diff --git a/figuresREADME/SensiPlot.png b/figuresREADME/SensiPlot.png
deleted file mode 100644
index b709e71..0000000
Binary files a/figuresREADME/SensiPlot.png and /dev/null differ
diff --git a/figuresREADME/dataPlot.png b/figuresREADME/dataPlot.png
deleted file mode 100644
index ec610f7..0000000
Binary files a/figuresREADME/dataPlot.png and /dev/null differ
diff --git a/figuresREADME/placeboPlot.png b/figuresREADME/placeboPlot.png
deleted file mode 100644
index 2a028ac..0000000
Binary files a/figuresREADME/placeboPlot.png and /dev/null differ
diff --git a/figuresREADME/reg_para.png b/figuresREADME/reg_para.png
deleted file mode 100644
index 7e78537..0000000
Binary files a/figuresREADME/reg_para.png and /dev/null differ
diff --git a/inst/CITATION b/inst/CITATION
new file mode 100644
index 0000000..87f48da
--- /dev/null
+++ b/inst/CITATION
@@ -0,0 +1,16 @@
+bibentry(bibtype = "TechReport",
+ title = "rddtools: A toolbox for regression discontinuity in R",
+ author = c(person("Matthieu", "Stigler"),
+ person("Bastiaan", "Quast") ),
+ institution = "The Graduate Institute",
+ address = "Maison de la paix, Geneva, Switzerland",
+ year = "2016",
+ url = "https://bastiaanquast.com/rddtools/",
+ textVersion = "Stigler, M. and B. Quast, B (2016). rddtools: A toolbox for regression discontinuity in R. ",
+
+
+ mheader = "To cite rddtools in publications please use:",
+
+ mfooter = "We have invested a lot of time and effort in creating rddtools, please cite it when using it for data analysis. See also 'citation()' for citing R."
+
+)
diff --git a/inst/ChangeLog b/inst/ChangeLog
new file mode 100644
index 0000000..a5052e6
--- /dev/null
+++ b/inst/ChangeLog
@@ -0,0 +1,6 @@
+Version 0.5.0: Matthieu Stigler (2018-01-29)
+ -new: plotBin allows for separate bin on each side
+ -new: wrapper for CCT plots
+ -fix issues with new output from rdtable
+ -add test file
+ -bw_ik work on regression output objects
diff --git a/inst/devtools_internal_tests.R b/inst/devtools_internal_tests.R
new file mode 100644
index 0000000..67aa2d5
--- /dev/null
+++ b/inst/devtools_internal_tests.R
@@ -0,0 +1,13 @@
+devtools::check_rhub(email="Matthieu.Stigler@gmail.com", interactive=FALSE)
+
+devtools::check_win_devel()
+devtools::check_win_release()
+devtools::check_win_oldrelease()
+
+devtools::build()
+usethis::use_gpl_license(version = 3, include_future = TRUE)
+
+## then
+# direct: devtools::submit_cran()
+
+curl::curl_fetch_memory("ftp://win-builder.r-project.org")
diff --git a/RDDtools/man/STAR_MHE.Rd b/man/STAR_MHE.Rd
similarity index 66%
rename from RDDtools/man/STAR_MHE.Rd
rename to man/STAR_MHE.Rd
index 36a1a38..cf50faf 100644
--- a/RDDtools/man/STAR_MHE.Rd
+++ b/man/STAR_MHE.Rd
@@ -1,9 +1,11 @@
-% Generated by roxygen2 (4.0.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rddtools.R
\docType{data}
\name{STAR_MHE}
\alias{STAR_MHE}
\title{Transformation of the STAR dataset as used in Angrist and Pischke (2008)}
-\format{A data frame containing 5743 observations and 6 variables. The first variable is from the original dataset,
+\format{
+A data frame containing 5743 observations and 6 variables. The first variable is from the original dataset,
all other are created by Angrist and Pischke STAT code.
\describe{
\item{schidkn}{School ID in kindergarden (original variable, schoolidk in \code{\link[AER]{STAR}})}
@@ -11,41 +13,37 @@ all other are created by Angrist and Pischke STAT code.
\item{classid}{The id of the class (computed by A & P)}
\item{cs}{Class size (computed by A & P)}
\item{female, nwhite}{Various covariates (computed by A & P)}
-}}
+}
+}
\source{
Data obtained using the script krueger.do on data webstar.rda, found on J. Angrist website
-\url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}, retrieved on 26 November 2012.
-}
-\usage{
-STAR_MHE
}
\description{
Transformation of the STAR dataset as used in Table 8.2.1 of Angrist and Pischke (2008)
}
\details{
-). This is a transformation of the dataset from the project STAR (Student/Teacher Achievement Ratio.
-The full dataset is described and available in package AER, \code{\link[AER]{STAR}}.
-The transformed data was obtained using the STATA script krueger.do, obtained from Joshua Angrist website
-(\url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}), on the webstar.dta.
+). This is a transformation of the dataset from the project STAR (Student/Teacher Achievement Ratio.
+The full dataset is described and available in package AER, \code{\link[AER]{STAR}}.
+The transformed data was obtained using the STATA script krueger.do, obtained from Joshua Angrist website, on the webstar.dta.
}
\examples{
data(STAR_MHE)
# Compute the group means:
-STAR_MHE_means <- aggregate(STAR_MHE[, c("classid", "pscore", "cs")], by=list(STAR_MHE$classid), mean)
+STAR_MHE_means <- aggregate(STAR_MHE[, c('classid', 'pscore', 'cs')],
+ by=list(STAR_MHE$classid), mean)
# Regression of means, with weighted average:
reg_krug_gls <- lm(pscore~cs, data=STAR_MHE_means, weights=cs)
coef(summary(reg_krug_gls))[2,2]
}
\references{
-Krueger, A. (1999) "Experimental Estimates Of Education Production Functions,"
+Krueger, A. (1999) 'Experimental Estimates Of Education Production Functions,'
\emph{The Quarterly Journal of Economics}, Vol. 114(2), pages 497-532, May.
-Angrist, A. ad Pischke J-S (2008) \emph{Mostly Harmless Econometrics: An Empiricist's Companion},
+Angrist, A. ad Pischke J-S (2008) \emph{Mostly Harmless Econometrics: An Empiricist's Companion},
Princeton University press
}
\seealso{
\code{\link[AER]{STAR}} for the original dataset.
}
-
diff --git a/man/as.lm.Rd b/man/as.lm.Rd
new file mode 100644
index 0000000..ec2b19d
--- /dev/null
+++ b/man/as.lm.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rdd_data_methods.R
+\name{as.lm}
+\alias{as.lm}
+\title{Convert a rdd object to lm}
+\usage{
+as.lm(x)
+}
+\arguments{
+\item{x}{An object to convert to lm}
+}
+\value{
+An object of class \code{lm}
+}
+\description{
+Convert a rdd object to lm
+}
+\examples{
+data(house)
+house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0)
+reg_para <- rdd_reg_lm(rdd_object=house_rdd)
+reg_para_lm <- as.lm(reg_para)
+reg_para_lm
+plot(reg_para_lm, which=4)
+}
+\seealso{
+\code{\link{as.npreg}} which converts \code{rdd_reg} objects into \code{npreg} from package \code{np}.
+}
diff --git a/man/as.npregbw.Rd b/man/as.npregbw.Rd
new file mode 100644
index 0000000..ca74ea0
--- /dev/null
+++ b/man/as.npregbw.Rd
@@ -0,0 +1,48 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/as.npreg.R
+\name{as.npregbw}
+\alias{as.npregbw}
+\alias{as.npreg}
+\title{Convert an rdd_reg object to a \code{npreg} object}
+\usage{
+as.npregbw(x, ...)
+
+as.npreg(x, ...)
+}
+\arguments{
+\item{x}{Object of class \code{rdd_reg} created by \code{\link{rdd_reg_np}} or \code{\link{rdd_reg_lm}}}
+
+\item{\ldots}{Further arguments passed to the \code{\link[np]{npregbw}} or \code{\link[np]{npreg}}}
+}
+\value{
+An object of class \code{npreg} or \code{npregbw}
+}
+\description{
+Convert an rdd_object to a non-parametric regression \code{npreg} from package \code{np}
+}
+\details{
+This function converts an rdd_reg object into an \code{npreg} object from package \code{np}
+Note that the output won't be the same, since \code{npreg} does not offer a triangular kernel, but a Gaussian or Epanechinkov one.
+Another reason why estimates might differ slightly is that \code{npreg} implements a multivariate kernel, while rdd_reg
+proceeds as if the kernel was univariate. A simple solution to make the multivariate kernel similar to the univariate one
+is to set the bandwidth for x and Dx to a large number, so that they converge towards a constant, and one obtains back the univariate kernel.
+}
+\examples{
+# Estimate ususal rdd_reg:
+ data(house)
+ house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0)
+ reg_nonpara <- rdd_reg_np(rdd_object=house_rdd)
+
+## Convert to npreg:
+ reg_nonpara_np <- as.npreg(reg_nonpara)
+ reg_nonpara_np
+ rdd_coef(reg_nonpara_np, allCo=TRUE, allInfo=TRUE)
+
+## Compare with result obtained with a Gaussian kernel:
+ bw_lm <- dnorm(house_rdd$x, sd=rddtools:::getBW(reg_nonpara))
+ reg_nonpara_gaus <- rdd_reg_lm(rdd_object=house_rdd, w=bw_lm)
+ all.equal(rdd_coef(reg_nonpara_gaus),rdd_coef(reg_nonpara_np))
+}
+\seealso{
+\code{\link{as.lm}} which converts \code{rdd_reg} objects into \code{lm}.
+}
diff --git a/RDDtools/man/clusterInf.Rd b/man/clusterInf.Rd
similarity index 72%
rename from RDDtools/man/clusterInf.Rd
rename to man/clusterInf.Rd
index afe70c7..7a730e2 100644
--- a/RDDtools/man/clusterInf.Rd
+++ b/man/clusterInf.Rd
@@ -1,4 +1,5 @@
-% Generated by roxygen2 (4.0.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clusterInf.R
\name{clusterInf}
\alias{clusterInf}
\title{Post-inference for clustered data}
@@ -6,7 +7,7 @@
clusterInf(object, clusterVar, vcov. = NULL, type = c("df-adj", "HC"), ...)
}
\arguments{
-\item{object}{Object of class lm, from which RDDreg also inherits.}
+\item{object}{Object of class lm, from which rdd_reg also inherits.}
\item{clusterVar}{The variable containing the cluster attributions.}
@@ -24,23 +25,22 @@ Correct standard-errors to account for clustered data, doing either a degrees of
possibly on the range specified by bandwidth
}
\examples{
-data(Lee2008)
-Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
-reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
+data(house)
+house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0)
+reg_para <- rdd_reg_lm(rdd_object=house_rdd)
# here we just generate randomly a cluster variable:
-nlet <- sort(c(outer(letters, letters, paste, sep="")))
-clusRandom <- sample(nlet[1:60], size=nrow(Lee2008_rdd), replace=TRUE)
+nlet <- sort(c(outer(letters, letters, paste, sep='')))
+clusRandom <- sample(nlet[1:60], size=nrow(house_rdd), replace=TRUE)
# now do post-inference:
clusterInf(reg_para, clusterVar=clusRandom)
-clusterInf(reg_para, clusterVar=clusRandom, type="HC")
+clusterInf(reg_para, clusterVar=clusRandom, type='HC')
}
\references{
-Wooldridge (2003) Cluster-sample methods in applied econometrics.
+Wooldridge (2003) Cluster-sample methods in applied econometrics.
\emph{AmericanEconomic Review}, 93, p. 133-138
}
\seealso{
\code{\link{vcovCluster}}, which implements the cluster-robust covariance matrix estimator used by \code{cluserInf}
}
-
diff --git a/RDDtools/man/covarTest_dis.Rd b/man/covarTest_dis.Rd
similarity index 61%
rename from RDDtools/man/covarTest_dis.Rd
rename to man/covarTest_dis.Rd
index 4d97d44..4de36e2 100644
--- a/RDDtools/man/covarTest_dis.Rd
+++ b/man/covarTest_dis.Rd
@@ -1,31 +1,40 @@
-% Generated by roxygen2 (4.0.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/covarTests.R
\name{covarTest_dis}
\alias{covarTest_dis}
-\alias{covarTest_dis.RDDdata}
-\alias{covarTest_dis.RDDreg}
+\alias{covarTest_dis.rdd_data}
+\alias{covarTest_dis.rdd_reg}
\title{Testing for balanced covariates: equality of distribution}
\usage{
-covarTest_dis(object, bw, exact = NULL, p.adjust = c("none", "holm", "BH",
- "BY", "hochberg", "hommel", "bonferroni"))
+covarTest_dis(
+ object,
+ bw,
+ exact = NULL,
+ p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni")
+)
-\method{covarTest_dis}{RDDdata}(object, bw = NULL, exact = FALSE,
- p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel",
- "bonferroni"))
+\method{covarTest_dis}{rdd_data}(
+ object,
+ bw = NULL,
+ exact = FALSE,
+ p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni")
+)
-\method{covarTest_dis}{RDDreg}(object, bw = NULL, exact = FALSE,
- p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel",
- "bonferroni"))
+\method{covarTest_dis}{rdd_reg}(
+ object,
+ bw = NULL,
+ exact = FALSE,
+ p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni")
+)
}
\arguments{
-\item{object}{object of class RDDdata}
+\item{object}{object of class rdd_data}
\item{bw}{a bandwidth}
\item{exact}{Argument of the \code{\link{ks.test}} function: NULL or a logical indicating whether an exact p-value should be computed.}
\item{p.adjust}{Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function}
-
-\item{\ldots}{currently not used}
}
\value{
A data frame with, for each covariate, the K-S statistic and its p-value.
@@ -34,29 +43,28 @@ A data frame with, for each covariate, the K-S statistic and its p-value.
Tests equality of distribution with a Kolmogorov-Smirnov for each covariates, between the two full groups or around the discontinuity threshold
}
\examples{
-data(Lee2008)
+data(house)
## Add randomly generated covariates
set.seed(123)
-n_Lee <- nrow(Lee2008)
-Z <- data.frame(z1 = rnorm(n_Lee, sd=2),
- z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)),
+n_Lee <- nrow(house)
+Z <- data.frame(z1 = rnorm(n_Lee, sd=2),
+ z2 = rnorm(n_Lee, mean = ifelse(house<0, 5, 8)),
z3 = sample(letters, size = n_Lee, replace = TRUE))
-Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0)
+house_rdd_Z <- rdd_data(y = house$y, x = house$x, covar = Z, cutpoint = 0)
## Kolmogorov-Smirnov test of equality in distribution:
-covarTest_dis(Lee2008_rdd_Z, bw=0.3)
+covarTest_dis(house_rdd_Z, bw=0.3)
## Can also use function covarTest_dis() for a t-test for equality of means around cutoff:
-covarTest_mean(Lee2008_rdd_Z, bw=0.3)
+covarTest_mean(house_rdd_Z, bw=0.3)
## covarTest_dis works also on regression outputs (bw will be taken from the model)
-reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z)
+reg_nonpara <- rdd_reg_np(rdd_object=house_rdd_Z)
covarTest_dis(reg_nonpara)
}
-\author{
-Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
-}
\seealso{
\code{\link{covarTest_mean}} for the t-test of equality of means
}
-
+\author{
+Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
+}
diff --git a/RDDtools/man/covarTest_mean.Rd b/man/covarTest_mean.Rd
similarity index 58%
rename from RDDtools/man/covarTest_mean.Rd
rename to man/covarTest_mean.Rd
index 983b84a..84dcba4 100644
--- a/RDDtools/man/covarTest_mean.Rd
+++ b/man/covarTest_mean.Rd
@@ -1,24 +1,37 @@
-% Generated by roxygen2 (4.0.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/covarTests.R
\name{covarTest_mean}
\alias{covarTest_mean}
-\alias{covarTest_mean.RDDdata}
-\alias{covarTest_mean.RDDreg}
+\alias{covarTest_mean.rdd_data}
+\alias{covarTest_mean.rdd_reg}
\title{Testing for balanced covariates: equality of means with t-test}
\usage{
-covarTest_mean(object, bw = NULL, paired = FALSE, var.equal = FALSE,
- p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel",
- "bonferroni"))
+covarTest_mean(
+ object,
+ bw = NULL,
+ paired = FALSE,
+ var.equal = FALSE,
+ p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni")
+)
-\method{covarTest_mean}{RDDdata}(object, bw = NULL, paired = FALSE,
- var.equal = FALSE, p.adjust = c("none", "holm", "BH", "BY", "hochberg",
- "hommel", "bonferroni"))
+\method{covarTest_mean}{rdd_data}(
+ object,
+ bw = NULL,
+ paired = FALSE,
+ var.equal = FALSE,
+ p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni")
+)
-\method{covarTest_mean}{RDDreg}(object, bw = NULL, paired = FALSE,
- var.equal = FALSE, p.adjust = c("none", "holm", "BH", "BY", "hochberg",
- "hommel", "bonferroni"))
+\method{covarTest_mean}{rdd_reg}(
+ object,
+ bw = NULL,
+ paired = FALSE,
+ var.equal = FALSE,
+ p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni")
+)
}
\arguments{
-\item{object}{object of class RDDdata}
+\item{object}{object of class rdd_data}
\item{bw}{a bandwidth}
@@ -27,8 +40,6 @@ covarTest_mean(object, bw = NULL, paired = FALSE, var.equal = FALSE,
\item{var.equal}{Argument of the \code{\link{t.test}} function: logical variable indicating whether to treat the two variances as being equal}
\item{p.adjust}{Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function}
-
-\item{\ldots}{currently not used}
}
\value{
A data frame with, for each covariate, the mean on each size, the difference, t-stat and ts p-value.
@@ -37,30 +48,29 @@ A data frame with, for each covariate, the mean on each size, the difference, t-
Tests equality of means by a t-test for each covariate, between the two full groups or around the discontinuity threshold
}
\examples{
-data(Lee2008)
+data(house)
## Add randomly generated covariates
set.seed(123)
-n_Lee <- nrow(Lee2008)
-Z <- data.frame(z1 = rnorm(n_Lee, sd=2),
- z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)),
+n_Lee <- nrow(house)
+Z <- data.frame(z1 = rnorm(n_Lee, sd=2),
+ z2 = rnorm(n_Lee, mean = ifelse(house<0, 5, 8)),
z3 = sample(letters, size = n_Lee, replace = TRUE))
-Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0)
+house_rdd_Z <- rdd_data(y = house$y, x = house$x, covar = Z, cutpoint = 0)
## test for equality of means around cutoff:
-covarTest_mean(Lee2008_rdd_Z, bw=0.3)
+covarTest_mean(house_rdd_Z, bw=0.3)
## Can also use function covarTest_dis() for Kolmogorov-Smirnov test:
-covarTest_dis(Lee2008_rdd_Z, bw=0.3)
+covarTest_dis(house_rdd_Z, bw=0.3)
## covarTest_mean works also on regression outputs (bw will be taken from the model)
-reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z)
+reg_nonpara <- rdd_reg_np(rdd_object=house_rdd_Z)
covarTest_mean(reg_nonpara)
}
-\author{
-Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
-}
\seealso{
\code{\link{covarTest_dis}} for the Kolmogorov-Smirnov test of equality of distribution
}
-
+\author{
+Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
+}
diff --git a/man/dens_test.Rd b/man/dens_test.Rd
new file mode 100644
index 0000000..1f11abf
--- /dev/null
+++ b/man/dens_test.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dens_test.R
+\name{dens_test}
+\alias{dens_test}
+\title{McCrary Sorting Test}
+\usage{
+dens_test(rdd_object, bin = NULL, bw = NULL, plot = TRUE, ...)
+}
+\arguments{
+\item{rdd_object}{object of class rdd_data}
+
+\item{bin}{the binwidth (defaults to \code{2*sd(runvar)*length(runvar)^(-.5)})}
+
+\item{bw}{the bandwidth to use (by default uses bandwidth selection calculation from McCrary (2008))}
+
+\item{plot}{Whether to return a plot. Logical, default to TRUE.}
+
+\item{\ldots}{Further arguments passed to the unexported \code{DCdensity} function.}
+}
+\description{
+This calls the original \code{DCdensity} function which was in the package \code{rdd} by Drew Dimmery,
+which has been archived and is now internally stored in the Rddtools package.
+}
+\details{
+Run the McCracy test for manipulation of the forcing variable
+}
+\examples{
+data(house)
+house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0)
+dens_test(house_rdd)
+}
+\references{
+McCrary, Justin. (2008) "Manipulation of the running variable in the regression discontinuity design: A density test," \emph{Journal of Econometrics}. 142(2): 698-714. \doi{http://dx.doi.org/10.1016/j.jeconom.2007.05.005}
+}
diff --git a/RDDtools/man/gen_MC_IK.Rd b/man/gen_mc_ik.Rd
similarity index 55%
rename from RDDtools/man/gen_MC_IK.Rd
rename to man/gen_mc_ik.Rd
index 74af276..70826f4 100644
--- a/RDDtools/man/gen_MC_IK.Rd
+++ b/man/gen_mc_ik.Rd
@@ -1,10 +1,16 @@
-% Generated by roxygen2 (4.0.1): do not edit by hand
-\name{gen_MC_IK}
-\alias{gen_MC_IK}
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/gen_mc_ik.R
+\name{gen_mc_ik}
+\alias{gen_mc_ik}
\title{Generate Monte Carlo simulations of Imbens and Kalyanaraman}
\usage{
-gen_MC_IK(n = 200, version = 1, sd = 0.1295, output = c("data.frame",
- "RDDdata"), size)
+gen_mc_ik(
+ n = 200,
+ version = 1,
+ sd = 0.1295,
+ output = c("data.frame", "rdd_data"),
+ size
+)
}
\arguments{
\item{n}{The size of sampel to generate}
@@ -13,9 +19,9 @@ gen_MC_IK(n = 200, version = 1, sd = 0.1295, output = c("data.frame",
\item{sd}{The standard deviation of the error term.}
-\item{output}{Whether to return a data-frame, or already a RDDdata}
+\item{output}{Whether to return a data-frame, or already a rdd_data}
-\item{size}{The size of the effect, this depends on the specific version, defaults are as in IK: 0.04, NULL, 0.1, 0.1}
+\item{size}{The size of the effect, this depends on the specific version, defaults are as in ik: 0.04, NULL, 0.1, 0.1}
}
\value{
An data frame with x and y variables.
@@ -24,19 +30,19 @@ An data frame with x and y variables.
Generate the simulations reported in Imbens and Kalyanaraman (2012)
}
\examples{
-MC1_dat <- gen_MC_IK()
-MC1_rdd <- RDDdata(y=MC1_dat$y, x=MC1_dat$x, cutpoint=0)
+mc1_dat <- gen_mc_ik()
+MC1_rdd <- rdd_data(y=mc1_dat$y, x=mc1_dat$x, cutpoint=0)
## Use np regression:
-reg_nonpara <- RDDreg_np(RDDobject=MC1_rdd)
+reg_nonpara <- rdd_reg_np(rdd_object=MC1_rdd)
reg_nonpara
# Represent the curves:
plotCu <- function(version=1, xlim=c(-0.1,0.1)){
- res <- gen_MC_IK(sd=0.0000001, n=1000, version=version)
+ res <- gen_mc_ik(sd=0.0000001, n=1000, version=version)
res <- res[order(res$x),]
- ylim <- range(subset(res, x>=min(xlim) & x<=max(xlim), "y"))
- plot(res, type="l", xlim=xlim, ylim=ylim, main=paste("DGP", version))
+ ylim <- range(subset(res, x>=min(xlim) & x<=max(xlim), 'y'))
+ plot(res, type='l', xlim=xlim, ylim=ylim, main=paste('DGP', version))
abline(v=0)
xCut <- res[which(res$x==min(res$x[res$x>=0]))+c(0,-1),]
points(xCut, col=2)
@@ -48,7 +54,3 @@ plotCu(version=3)
plotCu(version=4)
layout(matrix(1))
}
-\references{
-TODO
-}
-
diff --git a/man/house.Rd b/man/house.Rd
new file mode 100644
index 0000000..cdbe867
--- /dev/null
+++ b/man/house.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rddtools.R
+\docType{data}
+\name{house}
+\alias{house}
+\title{Dataset used in Lee (2008)}
+\format{
+A data frame with 6558 observations and two variables:
+\describe{
+\item{x}{Vote at election t-1}
+\item{y}{Vote at election t}
+}
+}
+\source{
+Guido Imbens webpage: \url{https://scholar.harvard.edu/imbens/scholar_software/regression-discontinuity}
+}
+\description{
+Randomized experiments from non-random selection in U.S. House elections
+
+Dataset described used in Imbens and Kalyamaran (2012), and probably the same dataset used in Lee (2008),
+}
+\examples{
+data(house)
+rdd_house <- rdd_data(x=x, y=y, data=house, cutpoint=0)
+summary(rdd_house)
+plot(rdd_house)
+}
+\references{
+Imbens, Guido and Karthik Kalyanaraman. (2012) 'Optimal Bandwidth Choice for the regression discontinuity estimator,'
+Review of Economic Studies (2012) 79, 933-959
+
+Lee, D. (2008) Randomized experiments from non-random selection in U.S. House elections,
+\emph{Journal of Econometrics}, 142, 675-697
+}
diff --git a/man/indh.Rd b/man/indh.Rd
new file mode 100644
index 0000000..5b568a6
--- /dev/null
+++ b/man/indh.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rddtools.R
+\docType{data}
+\name{indh}
+\alias{indh}
+\title{INDH data set}
+\format{
+A data frame with two variables with 720 observations each
+}
+\description{
+Data from the Initiative Nationale du Development Humaine, collected as the part of the SNSF project "Development Aid and Social Dynamics"
+}
+\examples{
+# load the data
+data(indh)
+
+# construct rdd_data frame
+rdd_dat_indh <- rdd_data(y=choice_pg, x=poverty, data=indh, cutpoint=30)
+
+# inspect data frame
+summary(rdd_dat_indh)
+
+# perform non-parametric regression
+( reg_np_indh <- rdd_reg_np(rdd_dat_indh) )
+}
+\references{
+Arcand, Rieger, and Nguyen (2015) 'Development Aid and Social Dyanmics Data Set'
+}
diff --git a/RDDtools/man/plot.RDDdata.Rd b/man/plot.rdd_data.Rd
similarity index 63%
rename from RDDtools/man/plot.RDDdata.Rd
rename to man/plot.rdd_data.Rd
index a684d24..155a8cd 100644
--- a/RDDtools/man/plot.RDDdata.Rd
+++ b/man/plot.rdd_data.Rd
@@ -1,13 +1,22 @@
-% Generated by roxygen2 (4.0.1): do not edit by hand
-\name{plot.RDDdata}
-\alias{plot.RDDdata}
-\title{Plot RDDdata}
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rdd_data_methods.R
+\name{plot.rdd_data}
+\alias{plot.rdd_data}
+\title{Plot rdd_data}
\usage{
-\method{plot}{RDDdata}(x, h, nbins = NULL, xlim = range(object$x, na.rm =
- TRUE), cex = 0.7, nplot = 1, device = c("base", "ggplot"), ...)
+\method{plot}{rdd_data}(
+ x,
+ h = NULL,
+ nbins = NULL,
+ xlim = range(object$x, na.rm = TRUE),
+ cex = 0.7,
+ nplot = 1,
+ device = c("base", "ggplot"),
+ ...
+)
}
\arguments{
-\item{x}{Object of class RDDdata}
+\item{x}{Object of class rdd_data}
\item{h}{The binwidth parameter (note this differs from the bandwidth parameter!)}
@@ -32,27 +41,26 @@ Binned plot of the forcing and outcome variable
\details{
Produces a simple binned plot averaging values within each interval. The length of the intervals
is specified with the argument \code{h}, specifying the whole binwidth (contrary to the usual bandwidth
-argument, that gives half of the length of the kernel window.
-When no bandwidth is given, the bandwidth of Ruppert et al is used, see \code{\link{RDDbw_RSW}}.
+argument, that gives half of the length of the kernel window.
+When no bandwidth is given, the bandwidth of Ruppert et al is used, see \code{\link{rdd_bw_rsw}}.
}
\examples{
-data(Lee2008)
-Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
-plot(Lee2008_rdd)
+data(house)
+house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0)
+plot(house_rdd)
## Specify manually the bandwidth:
-plot(Lee2008_rdd, h=0.2)
+plot(house_rdd, h=0.2)
## Show three plots with different bandwidth:
-plot(Lee2008_rdd, h=c(0.2,0.3,0.4), nplot=3)
+plot(house_rdd, h=c(0.2,0.3,0.4), nplot=3)
## Specify instead of the bandwidth, the final number of bins:
-plot(Lee2008_rdd, nbins=22)
+plot(house_rdd, nbins=22)
## If the specified number of bins is odd, the larger number is given to side with largest range
-plot(Lee2008_rdd, nbins=21)
+plot(house_rdd, nbins=21)
}
\author{
Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
}
-
diff --git a/RDDtools/man/plotBin.Rd b/man/plotBin.Rd
similarity index 53%
rename from RDDtools/man/plotBin.Rd
rename to man/plotBin.Rd
index 24ae2c1..7f911c6 100644
--- a/RDDtools/man/plotBin.Rd
+++ b/man/plotBin.Rd
@@ -1,11 +1,24 @@
-% Generated by roxygen2 (4.0.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plotBin.R
\name{plotBin}
\alias{plotBin}
\title{Bin plotting}
\usage{
-plotBin(x, y, h = 0.05, nbins = NULL, cutpoint = 0, plot = TRUE,
- type = c("value", "number"), xlim = range(x, na.rm = TRUE), cex = 0.9,
- main = NULL, xlab, ylab, ...)
+plotBin(
+ x,
+ y,
+ h = NULL,
+ nbins = NULL,
+ cutpoint = 0,
+ plot = TRUE,
+ type = c("value", "number"),
+ xlim = range(x, na.rm = TRUE),
+ cex = 0.9,
+ main = NULL,
+ xlab,
+ ylab,
+ ...
+)
}
\arguments{
\item{x}{Forcing variable}
@@ -14,13 +27,15 @@ plotBin(x, y, h = 0.05, nbins = NULL, cutpoint = 0, plot = TRUE,
\item{h}{the bandwidth (defaults to \code{2*sd(runvar)*length(runvar)^(-.5)})}
+\item{nbins}{number of Bins}
+
\item{cutpoint}{Cutpoint}
\item{plot}{Logical. Whether to plot or only returned silently}
\item{type}{Whether returns the y averages, or the x frequencies}
-\item{xlim,cex,main,xlab,ylab}{Usual parameters passed to plot(), see \code{\link{par}}}
+\item{xlim, cex, main, xlab, ylab}{Usual parameters passed to plot(), see \code{\link{par}}}
\item{\ldots}{further arguments passed to plot.}
}
@@ -28,13 +43,8 @@ plotBin(x, y, h = 0.05, nbins = NULL, cutpoint = 0, plot = TRUE,
Returns silently values
}
\description{
-Do a "scatterplot bin smoothing"
-}
-\author{
-Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
+Do a 'scatterplot bin smoothing'
}
\references{
McCrary, Justin.
}
-\keyword{internal}
-
diff --git a/RDDtools/man/plotPlacebo.Rd b/man/plotPlacebo.Rd
similarity index 58%
rename from RDDtools/man/plotPlacebo.Rd
rename to man/plotPlacebo.Rd
index f1f58ca..63a16c4 100644
--- a/RDDtools/man/plotPlacebo.Rd
+++ b/man/plotPlacebo.Rd
@@ -1,38 +1,69 @@
-% Generated by roxygen2 (4.0.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/placebo.R
\name{plotPlacebo}
-\alias{computePlacebo}
\alias{plotPlacebo}
-\alias{plotPlacebo.RDDreg}
+\alias{plotPlacebo.rdd_reg}
\alias{plotPlaceboDens}
-\alias{plotPlaceboDens.RDDreg}
+\alias{plotPlaceboDens.rdd_reg}
+\alias{computePlacebo}
\title{Draw a (density) plot of placebo tests}
\usage{
-plotPlacebo(object, device = c("ggplot", "base"), ...)
-
-\method{plotPlacebo}{RDDreg}(object, device = c("ggplot", "base"),
- from = 0.25, to = 0.75, by = 0.1, level = 0.95, same_bw = FALSE,
- vcov. = NULL, plot = TRUE, output = c("data", "ggplot"), ...)
-
-plotPlaceboDens(object, device = c("ggplot", "base"), ...)
-
-\method{plotPlaceboDens}{RDDreg}(object, device = c("ggplot", "base"),
- from = 0.25, to = 0.75, by = 0.1, level = 0.95, same_bw = FALSE,
- vcov. = NULL, ...)
-
-computePlacebo(object, from = 0.25, to = 0.75, by = 0.1, level = 0.95,
- same_bw = FALSE, vcov. = NULL)
+plotPlacebo(
+ object,
+ device = c("ggplot", "base"),
+ output = c("data", "ggplot"),
+ ...
+)
+
+\method{plotPlacebo}{rdd_reg}(
+ object,
+ device = c("ggplot", "base"),
+ output = c("data", "ggplot"),
+ from = 0.25,
+ to = 0.75,
+ by = 0.1,
+ level = 0.95,
+ same_bw = FALSE,
+ vcov. = NULL,
+ plot = TRUE,
+ ...
+)
+
+plotPlaceboDens(
+ object,
+ device = c("ggplot", "base"),
+ output = c("data", "ggplot"),
+ ...
+)
+
+\method{plotPlaceboDens}{rdd_reg}(
+ object,
+ device = c("ggplot", "base"),
+ output = c("data", "ggplot"),
+ from = 0.25,
+ to = 0.75,
+ by = 0.1,
+ level = 0.95,
+ same_bw = FALSE,
+ vcov. = NULL,
+ ...
+)
+
+computePlacebo(
+ object,
+ from = 0.25,
+ to = 0.75,
+ by = 0.1,
+ level = 0.95,
+ same_bw = FALSE,
+ vcov. = NULL
+)
}
\arguments{
\item{object}{the output of an RDD regression}
\item{device}{Whether to draw a base or a ggplot graph.}
-\item{\ldots}{Further arguments passed to specific methods.}
-
-\item{vcov.}{Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}.}
-
-\item{plot}{Whether to actually plot the data.}
-
\item{output}{Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object}
\item{from}{Starting point of the fake cutpoints sequence. Refers ot the quantile of each side of the true cutpoint}
@@ -44,6 +75,12 @@ computePlacebo(object, from = 0.25, to = 0.75, by = 0.1, level = 0.95,
\item{level}{Level of the confidence interval shown}
\item{same_bw}{Whether to re-estimate the bandwidth at each point}
+
+\item{vcov.}{Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}.}
+
+\item{plot}{Whether to actually plot the data.}
+
+\item{\ldots}{Further arguments passed to specific methods.}
}
\value{
A data frame containing the cutpoints, their corresponding estimates and confidence intervals.
@@ -52,13 +89,13 @@ A data frame containing the cutpoints, their corresponding estimates and confide
Draw a plot of placebo tests, estimating the impact on fake cutpoints
}
\examples{
-data(Lee2008)
-Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
-reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd)
+data(house)
+house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0)
+reg_nonpara <- rdd_reg_np(rdd_object=house_rdd)
plotPlacebo(reg_nonpara)
# Use with another vcov function; cluster case
-reg_nonpara_lminf <- RDDreg_np(RDDobject=Lee2008_rdd, inference="lm")
+reg_nonpara_lminf <- rdd_reg_np(rdd_object=house_rdd, inference='lm')
# need to be a function applied to updated object!
vc <- function(x) vcovCluster(x, clusterVar=model.frame(x)$x)
plotPlacebo(reg_nonpara_lminf, vcov. = vc)
@@ -66,4 +103,3 @@ plotPlacebo(reg_nonpara_lminf, vcov. = vc)
\author{
Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
}
-
diff --git a/man/plotSensi.Rd b/man/plotSensi.Rd
new file mode 100644
index 0000000..da793f1
--- /dev/null
+++ b/man/plotSensi.Rd
@@ -0,0 +1,94 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plotSensi.R
+\name{plotSensi}
+\alias{plotSensi}
+\alias{plotSensi.rdd_reg_np}
+\alias{plotSensi.rdd_reg_lm}
+\title{Plot the sensitivity to the bandwidth}
+\usage{
+plotSensi(
+ rdd_regobject,
+ from,
+ to,
+ by = 0.01,
+ level = 0.95,
+ output = c("data", "ggplot"),
+ plot = TRUE,
+ ...
+)
+
+\method{plotSensi}{rdd_reg_np}(
+ rdd_regobject,
+ from,
+ to,
+ by = 0.05,
+ level = 0.95,
+ output = c("data", "ggplot"),
+ plot = TRUE,
+ device = c("ggplot", "base"),
+ vcov. = NULL,
+ ...
+)
+
+\method{plotSensi}{rdd_reg_lm}(
+ rdd_regobject,
+ from,
+ to,
+ by = 0.05,
+ level = 0.95,
+ output = c("data", "ggplot"),
+ plot = TRUE,
+ order,
+ type = c("colour", "facet"),
+ ...
+)
+}
+\arguments{
+\item{rdd_regobject}{object of a RDD regression, from either \code{\link{rdd_reg_lm}} or \code{\link{rdd_reg_np}}}
+
+\item{from}{First bandwidth point. Default value is max(1e-3, bw-0.1)}
+
+\item{to}{Last bandwidth point. Default value is bw+0.1}
+
+\item{by}{Increments in the \code{from} \code{to} sequence}
+
+\item{level}{Level of the confidence interval}
+
+\item{output}{Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object}
+
+\item{plot}{Whether to actually plot the data.}
+
+\item{device}{Whether to draw a base or a ggplot graph.}
+
+\item{vcov.}{Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}}
+
+\item{order}{For parametric models (from \code{\link{rdd_reg_lm}}), the order of the polynomial.}
+
+\item{type}{For parametric models (from \code{\link{rdd_reg_lm}}) whether different orders are represented as different colour or as different facets.}
+
+\item{\ldots}{Further arguments passed to specific methods}
+}
+\value{
+A data frame containing the bandwidths and corresponding estimates and confidence intervals.
+}
+\description{
+Draw a plot showing the LATE estimates depending on multiple bandwidths
+}
+\examples{
+data(house)
+house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0)
+
+#Non-parametric estimate
+bw_ik <- rdd_bw_ik(house_rdd)
+reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
+plotSensi(reg_nonpara)
+plotSensi(reg_nonpara, device='base')
+
+#Parametric estimate:
+reg_para_ik <- rdd_reg_lm(rdd_object=house_rdd, order=4, bw=bw_ik)
+plotSensi(reg_para_ik)
+plotSensi(reg_para_ik, type='facet')
+}
+\author{
+Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
+}
diff --git a/man/rdd_bw_cct_estim.Rd b/man/rdd_bw_cct_estim.Rd
new file mode 100644
index 0000000..1e22ad1
--- /dev/null
+++ b/man/rdd_bw_cct_estim.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/bw_cct_estim.R
+\name{rdd_bw_cct_estim}
+\alias{rdd_bw_cct_estim}
+\title{Bandwidth selection for Regression Discontinuity estimators, CTT 2014}
+\usage{
+rdd_bw_cct_estim(
+ rdd_object,
+ method = c("mserd", "msetwo", "msesum", "msecomb1", "msecomb2", "cerrd", "certwo",
+ "cersum", "cercomb1"),
+ kernel = c("Triangular", "Uniform", "Epanechnikov"),
+ ...
+)
+}
+\arguments{
+\item{rdd_object}{of class rdd_data created by \code{\link{rdd_data}}}
+
+\item{method}{The type of method used. See}
+
+\item{kernel}{The type of kernel used: either \code{Triangular}, \code{Uniform} or \code{Epanechnikov}.}
+
+\item{\ldots}{further arguments passed to \code{\link[rdrobust]{rdbwselect}}.}
+}
+\value{
+See documentation of \code{\link[rdrobust]{rdbwselect}}
+}
+\description{
+Simple wrapper of the Calonico-Cattaneo-Titiunik (2014) bandwidth selection procedures
+for RDD estimators \code{\link[rdrobust]{rdbwselect}}.
+}
+\examples{
+data(house)
+rd<- rdd_data(x=house$x, y=house$y, cutpoint=0)
+rdd_bw_cct_estim(rd)
+
+}
+\references{
+Calonico, S., M. D. Cattaneo, and R. Titiunik. 2014a. Robust Nonparametric Confidence Intervals for Regression-Discontinuity Designs. Econometrica 82(6): 2295-2326.
+\url{https://www.tandfonline.com/doi/abs/10.1080/01621459.2015.1017578}.
+}
+\seealso{
+\code{\link{rdd_bw_ik}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012)
+}
+\author{
+Original code written by Calonico, Cattaneo, Farrell and Titiuni, see \code{\link[rdrobust]{rdbwselect}}
+}
diff --git a/man/rdd_bw_cct_plot.Rd b/man/rdd_bw_cct_plot.Rd
new file mode 100644
index 0000000..5eff1df
--- /dev/null
+++ b/man/rdd_bw_cct_plot.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/bw_cct_plot.R
+\name{rdd_bw_cct_plot}
+\alias{rdd_bw_cct_plot}
+\title{Bandwidth selection for Regression Discontinuity visualisation, CTT 2015}
+\usage{
+rdd_bw_cct_plot(
+ rdd_object,
+ method = c("esmv", "es", "espr", "esmvpr", "qs", "qspr", "qsmv", "qsmvpr"),
+ ...
+)
+}
+\arguments{
+\item{rdd_object}{of class rdd_data created by \code{\link{rdd_data}}}
+
+\item{method}{The type of method used. See \code{\link[rdrobust]{rdplot}}.
+Default is \code{esmv}, the variance mimicking evenly-spaced method.}
+
+\item{\ldots}{further arguments passed to \code{\link[rdrobust]{rdplot}}.}
+}
+\value{
+See documentation of \code{\link[rdrobust]{rdplot}}
+}
+\description{
+Simple wrapper of the Calonico-Cattaneo-Titiunik (2015) bandwidth selection procedures
+for RDD visualisation \code{\link[rdrobust]{rdplot}}.
+}
+\examples{
+data(house)
+rd<- rdd_data(x=house$x, y=house$y, cutpoint=0)
+rdd_bw_cct_plot(rd)
+
+}
+\references{
+Calonico, S., M. D. Cattaneo, and R. Titiunik. 2015a. Optimal Data-Driven Regression Discontinuity Plots. Journal of the American Statistical Association 110(512): 1753-1769.
+\url{https://www.tandfonline.com/doi/abs/10.1080/01621459.2015.1017578}.
+}
+\seealso{
+\code{\link{rdd_bw_ik}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012)
+}
+\author{
+Original code written by Calonico, Cattaneo, Farrell and Titiuni, see \code{\link[rdrobust]{rdplot}}
+}
diff --git a/man/rdd_bw_ik.Rd b/man/rdd_bw_ik.Rd
new file mode 100644
index 0000000..3847201
--- /dev/null
+++ b/man/rdd_bw_ik.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/bw_ik.R
+\name{rdd_bw_ik}
+\alias{rdd_bw_ik}
+\title{Imbens-Kalyanaraman Optimal Bandwidth Calculation}
+\usage{
+rdd_bw_ik(rdd_object, kernel = c("Triangular", "Uniform", "Normal"))
+}
+\arguments{
+\item{rdd_object}{of class rdd_data created by \code{\link{rdd_data}}}
+
+\item{kernel}{The type of kernel used: either \code{triangular} or \code{uniform}.}
+}
+\value{
+The optimal bandwidth
+}
+\description{
+Imbens-Kalyanaraman optimal bandwidth
+for local linear regression in Regression discontinuity designs.
+}
+\examples{
+data(house)
+rd<- rdd_data(x=house$x, y=house$y, cutpoint=0)
+rdd_bw_ik(rd)
+}
+\references{
+Imbens, Guido and Karthik Kalyanaraman. (2012) 'Optimal Bandwidth Choice for the regression discontinuity estimator,'
+Review of Economic Studies (2012) 79, 933-959
+}
+\seealso{
+\code{\link{rdd_bw_rsw}} Global bandwidth selector of Ruppert, Sheather and Wand (1995)
+}
+\author{
+Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
+}
diff --git a/RDDtools/man/RDDbw_RSW.Rd b/man/rdd_bw_rsw.Rd
similarity index 52%
rename from RDDtools/man/RDDbw_RSW.Rd
rename to man/rdd_bw_rsw.Rd
index 671c63f..feb71f5 100644
--- a/RDDtools/man/RDDbw_RSW.Rd
+++ b/man/rdd_bw_rsw.Rd
@@ -1,32 +1,32 @@
-% Generated by roxygen2 (4.0.1): do not edit by hand
-\name{RDDbw_RSW}
-\alias{RDDbw_RSW}
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/bw_rot.R
+\name{rdd_bw_rsw}
+\alias{rdd_bw_rsw}
\title{Global bandwidth selector of Ruppert, Sheather and Wand (1995) from package \pkg{KernSmooth}}
\usage{
-RDDbw_RSW(object, type = c("global", "sided"))
+rdd_bw_rsw(object, type = c("global", "sided"))
}
\arguments{
-\item{object}{object of class RDDdata created by \code{\link{RDDdata}}}
+\item{object}{object of class rdd_data created by \code{\link{rdd_data}}}
-\item{type}{Whether to choose a global bandwidth for the whole function (\code{global})
+\item{type}{Whether to choose a global bandwidth for the whole function (\code{global})
or for each side (\code{sided})}
}
\value{
One (or two for \code{sided}) bandwidth value.
}
\description{
-Uses the global bandwidth selector of Ruppert, Sheather and Wand (1995)
+Uses the global bandwidth selector of Ruppert, Sheather and Wand (1995)
either to the whole function, or to the functions below and above the cutpoint.
}
\examples{
-data(Lee2008)
-rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0)
-RDDbw_RSW(rd)
+data(house)
+rd<- rdd_data(x=house$x, y=house$y, cutpoint=0)
+rdd_bw_rsw(rd)
}
\references{
See \code{\link[KernSmooth]{dpill}}
}
\seealso{
-\code{\link{RDDbw_IK}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012)
+\code{\link{rdd_bw_ik}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012)
}
-
diff --git a/RDDtools/man/RDDcoef.Rd b/man/rdd_coef.Rd
similarity index 57%
rename from RDDtools/man/RDDcoef.Rd
rename to man/rdd_coef.Rd
index ec712a3..e7a0e9c 100644
--- a/RDDtools/man/RDDcoef.Rd
+++ b/man/rdd_coef.Rd
@@ -1,15 +1,16 @@
-% Generated by roxygen2 (4.0.1): do not edit by hand
-\name{RDDcoef}
-\alias{RDDcoef}
-\alias{RDDcoef.RDDreg_np}
-\alias{RDDcoef.default}
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rdd_coef.R
+\name{rdd_coef}
+\alias{rdd_coef}
+\alias{rdd_coef.default}
+\alias{rdd_coef.rdd_reg_np}
\title{RDD coefficient}
\usage{
-RDDcoef(object, allInfo = FALSE, allCo = FALSE, ...)
+rdd_coef(object, allInfo = FALSE, allCo = FALSE, ...)
-\method{RDDcoef}{default}(object, allInfo = FALSE, allCo = FALSE, ...)
+\method{rdd_coef}{default}(object, allInfo = FALSE, allCo = FALSE, ...)
-\method{RDDcoef}{RDDreg_np}(object, allInfo = FALSE, allCo = FALSE, ...)
+\method{rdd_coef}{rdd_reg_np}(object, allInfo = FALSE, allCo = FALSE, ...)
}
\arguments{
\item{object}{A RDD regression object}
@@ -21,10 +22,9 @@ RDDcoef(object, allInfo = FALSE, allCo = FALSE, ...)
\item{\ldots}{Further arguments passed to/from specific methods}
}
\value{
-Either a numeric value of the RDD coefficient estimate, or a data frame with the estimate,
+Either a numeric value of the RDD coefficient estimate, or a data frame with the estimate,
its standard value, t test and p-value and
}
\description{
Function to access the RDD coefficient in the various regressions
}
-
diff --git a/man/rdd_data.Rd b/man/rdd_data.Rd
new file mode 100644
index 0000000..03b1855
--- /dev/null
+++ b/man/rdd_data.Rd
@@ -0,0 +1,59 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rdd_data.R
+\name{rdd_data}
+\alias{rdd_data}
+\title{Construct rdd_data}
+\usage{
+rdd_data(y, x, covar, cutpoint, z, labels, data)
+}
+\arguments{
+\item{y}{Output}
+
+\item{x}{Forcing variable}
+
+\item{covar}{Exogeneous variables}
+
+\item{cutpoint}{Cutpoint}
+
+\item{z}{Assignment variable for the fuzzy case. Should be 0/1 or TRUE/FALSE variable.}
+
+\item{labels}{Additional labels to provide as list (with entries \code{x}, \code{y}, and eventually vector \code{covar}). Unused currently.}
+
+\item{data}{A data-frame for the \code{x} and \code{y} variables. If this is provided,
+the column names can be entered directly for argument \code{x}, \code{y} and \code{covar}.
+For \code{covar}, should be a character vector.}
+}
+\value{
+Object of class \code{rdd_data}, inheriting from \code{data.frame}
+}
+\description{
+Construct the base RDD object, containing x, y and the cutpoint, eventuallay covariates.
+}
+\details{
+Arguments \code{x}, \code{y} (and eventually \code{covar}) can be either given as:
+\itemize{
+\item vectors (eventually data-frame for \code{covar})
+\item quote/character when \code{data} is also provided. For multiple \code{covar}, use a vector of characters
+}
+}
+\examples{
+data(house)
+rd <- rdd_data(x=house$x, y=house$y, cutpoint=0)
+rd2 <- rdd_data(x=x, y=y, data=house, cutpoint=0)
+
+# The print() function is the same as the print.data.frame:
+rd
+
+# The summary() and plot() function are specific to rdd_data
+summary(rd)
+plot(rd)
+
+# for the fuzzy case, you need to specify the assignment variable z:
+rd_dat_fakefuzzy <- rdd_data(x=house$x, y=house$y,
+ z=ifelse(house$x>0+rnorm(nrow(house), sd=0.05),1,0),
+ cutpoint=0)
+summary(rd_dat_fakefuzzy)
+}
+\author{
+Matthieu Stigler \email{Matthieu.Stigler@gmail.com}
+}
diff --git a/RDDtools/man/RDDgenreg.Rd b/man/rdd_gen_reg.Rd
similarity index 64%
rename from RDDtools/man/RDDgenreg.Rd
rename to man/rdd_gen_reg.Rd
index 6c3750e..c396f77 100644
--- a/RDDtools/man/RDDgenreg.Rd
+++ b/man/rdd_gen_reg.Rd
@@ -1,14 +1,26 @@
-% Generated by roxygen2 (4.0.1): do not edit by hand
-\name{RDDgenreg}
-\alias{RDDgenreg}
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reg_gen.R
+\name{rdd_gen_reg}
+\alias{rdd_gen_reg}
\title{General polynomial estimator of the regression discontinuity}
\usage{
-RDDgenreg(RDDobject, fun = glm, covariates = NULL, order = 1, bw = NULL,
- slope = c("separate", "same"), covar.opt = list(strategy = c("include",
- "residual"), slope = c("same", "separate"), bw = NULL), weights, ...)
+rdd_gen_reg(
+ rdd_object,
+ fun = glm,
+ covariates = NULL,
+ order = 1,
+ bw = NULL,
+ slope = c("separate", "same"),
+ covar.opt = list(strategy = c("include", "residual"), slope = c("same", "separate"), bw
+ = NULL),
+ weights,
+ ...
+)
}
\arguments{
-\item{RDDobject}{Object of class RDDdata created by \code{\link{RDDdata}}}
+\item{rdd_object}{Object of class rdd_data created by \code{\link{rdd_data}}}
+
+\item{fun}{The function to estimate the parameters}
\item{covariates}{Formula to include covariates}
@@ -16,46 +28,44 @@ RDDgenreg(RDDobject, fun = glm, covariates = NULL, order = 1, bw = NULL,
\item{bw}{A bandwidth to specify the subset on which the kernel weighted regression is estimated}
-\item{weights}{Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw}}
-
\item{slope}{Whether slopes should be different on left or right (separate), or the same.}
\item{covar.opt}{Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}).}
-\item{fun}{The function to estimate the parameters}
+\item{weights}{Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw}}
\item{\ldots}{Further arguments passed to fun. See the example.}
}
\value{
-An object of class RDDreg_lm and class lm, with specific print and plot methods
+An object of class rdd_reg_lm and class lm, with specific print and plot methods
}
\description{
Compute RDD estimate allowing a locally kernel weighted version of any estimation function
possibly on the range specified by bandwidth
}
\details{
-This function allows the user to use a custom estimating function, instead of the traditional \code{lm()}.
+This function allows the user to use a custom estimating function, instead of the traditional \code{lm()}.
It is assumed that the custom funciton has following behaviour:
\enumerate{
\item A formula interface, together with a \code{data} argument
\item A \code{weight} argument
\item A coef(summary(x)) returning a data-frame containing a column Estimate
}
-Note that for the last requirement, this can be accomodated by writing a specific \code{\link{RDDcoef}}
+Note that for the last requirement, this can be accomodated by writing a specific \code{\link{rdd_coef}}
function for the class of the object returned by \code{fun}.
}
\examples{
## Step 0: prepare data
-data(Lee2008)
-Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
+data(house)
+house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0)
## Estimate a local probit:
-Lee2008_rdd$y <- with(Lee2008_rdd, ifelse(y