|
| 1 | +\documentclass{article} |
| 2 | + |
| 3 | +\usepackage[utf8]{inputenc} |
| 4 | +\usepackage[english]{babel} |
| 5 | + |
| 6 | +\usepackage[T1]{fontenc} |
| 7 | +\usepackage{lmodern} |
| 8 | + |
| 9 | +\usepackage{hyperref} |
| 10 | + |
| 11 | +\usepackage[backend=biber]{biblatex} |
| 12 | +\addbibresource{references.bib} |
| 13 | + |
| 14 | +\usepackage{changepage} |
| 15 | + |
| 16 | +\usepackage{minted} |
| 17 | +\usemintedstyle{vs} |
| 18 | +\usepackage{upquote} |
| 19 | + |
| 20 | +\hyphenation{ECMAScript} |
| 21 | + |
| 22 | +\newenvironment{gramprod}[2]{ |
| 23 | + \begin{samepage} |
| 24 | + \begin{adjustwidth}{0.5cm}{0.5cm} |
| 25 | + \emph{#1} \textbf{#2} |
| 26 | + \begin{adjustwidth}{0.5cm}{0.5cm} |
| 27 | + }{ |
| 28 | + \medskip |
| 29 | + \end{adjustwidth} |
| 30 | + \end{adjustwidth} |
| 31 | + \end{samepage} |
| 32 | +} |
| 33 | + |
| 34 | +\newenvironment{lexprod}[1]{ |
| 35 | + \begin{gramprod}{#1}{::} |
| 36 | + }{ |
| 37 | + \end{gramprod} |
| 38 | +} |
| 39 | + |
| 40 | +\newenvironment{synprod}[1]{ |
| 41 | + \begin{gramprod}{#1}{:} |
| 42 | + }{ |
| 43 | + \end{gramprod} |
| 44 | +} |
| 45 | + |
| 46 | +\newcommand{\token}[1]{\emph{#1}} |
| 47 | + |
| 48 | +\newcommand{\lit}[1]{\textbf{\texttt{#1}}} |
| 49 | + |
| 50 | +\newcommand{\opt}[1]{#1\textsubscript{opt}} |
| 51 | + |
| 52 | +\newcommand{\esref}[2]{\href{https://es5.github.io/\#x#2}{#1}} |
| 53 | + |
| 54 | +\title{The JSON5 Data Interchange Format} |
| 55 | + |
| 56 | +\author{ |
| 57 | + Jordan Tucker |
| 58 | + \thanks{ |
| 59 | + In collaboration with Aseem Kishore |
| 60 | + (\href{https://github.com/aseemk}{github.com/aseemk}) |
| 61 | + }\\ |
| 62 | + \href{https://github.com/jordanbtucker}{github.com/jordanbtucker} |
| 63 | +} |
| 64 | +\date{March, 2016} |
| 65 | + |
| 66 | +\begin{document} |
| 67 | + |
| 68 | +\maketitle |
| 69 | + |
| 70 | +\begin{abstract} |
| 71 | +The JSON5 Data Interchange Format is a proposed extension to JSON that aims to make it easier for humans to write and maintain by hand. It does this by adding some minimal syntax features directly from ECMAScript 5.1. |
| 72 | +\end{abstract} |
| 73 | + |
| 74 | +\tableofcontents |
| 75 | + |
| 76 | +\section{Introduction} |
| 77 | + |
| 78 | +JSON \cite{json} \cite{rfc7159} is an excellent data interchange format, but it has limitations that can cause problems when written by hand. The JSON5 Data Interchange Format (JSON5) is a proposed extension to JSON that aims to alleviate some of the limitations of JSON by expanding its syntax to include some productions from ECMAScript 5.1 \cite{es5}. |
| 79 | + |
| 80 | +JSON5 is a subset of ECMAScript 5.1 that adds no new data types, and is backwards compatible with JSON documents. |
| 81 | + |
| 82 | +Similar to JSON, JSON5 can represent four primitive types (strings, numbers, booleans, and null) and two structured types (objects and arrays). |
| 83 | + |
| 84 | +A string is a sequence of zero or more Unicode characters \cite{unicode}. Note that this citation references the latest version of Unicode rather than a specific release. It is not expected that future changes in the Unicode specification will impact the syntax of JSON5. |
| 85 | + |
| 86 | +An object is an unordered collection of zero or more name/value pairs, where a name is a string or identifier and a value is a string, number, boolean, null, object, or array. |
| 87 | + |
| 88 | +An array is an ordered sequence of zero or more values. |
| 89 | + |
| 90 | +\subsection{Summary of Features} |
| 91 | + |
| 92 | +The following ECMAScript 5.1 features, which are not supported in JSON, have been extended to JSON5. |
| 93 | + |
| 94 | +\subsubsection*{Objects} |
| 95 | + |
| 96 | +\begin{itemize} |
| 97 | + \item Object keys may be an ECMAScript 5.1 \esref{\token{IdentifierName}}{7.6}. |
| 98 | + \item Objects may have a single trailing comma. |
| 99 | +\end{itemize} |
| 100 | + |
| 101 | +\subsubsection*{Arrays} |
| 102 | + |
| 103 | +\begin{itemize} |
| 104 | + \item Arrays may have a single trailing comma. |
| 105 | +\end{itemize} |
| 106 | + |
| 107 | +\subsubsection*{Strings} |
| 108 | + |
| 109 | +\begin{itemize} |
| 110 | + \item Strings may be single quoted. |
| 111 | + \item Strings may span multiple lines by escaping new line characters. |
| 112 | + \item Strings may include character escapes. |
| 113 | +\end{itemize} |
| 114 | + |
| 115 | +\subsubsection*{Numbers} |
| 116 | + |
| 117 | +\begin{itemize} |
| 118 | + \item Numbers may be hexadecimal (base 16). |
| 119 | + \item Numbers may have a leading or trailing decimal point. |
| 120 | + \item Numbers may be Infinity, -Infinity, NaN, or -NaN \cite{float}. |
| 121 | + \item Numbers may begin with an explicit plus sign. |
| 122 | +\end{itemize} |
| 123 | + |
| 124 | +\subsubsection*{Comments} |
| 125 | + |
| 126 | +\begin{itemize} |
| 127 | + \item In-line and block comments are allowed. |
| 128 | +\end{itemize} |
| 129 | + |
| 130 | +\subsubsection*{Whitespace} |
| 131 | + |
| 132 | +\begin{itemize} |
| 133 | + \item Additional whitespace characters are allowed. |
| 134 | +\end{itemize} |
| 135 | + |
| 136 | +\subsection{Short Example} |
| 137 | + |
| 138 | +The following example illustrates many of the features of JSON5. |
| 139 | + |
| 140 | +\begin{minted}{js} |
| 141 | +{ |
| 142 | + foo: 'bar', |
| 143 | + while: true, |
| 144 | + |
| 145 | + this: 'is a \ |
| 146 | +multi-line string', |
| 147 | + |
| 148 | + // this is an inline comment |
| 149 | + here: 'is another', // inline comment |
| 150 | + |
| 151 | + /* this is a block comment |
| 152 | + that continues on another line */ |
| 153 | + |
| 154 | + movie: 'The Great Escape\x21', |
| 155 | + |
| 156 | + hex: 0xDEADbeef, |
| 157 | + half: .5, |
| 158 | + delta: +10, |
| 159 | + to: Infinity, // and beyond! |
| 160 | + |
| 161 | + finally: 'a trailing comma', |
| 162 | + oh: [ |
| 163 | + "we shouldn't forget", |
| 164 | + 'arrays can have', |
| 165 | + 'trailing commas too', |
| 166 | + ], |
| 167 | +} |
| 168 | +\end{minted} |
| 169 | + |
| 170 | +\section{Grammar} |
| 171 | + |
| 172 | +JSON5 is defined by a lexical grammar and a syntactic grammar. The lexical grammar defines productions that translate text into tokens, and the syntactic grammar defines productions that translate tokens into a JSON5 value. |
| 173 | + |
| 174 | +All productions that do not begin with the characters ``JSON5'' are defined by productions of the \esref{ECMAScript 5.1 Lexical Grammar}{5.1.2} \cite{es5}. |
| 175 | + |
| 176 | +\subsection{Lexical Grammar} |
| 177 | + |
| 178 | +The lexical grammar for JSON5 has as its terminal symbols characters (Unicode code units \cite{unicode}) that conform to the rules for \token{JSON5SourceCharacter}. It defines a set of productions, starting from the goal symbol \token{JSON5InputElement}, that describe how sequences of such characters are translated into a sequence of input elements. |
| 179 | + |
| 180 | +Input elements other than whitespace and comments form the terminal symbols for the syntactic grammar for JSON5 and are called tokens. These tokens are the identifiers, literals, and punctuators of the JSON5 language. Simple whitespace and comments are discarded and do not appear in the stream of input elements for the syntactic grammar. |
| 181 | + |
| 182 | +Productions of the lexical grammar are distinguished by having two colons ``::'' as separating punctuation. |
| 183 | + |
| 184 | +\subsubsection*{Syntax} |
| 185 | + |
| 186 | +\begin{lexprod}{JSON5SourceCharacter} |
| 187 | + \esref{\token{SourceCharacter}}{6} |
| 188 | +\end{lexprod} |
| 189 | + |
| 190 | +\begin{lexprod}{JSON5InputElement} |
| 191 | + \token{JSON5WhiteSpace}\\ |
| 192 | + \token{JSON5LineTerminator}\\ |
| 193 | + \token{JSON5Comment}\\ |
| 194 | + \token{JSON5Token} |
| 195 | +\end{lexprod} |
| 196 | + |
| 197 | +\begin{lexprod}{JSON5Token} |
| 198 | + \token{JSON5Identifier}\\ |
| 199 | + \token{JSON5NullLiteral}\\ |
| 200 | + \token{JSON5BooleanLiteral}\\ |
| 201 | + \token{JSON5Number}\\ |
| 202 | + \token{JSON5String} |
| 203 | +\end{lexprod} |
| 204 | + |
| 205 | +\begin{lexprod}{JSON5WhiteSpace} |
| 206 | + \esref{\token{WhiteSpace}}{7.2} |
| 207 | +\end{lexprod} |
| 208 | + |
| 209 | +\begin{lexprod}{JSON5LineTerminator} |
| 210 | + \esref{\token{LineTerminator}}{7.3} |
| 211 | +\end{lexprod} |
| 212 | + |
| 213 | +\begin{lexprod}{JSON5LineTerminatorSequence} |
| 214 | + \esref{\token{LineTerminatorSequence}}{7.3} |
| 215 | +\end{lexprod} |
| 216 | + |
| 217 | +\begin{lexprod}{JSON5Comment} |
| 218 | + \esref{\token{Comment}}{7.4} |
| 219 | +\end{lexprod} |
| 220 | + |
| 221 | +\begin{lexprod}{JSON5Identifier} |
| 222 | + \esref{\token{IdentifierName}}{7.6} |
| 223 | +\end{lexprod} |
| 224 | + |
| 225 | +\begin{lexprod}{JSON5NullLiteral} |
| 226 | + \esref{\token{NullLiteral}}{7.8.1} |
| 227 | +\end{lexprod} |
| 228 | + |
| 229 | +\begin{lexprod}{JSON5BooleanLiteral} |
| 230 | + \esref{\token{BooleanLiteral}}{7.8.2} |
| 231 | +\end{lexprod} |
| 232 | + |
| 233 | +\begin{lexprod}{JSON5NumericLiteral} |
| 234 | + \esref{\token{NumericLiteral}}{7.8.3}\\ |
| 235 | + \lit{Infinity}\\ |
| 236 | + \lit{NaN} |
| 237 | +\end{lexprod} |
| 238 | + |
| 239 | +\begin{lexprod}{JSON5Number} |
| 240 | + \token{JSON5NumericLiteral}\\ |
| 241 | + \lit{+} \token{JSON5NumericLiteral}\\ |
| 242 | + \lit{-} \token{JSON5NumericLiteral} |
| 243 | +\end{lexprod} |
| 244 | + |
| 245 | +\begin{lexprod}{JSON5String} |
| 246 | + \esref{\token{StringLiteral}}{7.8.4} |
| 247 | +\end{lexprod} |
| 248 | + |
| 249 | +\subsection{Syntactic Grammar} |
| 250 | + |
| 251 | +The syntactic grammar for JSON5 has tokens defined by the lexical grammar as its terminal symbols. It defines a set of productions, starting from the goal symbol \token{JSON5Text}, that describe how sequences of tokens can form syntactically correct JSON5 values. |
| 252 | + |
| 253 | +When a stream of characters is to be parsed as a JSON5 value, it is first converted to a stream of input elements by repeated application of the lexical grammar; this stream of input elements is then parsed by a single application of the syntactic grammar. The program is syntactically in error if the tokens in the stream of input elements cannot be parsed as a single instance of the goal nonterminal \token{JSON5Text}, with no tokens left over. |
| 254 | + |
| 255 | +Productions of the syntactic grammar are distinguished by having just one colon ``:'' as punctuation. |
| 256 | + |
| 257 | +\subsubsection*{Syntax} |
| 258 | + |
| 259 | +\begin{synprod}{JSON5Text} |
| 260 | + \token{JSON5Value} |
| 261 | +\end{synprod} |
| 262 | + |
| 263 | +\begin{synprod}{JSON5Value} |
| 264 | + \token{JSON5NullLiteral}\\ |
| 265 | + \token{JSON5BooleanLiteral}\\ |
| 266 | + \token{JSON5Number}\\ |
| 267 | + \token{JSON5String}\\ |
| 268 | + \token{JSON5Object}\\ |
| 269 | + \token{JSON5Array} |
| 270 | +\end{synprod} |
| 271 | + |
| 272 | +\begin{synprod}{JSON5Object} |
| 273 | + \lit{\{} \lit{\}}\\ |
| 274 | + \lit{\{} \token{JSON5MemberList} \opt{\lit{,}} \lit{\}} |
| 275 | +\end{synprod} |
| 276 | + |
| 277 | +\begin{synprod}{JSON5MemberList} |
| 278 | + \token{JSON5Member}\\ |
| 279 | + \token{JSON5MemberList} \lit{,} \token{JSON5Member} |
| 280 | +\end{synprod} |
| 281 | + |
| 282 | +\begin{synprod}{JSON5Member} |
| 283 | + \token{JSON5MemberName} \lit{:} \token{JSON5Value} |
| 284 | +\end{synprod} |
| 285 | + |
| 286 | +\begin{synprod}{JSON5MemberName} |
| 287 | + \token{JSON5Identifier}\\ |
| 288 | + \token{JSON5String} |
| 289 | +\end{synprod} |
| 290 | + |
| 291 | +\begin{synprod}{JSON5Array} |
| 292 | + \lit{[} \lit{]}\\ |
| 293 | + \lit{[} \token{JSON5ElementList} \opt{\lit{,}} \lit{]} |
| 294 | +\end{synprod} |
| 295 | + |
| 296 | +\begin{synprod}{JSON5ElementList} |
| 297 | + \token{JSON5Value}\\ |
| 298 | + \token{JSON5ElementList} \lit{,} \token{JSON5Value} |
| 299 | +\end{synprod} |
| 300 | + |
| 301 | +\printbibliography |
| 302 | + |
| 303 | +\end{document} |
0 commit comments