From cec376529c4ad8cd15d1798afede91fe85c98d2b Mon Sep 17 00:00:00 2001 From: Yuki Izumi Date: Mon, 26 Sep 2016 17:09:51 +1000 Subject: [PATCH 001/218] More runs in benchmark; .gitignore update --- .gitignore | 6 ++++ Makefile | 2 +- src/inlines.c | 91 ++++++++++++++++++++++++--------------------------- 3 files changed, 50 insertions(+), 49 deletions(-) diff --git a/.gitignore b/.gitignore index f56da3c14..c2ee13f57 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,9 @@ bstrlib.txt build cmark.dSYM/* cmark + +# Testing and benchmark +alltests.md +progit/ +bench/benchinput.md +test/afl_results/ diff --git a/Makefile b/Makefile index 47ac06f3f..e8630e994 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ BENCHDIR=bench BENCHSAMPLES=$(wildcard $(BENCHDIR)/samples/*.md) BENCHFILE=$(BENCHDIR)/benchinput.md ALLTESTS=alltests.md -NUMRUNS?=10 +NUMRUNS?=20 CMARK=$(BUILDDIR)/src/cmark CMARK_FUZZ=$(BUILDDIR)/src/cmark-fuzz PROG?=$(CMARK) diff --git a/src/inlines.c b/src/inlines.c index f5b062182..86d3e0018 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -515,9 +515,17 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { delimiter *opener; delimiter *old_closer; bool opener_found; - int openers_bottom_index; - delimiter *openers_bottom[6] = {stack_bottom, stack_bottom, stack_bottom, - stack_bottom, stack_bottom, stack_bottom}; + bool odd_match; + delimiter *openers_bottom[3][128]; + int i; + + // initialize openers_bottom: + for (i=0; i < 3; i++) { + openers_bottom[i]['*'] = stack_bottom; + openers_bottom[i]['_'] = stack_bottom; + openers_bottom[i]['\''] = stack_bottom; + openers_bottom[i]['"'] = stack_bottom; + } // move back to first relevant delim. while (closer != NULL && closer->previous != stack_bottom) { @@ -527,36 +535,22 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { // now move forward, looking for closers, and handling each while (closer != NULL) { if (closer->can_close) { - switch (closer->delim_char) { - case '"': - openers_bottom_index = 0; - break; - case '\'': - openers_bottom_index = 1; - break; - case '_': - openers_bottom_index = 2; - break; - case '*': - openers_bottom_index = 3 + (closer->length % 3); - break; - default: - assert(false); - } - // Now look backwards for first matching opener: opener = closer->previous; opener_found = false; - while (opener != NULL && opener != openers_bottom[openers_bottom_index]) { - if (opener->can_open && opener->delim_char == closer->delim_char) { + odd_match = false; + while (opener != NULL && opener != stack_bottom && + opener != openers_bottom[closer->length % 3][closer->delim_char]) { + if (opener->can_open && opener->delim_char == closer->delim_char) { // interior closer of size 2 can't match opener of size 1 // or of size 1 can't match 2 - if (!(closer->can_open || opener->can_close) || - ((opener->length + closer->length) % 3) != 0) { + odd_match = (closer->can_open || opener->can_close) && + ((opener->length + closer->length) % 3 == 0); + if (!odd_match) { opener_found = true; break; } - } + } opener = opener->previous; } old_closer = closer; @@ -585,7 +579,8 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { } if (!opener_found) { // set lower bound for future searches for openers - openers_bottom[openers_bottom_index] = old_closer->previous; + openers_bottom[old_closer->length % 3][old_closer->delim_char] = + old_closer->previous; if (!old_closer->can_open) { // we can remove a closer that can't be an // opener, once we've seen there's no @@ -1037,31 +1032,31 @@ static cmark_node *handle_newline(subject *subj) { static bufsize_t subject_find_special_char(subject *subj, int options) { // "\r\n\\`&_*[]pos + 1; From f30d3c619d9c94c6379d9603769e2246c47b894a Mon Sep 17 00:00:00 2001 From: Yuki Izumi Date: Tue, 27 Sep 2016 16:02:41 +1000 Subject: [PATCH 002/218] Arena allocator This allocator allocates a 4MiB arena into which all allocations are made, and then increasingly larger arenas as earlier ones are used up. Freeing memory in the arena is a no-op: clean all memory with cmark_arena_reset(). In order to support realloc, we store the size of each allocation in a size_t before the returned pointer. The speedup is over 25% on large (benchmark-sized) inputs -- we pay a small increase in maximum RSS (~10%) for this. --- man/man3/cmark.3 | 56 ++++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 3 +- src/arena.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++ src/blocks.c | 4 +-- src/cmark.c | 6 +++- src/cmark.h | 48 +++++++++++++++++++++++++++++++ src/commonmark.c | 6 +++- src/html.c | 6 +++- src/latex.c | 6 +++- src/main.c | 65 +++++++++++++++++++++++++++-------------- src/man.c | 6 +++- src/node.c | 4 +-- src/render.c | 3 +- src/render.h | 2 +- src/xml.c | 6 +++- 15 files changed, 257 insertions(+), 36 deletions(-) create mode 100644 src/arena.c diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index 4cb20d6f5..f32644a06 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -115,6 +115,27 @@ typedef struct cmark_mem { Defines the memory allocation functions to be used by CMark when parsing and allocating a document tree +.PP +\fIcmark_mem *\f[] \fBcmark_get_default_mem_allocator\f[](\fI\f[]) + +.PP +The default memory allocator; uses the system's calloc, realloc and +free. + +.PP +\fIcmark_mem *\f[] \fBcmark_get_arena_mem_allocator\f[](\fI\f[]) + +.PP +An arena allocator; uses system calloc to allocate large slabs of +memory. Memory in these slabs is not reused at all. + +.PP +\fIvoid\f[] \fBcmark_arena_reset\f[](\fIvoid\f[]) + +.PP +Resets the arena allocator, quickly returning all used memory to the +operating system. + .SS Creating and Destroying Nodes @@ -646,6 +667,13 @@ Rendering Render a \f[I]node\f[] tree as XML. It is the caller's responsibility to free the returned buffer. +.PP +\fIchar *\f[] \fBcmark_render_xml_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_xml\f[], but specifying the allocator to use +for the resulting string. + .PP \fIchar *\f[] \fBcmark_render_html\f[](\fIcmark_node *root\f[], \fIint options\f[]) @@ -654,6 +682,13 @@ Render a \f[I]node\f[] tree as an HTML fragment. It is up to the user to add an appropriate header and footer. It is the caller's responsibility to free the returned buffer. +.PP +\fIchar *\f[] \fBcmark_render_html_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_html\f[], but specifying the allocator to use +for the resulting string. + .PP \fIchar *\f[] \fBcmark_render_man\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) @@ -661,6 +696,13 @@ to free the returned buffer. Render a \f[I]node\f[] tree as a groff man page, without the header. It is the caller's responsibility to free the returned buffer. +.PP +\fIchar *\f[] \fBcmark_render_man_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_man\f[], but specifying the allocator to use +for the resulting string. + .PP \fIchar *\f[] \fBcmark_render_commonmark\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) @@ -668,6 +710,13 @@ is the caller's responsibility to free the returned buffer. Render a \f[I]node\f[] tree as a commonmark document. It is the caller's responsibility to free the returned buffer. +.PP +\fIchar *\f[] \fBcmark_render_commonmark_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_commonmark\f[], but specifying the allocator to +use for the resulting string. + .PP \fIchar *\f[] \fBcmark_render_latex\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) @@ -675,6 +724,13 @@ responsibility to free the returned buffer. Render a \f[I]node\f[] tree as a LaTeX document. It is the caller's responsibility to free the returned buffer. +.PP +\fIchar *\f[] \fBcmark_render_latex_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_latex\f[], but specifying the allocator to use +for the resulting string. + .SS Options diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 319719607..89b23ac97 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,6 +40,7 @@ set(LIBRARY_SOURCES houdini_html_e.c houdini_html_u.c cmark_ctype.c + arena.c ${HEADERS} ) @@ -64,7 +65,7 @@ set_target_properties(${PROGRAM} PROPERTIES COMPILE_FLAGS -DCMARK_STATIC_DEFINE) # Check integrity of node structure when compiled as debug: -set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES") +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES -DDEBUG") set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}") set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") diff --git a/src/arena.c b/src/arena.c new file mode 100644 index 000000000..1a30a97c0 --- /dev/null +++ b/src/arena.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include "cmark.h" + +static struct arena_chunk { + size_t sz, used; + void *ptr; + struct arena_chunk *prev; +} *A = NULL; + +static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) { + struct arena_chunk *c = calloc(1, sizeof(*c)); + if (!c) + abort(); + c->sz = sz; + c->ptr = calloc(1, sz); + c->prev = prev; + return c; +} + +static void init_arena(void) { + A = alloc_arena_chunk(4 * 1048576, NULL); +} + +void cmark_arena_reset(void) { + while (A) { + free(A->ptr); + struct arena_chunk *n = A->prev; + free(A); + A = n; + } +} + +static void *arena_calloc(size_t nmem, size_t size) { + if (!A) + init_arena(); + + size_t sz = nmem * size + sizeof(size_t); + if (sz > A->sz) { + A->prev = alloc_arena_chunk(sz, A->prev); + return (uint8_t *) A->prev->ptr + sizeof(size_t); + } + if (sz > A->sz - A->used) { + A = alloc_arena_chunk(A->sz + A->sz / 2, A); + } + void *ptr = (uint8_t *) A->ptr + A->used; + A->used += sz; + *((size_t *) ptr) = nmem * size; + return (uint8_t *) ptr + sizeof(size_t); +} + +static void *arena_realloc(void *ptr, size_t size) { + if (!A) + init_arena(); + + void *new_ptr = arena_calloc(1, size); + if (ptr) + memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]); + return new_ptr; +} + +static void arena_free(void *ptr) { + (void) ptr; + /* no-op */ +} + +cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free}; + +cmark_mem *cmark_get_arena_mem_allocator() { + return &CMARK_ARENA_MEM_ALLOCATOR; +} diff --git a/src/blocks.c b/src/blocks.c index 5a293b2e0..5f321c48e 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -108,8 +108,8 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { } cmark_parser *cmark_parser_new(int options) { - extern cmark_mem DEFAULT_MEM_ALLOCATOR; - return cmark_parser_new_with_mem(options, &DEFAULT_MEM_ALLOCATOR); + extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; + return cmark_parser_new_with_mem(options, &CMARK_DEFAULT_MEM_ALLOCATOR); } void cmark_parser_free(cmark_parser *parser) { diff --git a/src/cmark.c b/src/cmark.c index d64237f24..c2d0a33cb 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -28,7 +28,11 @@ static void *xrealloc(void *ptr, size_t size) { return new_ptr; } -cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free}; +cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free}; + +cmark_mem *cmark_get_default_mem_allocator() { + return &CMARK_DEFAULT_MEM_ALLOCATOR; +} char *cmark_markdown_to_html(const char *text, size_t len, int options) { cmark_node *doc; diff --git a/src/cmark.h b/src/cmark.h index d1a65aa88..6b29da3c4 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -100,6 +100,24 @@ typedef struct cmark_mem { void (*free)(void *); } cmark_mem; +/** The default memory allocator; uses the system's calloc, + * realloc and free. + */ +CMARK_EXPORT +cmark_mem *cmark_get_default_mem_allocator(); + +/** An arena allocator; uses system calloc to allocate large + * slabs of memory. Memory in these slabs is not reused at all. + */ +CMARK_EXPORT +cmark_mem *cmark_get_arena_mem_allocator(); + +/** Resets the arena allocator, quickly returning all used memory + * to the operating system. + */ +CMARK_EXPORT +void cmark_arena_reset(void); + /** * ## Creating and Destroying Nodes */ @@ -507,6 +525,12 @@ cmark_node *cmark_parse_file(FILE *f, int options); CMARK_EXPORT char *cmark_render_xml(cmark_node *root, int options); +/** As for 'cmark_render_xml', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem); + /** Render a 'node' tree as an HTML fragment. It is up to the user * to add an appropriate header and footer. It is the caller's * responsibility to free the returned buffer. @@ -514,24 +538,48 @@ char *cmark_render_xml(cmark_node *root, int options); CMARK_EXPORT char *cmark_render_html(cmark_node *root, int options); +/** As for 'cmark_render_html', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem); + /** Render a 'node' tree as a groff man page, without the header. * It is the caller's responsibility to free the returned buffer. */ CMARK_EXPORT char *cmark_render_man(cmark_node *root, int options, int width); +/** As for 'cmark_render_man', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); + /** Render a 'node' tree as a commonmark document. * It is the caller's responsibility to free the returned buffer. */ CMARK_EXPORT char *cmark_render_commonmark(cmark_node *root, int options, int width); +/** As for 'cmark_render_commonmark', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); + /** Render a 'node' tree as a LaTeX document. * It is the caller's responsibility to free the returned buffer. */ CMARK_EXPORT char *cmark_render_latex(cmark_node *root, int options, int width); +/** As for 'cmark_render_latex', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); + /** * ## Options */ diff --git a/src/commonmark.c b/src/commonmark.c index a9ba56633..dd696ae9b 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -466,10 +466,14 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } char *cmark_render_commonmark(cmark_node *root, int options, int width) { + return cmark_render_commonmark_with_mem(root, options, width, cmark_node_mem(root)); +} + +char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { if (options & CMARK_OPT_HARDBREAKS) { // disable breaking on width, since it has // a different meaning with OPT_HARDBREAKS width = 0; } - return cmark_render(root, options, width, outc, S_render_node); + return cmark_render(mem, root, options, width, outc, S_render_node); } diff --git a/src/html.c b/src/html.c index a680e4a50..d58596cd0 100644 --- a/src/html.c +++ b/src/html.c @@ -323,8 +323,12 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } char *cmark_render_html(cmark_node *root, int options) { + return cmark_render_html_with_mem(root, options, cmark_node_mem(root)); +} + +char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem) { char *result; - cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root)); + cmark_strbuf html = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; struct render_state state = {&html, NULL}; diff --git a/src/latex.c b/src/latex.c index f372a132a..68961e11f 100644 --- a/src/latex.c +++ b/src/latex.c @@ -449,5 +449,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } char *cmark_render_latex(cmark_node *root, int options, int width) { - return cmark_render(root, options, width, outc, S_render_node); + return cmark_render_latex_with_mem(root, options, width, cmark_node_mem(root)); +} + +char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { + return cmark_render(mem, root, options, width, outc, S_render_node); } diff --git a/src/main.c b/src/main.c index 9482f6869..d1e263790 100644 --- a/src/main.c +++ b/src/main.c @@ -36,45 +36,50 @@ void print_usage() { printf(" --version Print version\n"); } -static void print_document(cmark_node *document, writer_format writer, +static bool print_document(cmark_node *document, writer_format writer, int options, int width) { char *result; + cmark_mem *mem = cmark_get_default_mem_allocator(); + switch (writer) { case FORMAT_HTML: - result = cmark_render_html(document, options); + result = cmark_render_html_with_mem(document, options, mem); break; case FORMAT_XML: - result = cmark_render_xml(document, options); + result = cmark_render_xml_with_mem(document, options, mem); break; case FORMAT_MAN: - result = cmark_render_man(document, options, width); + result = cmark_render_man_with_mem(document, options, width, mem); break; case FORMAT_COMMONMARK: - result = cmark_render_commonmark(document, options, width); + result = cmark_render_commonmark_with_mem(document, options, width, mem); break; case FORMAT_LATEX: - result = cmark_render_latex(document, options, width); + result = cmark_render_latex_with_mem(document, options, width, mem); break; default: fprintf(stderr, "Unknown format %d\n", writer); - exit(1); + return false; } printf("%s", result); - cmark_node_mem(document)->free(result); + mem->free(result); + + return true; } int main(int argc, char *argv[]) { int i, numfps = 0; int *files; char buffer[4096]; - cmark_parser *parser; + cmark_parser *parser = NULL; size_t bytes; - cmark_node *document; + cmark_node *document = NULL; int width = 0; char *unparsed; writer_format writer = FORMAT_HTML; int options = CMARK_OPT_DEFAULT; + int res = 1; #if defined(_WIN32) && !defined(__CYGWIN__) _setmode(_fileno(stdin), _O_BINARY); @@ -87,7 +92,7 @@ int main(int argc, char *argv[]) { if (strcmp(argv[i], "--version") == 0) { printf("cmark %s", CMARK_VERSION_STRING); printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n"); - exit(0); + goto success; } else if (strcmp(argv[i], "--sourcepos") == 0) { options |= CMARK_OPT_SOURCEPOS; } else if (strcmp(argv[i], "--hardbreaks") == 0) { @@ -103,7 +108,7 @@ int main(int argc, char *argv[]) { } else if ((strcmp(argv[i], "--help") == 0) || (strcmp(argv[i], "-h") == 0)) { print_usage(); - exit(0); + goto success; } else if (strcmp(argv[i], "--width") == 0) { i += 1; if (i < argc) { @@ -111,11 +116,11 @@ int main(int argc, char *argv[]) { if (unparsed && strlen(unparsed) > 0) { fprintf(stderr, "failed parsing width '%s' at '%s'\n", argv[i], unparsed); - exit(1); + goto failure; } } else { fprintf(stderr, "--width requires an argument\n"); - exit(1); + goto failure; } } else if ((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--to") == 0)) { i += 1; @@ -132,27 +137,32 @@ int main(int argc, char *argv[]) { writer = FORMAT_LATEX; } else { fprintf(stderr, "Unknown format %s\n", argv[i]); - exit(1); + goto failure; } } else { fprintf(stderr, "No argument provided for %s\n", argv[i - 1]); - exit(1); + goto failure; } } else if (*argv[i] == '-') { print_usage(); - exit(1); + goto failure; } else { // treat as file argument files[numfps++] = i; } } +#if DEBUG parser = cmark_parser_new(options); +#else + parser = cmark_parser_new_with_mem(options, cmark_get_arena_mem_allocator()); +#endif + for (i = 0; i < numfps; i++) { FILE *fp = fopen(argv[files[i]], "rb"); if (fp == NULL) { fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]], strerror(errno)); - exit(1); + goto failure; } while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { @@ -166,7 +176,6 @@ int main(int argc, char *argv[]) { } if (numfps == 0) { - while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) { cmark_parser_feed(parser, buffer, bytes); if (bytes < sizeof(buffer)) { @@ -176,13 +185,25 @@ int main(int argc, char *argv[]) { } document = cmark_parser_finish(parser); - cmark_parser_free(parser); - print_document(document, writer, options, width); + if (!print_document(document, writer, options, width)) + goto failure; + +success: + res = 0; + +failure: + +#if DEBUG + if (parser) + cmark_parser_free(parser); cmark_node_free(document); +#else + cmark_arena_reset(); +#endif free(files); - return 0; + return res; } diff --git a/src/man.c b/src/man.c index 1c76f68bb..f3980275d 100644 --- a/src/man.c +++ b/src/man.c @@ -248,5 +248,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } char *cmark_render_man(cmark_node *root, int options, int width) { - return cmark_render(root, options, width, S_outc, S_render_node); + return cmark_render_man_with_mem(root, options, width, cmark_node_mem(root)); +} + +char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { + return cmark_render(mem, root, options, width, S_outc, S_render_node); } diff --git a/src/node.c b/src/node.c index c6c29028e..208621f4c 100644 --- a/src/node.c +++ b/src/node.c @@ -98,8 +98,8 @@ cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) { } cmark_node *cmark_node_new(cmark_node_type type) { - extern cmark_mem DEFAULT_MEM_ALLOCATOR; - return cmark_node_new_with_mem(type, &DEFAULT_MEM_ALLOCATOR); + extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; + return cmark_node_new_with_mem(type, &CMARK_DEFAULT_MEM_ALLOCATOR); } // Free a cmark_node list and any children. diff --git a/src/render.c b/src/render.c index 20dca5ff8..b7eabcb53 100644 --- a/src/render.c +++ b/src/render.c @@ -142,13 +142,12 @@ void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) { renderer->column += 1; } -char *cmark_render(cmark_node *root, int options, int width, +char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, void (*outc)(cmark_renderer *, cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options)) { - cmark_mem *mem = cmark_node_mem(root); cmark_strbuf pref = CMARK_BUF_INIT(mem); cmark_strbuf buf = CMARK_BUF_INIT(mem); cmark_node *cur; diff --git a/src/render.h b/src/render.h index 35eb0a65d..b73ace464 100644 --- a/src/render.h +++ b/src/render.h @@ -36,7 +36,7 @@ void cmark_render_ascii(cmark_renderer *renderer, const char *s); void cmark_render_code_point(cmark_renderer *renderer, uint32_t c); -char *cmark_render(cmark_node *root, int options, int width, +char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, void (*outc)(cmark_renderer *, cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, diff --git a/src/xml.c b/src/xml.c index 4898cd2e8..ea53b99c6 100644 --- a/src/xml.c +++ b/src/xml.c @@ -148,8 +148,12 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } char *cmark_render_xml(cmark_node *root, int options) { + return cmark_render_xml_with_mem(root, options, cmark_node_mem(root)); +} + +char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem) { char *result; - cmark_strbuf xml = CMARK_BUF_INIT(cmark_node_mem(root)); + cmark_strbuf xml = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; struct render_state state = {&xml, 0}; From c8960d74de2e990c30234d27d9c80bfbf37442b9 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Thu, 1 Dec 2016 14:14:27 +1100 Subject: [PATCH 003/218] Extensions API (https://github.com/jgm/cmark/pull/123) --- CMakeLists.txt | 1 + Makefile | 14 + api_test/CMakeLists.txt | 2 +- extensions/CMakeLists.txt | 32 ++ extensions/core-extensions.c | 325 +++++++++++++++++++ extensions/ext_scanners.c | 585 +++++++++++++++++++++++++++++++++++ extensions/ext_scanners.h | 20 ++ extensions/ext_scanners.re | 65 ++++ src/CMakeLists.txt | 55 +++- src/blocks.c | 240 ++++++++++++-- src/cmark.c | 11 + src/cmark.h | 73 ++++- src/cmark_extension_api.h | 546 ++++++++++++++++++++++++++++++++ src/commonmark.c | 31 ++ src/config.h.in | 2 + src/html.c | 71 ++++- src/inlines.c | 276 +++++++++++++---- src/inlines.h | 9 +- src/latex.c | 47 +++ src/libcmark.pc.in | 2 +- src/linked_list.c | 37 +++ src/main.c | 46 ++- src/man.c | 57 ++++ src/node.c | 233 ++++++++++++-- src/node.h | 14 + src/parser.h | 17 + src/plugin.c | 33 ++ src/plugin.h | 34 ++ src/registry.c | 141 +++++++++ src/registry.h | 18 ++ src/syntax_extension.c | 54 ++++ src/syntax_extension.h | 18 ++ 32 files changed, 2977 insertions(+), 132 deletions(-) create mode 100644 extensions/CMakeLists.txt create mode 100644 extensions/core-extensions.c create mode 100644 extensions/ext_scanners.c create mode 100644 extensions/ext_scanners.h create mode 100644 extensions/ext_scanners.re create mode 100644 src/cmark_extension_api.h create mode 100644 src/linked_list.c create mode 100644 src/plugin.c create mode 100644 src/plugin.h create mode 100644 src/registry.c create mode 100644 src/registry.h create mode 100644 src/syntax_extension.c create mode 100644 src/syntax_extension.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 33180e535..49eba20aa 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(CMARK_SHARED "Build shared libcmark library" ON) option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF) add_subdirectory(src) +add_subdirectory(extensions) if(CMARK_TESTS AND CMARK_SHARED) add_subdirectory(api_test) endif() diff --git a/Makefile b/Makefile index e8630e994..6484ee00f 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ SRCDIR=src +EXTDIR=extensions DATADIR=data BUILDDIR?=build GENERATOR?=Unix Makefiles @@ -126,6 +127,19 @@ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re --encoding-policy substitute -o $@ $< $(CLANG_FORMAT) $@ +# We include scanners.c in the repository, so this shouldn't +# normally need to be generated. +$(EXTDIR)/ext_scanners.c: $(EXTDIR)/ext_scanners.re + @case "$$(re2c -v)" in \ + *\ 0.13.*|*\ 0.14|*\ 0.14.1) \ + echo "re2c >= 0.14.2 is required"; \ + false; \ + ;; \ + esac + re2c --case-insensitive -b -i --no-generation-date -8 \ + --encoding-policy substitute -o $@ $< + clang-format -style llvm -i $@ + # We include entities.inc in the repository, so normally this # doesn't need to be regenerated: $(SRCDIR)/entities.inc: tools/make_entities_inc.py diff --git a/api_test/CMakeLists.txt b/api_test/CMakeLists.txt index 3151ccccb..5c247aba0 100644 --- a/api_test/CMakeLists.txt +++ b/api_test/CMakeLists.txt @@ -8,7 +8,7 @@ include_directories( ${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src ) -target_link_libraries(api_test libcmark) +target_link_libraries(api_test libcmark ${CMAKE_DL_LIBS}) # Compiler flags if(MSVC) diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt new file mode 100644 index 000000000..85d9e4450 --- /dev/null +++ b/extensions/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 2.8) +set(LIBRARY "cmarkextensions") +set(LIBRARY_SOURCES + ${PROJECT_SOURCE_DIR}/src/buffer.c + ${PROJECT_SOURCE_DIR}/src/cmark_ctype.c + core-extensions.c + ext_scanners.c + ext_scanners.h + ) + +include_directories( + ${PROJECT_SOURCE_DIR}/src + ${PROJECT_BINARY_DIR}/src +) + +# We make LIB_INSTALL_DIR configurable rather than +# hard-coding lib, because on some OSes different locations +# are used for different architectures (e.g. /usr/lib64 on +# 64-bit Fedora). +if(NOT LIB_INSTALL_DIR) + set(LIB_INSTALL_DIR "lib" CACHE STRING + "Set the installation directory for libraries." FORCE) +endif(NOT LIB_INSTALL_DIR) + +include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) + +set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") +set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") + +add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) + +target_link_libraries(cmarkextensions libcmark) diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c new file mode 100644 index 000000000..ad9715636 --- /dev/null +++ b/extensions/core-extensions.c @@ -0,0 +1,325 @@ +#include +#include + +#include +#include + +#include "parser.h" +#include "buffer.h" +#include "ext_scanners.h" + +typedef struct { + int n_columns; + cmark_llist *cells; +} table_row; + +static void free_table_cell(void *data) { + cmark_strbuf_free((cmark_strbuf *) data); + free(data); +} + +static void free_table_row(table_row *row) { + + if (!row) + return; + + cmark_llist_free_full(row->cells, (cmark_free_func) free_table_cell); + + free(row); +} + +static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsize_t len) +{ + cmark_strbuf *res = (cmark_strbuf *)malloc(sizeof(cmark_strbuf)); + bufsize_t r, w; + + cmark_strbuf_init(mem, res, len + 1); + cmark_strbuf_put(res, string, len); + cmark_strbuf_putc(res, '\0'); + + for (r = 0, w = 0; r < len; ++r) { + if (res->ptr[r] == '\\' && res->ptr[r + 1] == '|') + r++; + + res->ptr[w++] = res->ptr[r]; + } + + cmark_strbuf_truncate(res, w); + + return res; +} + +static table_row *row_from_string(cmark_mem *mem, unsigned char *string, int len) { + table_row *row = NULL; + bufsize_t cell_matched = 0; + bufsize_t cell_offset = 0; + + row = malloc(sizeof(table_row)); + row->n_columns = 0; + row->cells = NULL; + + do { + cell_matched = scan_table_cell(string, len, cell_offset); + if (cell_matched) { + cmark_strbuf *cell_buf = unescape_pipes(mem, string + cell_offset + 1, + cell_matched - 1); + row->n_columns += 1; + row->cells = cmark_llist_append(row->cells, cell_buf); + } + cell_offset += cell_matched; + } while (cell_matched); + + cell_matched = scan_table_row_end(string, len, cell_offset); + cell_offset += cell_matched; + + if (!cell_matched || cell_offset != len) { + free_table_row(row); + row = NULL; + } + + return row; +} + +static cmark_node *try_opening_table_header(cmark_syntax_extension *self, + cmark_parser * parser, + cmark_node * parent_container, + unsigned char * input, + int len) { + bufsize_t matched = scan_table_start(input, len, cmark_parser_get_first_nonspace(parser)); + cmark_node *table_header; + table_row *header_row = NULL; + table_row *marker_row = NULL; + const char *parent_string; + + if (!matched) + goto done; + + parent_string = cmark_node_get_string_content(parent_container); + + header_row = row_from_string(parser->mem, (unsigned char *) parent_string, strlen(parent_string)); + + if (!header_row) { + goto done; + } + + marker_row = row_from_string(parser->mem, input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + + assert(marker_row); + + if (header_row->n_columns != marker_row->n_columns) { + goto done; + } + + if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { + goto done; + } + + cmark_node_set_syntax_extension(parent_container, self); + cmark_node_set_n_table_columns(parent_container, header_row->n_columns); + + table_header = cmark_parser_add_child(parser, parent_container, + CMARK_NODE_TABLE_ROW, cmark_parser_get_offset(parser)); + cmark_node_set_syntax_extension(table_header, self); + cmark_node_set_is_table_header(table_header, true); + + { + cmark_llist *tmp; + + for (tmp = header_row->cells; tmp; tmp = tmp->next) { + cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; + cmark_node *header_cell = cmark_parser_add_child(parser, table_header, + CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); + cmark_node_set_string_content(header_cell, (char *) cell_buf->ptr); + cmark_node_set_syntax_extension(header_cell, self); + } + } + + cmark_parser_advance_offset(parser, input, + strlen(input) - 1 - cmark_parser_get_offset(parser), + false); +done: + free_table_row(header_row); + free_table_row(marker_row); + return parent_container; +} + +static cmark_node *try_opening_table_row(cmark_syntax_extension *self, + cmark_parser * parser, + cmark_node * parent_container, + unsigned char * input, + int len) { + cmark_node *table_row_block; + table_row *row; + + if (cmark_parser_is_blank(parser)) + return NULL; + + table_row_block = cmark_parser_add_child(parser, parent_container, + CMARK_NODE_TABLE_ROW, cmark_parser_get_offset(parser)); + + cmark_node_set_syntax_extension(table_row_block, self); + + /* We don't advance the offset here */ + + row = row_from_string(parser->mem, input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + + { + cmark_llist *tmp; + + for (tmp = row->cells; tmp; tmp = tmp->next) { + cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; + cmark_node *cell = cmark_parser_add_child(parser, table_row_block, + CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); + cmark_node_set_string_content(cell, (char *) cell_buf->ptr); + cmark_node_set_syntax_extension(cell, self); + } + } + + free_table_row(row); + + cmark_parser_advance_offset(parser, input, + len - 1 - cmark_parser_get_offset(parser), + false); + + return table_row_block; +} + +static cmark_node *try_opening_table_block(cmark_syntax_extension * syntax_extension, + int indented, + cmark_parser * parser, + cmark_node * parent_container, + unsigned char * input, + int len) { + cmark_node_type parent_type = cmark_node_get_type(parent_container); + + if (!indented && parent_type == CMARK_NODE_PARAGRAPH) { + return try_opening_table_header(syntax_extension, parser, parent_container, input, len); + } else if (!indented && parent_type == CMARK_NODE_TABLE) { + return try_opening_table_row(syntax_extension, parser, parent_container, input, len); + } + + return NULL; +} + +static int table_matches(cmark_syntax_extension *self, + cmark_parser * parser, + unsigned char * input, + int len, + cmark_node * parent_container) { + int res = 0; + + if (cmark_node_get_type(parent_container) == CMARK_NODE_TABLE) { + table_row *new_row = row_from_string(parser->mem, input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + if (new_row) { + if (new_row->n_columns == cmark_node_get_n_table_columns(parent_container)) + res = 1; + } + free_table_row(new_row); + } + + return res; +} + +static cmark_syntax_extension *register_table_syntax_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("piped-tables"); + + cmark_syntax_extension_set_match_block_func(ext, table_matches); + cmark_syntax_extension_set_open_block_func(ext, try_opening_table_block); + + return ext; +} + +static cmark_node *strikethrough_match(cmark_syntax_extension *self, + cmark_parser *parser, + cmark_node *parent, + unsigned char character, + cmark_inline_parser *inline_parser) +{ + cmark_node *res = NULL; + int left_flanking, right_flanking, punct_before, punct_after; + int num_delims; + + /* Exit early */ + if (character != '~') + return NULL; + + num_delims = cmark_inline_parser_scan_delimiters(inline_parser, 1, '~', + &left_flanking, &right_flanking, &punct_before, &punct_after); + + if (num_delims > 0) { /* Should not be needed */ + int can_open, can_close; + + res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + cmark_node_set_literal(res, "~"); + + can_open = left_flanking; + can_close = right_flanking; + if (can_open || can_close) + cmark_inline_parser_push_delimiter(inline_parser, character, can_open, can_close, res); + } + + return res; +} + +static delimiter *strikethrough_insert(cmark_syntax_extension *self, + cmark_parser *parser, + cmark_inline_parser *inline_parser, + delimiter *opener, + delimiter *closer) +{ + cmark_node *strikethrough; + cmark_node *tmp, *next; + delimiter *delim, *tmp_delim; + delimiter *res = closer->next; + + strikethrough = opener->inl_text; + + if (!cmark_node_set_type(strikethrough, CMARK_NODE_STRIKETHROUGH)) + goto done; + + cmark_node_set_string_content(strikethrough, "~"); + tmp = cmark_node_next(opener->inl_text); + + while (tmp) { + if (tmp == closer->inl_text) + break; + next = cmark_node_next(tmp); + cmark_node_append_child(strikethrough, tmp); + tmp = next; + } + + cmark_node_free(closer->inl_text); + + delim = closer; + while (delim != NULL && delim != opener) { + tmp_delim = delim->previous; + cmark_inline_parser_remove_delimiter(inline_parser, delim); + delim = tmp_delim; + } + + cmark_inline_parser_remove_delimiter(inline_parser, opener); + +done: + return res; +} + +static cmark_syntax_extension *create_strikethrough_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("tilde_strikethrough"); + cmark_llist *special_chars = NULL; + + cmark_syntax_extension_set_match_inline_func(ext, strikethrough_match); + cmark_syntax_extension_set_inline_from_delim_func(ext, strikethrough_insert); + special_chars = cmark_llist_append(special_chars, (void *) '~'); + cmark_syntax_extension_set_special_inline_chars(ext, special_chars); + + return ext; +} + +int init_libcmarkextensions(cmark_plugin *plugin) { + cmark_plugin_register_syntax_extension(plugin, register_table_syntax_extension()); + cmark_plugin_register_syntax_extension(plugin, create_strikethrough_extension()); + return 1; +} diff --git a/extensions/ext_scanners.c b/extensions/ext_scanners.c new file mode 100644 index 000000000..78df8d242 --- /dev/null +++ b/extensions/ext_scanners.c @@ -0,0 +1,585 @@ +/* Generated by re2c 0.16 */ +#include +#include "ext_scanners.h" + +bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), + unsigned char *ptr, int len, bufsize_t offset) { + bufsize_t res; + + if (ptr == NULL || offset > len) { + return 0; + } else { + unsigned char lim = ptr[len]; + + ptr[len] = '\0'; + res = scanner(ptr + offset); + ptr[len] = lim; + } + + return res; +} + +bufsize_t _scan_table_cell(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 0, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, + 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *(marker = p); + if (yych <= 0xDF) { + if (yych <= '{') { + if (yych != '\n') + goto yy3; + } else { + if (yych <= '|') + goto yy4; + if (yych <= 0x7F) + goto yy3; + if (yych >= 0xC2) + goto yy5; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy7; + if (yych == 0xED) + goto yy9; + goto yy8; + } else { + if (yych <= 0xF0) + goto yy10; + if (yych <= 0xF3) + goto yy11; + if (yych <= 0xF4) + goto yy12; + } + } + yy2 : { return 0; } + yy3: + yych = *++p; + goto yy2; + yy4: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '{') { + if (yych <= '\n') { + if (yych <= '\t') + goto yy14; + goto yy2; + } else { + if (yych == '\r') + goto yy2; + goto yy14; + } + } else { + if (yych <= 0x7F) { + if (yych <= '|') + goto yy2; + goto yy14; + } else { + if (yych <= 0xC1) + goto yy2; + if (yych <= 0xF4) + goto yy14; + goto yy2; + } + } + yy5: + yych = *++p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy3; + yy6: + p = marker; + if (yyaccept == 0) { + goto yy2; + } else { + goto yy15; + } + yy7: + yych = *++p; + if (yych <= 0x9F) + goto yy6; + if (yych <= 0xBF) + goto yy5; + goto yy6; + yy8: + yych = *++p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy5; + goto yy6; + yy9: + yych = *++p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0x9F) + goto yy5; + goto yy6; + yy10: + yych = *++p; + if (yych <= 0x8F) + goto yy6; + if (yych <= 0xBF) + goto yy8; + goto yy6; + yy11: + yych = *++p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy8; + goto yy6; + yy12: + yych = *++p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0x8F) + goto yy8; + goto yy6; + yy13: + yyaccept = 1; + marker = ++p; + yych = *p; + yy14: + if (yybm[0 + yych] & 64) { + goto yy13; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\r') + goto yy15; + if (yych <= '\\') + goto yy16; + } else { + if (yych <= 0xDF) + goto yy18; + if (yych <= 0xE0) + goto yy19; + goto yy20; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy21; + if (yych <= 0xEF) + goto yy20; + goto yy22; + } else { + if (yych <= 0xF3) + goto yy23; + if (yych <= 0xF4) + goto yy24; + } + } + yy15 : { return (bufsize_t)(p - start); } + yy16: + yyaccept = 1; + marker = ++p; + yych = *p; + if (yybm[0 + yych] & 128) { + goto yy16; + } + if (yych <= 0xDF) { + if (yych <= '\f') { + if (yych == '\n') + goto yy15; + goto yy13; + } else { + if (yych <= '\r') + goto yy15; + if (yych <= 0x7F) + goto yy13; + if (yych <= 0xC1) + goto yy15; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy19; + if (yych == 0xED) + goto yy21; + goto yy20; + } else { + if (yych <= 0xF0) + goto yy22; + if (yych <= 0xF3) + goto yy23; + if (yych <= 0xF4) + goto yy24; + goto yy15; + } + } + yy18: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy13; + goto yy6; + yy19: + ++p; + yych = *p; + if (yych <= 0x9F) + goto yy6; + if (yych <= 0xBF) + goto yy18; + goto yy6; + yy20: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy18; + goto yy6; + yy21: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0x9F) + goto yy18; + goto yy6; + yy22: + ++p; + yych = *p; + if (yych <= 0x8F) + goto yy6; + if (yych <= 0xBF) + goto yy20; + goto yy6; + yy23: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy20; + goto yy6; + yy24: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0x8F) + goto yy20; + goto yy6; + } +} + +bufsize_t _scan_table_row_end(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + yych = *(marker = p); + if (yych <= 0xDF) { + if (yych <= '{') { + if (yych != '\n') + goto yy28; + } else { + if (yych <= '|') + goto yy29; + if (yych <= 0x7F) + goto yy28; + if (yych >= 0xC2) + goto yy30; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy32; + if (yych == 0xED) + goto yy34; + goto yy33; + } else { + if (yych <= 0xF0) + goto yy35; + if (yych <= 0xF3) + goto yy36; + if (yych <= 0xF4) + goto yy37; + } + } + yy27 : { return 0; } + yy28: + yych = *++p; + goto yy27; + yy29: + yych = *(marker = ++p); + if (yych == '\n') + goto yy38; + if (yych == '\r') + goto yy40; + goto yy27; + yy30: + yych = *++p; + if (yych <= 0x7F) + goto yy31; + if (yych <= 0xBF) + goto yy28; + yy31: + p = marker; + goto yy27; + yy32: + yych = *++p; + if (yych <= 0x9F) + goto yy31; + if (yych <= 0xBF) + goto yy30; + goto yy31; + yy33: + yych = *++p; + if (yych <= 0x7F) + goto yy31; + if (yych <= 0xBF) + goto yy30; + goto yy31; + yy34: + yych = *++p; + if (yych <= 0x7F) + goto yy31; + if (yych <= 0x9F) + goto yy30; + goto yy31; + yy35: + yych = *++p; + if (yych <= 0x8F) + goto yy31; + if (yych <= 0xBF) + goto yy33; + goto yy31; + yy36: + yych = *++p; + if (yych <= 0x7F) + goto yy31; + if (yych <= 0xBF) + goto yy33; + goto yy31; + yy37: + yych = *++p; + if (yych <= 0x7F) + goto yy31; + if (yych <= 0x8F) + goto yy33; + goto yy31; + yy38: + ++p; + { return (bufsize_t)(p - start); } + yy40: + ++p; + if ((yych = *p) == '\n') + goto yy38; + goto yy31; + } +} + +bufsize_t _scan_table_start(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *(marker = p); + if (yych <= 0xDF) { + if (yych <= '{') { + if (yych != '\n') + goto yy44; + } else { + if (yych <= '|') + goto yy45; + if (yych <= 0x7F) + goto yy44; + if (yych >= 0xC2) + goto yy46; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy48; + if (yych == 0xED) + goto yy50; + goto yy49; + } else { + if (yych <= 0xF0) + goto yy51; + if (yych <= 0xF3) + goto yy52; + if (yych <= 0xF4) + goto yy53; + } + } + yy43 : { return 0; } + yy44: + yych = *++p; + goto yy43; + yy45: + yych = *(marker = ++p); + if (yybm[0 + yych] & 64) { + goto yy54; + } + if (yych == '-') + goto yy56; + goto yy43; + yy46: + yych = *++p; + if (yych <= 0x7F) + goto yy47; + if (yych <= 0xBF) + goto yy44; + yy47: + p = marker; + goto yy43; + yy48: + yych = *++p; + if (yych <= 0x9F) + goto yy47; + if (yych <= 0xBF) + goto yy46; + goto yy47; + yy49: + yych = *++p; + if (yych <= 0x7F) + goto yy47; + if (yych <= 0xBF) + goto yy46; + goto yy47; + yy50: + yych = *++p; + if (yych <= 0x7F) + goto yy47; + if (yych <= 0x9F) + goto yy46; + goto yy47; + yy51: + yych = *++p; + if (yych <= 0x8F) + goto yy47; + if (yych <= 0xBF) + goto yy49; + goto yy47; + yy52: + yych = *++p; + if (yych <= 0x7F) + goto yy47; + if (yych <= 0xBF) + goto yy49; + goto yy47; + yy53: + yych = *++p; + if (yych <= 0x7F) + goto yy47; + if (yych <= 0x8F) + goto yy49; + goto yy47; + yy54: + ++p; + yych = *p; + if (yybm[0 + yych] & 64) { + goto yy54; + } + if (yych != '-') + goto yy47; + yy56: + ++p; + yych = *p; + if (yybm[0 + yych] & 128) { + goto yy56; + } + if (yych <= '\f') { + if (yych == '\t') + goto yy58; + if (yych <= '\n') + goto yy47; + } else { + if (yych <= ' ') { + if (yych <= 0x1F) + goto yy47; + } else { + if (yych == '|') + goto yy60; + goto yy47; + } + } + yy58: + ++p; + yych = *p; + if (yych <= '\f') { + if (yych == '\t') + goto yy58; + if (yych <= '\n') + goto yy47; + goto yy58; + } else { + if (yych <= ' ') { + if (yych <= 0x1F) + goto yy47; + goto yy58; + } else { + if (yych != '|') + goto yy47; + } + } + yy60: + ++p; + yych = *p; + if (yybm[0 + yych] & 64) { + goto yy54; + } + if (yych <= '\r') { + if (yych <= 0x08) + goto yy47; + if (yych >= '\v') + goto yy63; + } else { + if (yych == '-') + goto yy56; + goto yy47; + } + yy61: + ++p; + { return (bufsize_t)(p - start); } + yy63: + ++p; + if ((yych = *p) == '\n') + goto yy61; + goto yy47; + } +} diff --git a/extensions/ext_scanners.h b/extensions/ext_scanners.h new file mode 100644 index 000000000..c96b18490 --- /dev/null +++ b/extensions/ext_scanners.h @@ -0,0 +1,20 @@ +#include "cmark.h" +#include "chunk.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, + int len, bufsize_t offset); +bufsize_t _scan_table_start(const unsigned char *p); +bufsize_t _scan_table_cell(const unsigned char *p); +bufsize_t _scan_table_row_end(const unsigned char *p); + +#define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n) +#define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n) +#define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n) + +#ifdef __cplusplus +} +#endif diff --git a/extensions/ext_scanners.re b/extensions/ext_scanners.re new file mode 100644 index 000000000..7ad561f51 --- /dev/null +++ b/extensions/ext_scanners.re @@ -0,0 +1,65 @@ +#include +#include "ext_scanners.h" + +bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset) +{ + bufsize_t res; + + if (ptr == NULL || offset > len) { + return 0; + } else { + unsigned char lim = ptr[len]; + + ptr[len] = '\0'; + res = scanner(ptr + offset); + ptr[len] = lim; + } + + return res; +} + +/*!re2c + re2c:define:YYCTYPE = "unsigned char"; + re2c:define:YYCURSOR = p; + re2c:define:YYMARKER = marker; + re2c:define:YYCTXMARKER = marker; + re2c:yyfill:enable = 0; + + spacechar = [ \t\v\f]; + newline = [\r]?[\n]; + + escaped_char = [\\][|!"#$%&'()*+,./:;<=>?@[\\\]^_`{}~-]; + + table_marker = [|](spacechar*[-]+spacechar*); + table_cell = [|](escaped_char|[^|\r\n])+; +*/ + +bufsize_t _scan_table_cell(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + table_cell { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} + +bufsize_t _scan_table_row_end(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [|]newline { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} + +bufsize_t _scan_table_start(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + (table_marker)+ [|]newline { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 89b23ac97..c5d1c0efb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,6 +6,7 @@ set(LIBRARY "libcmark") set(STATICLIBRARY "libcmark_static") set(HEADERS cmark.h + cmark_extension_api.h parser.h buffer.h node.h @@ -18,6 +19,9 @@ set(HEADERS houdini.h cmark_ctype.h render.h + registry.h + syntax_extension.h + plugin.h ) set(LIBRARY_SOURCES cmark.c @@ -41,6 +45,10 @@ set(LIBRARY_SOURCES houdini_html_u.c cmark_ctype.c arena.c + linked_list.c + syntax_extension.c + registry.c + plugin.c ${HEADERS} ) @@ -71,6 +79,8 @@ set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}") set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") +add_definitions(-DLIBDIR=\"${CMAKE_BINARY_DIR}\") + if (${CMAKE_VERSION} VERSION_GREATER "1.8") set(CMAKE_C_VISIBILITY_PRESET hidden) set(CMAKE_VISIBILITY_INLINES_HIDDEN 1) @@ -82,9 +92,9 @@ if (CMARK_SHARED) add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) # Include minor version and patch level in soname for now. set_target_properties(${LIBRARY} PROPERTIES - OUTPUT_NAME "cmark" - SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} - VERSION ${PROJECT_VERSION}) + OUTPUT_NAME "cmark" + SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} + VERSION ${PROJECT_VERSION}) set_property(TARGET ${LIBRARY} APPEND PROPERTY MACOSX_RPATH true) @@ -101,21 +111,31 @@ endif() if (CMARK_STATIC) add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES}) set_target_properties(${STATICLIBRARY} PROPERTIES - COMPILE_FLAGS -DCMARK_STATIC_DEFINE - POSITION_INDEPENDENT_CODE ON) + COMPILE_FLAGS -DCMARK_STATIC_DEFINE + POSITION_INDEPENDENT_CODE ON) if (MSVC) - set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmark_static" - VERSION ${PROJECT_VERSION}) + set_target_properties(${STATICLIBRARY} PROPERTIES + OUTPUT_NAME "cmark_static" + VERSION ${PROJECT_VERSION}) else() - set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmark" - VERSION ${PROJECT_VERSION}) - endif(MSVC) + set_target_properties(${STATICLIBRARY} PROPERTIES + OUTPUT_NAME "cmark" + VERSION ${PROJECT_VERSION}) +endif(MSVC) + +target_link_libraries(cmark ${CMAKE_DL_LIBS}) list(APPEND CMARK_INSTALL ${STATICLIBRARY}) endif() +set_property(TARGET ${LIBRARY} + APPEND PROPERTY MACOSX_RPATH true) + +# Avoid name clash between PROGRAM and LIBRARY pdb files. +set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark_dll) + +generate_export_header(${LIBRARY} + BASE_NAME ${PROJECT_NAME}) if (MSVC) set_property(TARGET ${PROGRAM} @@ -139,11 +159,12 @@ if(CMARK_SHARED OR CMARK_STATIC) DESTINATION lib${LIB_SUFFIX}/pkgconfig) install(FILES - cmark.h - ${CMAKE_CURRENT_BINARY_DIR}/cmark_export.h - ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h - DESTINATION include - ) + cmark.h + cmark_extension_api.h + ${CMAKE_CURRENT_BINARY_DIR}/cmark_export.h + ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h + DESTINATION include + ) install(EXPORT cmark DESTINATION lib${LIB_SUFFIX}/cmake) endif() diff --git a/src/blocks.c b/src/blocks.c index 5f321c48e..9f93c06fa 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -10,6 +10,7 @@ #include #include "cmark_ctype.h" +#include "syntax_extension.h" #include "config.h" #include "parser.h" #include "cmark.h" @@ -80,30 +81,57 @@ static cmark_node *make_document(cmark_mem *mem) { return e; } -cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { - cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser)); - parser->mem = mem; +int cmark_parser_attach_syntax_extension(cmark_parser *parser, + cmark_syntax_extension *extension) { + parser->syntax_extensions = cmark_llist_append(parser->syntax_extensions, extension); + if (extension->match_inline && extension->insert_inline_from_delim) { + parser->inline_syntax_extensions = cmark_llist_append( + parser->inline_syntax_extensions, extension); + } + + return 1; +} + +static void cmark_parser_dispose(cmark_parser *parser) { + if (parser->root) + cmark_node_free(parser->root); + + if (parser->refmap) + cmark_reference_map_free(parser->refmap); +} + +static void cmark_parser_reset(cmark_parser *parser) { + cmark_llist *saved_exts = parser->syntax_extensions; + cmark_llist *saved_inline_exts = parser->inline_syntax_extensions; + int saved_options = parser->options; + cmark_mem *saved_mem = parser->mem; + + cmark_parser_dispose(parser); + + memset(parser, 0, sizeof(cmark_parser)); + parser->mem = saved_mem; - cmark_node *document = make_document(mem); + cmark_strbuf_init(parser->mem, &parser->curline, 256); + cmark_strbuf_init(parser->mem, &parser->linebuf, 0); - cmark_strbuf_init(mem, &parser->curline, 256); - cmark_strbuf_init(mem, &parser->linebuf, 0); + cmark_node *document = make_document(parser->mem); - parser->refmap = cmark_reference_map_new(mem); + parser->refmap = cmark_reference_map_new(parser->mem); parser->root = document; parser->current = document; - parser->line_number = 0; - parser->offset = 0; - parser->column = 0; - parser->first_nonspace = 0; - parser->first_nonspace_column = 0; - parser->indent = 0; - parser->blank = false; - parser->partially_consumed_tab = false; - parser->last_line_length = 0; - parser->options = options; + parser->last_buffer_ended_with_cr = false; + parser->syntax_extensions = saved_exts; + parser->inline_syntax_extensions = saved_inline_exts; + parser->options = saved_options; +} + +cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { + cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser)); + parser->mem = mem; + parser->options = options; + cmark_parser_reset(parser); return parser; } @@ -114,9 +142,11 @@ cmark_parser *cmark_parser_new(int options) { void cmark_parser_free(cmark_parser *parser) { cmark_mem *mem = parser->mem; + cmark_parser_dispose(parser); cmark_strbuf_free(&parser->curline); cmark_strbuf_free(&parser->linebuf); - cmark_reference_map_free(parser->refmap); + cmark_llist_free(parser->syntax_extensions); + cmark_llist_free(parser->inline_syntax_extensions); mem->free(parser); } @@ -144,7 +174,14 @@ static bool is_blank(cmark_strbuf *s, bufsize_t offset) { } static CMARK_INLINE bool can_contain(cmark_node_type parent_type, - cmark_node_type child_type) { + cmark_node_type child_type) { + if (parent_type == CMARK_NODE_TABLE) { + return child_type == CMARK_NODE_TABLE_ROW; + } + + if (parent_type == CMARK_NODE_TABLE_ROW) + return child_type == CMARK_NODE_TABLE_CELL; + return (parent_type == CMARK_NODE_DOCUMENT || parent_type == CMARK_NODE_BLOCK_QUOTE || parent_type == CMARK_NODE_ITEM || @@ -159,7 +196,8 @@ static CMARK_INLINE bool accepts_lines(cmark_node_type block_type) { static CMARK_INLINE bool contains_inlines(cmark_node_type block_type) { return (block_type == CMARK_NODE_PARAGRAPH || - block_type == CMARK_NODE_HEADING); + block_type == CMARK_NODE_HEADING || + block_type == CMARK_NODE_TABLE_CELL); } static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) { @@ -359,23 +397,43 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent, return child; } +static void manage_extensions_special_characters(cmark_parser *parser, bool add) { + cmark_llist *tmp_ext; + + for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data; + cmark_llist *tmp_char; + for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { + unsigned char c = (unsigned char) (unsigned long) tmp_char->data; + if (add) + cmark_inlines_add_special_character(c); + else + cmark_inlines_remove_special_character(c); + } + } +} + // Walk through node and all children, recursively, parsing // string content into inline content where appropriate. -static void process_inlines(cmark_mem *mem, cmark_node *root, +static void process_inlines(cmark_parser *parser, cmark_reference_map *refmap, int options) { - cmark_iter *iter = cmark_iter_new(root); + cmark_iter *iter = cmark_iter_new(parser->root); cmark_node *cur; cmark_event_type ev_type; + manage_extensions_special_characters(parser, true); + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER) { - if (contains_inlines(S_type(cur))) { - cmark_parse_inlines(mem, cur, refmap, options); + if (contains_inlines(cur->type)) { + cmark_parse_inlines(parser, cur, refmap, options); } } } + manage_extensions_special_characters(parser, false); + cmark_iter_free(iter); } @@ -482,7 +540,7 @@ static cmark_node *finalize_document(cmark_parser *parser) { } finalize(parser, parser->root); - process_inlines(parser->mem, parser->root, parser->refmap, parser->options); + process_inlines(parser, parser->refmap, parser->options); return parser->root; } @@ -521,6 +579,19 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) { S_parser_feed(parser, (const unsigned char *)buffer, len, false); } +void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len) { + cmark_strbuf saved_linebuf; + + cmark_strbuf_init(parser->mem, &saved_linebuf, 0); + cmark_strbuf_puts(&saved_linebuf, cmark_strbuf_cstr(&parser->linebuf)); + cmark_strbuf_clear(&parser->linebuf); + + S_parser_feed(parser, (const unsigned char *)buffer, len, true); + + cmark_strbuf_sets(&parser->linebuf, cmark_strbuf_cstr(&saved_linebuf)); + cmark_strbuf_free(&saved_linebuf); +} + static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, bool eof) { const unsigned char *end = buffer + len; @@ -784,6 +855,21 @@ static bool parse_html_block_prefix(cmark_parser *parser, return res; } +static bool parse_extension_block(cmark_parser *parser, + cmark_node *container, + cmark_chunk *input) +{ + bool res = false; + + if (container->extension->last_block_matches) { + if (container->extension->last_block_matches( + container->extension, parser, input->data, input->len, container)) + res = true; + } + + return res; +} + /** * For each containing node, try to parse the associated line start. * @@ -805,6 +891,12 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input, S_find_first_nonspace(parser, input); + if (container->extension) { + if (!parse_extension_block(parser, container, input)) + goto done; + continue; + } + switch (cont_type) { case CMARK_NODE_BLOCK_QUOTE: if (!parse_block_quote_prefix(parser, input)) @@ -1005,9 +1097,27 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.code.fence_length = 0; (*container)->as.code.fence_offset = 0; (*container)->as.code.info = cmark_chunk_literal(""); - } else { - break; + cmark_llist *tmp; + cmark_node *new_container = NULL; + + for (tmp = parser->syntax_extensions; tmp; tmp=tmp->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; + + if (ext->try_opening_block) { + new_container = ext->try_opening_block( + ext, indented, parser, *container, input->data, input->len); + + if (new_container) { + *container = new_container; + break; + } + } + } + + if (!new_container) { + break; + } } if (accepts_lines(S_type(*container))) { @@ -1140,6 +1250,9 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, bool all_matched = true; cmark_node *container; cmark_chunk input; + cmark_node *current; + + cmark_strbuf_clear(&parser->curline); if (parser->options & CMARK_OPT_VALIDATE_UTF8) cmark_utf8proc_check(&parser->curline, buffer, bytes); @@ -1170,9 +1283,13 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, container = last_matched_container; + current = parser->current; + open_new_blocks(parser, &container, &input, all_matched); - add_text_to_container(parser, container, last_matched_container, &input); + /* parser->current might have changed if feed_reentrant was called */ + if (current == parser->current) + add_text_to_container(parser, container, last_matched_container, &input); finished: parser->last_line_length = input.len; @@ -1187,6 +1304,12 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, } cmark_node *cmark_parser_finish(cmark_parser *parser) { + cmark_node *res; + + /* Parser was already finished once */ + if (parser->root == NULL) + return NULL; + if (parser->linebuf.size) { S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); cmark_strbuf_clear(&parser->linebuf); @@ -1197,11 +1320,70 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { cmark_consolidate_text_nodes(parser->root); cmark_strbuf_free(&parser->curline); + cmark_strbuf_free(&parser->linebuf); #if CMARK_DEBUG_NODES if (cmark_node_check(parser->root, stderr)) { abort(); } #endif - return parser->root; + + res = parser->root; + parser->root = NULL; + + cmark_parser_reset(parser); + + return res; +} + +int cmark_parser_get_line_number(cmark_parser *parser) { + return parser->line_number; +} + +bufsize_t cmark_parser_get_offset(cmark_parser *parser) { + return parser->offset; +} + +bufsize_t cmark_parser_get_column(cmark_parser *parser) { + return parser->column; +} + +int cmark_parser_get_first_nonspace(cmark_parser *parser) { + return parser->first_nonspace; +} + +int cmark_parser_get_first_nonspace_column(cmark_parser *parser) { + return parser->first_nonspace_column; +} + +int cmark_parser_get_indent(cmark_parser *parser) { + return parser->indent; +} + +int cmark_parser_is_blank(cmark_parser *parser) { + return parser->blank; +} + +int cmark_parser_has_partially_consumed_tab(cmark_parser *parser) { + return parser->partially_consumed_tab; +} + +int cmark_parser_get_last_line_length(cmark_parser *parser) { + return parser->last_line_length; +} + +cmark_node *cmark_parser_add_child(cmark_parser *parser, + cmark_node *parent, + cmark_node_type block_type, + int start_column) { + return add_child(parser, parent, block_type, start_column); +} + +void cmark_parser_advance_offset(cmark_parser *parser, + const char *input, + int count, + int columns) { + cmark_chunk input_chunk = cmark_chunk_literal(input); + + S_advance_offset(parser, &input_chunk, count, columns); } diff --git a/src/cmark.c b/src/cmark.c index c2d0a33cb..c9d450fdb 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -1,6 +1,7 @@ #include #include #include +#include "registry.h" #include "node.h" #include "houdini.h" #include "cmark.h" @@ -45,3 +46,13 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options) { return result; } + +int cmark_init(void) { + cmark_discover_plugins(); + return 1; +} + +int cmark_deinit(void) { + cmark_release_plugins(); + return 1; +} diff --git a/src/cmark.h b/src/cmark.h index 6b29da3c4..fbede2731 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -46,8 +46,13 @@ typedef enum { CMARK_NODE_HEADING, CMARK_NODE_THEMATIC_BREAK, + /* blocks with no syntax rules in the current specification */ + CMARK_NODE_TABLE, + CMARK_NODE_TABLE_ROW, + CMARK_NODE_TABLE_CELL, + CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT, - CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK, + CMARK_NODE_LAST_BLOCK = CMARK_NODE_TABLE_CELL, /* Inline */ CMARK_NODE_TEXT, @@ -61,8 +66,11 @@ typedef enum { CMARK_NODE_LINK, CMARK_NODE_IMAGE, + /* inlines with no syntax rules in the current specification */ + CMARK_NODE_STRIKETHROUGH, + CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT, - CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE, + CMARK_NODE_LAST_INLINE = CMARK_NODE_STRIKETHROUGH, } cmark_node_type; /* For backwards compatibility: */ @@ -87,6 +95,8 @@ typedef struct cmark_node cmark_node; typedef struct cmark_parser cmark_parser; typedef struct cmark_iter cmark_iter; +typedef void (*cmark_free_func) (void *user_data); + /** * ## Custom memory allocator support */ @@ -118,6 +128,46 @@ cmark_mem *cmark_get_arena_mem_allocator(); CMARK_EXPORT void cmark_arena_reset(void); + +/* + * ## Basic data structures + * + * To keep dependencies to the strict minimum, libcmark implements + * its own versions of "classic" data structures. + */ + +/** + * ### Linked list + */ + +/** A generic singly linked list. + */ +typedef struct _cmark_llist +{ + struct _cmark_llist *next; + void *data; +} cmark_llist; + +/** Append an element to the linked list, return the possibly modified + * head of the list. + */ +CMARK_EXPORT +cmark_llist * cmark_llist_append (cmark_llist * head, + void * data); + +/** Free the list starting with 'head', calling 'free_func' with the + * data pointer of each of its elements + */ +CMARK_EXPORT +void cmark_llist_free_full (cmark_llist * head, + cmark_free_func free_func); + +/** Free the list starting with 'head' + */ +CMARK_EXPORT +void cmark_llist_free (cmark_llist * head); + + /** * ## Creating and Destroying Nodes */ @@ -272,6 +322,11 @@ CMARK_EXPORT void *cmark_node_get_user_data(cmark_node *node); */ CMARK_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data); +/** Set free function for user data */ +CMARK_EXPORT +int cmark_node_set_user_data_free_func(cmark_node *node, + cmark_free_func free_func); + /** Returns the type of 'node', or `CMARK_NODE_NONE` on error. */ CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node); @@ -352,6 +407,15 @@ CMARK_EXPORT const char *cmark_node_get_fence_info(cmark_node *node); */ CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info); +/** Sets code blocks fencing details + */ +CMARK_EXPORT int cmark_node_set_fenced(cmark_node * node, int fenced, + int length, int offset, char character); + +/** Returns code blocks fencing details + */ +CMARK_EXPORT int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character); + /** Returns the URL of a link or image 'node', or an empty string if no URL is set. Returns NULL if called on a node that is not a link or image. @@ -415,6 +479,11 @@ CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node); */ CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node); +CMARK_EXPORT int cmark_node_get_n_table_columns(cmark_node *node); +CMARK_EXPORT int cmark_node_set_n_table_columns(cmark_node *node, int n_columns); +CMARK_EXPORT int cmark_node_is_table_header(cmark_node *node); +CMARK_EXPORT int cmark_node_set_is_table_header(cmark_node *node, int is_table_header); + /** * ## Tree Manipulation */ diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h new file mode 100644 index 000000000..bae8310de --- /dev/null +++ b/src/cmark_extension_api.h @@ -0,0 +1,546 @@ +#ifndef CMARK_EXTENSION_API_H +#define CMARK_EXTENSION_API_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "buffer.h" + +/** + * ## Extension Support + * + * While the "core" of libcmark is strictly compliant with the + * specification, an API is provided for extension writers to + * hook into the parsing process. + * + * It should be noted that the cmark_node API already offers + * room for customization, with methods offered to traverse and + * modify the AST, and even define custom blocks. + * When the desired customization is achievable in an error-proof + * way using that API, it should be the preferred method. + * + * The following API requires a more in-depth understanding + * of libcmark's parsing strategy, which is exposed + * [here](http://spec.commonmark.org/0.24/#appendix-a-parsing-strategy). + * + * It should be used when "a posteriori" modification of the AST + * proves to be too difficult / impossible to implement correctly. + * + * It can also serve as an intermediary step before extending + * the specification, as an extension implemented using this API + * will be trivially integrated in the core if it proves to be + * desirable. + */ + +typedef struct cmark_plugin cmark_plugin; + +/** A syntax extension that can be attached to a cmark_parser + * with cmark_parser_attach_syntax_extension(). + * + * Extension writers should assign functions matching + * the signature of the following 'virtual methods' to + * implement new functionality. + * + * Their calling order and expected behaviour match the procedure outlined + * at : + * + * During step 1, cmark will call the function provided through + * 'cmark_syntax_extension_set_match_block_func' when it + * iterates over an open block created by this extension, + * to determine whether it could contain the new line. + * If no function was provided, cmark will close the block. + * + * During step 2, if and only if the new line doesn't match any + * of the standard syntax rules, cmark will call the function + * provided through 'cmark_syntax_extension_set_open_block_func' + * to let the extension determine whether that new line matches + * one of its syntax rules. + * It is the responsibility of the parser to create and add the + * new block with cmark_parser_make_block and cmark_parser_add_child. + * If no function was provided is NULL, the extension will have + * no effect at all on the final block structure of the AST. + * + * #### Inline parsing phase hooks + * + * For each character provided by the extension through + * 'cmark_syntax_extension_set_special_inline_chars', + * the function provided by the extension through + * 'cmark_syntax_extension_set_match_inline_func' + * will get called, it is the responsibility of the extension + * to scan the characters located at the current inline parsing offset + * with the cmark_inline_parser API. + * + * Depending on the type of the extension, it can either: + * + * * Scan forward, determine that the syntax matches and return + * a newly-created inline node with the appropriate type. + * This is the technique that would be used if inline code + * (with backticks) was implemented as an extension. + * * Scan only the character(s) that its syntax rules require + * for opening and closing nodes, push a delimiter on the + * delimiter stack, and return a simple text node with its + * contents set to the character(s) consumed. + * This is the technique that would be used if emphasis + * inlines were implemented as an extension. + * + * When an extension has pushed delimiters on the stack, + * the function provided through + * 'cmark_syntax_extension_set_inline_from_delim_func' + * will get called in a latter phase, + * when the inline parser has matched opener and closer delimiters + * created by the extension together. + * + * It is then the responsibility of the extension to modify + * and populate the opener inline text node, and to remove + * the necessary delimiters from the delimiter stack. + * + * Finally, the extension should return NULL if its scan didn't + * match its syntax rules. + * + * The extension can store whatever private data it might need + * with 'cmark_syntax_extension_set_private', + * and optionally define a free function for this data. + */ +typedef struct cmark_syntax_extension cmark_syntax_extension; + +typedef struct subject cmark_inline_parser; + +/** Exposed raw for now */ + +typedef struct delimiter { + struct delimiter *previous; + struct delimiter *next; + cmark_node *inl_text; + bufsize_t length; + int position; + unsigned char delim_char; + int can_open; + int can_close; + int active; +} delimiter; + +/** + * ### Plugin API. + * + * Extensions should be distributed as dynamic libraries, + * with a single exported function named after the distributed + * filename. + * + * When discovering extensions (see cmark_init), cmark will + * try to load a symbol named "init_{{filename}}" in all the + * dynamic libraries it encounters. + * + * For example, given a dynamic library named myextension.so + * (or myextension.dll), cmark will try to load the symbol + * named "init_myextension". This means that the filename + * must lend itself to forming a valid C identifier, with + * the notable exception of dashes, which will be translated + * to underscores, which means cmark will look for a function + * named "init_my_extension" if it encounters a dynamic library + * named "my-extension.so". + * + * See the 'cmark_plugin_init_func' typedef for the exact prototype + * this function should follow. + * + * For now the extensibility of cmark is not complete, as + * it only offers API to hook into the block parsing phase + * (). + * + * See 'cmark_plugin_register_syntax_extension' for more information. + */ + +/** The prototype plugins' init function should follow. + */ +typedef int (*cmark_plugin_init_func)(cmark_plugin *plugin); + +/** Register a syntax 'extension' with the 'plugin', it will be made + * available as an extension and, if attached to a cmark_parser + * with 'cmark_parser_attach_syntax_extension', it will contribute + * to the block parsing process. + * + * See the documentation for 'cmark_syntax_extension' for information + * on how to implement one. + * + * This function will typically be called from the init function + * of external modules. + * + * This takes ownership of 'extension', one should not call + * 'cmark_syntax_extension_free' on a registered extension. + */ +CMARK_EXPORT +int cmark_plugin_register_syntax_extension(cmark_plugin *plugin, + cmark_syntax_extension *extension); + +/** This will search for the syntax extension named 'name' among the + * registered syntax extensions. + * + * It can then be attached to a cmark_parser + * with the cmark_parser_attach_syntax_extension method. + */ +CMARK_EXPORT +cmark_syntax_extension *cmark_find_syntax_extension(const char *name); + +/** Should create and add a new open block to 'parent_container' if + * 'input' matches a syntax rule for that block type. It is allowed + * to modify the type of 'parent_container'. + * + * Should return the newly created block if there is one, or + * 'parent_container' if its type was modified, or NULL. + */ +typedef cmark_node * (*cmark_open_block_func) (cmark_syntax_extension *extension, + int indented, + cmark_parser *parser, + cmark_node *parent_container, + unsigned char *input, + int len); + +typedef cmark_node *(*cmark_match_inline_func)(cmark_syntax_extension *extension, + cmark_parser *parser, + cmark_node *parent, + unsigned char character, + cmark_inline_parser *inline_parser); + +typedef delimiter *(*cmark_inline_from_delim_func)(cmark_syntax_extension *extension, + cmark_parser *parser, + cmark_inline_parser *inline_parser, + delimiter *opener, + delimiter *closer); + +/** Should return 'true' if 'input' can be contained in 'container', + * 'false' otherwise. + */ +typedef int (*cmark_match_block_func) (cmark_syntax_extension *extension, + cmark_parser *parser, + unsigned char *input, + int len, + cmark_node *container); + +/** Free a cmark_syntax_extension. + */ +CMARK_EXPORT +void cmark_syntax_extension_free (cmark_syntax_extension *extension); + +/** Return a newly-constructed cmark_syntax_extension, named 'name'. + */ +CMARK_EXPORT +cmark_syntax_extension *cmark_syntax_extension_new (const char *name); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension, + cmark_open_block_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension, + cmark_match_block_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension, + cmark_match_inline_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension, + cmark_inline_from_delim_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension, + cmark_llist *special_chars); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, + void *priv, + cmark_free_func free_func); + +/** Return the index of the line currently being parsed, starting with 1. + */ +CMARK_EXPORT +int cmark_parser_get_line_number(cmark_parser *parser); + +/** Return the offset in bytes in the line being processed. + * + * Example: + * + * ### foo + * + * Here, offset will first be 0, then 5 (the index of the 'f' character). + */ +CMARK_EXPORT +int cmark_parser_get_offset(cmark_parser *parser); + +/** + * Return the offset in 'columns' in the line being processed. + * + * This value may differ from the value returned by + * cmark_parser_get_offset() in that it accounts for tabs, + * and as such should not be used as an index in the current line's + * buffer. + * + * Example: + * + * cmark_parser_advance_offset() can be called to advance the + * offset by a number of columns, instead of a number of bytes. + * + * In that case, if offset falls "in the middle" of a tab + * character, 'column' and offset will differ. + * + * ``` + * foo \t bar + * ^ ^^ + * offset (0) 20 + * ``` + * + * If cmark_parser_advance_offset is called here with 'columns' + * set to 'true' and 'offset' set to 22, cmark_parser_get_offset() + * will return 20, whereas cmark_parser_get_column() will return + * 22. + * + * Additionally, as tabs expand to the next multiple of 4 column, + * cmark_parser_has_partially_consumed_tab() will now return + * 'true'. + */ +CMARK_EXPORT +int cmark_parser_get_column(cmark_parser *parser); + +/** Return the absolute index in bytes of the first nonspace + * character coming after the offset as returned by + * cmark_parser_get_offset() in the line currently being processed. + * + * Example: + * + * ``` + * foo bar baz \n + * ^ ^ ^ + * 0 offset (16) first_nonspace (28) + * ``` + */ +CMARK_EXPORT +int cmark_parser_get_first_nonspace(cmark_parser *parser); + +/** Return the absolute index of the first nonspace column coming after 'offset' + * in the line currently being processed, counting tabs as multiple + * columns as appropriate. + * + * See the documentation for cmark_parser_get_first_nonspace() and + * cmark_parser_get_column() for more information. + */ +CMARK_EXPORT +int cmark_parser_get_first_nonspace_column(cmark_parser *parser); + +/** Return the difference between the values returned by + * cmark_parser_get_first_nonspace_column() and + * cmark_parser_get_column(). + * + * This is not a byte offset, as it can count one tab as multiple + * characters. + */ +CMARK_EXPORT +int cmark_parser_get_indent(cmark_parser *parser); + +/** Return 'true' if the line currently being processed has been entirely + * consumed, 'false' otherwise. + * + * Example: + * + * ``` + * foo bar baz \n + * ^ + * offset + * ``` + * + * This function will return 'false' here. + * + * ``` + * foo bar baz \n + * ^ + * offset + * ``` + * This function will still return 'false'. + * + * ``` + * foo bar baz \n + * ^ + * offset + * ``` + * + * At this point, this function will now return 'true'. + */ +CMARK_EXPORT +int cmark_parser_is_blank(cmark_parser *parser); + +/** Return 'true' if the value returned by cmark_parser_get_offset() + * is 'inside' an expanded tab. + * + * See the documentation for cmark_parser_get_column() for more + * information. + */ +CMARK_EXPORT +int cmark_parser_has_partially_consumed_tab(cmark_parser *parser); + +/** Return the length in bytes of the previously processed line, excluding potential + * newline (\n) and carriage return (\r) trailing characters. + */ +CMARK_EXPORT +int cmark_parser_get_last_line_length(cmark_parser *parser); + +/** Add a child to 'parent' during the parsing process. + * + * If 'parent' isn't the kind of node that can accept this child, + * this function will back up till it hits a node that can, closing + * blocks as appropriate. + */ +CMARK_EXPORT +cmark_node*cmark_parser_add_child(cmark_parser *parser, + cmark_node *parent, + cmark_node_type block_type, + int start_column); + +/** Advance the 'offset' of the parser in the current line. + * + * See the documentation of cmark_parser_get_offset() and + * cmark_parser_get_column() for more information. + */ +CMARK_EXPORT +void cmark_parser_advance_offset(cmark_parser *parser, + const char *input, + int count, + int columns); + + +CMARK_EXPORT +void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len); + +/** Attach the syntax 'extension' to the 'parser', to provide extra syntax + * rules. + * See the documentation for cmark_syntax_extension for more information. + * + * Returns 'true' if the 'extension' was successfully attached, + * 'false' otherwise. + */ +CMARK_EXPORT +int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension); + +/** Change the type of 'node'. + * + * Return 0 if the type could be changed, 1 otherwise. + */ +CMARK_EXPORT int cmark_node_set_type(cmark_node *node, cmark_node_type type); + +/** Return the string content for all types of 'node'. + * The pointer stays valid as long as 'node' isn't freed. + */ +CMARK_EXPORT const char *cmark_node_get_string_content(cmark_node *node); + +/** Set the string 'content' for all types of 'node'. + * Copies 'content'. + */ +CMARK_EXPORT int cmark_node_set_string_content(cmark_node *node, const char *content); + +/** Get the syntax extension responsible for the creation of 'node'. + * Return NULL if 'node' was created because it matched standard syntax rules. + */ +CMARK_EXPORT cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node); + +/** Set the syntax extension responsible for creating 'node'. + */ +CMARK_EXPORT int cmark_node_set_syntax_extension(cmark_node *node, + cmark_syntax_extension *extension); + +/** + * ## Inline syntax extension helpers + * + * The inline parsing process is described in detail at + * + */ + +/** Should return 'true' if the predicate matches 'c', 'false' otherwise + */ +typedef int (*cmark_inline_predicate)(int c); + +/** Advance the current inline parsing offset */ +CMARK_EXPORT +void cmark_inline_parser_advance_offset(cmark_inline_parser *parser); + +/** Get the current inline parsing offset */ +CMARK_EXPORT +int cmark_inline_parser_get_offset(cmark_inline_parser *parser); + +/** Get the character located at the current inline parsing offset + */ +CMARK_EXPORT +unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser); + +/** Get the character located 'pos' bytes in the current line. + */ +CMARK_EXPORT +unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, int pos); + +/** Whether the inline parser has reached the end of the current line + */ +CMARK_EXPORT +int cmark_inline_parser_is_eof(cmark_inline_parser *parser); + +/** Get the characters located after the current inline parsing offset + * while 'pred' matches. Free after usage. + */ +CMARK_EXPORT +char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred); + +/** Push a delimiter on the delimiter stack. + * See < for + * more information on the parameters + */ +CMARK_EXPORT +void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser, + unsigned char c, + int can_open, + int can_close, + cmark_node *inl_text); + +/** Remove 'delim' from the delimiter stack + */ +CMARK_EXPORT +void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim); + +CMARK_EXPORT +delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser); + +/** Convenience function to scan a given delimiter. + * + * 'left_flanking' and 'right_flanking' will be set to true if they + * respectively precede and follow a non-space, non-punctuation + * character. + * + * Additionally, 'punct_before' and 'punct_after' will respectively be set + * if the preceding or following character is a punctuation character. + * + * Note that 'left_flanking' and 'right_flanking' can both be 'true'. + * + * Returns the number of delimiters encountered, in the limit + * of 'max_delims', and advances the inline parsing offset. + */ +CMARK_EXPORT +int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, + int max_delims, + unsigned char c, + int *left_flanking, + int *right_flanking, + int *punct_before, + int *punct_after); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/commonmark.c b/src/commonmark.c index dd696ae9b..e41e4ee3a 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -335,6 +335,33 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_TABLE: + BLANKLINE(); + break; + + case CMARK_NODE_TABLE_ROW: + if (entering) { + CR(); + LIT("|"); + } + break; + case CMARK_NODE_TABLE_CELL: + if (entering) { + } else { + LIT(" |"); + if (node->parent->as.table_row.is_header && !node->next) { + int i; + int n_cols = node->parent->parent->as.table.n_columns; + CR(); + LIT("|"); + for (i = 0; i < n_cols; i++) { + LIT(" --- |"); + } + CR(); + } + } + break; + case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; @@ -457,6 +484,10 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_STRIKETHROUGH: + OUT(cmark_node_get_string_content(node), false, LITERAL); + break; + default: assert(false); break; diff --git a/src/config.h.in b/src/config.h.in index de1a4dd49..e14bf73ff 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -69,6 +69,8 @@ CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ... #endif +#define EXTENSION_DIR LIBDIR "/extensions" + #ifdef __cplusplus } #endif diff --git a/src/html.c b/src/html.c index d58596cd0..600d98326 100644 --- a/src/html.c +++ b/src/html.c @@ -27,6 +27,8 @@ static CMARK_INLINE void cr(cmark_strbuf *html) { struct render_state { cmark_strbuf *html; cmark_node *plain; + bool need_closing_table_body; + bool in_table_header; }; static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html, @@ -217,6 +219,65 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } break; + case CMARK_NODE_TABLE: + if (entering) { + cr(html); + cmark_strbuf_puts(html, "'); + state->need_closing_table_body = false; + } else { + if (state->need_closing_table_body) + cmark_strbuf_puts(html, ""); + state->need_closing_table_body = false; + cmark_strbuf_puts(html, ""); + } + break; + + case CMARK_NODE_TABLE_ROW: + if (entering) { + cr(html); + if (node->as.table_row.is_header) { + state->in_table_header = true; + cmark_strbuf_puts(html, ""); + cr(html); + } + cmark_strbuf_puts(html, "'); + } else { + cr(html); + cmark_strbuf_puts(html, ""); + if (node->as.table_row.is_header) { + cr(html); + cmark_strbuf_puts(html, ""); + cr(html); + cmark_strbuf_puts(html, ""); + state->need_closing_table_body = true; + state->in_table_header = false; + } + } + break; + + case CMARK_NODE_TABLE_CELL: + if (entering) { + cr(html); + if (state->in_table_header) { + cmark_strbuf_puts(html, "'); + } else { + if (state->in_table_header) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_puts(html, ""); + } + } + break; + case CMARK_NODE_TEXT: escape_html(html, node->as.literal.data, node->as.literal.len); break; @@ -313,6 +374,14 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } break; + case CMARK_NODE_STRIKETHROUGH: + if (entering) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_puts(html, ""); + } + break; + default: assert(false); break; @@ -331,7 +400,7 @@ char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem) cmark_strbuf html = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; - struct render_state state = {&html, NULL}; + struct render_state state = {&html, NULL, false, false}; cmark_iter *iter = cmark_iter_new(root); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { diff --git a/src/inlines.c b/src/inlines.c index 86d3e0018..dec4860ba 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -12,6 +12,7 @@ #include "utf8.h" #include "scanners.h" #include "inlines.h" +#include "syntax_extension.h" static const char *EMDASH = "\xE2\x80\x94"; static const char *ENDASH = "\xE2\x80\x93"; @@ -32,16 +33,6 @@ static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99"; #define MAXBACKTICKS 1000 -typedef struct delimiter { - struct delimiter *previous; - struct delimiter *next; - cmark_node *inl_text; - bufsize_t length; - unsigned char delim_char; - bool can_open; - bool can_close; -} delimiter; - typedef struct bracket { struct bracket *previous; struct delimiter *previous_delimiter; @@ -52,7 +43,7 @@ typedef struct bracket { bool bracket_after; } bracket; -typedef struct { +typedef struct subject{ cmark_mem *mem; cmark_chunk input; bufsize_t pos; @@ -70,7 +61,7 @@ static CMARK_INLINE bool S_is_line_end_char(char c) { static delimiter *S_insert_emph(subject *subj, delimiter *opener, delimiter *closer); -static int parse_inline(subject *subj, cmark_node *parent, int options); +static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options); static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, cmark_reference_map *refmap); @@ -510,7 +501,25 @@ static cmark_node *handle_period(subject *subj, bool smart) { } } -static void process_emphasis(subject *subj, delimiter *stack_bottom) { +static cmark_syntax_extension *get_extension_for_special_char(cmark_parser *parser, unsigned char c) { + cmark_llist *tmp_ext; + + for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data; + cmark_llist *tmp_char; + for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { + unsigned char tmp_c = (unsigned char) (unsigned long) tmp_char->data; + + if (tmp_c == c) { + return ext; + } + } + } + + return NULL; +} + +static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *stack_bottom) { delimiter *closer = subj->last_delim; delimiter *opener; delimiter *old_closer; @@ -534,6 +543,7 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { // now move forward, looking for closers, and handling each while (closer != NULL) { + cmark_syntax_extension *extension = get_extension_for_special_char(parser, closer->delim_char); if (closer->can_close) { // Now look backwards for first matching opener: opener = closer->previous; @@ -541,7 +551,7 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { odd_match = false; while (opener != NULL && opener != stack_bottom && opener != openers_bottom[closer->length % 3][closer->delim_char]) { - if (opener->can_open && opener->delim_char == closer->delim_char) { + if (opener->can_open && opener->delim_char == closer->delim_char) { // interior closer of size 2 can't match opener of size 1 // or of size 1 can't match 2 odd_match = (closer->can_open || opener->can_close) && @@ -550,11 +560,17 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { opener_found = true; break; } - } + } opener = opener->previous; } old_closer = closer; - if (closer->delim_char == '*' || closer->delim_char == '_') { + + if (extension) { + if (opener_found) + closer = extension->insert_inline_from_delim(extension, parser, subj, opener, closer); + else + closer = closer->next; + } else if (closer->delim_char == '*' || closer->delim_char == '_') { if (opener_found) { closer = S_insert_emph(subj, opener, closer); } else { @@ -861,7 +877,7 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) { return i - offset; } // Return a link, an image, or a literal close bracket. -static cmark_node *handle_close_bracket(subject *subj) { +static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { bufsize_t initial_pos, after_link_text_pos; bufsize_t starturl, endurl, starttitle, endtitle, endall; bufsize_t n; @@ -985,7 +1001,7 @@ static cmark_node *handle_close_bracket(subject *subj) { // Free the bracket [: cmark_node_free(opener->inl_text); - process_emphasis(subj, opener->previous_delimiter); + process_emphasis(parser, subj, opener->previous_delimiter); pop_bracket(subj); // Now, if we have a link, we also want to deactivate earlier link @@ -1029,36 +1045,36 @@ static cmark_node *handle_newline(subject *subj) { } } -static bufsize_t subject_find_special_char(subject *subj, int options) { - // "\r\n\\`&_*[]pos + 1; while (n < subj->input.len) { @@ -1072,9 +1088,36 @@ static bufsize_t subject_find_special_char(subject *subj, int options) { return subj->input.len; } +void cmark_inlines_add_special_character(unsigned char c) { + SPECIAL_CHARS[c] = 1; +} + +void cmark_inlines_remove_special_character(unsigned char c) { + SPECIAL_CHARS[c] = 0; +} + +static cmark_node *try_extensions(cmark_parser *parser, + cmark_node *parent, + unsigned char c, + subject *subj) { + cmark_node *res = NULL; + cmark_llist *tmp; + + for (tmp = parser->inline_syntax_extensions; tmp; tmp = tmp->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; + + res = ext->match_inline(ext, parser, parent, c, subj); + + if (res) + break; + } + + return res; +} + // Parse an inline, advancing subject, and add it as a child of parent. // Return 0 if no inline can be parsed, 1 otherwise. -static int parse_inline(subject *subj, cmark_node *parent, int options) { +static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options) { cmark_node *new_inl = NULL; cmark_chunk contents; unsigned char c; @@ -1118,7 +1161,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { push_bracket(subj, false, new_inl); break; case ']': - new_inl = handle_close_bracket(subj); + new_inl = handle_close_bracket(parser, subj); break; case '!': advance(subj); @@ -1131,6 +1174,10 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { } break; default: + new_inl = try_extensions(parser, parent, c, subj); + if (new_inl != NULL) + break; + endpos = subject_find_special_char(subj, options); contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); subj->pos = endpos; @@ -1150,16 +1197,18 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { } // Parse inlines from parent's string_content, adding as children of parent. -extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, - cmark_reference_map *refmap, int options) { +extern void cmark_parse_inlines(cmark_parser *parser, + cmark_node *parent, + cmark_reference_map *refmap, + int options) { subject subj; - subject_from_buf(mem, &subj, &parent->content, refmap); + subject_from_buf(parser->mem, &subj, &parent->content, refmap); cmark_chunk_rtrim(&subj.input); - while (!is_eof(&subj) && parse_inline(&subj, parent, options)) + while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options)) ; - process_emphasis(&subj, NULL); + process_emphasis(parser, &subj, NULL); // free bracket and delim stack while (subj.last_delim) { remove_delimiter(&subj, subj.last_delim); @@ -1244,3 +1293,122 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_create(refmap, &lab, &url, &title); return subj.pos; } + +unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser) { + return peek_char(parser); +} + +unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, bufsize_t pos) { + return peek_at(parser, pos); +} + +int cmark_inline_parser_is_eof(cmark_inline_parser *parser) { + return is_eof(parser); +} + +static char * +my_strndup (const char *s, size_t n) +{ + char *result; + size_t len = strlen (s); + + if (n < len) + len = n; + + result = (char *) malloc (len + 1); + if (!result) + return 0; + + result[len] = '\0'; + return (char *) memcpy (result, s, len); +} + +char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred) { + unsigned char c; + bufsize_t startpos = parser->pos; + bufsize_t len = 0; + + while ((c = peek_char(parser)) && (*pred)(c)) { + advance(parser); + len++; + } + + return my_strndup((const char *) parser->input.data + startpos, len); +} + +void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser, + unsigned char c, + int can_open, + int can_close, + cmark_node *inl_text) { + push_delimiter(parser, c, can_open, can_close, inl_text); +} + +void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim) { + remove_delimiter(parser, delim); +} + +int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, + int max_delims, + unsigned char c, + int *left_flanking, + int *right_flanking, + int *punct_before, + int *punct_after) { + int numdelims = 0; + bufsize_t before_char_pos; + int32_t after_char = 0; + int32_t before_char = 0; + int len; + bool space_before, space_after; + + if (parser->pos == 0) { + before_char = 10; + } else { + before_char_pos = parser->pos - 1; + // walk back to the beginning of the UTF_8 sequence: + while (peek_at(parser, before_char_pos) >> 6 == 2 && before_char_pos > 0) { + before_char_pos -= 1; + } + len = cmark_utf8proc_iterate(parser->input.data + before_char_pos, + parser->pos - before_char_pos, &before_char); + if (len == -1) { + before_char = 10; + } + } + + while (peek_char(parser) == c && numdelims <= max_delims) { + numdelims++; + advance(parser); + } + + len = cmark_utf8proc_iterate(parser->input.data + parser->pos, + parser->input.len - parser->pos, &after_char); + if (len == -1) { + after_char = 10; + } + + *punct_before = cmark_utf8proc_is_punctuation(before_char); + *punct_after = cmark_utf8proc_is_punctuation(after_char); + space_before = cmark_utf8proc_is_space(before_char); + space_after = cmark_utf8proc_is_space(after_char); + + *left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && + !(*punct_after && !space_before && !*punct_before); + *right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) && + !(*punct_before && !space_after && !*punct_after); + + return numdelims; +} + +void cmark_inline_parser_advance_offset(cmark_inline_parser *parser) { + advance(parser); +} + +int cmark_inline_parser_get_offset(cmark_inline_parser *parser) { + return parser->pos; +} + +delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser) { + return parser->last_delim; +} diff --git a/src/inlines.h b/src/inlines.h index 52be76820..586b53fa7 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -8,12 +8,17 @@ extern "C" { cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); -void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, - cmark_reference_map *refmap, int options); +void cmark_parse_inlines(cmark_parser *parser, + cmark_node *parent, + cmark_reference_map *refmap, + int options); bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_map *refmap); +void cmark_inlines_add_special_character(unsigned char c); +void cmark_inlines_remove_special_character(unsigned char c); + #ifdef __cplusplus } #endif diff --git a/src/latex.c b/src/latex.c index 68961e11f..a8d485c63 100644 --- a/src/latex.c +++ b/src/latex.c @@ -346,6 +346,44 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_TABLE: + if (entering) { + int i, n_cols; + CR(); + LIT("\\begin{table}"); + CR(); + LIT("\\begin{tabular}{"); + + n_cols = node->as.table.n_columns; + for (i = 0; i < n_cols; i++) { + LIT("l"); + } + LIT("}"); + CR(); + } else { + LIT("\\end{tabular}"); + CR(); + LIT("\\end{table}"); + CR(); + } + break; + + case CMARK_NODE_TABLE_ROW: + if (!entering) { + CR(); + } + break; + + case CMARK_NODE_TABLE_CELL: + if (!entering) { + if (node->next) { + LIT(" & "); + } else { + LIT(" \\\\"); + } + } + break; + case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; @@ -440,6 +478,15 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_STRIKETHROUGH: + /* requires \usepackage{ulem} */ + if (entering) { + LIT("\\sout{"); + } else { + LIT("}"); + } + break; + default: assert(false); break; diff --git a/src/libcmark.pc.in b/src/libcmark.pc.in index 024ae4832..c3153ba2e 100644 --- a/src/libcmark.pc.in +++ b/src/libcmark.pc.in @@ -6,5 +6,5 @@ includedir=@CMAKE_INSTALL_PREFIX@/include Name: libcmark Description: CommonMark parsing, rendering, and manipulation Version: @PROJECT_VERSION@ -Libs: -L${libdir} -lcmark +Libs: -L${libdir} -lcmark -ldl Cflags: -I${includedir} diff --git a/src/linked_list.c b/src/linked_list.c new file mode 100644 index 000000000..7d6690dae --- /dev/null +++ b/src/linked_list.c @@ -0,0 +1,37 @@ +#include + +#include "cmark.h" + +cmark_llist *cmark_llist_append(cmark_llist *head, void *data) { + cmark_llist *tmp; + cmark_llist *new_node = (cmark_llist *) malloc(sizeof(cmark_llist)); + + new_node->data = data; + new_node->next = NULL; + + if (!head) + return new_node; + + for (tmp = head; tmp->next; tmp=tmp->next); + + tmp->next = new_node; + + return head; +} + +void cmark_llist_free_full(cmark_llist *head, cmark_free_func free_func) { + cmark_llist *tmp, *prev; + + for (tmp = head; tmp;) { + if (free_func) + free_func(tmp->data); + + prev = tmp; + tmp = tmp->next; + free(prev); + } +} + +void cmark_llist_free(cmark_llist *head) { + cmark_llist_free_full(head, NULL); +} diff --git a/src/main.c b/src/main.c index d1e263790..a1cca572f 100644 --- a/src/main.c +++ b/src/main.c @@ -6,6 +6,9 @@ #include "memory.h" #include "cmark.h" #include "node.h" +#include "cmark_extension_api.h" +#include "syntax_extension.h" +#include "registry.h" #if defined(_WIN32) && !defined(__CYGWIN__) #include @@ -32,6 +35,8 @@ void print_usage() { printf(" --nobreaks Render soft line breaks as spaces\n"); printf(" --safe Suppress raw HTML and dangerous URLs\n"); printf(" --smart Use smart punctuation\n"); + printf(" -e, --extension EXTENSION_NAME Specify an extension name to use\n"); + printf(" --list-extensions List available extensions and quit\n"); printf(" --help, -h Print usage information\n"); printf(" --version Print version\n"); } @@ -63,11 +68,26 @@ static bool print_document(cmark_node *document, writer_format writer, return false; } printf("%s", result); - mem->free(result); + cmark_node_mem(document)->free(result); return true; } +static void print_extensions(void) { + cmark_llist *syntax_extensions; + cmark_llist *tmp; + + printf ("Available extensions:\n"); + + syntax_extensions = cmark_list_syntax_extensions(); + for (tmp = syntax_extensions; tmp; tmp=tmp->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; + printf("%s\n", ext->name); + } + + cmark_llist_free(syntax_extensions); +} + int main(int argc, char *argv[]) { int i, numfps = 0; int *files; @@ -93,6 +113,9 @@ int main(int argc, char *argv[]) { printf("cmark %s", CMARK_VERSION_STRING); printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n"); goto success; + } else if (strcmp(argv[i], "--list-extensions") == 0) { + print_extensions(); + goto success; } else if (strcmp(argv[i], "--sourcepos") == 0) { options |= CMARK_OPT_SOURCEPOS; } else if (strcmp(argv[i], "--hardbreaks") == 0) { @@ -143,6 +166,9 @@ int main(int argc, char *argv[]) { fprintf(stderr, "No argument provided for %s\n", argv[i - 1]); goto failure; } + } else if ((strcmp(argv[i], "-e") == 0) || (strcmp(argv[i], "--extension") == 0)) { + i += 1; // Simpler to handle extensions in a second pass, as we can directly register + // them with the parser. } else if (*argv[i] == '-') { print_usage(); goto failure; @@ -157,6 +183,23 @@ int main(int argc, char *argv[]) { parser = cmark_parser_new_with_mem(options, cmark_get_arena_mem_allocator()); #endif + for (i = 1; i < argc; i++) { + if ((strcmp(argv[i], "-e") == 0) || (strcmp(argv[i], "--extension") == 0)) { + i += 1; + if (i < argc) { + cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(argv[i]); + if (!syntax_extension) { + fprintf(stderr, "Unknown extension %s\n", argv[i]); + goto failure; + } + cmark_parser_attach_syntax_extension(parser, syntax_extension); + } else { + fprintf(stderr, "No argument provided for %s\n", argv[i - 1]); + goto failure; + } + } + } + for (i = 0; i < numfps; i++) { FILE *fp = fopen(argv[files[i]], "rb"); if (fp == NULL) { @@ -189,6 +232,7 @@ int main(int argc, char *argv[]) { if (!print_document(document, writer, options, width)) goto failure; + success: res = 0; diff --git a/src/man.c b/src/man.c index f3980275d..205a07cb5 100644 --- a/src/man.c +++ b/src/man.c @@ -82,6 +82,19 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, switch (node->type) { case CMARK_NODE_DOCUMENT: + if (entering) { + /* Define a strikethrough macro */ + /* Commenting out because this makes tests fail + LIT(".de ST"); + CR(); + LIT(".nr ww \\w'\\\\$1'"); + CR(); + LIT("\\Z@\\v'-.25m'\\l'\\\\n[ww]u'@\\\\$1"); + CR(); + LIT(".."); + CR(); + */ + } break; case CMARK_NODE_BLOCK_QUOTE: @@ -173,6 +186,40 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_TABLE: + if (entering) { + int i, n_cols; + CR(); + LIT(".TS"); + CR(); + LIT("tab(@);"); + CR(); + + n_cols = node->as.table.n_columns; + + for (i = 0; i < n_cols; i++) { + LIT("c"); + } + + if (n_cols) { + LIT("."); + CR(); + } + } else { + LIT(".TE"); + CR(); + } + break; + case CMARK_NODE_TABLE_ROW: + if (!entering) { + CR(); + } + break; + case CMARK_NODE_TABLE_CELL: + if (!entering && node->next) { + LIT("@"); + } + break; case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; @@ -239,6 +286,16 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_STRIKETHROUGH: + if (entering) { + CR(); + LIT(".ST \""); + } else { + LIT("\""); + CR(); + } + break; + default: assert(false); break; diff --git a/src/node.c b/src/node.c index 208621f4c..2fd674f37 100644 --- a/src/node.c +++ b/src/node.c @@ -62,8 +62,21 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) { case CMARK_NODE_STRONG: case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: + case CMARK_NODE_STRIKETHROUGH: case CMARK_NODE_CUSTOM_INLINE: return S_is_inline(child); + case CMARK_NODE_TABLE: + return child->type == CMARK_NODE_TABLE_ROW; + case CMARK_NODE_TABLE_ROW: + return child->type == CMARK_NODE_TABLE_CELL; + case CMARK_NODE_TABLE_CELL: + return child->type == CMARK_NODE_TEXT || + child->type == CMARK_NODE_CODE || + child->type == CMARK_NODE_EMPH || + child->type == CMARK_NODE_STRONG || + child->type == CMARK_NODE_LINK || + child->type == CMARK_NODE_IMAGE || + child->type == CMARK_NODE_STRIKETHROUGH; default: break; @@ -102,35 +115,44 @@ cmark_node *cmark_node_new(cmark_node_type type) { return cmark_node_new_with_mem(type, &CMARK_DEFAULT_MEM_ALLOCATOR); } +static void free_node_as(cmark_node *node) { + switch (node->type) { + case CMARK_NODE_CODE_BLOCK: + cmark_chunk_free(NODE_MEM(node), &node->as.code.info); + cmark_chunk_free(NODE_MEM(node), &node->as.code.literal); + break; + case CMARK_NODE_TEXT: + case CMARK_NODE_HTML_INLINE: + case CMARK_NODE_CODE: + case CMARK_NODE_HTML_BLOCK: + cmark_chunk_free(NODE_MEM(node), &node->as.literal); + break; + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: + cmark_chunk_free(NODE_MEM(node), &node->as.link.url); + cmark_chunk_free(NODE_MEM(node), &node->as.link.title); + break; + case CMARK_NODE_CUSTOM_BLOCK: + case CMARK_NODE_CUSTOM_INLINE: + cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_enter); + cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_exit); + break; + default: + break; + } +} + // Free a cmark_node list and any children. static void S_free_nodes(cmark_node *e) { cmark_node *next; while (e != NULL) { cmark_strbuf_free(&e->content); - switch (e->type) { - case CMARK_NODE_CODE_BLOCK: - cmark_chunk_free(NODE_MEM(e), &e->as.code.info); - cmark_chunk_free(NODE_MEM(e), &e->as.code.literal); - break; - case CMARK_NODE_TEXT: - case CMARK_NODE_HTML_INLINE: - case CMARK_NODE_CODE: - case CMARK_NODE_HTML_BLOCK: - cmark_chunk_free(NODE_MEM(e), &e->as.literal); - break; - case CMARK_NODE_LINK: - case CMARK_NODE_IMAGE: - cmark_chunk_free(NODE_MEM(e), &e->as.link.url); - cmark_chunk_free(NODE_MEM(e), &e->as.link.title); - break; - case CMARK_NODE_CUSTOM_BLOCK: - case CMARK_NODE_CUSTOM_INLINE: - cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_enter); - cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_exit); - break; - default: - break; - } + + if (e->user_data && e->user_data_free_func) + e->user_data_free_func(e->user_data); + + free_node_as(e); + if (e->last_child) { // Splice children into list e->last_child->next = e->next; @@ -156,6 +178,29 @@ cmark_node_type cmark_node_get_type(cmark_node *node) { } } +int cmark_node_set_type(cmark_node * node, cmark_node_type type) { + cmark_node_type initial_type; + + if (type == node->type) + return 1; + + initial_type = node->type; + node->type = type; + + if (!S_can_contain(node->parent, node)) { + node->type = initial_type; + return 0; + } + + /* We rollback the type to free the union members appropriately */ + node->type = initial_type; + free_node_as(node); + + node->type = type; + + return 1; +} + const char *cmark_node_get_type_string(cmark_node *node) { if (node == NULL) { return "NONE"; @@ -178,6 +223,15 @@ const char *cmark_node_get_type_string(cmark_node *node) { return "html_block"; case CMARK_NODE_CUSTOM_BLOCK: return "custom_block"; + case CMARK_NODE_TABLE: + return "table"; + case CMARK_NODE_TABLE_ROW: + if (node->as.table_row.is_header) + return "table_header"; + else + return "table_row"; + case CMARK_NODE_TABLE_CELL: + return "table_cell"; case CMARK_NODE_PARAGRAPH: return "paragraph"; case CMARK_NODE_HEADING: @@ -204,6 +258,8 @@ const char *cmark_node_get_type_string(cmark_node *node) { return "link"; case CMARK_NODE_IMAGE: return "image"; + case CMARK_NODE_STRIKETHROUGH: + return "strikethrough"; } return ""; @@ -265,6 +321,15 @@ int cmark_node_set_user_data(cmark_node *node, void *user_data) { return 1; } +int cmark_node_set_user_data_free_func(cmark_node *node, + cmark_free_func free_func) { + if (node == NULL) { + return 0; + } + node->user_data_free_func = free_func; + return 1; +} + const char *cmark_node_get_literal(cmark_node *node) { if (node == NULL) { return NULL; @@ -311,6 +376,15 @@ int cmark_node_set_literal(cmark_node *node, const char *content) { return 0; } +const char *cmark_node_get_string_content(cmark_node *node) { + return (char *) node->content.ptr; +} + +int cmark_node_set_string_content(cmark_node *node, const char *content) { + cmark_strbuf_sets(&node->content, content); + return true; +} + int cmark_node_get_heading_level(cmark_node *node) { if (node == NULL) { return 0; @@ -477,6 +551,38 @@ int cmark_node_set_fence_info(cmark_node *node, const char *info) { } } +int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character) { + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_CODE_BLOCK) { + *length = node->as.code.fence_length; + *offset = node->as.code.fence_offset; + *character = node->as.code.fence_char; + return node->as.code.fenced; + } else { + return 0; + } +} + +int cmark_node_set_fenced(cmark_node * node, int fenced, + int length, int offset, char character) { + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_CODE_BLOCK) { + node->as.code.fenced = fenced; + node->as.code.fence_length = length; + node->as.code.fence_offset = offset; + node->as.code.fence_char = character; + return 1; + } else { + return 0; + } +} + const char *cmark_node_get_url(cmark_node *node) { if (node == NULL) { return NULL; @@ -609,6 +715,23 @@ int cmark_node_set_on_exit(cmark_node *node, const char *on_exit) { return 0; } +cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node) { + if (node == NULL) { + return NULL; + } + + return node->extension; +} + +int cmark_node_set_syntax_extension(cmark_node *node, cmark_syntax_extension *extension) { + if (node == NULL) { + return 0; + } + + node->extension = extension; + return 1; +} + int cmark_node_get_start_line(cmark_node *node) { if (node == NULL) { return 0; @@ -637,6 +760,68 @@ int cmark_node_get_end_column(cmark_node *node) { return node->end_column; } +int cmark_node_get_n_table_columns(cmark_node *node) { + if (node == NULL) { + return -1; + } + + switch (node->type) { + case CMARK_NODE_TABLE: + return node->as.table.n_columns; + default: + break; + } + + return -1; +} + +int cmark_node_set_n_table_columns(cmark_node *node, int n_columns) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_TABLE: + node->as.table.n_columns = n_columns; + return 1; + default: + break; + } + + return 0; +} + +int cmark_node_is_table_header(cmark_node *node) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_TABLE_ROW: + return node->as.table_row.is_header; + default: + break; + } + + return 1; +} + +int cmark_node_set_is_table_header(cmark_node *node, int is_table_header) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_TABLE_ROW: + node->as.table_row.is_header = is_table_header; + return 1; + default: + break; + } + + return 0; +} + // Unlink a node without adjusting its next, prev, and parent pointers. static void S_node_unlink(cmark_node *node) { if (node == NULL) { diff --git a/src/node.h b/src/node.h index 65d857f0b..cbb0e551b 100644 --- a/src/node.h +++ b/src/node.h @@ -9,6 +9,7 @@ extern "C" { #include #include "cmark.h" +#include "cmark_extension_api.h" #include "buffer.h" #include "chunk.h" @@ -46,6 +47,14 @@ typedef struct { cmark_chunk on_exit; } cmark_custom; +typedef struct { + int n_columns; +} cmark_table; + +typedef struct { + bool is_header; +} cmark_table_row; + enum cmark_node__internal_flags { CMARK_NODE__OPEN = (1 << 0), CMARK_NODE__LAST_LINE_BLANK = (1 << 1), @@ -61,6 +70,7 @@ struct cmark_node { struct cmark_node *last_child; void *user_data; + cmark_free_func user_data_free_func; int start_line; int start_column; @@ -69,6 +79,8 @@ struct cmark_node { uint16_t type; uint16_t flags; + cmark_syntax_extension *extension; + union { cmark_chunk literal; cmark_list list; @@ -76,6 +88,8 @@ struct cmark_node { cmark_heading heading; cmark_link link; cmark_custom custom; + cmark_table table; + cmark_table_row table_row; int html_block_type; } as; }; diff --git a/src/parser.h b/src/parser.h index 0c5033bd2..247423a76 100644 --- a/src/parser.h +++ b/src/parser.h @@ -14,22 +14,39 @@ extern "C" { struct cmark_parser { struct cmark_mem *mem; + /* A hashtable of urls in the current document for cross-references */ struct cmark_reference_map *refmap; + /* The root node of the parser, always a CMARK_NODE_DOCUMENT */ struct cmark_node *root; + /* The last open block after a line is fully processed */ struct cmark_node *current; + /* See the documentation for cmark_parser_get_line_number() in cmark.h */ int line_number; + /* See the documentation for cmark_parser_get_offset() in cmark.h */ bufsize_t offset; + /* See the documentation for cmark_parser_get_column() in cmark.h */ bufsize_t column; + /* See the documentation for cmark_parser_get_first_nonspace() in cmark.h */ bufsize_t first_nonspace; + /* See the documentation for cmark_parser_get_first_nonspace_column() in cmark.h */ bufsize_t first_nonspace_column; + /* See the documentation for cmark_parser_get_indent() in cmark.h */ int indent; + /* See the documentation for cmark_parser_is_blank() in cmark.h */ bool blank; + /* See the documentation for cmark_parser_has_partially_consumed_tab() in cmark.h */ bool partially_consumed_tab; + /* Contains the currently processed line */ cmark_strbuf curline; + /* See the documentation for cmark_parser_get_last_line_length() in cmark.h */ bufsize_t last_line_length; + /* FIXME: not sure about the difference with curline */ cmark_strbuf linebuf; + /* Options set by the user, see the Options section in cmark.h */ int options; bool last_buffer_ended_with_cr; + cmark_llist *syntax_extensions; + cmark_llist *inline_syntax_extensions; }; #ifdef __cplusplus diff --git a/src/plugin.c b/src/plugin.c new file mode 100644 index 000000000..39c361ac7 --- /dev/null +++ b/src/plugin.c @@ -0,0 +1,33 @@ +#include + +#include "plugin.h" + +int cmark_plugin_register_syntax_extension(cmark_plugin * plugin, + cmark_syntax_extension * extension) { + plugin->syntax_extensions = cmark_llist_append(plugin->syntax_extensions, extension); + return 1; +} + +cmark_plugin * +cmark_plugin_new(void) { + cmark_plugin *res = malloc(sizeof(cmark_plugin)); + + res->syntax_extensions = NULL; + + return res; +} + +void +cmark_plugin_free(cmark_plugin *plugin) { + cmark_llist_free_full(plugin->syntax_extensions, + (cmark_free_func) cmark_syntax_extension_free); + free(plugin); +} + +cmark_llist * +cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin) { + cmark_llist *res = plugin->syntax_extensions; + + plugin->syntax_extensions = NULL; + return res; +} diff --git a/src/plugin.h b/src/plugin.h new file mode 100644 index 000000000..b9e9d2994 --- /dev/null +++ b/src/plugin.h @@ -0,0 +1,34 @@ +#ifndef CMARK_PLUGIN_H +#define CMARK_PLUGIN_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cmark.h" +#include "cmark_extension_api.h" + +/** + * cmark_plugin: + * + * A plugin structure, which should be filled by plugin's + * init functions. + */ +struct cmark_plugin { + cmark_llist *syntax_extensions; +}; + +cmark_llist * +cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin); + +cmark_plugin * +cmark_plugin_new(void); + +void +cmark_plugin_free(cmark_plugin *plugin); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/registry.c b/src/registry.c new file mode 100644 index 000000000..8f7b9c4e1 --- /dev/null +++ b/src/registry.c @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "syntax_extension.h" +#include "registry.h" +#include "plugin.h" + + +static cmark_llist *syntax_extensions = NULL; +static cmark_llist *plugin_handles = NULL; + +static cmark_plugin *scan_file(char* filename) { + char* last_slash = strrchr(filename, '/'); + char* name_start = last_slash ? last_slash + 1 : filename; + char* last_dot = strrchr(filename, '.'); + cmark_plugin *plugin = NULL; + char *init_func_name = NULL; + int i; + void *libhandle; + char *libname = NULL; + + if (!last_dot || strcmp(last_dot, ".so")) + goto done; + + libname = malloc(sizeof(char) * (strlen(EXTENSION_DIR) + strlen(filename) + 2)); + snprintf(libname, strlen(EXTENSION_DIR) + strlen(filename) + 2, "%s/%s", + EXTENSION_DIR, filename); + libhandle = dlopen(libname, RTLD_NOW); + free(libname); + + if (!libhandle) { + printf("Error loading DSO: %s\n", dlerror()); + goto done; + } + + name_start[last_dot - name_start] = '\0'; + + for (i = 0; name_start[i]; i++) { + if (name_start[i] == '-') + name_start[i] = '_'; + } + + init_func_name = malloc(sizeof(char) * (strlen(name_start) + 6)); + + snprintf(init_func_name, strlen(name_start) + 6, "init_%s", name_start); + + cmark_plugin_init_func initfunc = (cmark_plugin_init_func) + (intptr_t) dlsym(libhandle, init_func_name); + free(init_func_name); + + plugin = cmark_plugin_new(); + + if (initfunc) { + if (initfunc(plugin)) { + plugin_handles = cmark_llist_append(plugin_handles, libhandle); + } else { + cmark_plugin_free(plugin); + printf("Error Initializing plugin %s\n", name_start); + plugin = NULL; + dlclose(libhandle); + } + } else { + printf("Error loading init function: %s\n", dlerror()); + dlclose(libhandle); + } + +done: + return plugin; +} + +static void scan_path(char *path) { + DIR *dir = opendir(path); + struct dirent* direntry; + + if (!dir) + return; + + while ((direntry = readdir(dir))) { + cmark_plugin *plugin = scan_file(direntry->d_name); + if (plugin) { + cmark_llist *syntax_extensions_list = cmark_plugin_steal_syntax_extensions(plugin); + cmark_llist *tmp; + + for (tmp = syntax_extensions_list; tmp; tmp=tmp->next) { + syntax_extensions = cmark_llist_append(syntax_extensions, tmp->data); + } + + cmark_llist_free(syntax_extensions_list); + cmark_plugin_free(plugin); + } + } + + closedir(dir); +} + +void cmark_discover_plugins(void) { + cmark_release_plugins(); + scan_path(EXTENSION_DIR); +} + +static void +release_plugin_handle(void *libhandle) { + dlclose(libhandle); +} + +void cmark_release_plugins(void) { + if (syntax_extensions) { + cmark_llist_free_full(syntax_extensions, + (cmark_free_func) cmark_syntax_extension_free); + syntax_extensions = NULL; + } + + cmark_llist_free_full(plugin_handles, release_plugin_handle); + plugin_handles = NULL; +} + +cmark_llist *cmark_list_syntax_extensions(void) { + cmark_llist *tmp; + cmark_llist *res = NULL; + + for (tmp = syntax_extensions; tmp; tmp = tmp->next) { + res = cmark_llist_append(res, tmp->data); + } + return res; +} + +cmark_syntax_extension *cmark_find_syntax_extension(const char *name) { + cmark_llist *tmp; + + for (tmp = syntax_extensions; tmp; tmp = tmp->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; + if (!strcmp(ext->name, name)) + return ext; + } + return NULL; +} diff --git a/src/registry.h b/src/registry.h new file mode 100644 index 000000000..bc566e010 --- /dev/null +++ b/src/registry.h @@ -0,0 +1,18 @@ +#ifndef CMARK_REGISTRY_H +#define CMARK_REGISTRY_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cmark.h" + +void cmark_discover_plugins(void); +void cmark_release_plugins(void); +cmark_llist *cmark_list_syntax_extensions(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/syntax_extension.c b/src/syntax_extension.c new file mode 100644 index 000000000..d8c4459d9 --- /dev/null +++ b/src/syntax_extension.c @@ -0,0 +1,54 @@ +#include + +#include "cmark.h" +#include "syntax_extension.h" +#include "buffer.h" + +void cmark_syntax_extension_free(cmark_syntax_extension *extension) { + if (extension->free_function && extension->priv) { + extension->free_function(extension->priv); + } + + cmark_llist_free(extension->special_inline_chars); + free(extension->name); + free(extension); +} + +cmark_syntax_extension *cmark_syntax_extension_new(const char *name) { + cmark_syntax_extension *res = (cmark_syntax_extension *) calloc(1, sizeof(cmark_syntax_extension)); + res->name = (char *) malloc(sizeof(char) * (strlen(name)) + 1); + strcpy(res->name, name); + return res; +} + +void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension, + cmark_open_block_func func) { + extension->try_opening_block = func; +} + +void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension, + cmark_match_block_func func) { + extension->last_block_matches = func; +} + +void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension, + cmark_match_inline_func func) { + extension->match_inline = func; +} + +void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension, + cmark_inline_from_delim_func func) { + extension->insert_inline_from_delim = func; +} + +void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension, + cmark_llist *special_chars) { + extension->special_inline_chars = special_chars; +} + +void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, + void *priv, + cmark_free_func free_func) { + extension->priv = priv; + extension->free_function = free_func; +} diff --git a/src/syntax_extension.h b/src/syntax_extension.h new file mode 100644 index 000000000..f46a7d2fe --- /dev/null +++ b/src/syntax_extension.h @@ -0,0 +1,18 @@ +#ifndef SYNTAX_EXTENSION_H +#define SYNTAX_EXTENSION_H + +#include "cmark.h" +#include "cmark_extension_api.h" + +struct cmark_syntax_extension { + cmark_match_block_func last_block_matches; + cmark_open_block_func try_opening_block; + cmark_match_inline_func match_inline; + cmark_inline_from_delim_func insert_inline_from_delim; + cmark_llist * special_inline_chars; + char * name; + void * priv; + cmark_free_func free_function; +}; + +#endif From 3e3761a26e68b2319738bbdd49a303a1d6098533 Mon Sep 17 00:00:00 2001 From: Yuki Izumi Date: Thu, 1 Dec 2016 14:16:12 +1100 Subject: [PATCH 004/218] Strip extensions API down and separate from core --- Makefile | 17 +- api_test/main.c | 53 ++-- extensions/CMakeLists.txt | 58 +++- extensions/core-extensions.c | 326 +------------------ extensions/core-extensions.h | 16 + extensions/ext_scanners.c | 585 ----------------------------------- extensions/ext_scanners.h | 20 -- extensions/ext_scanners.re | 65 ---- man/man3/cmark.3 | 48 ++- src/CMakeLists.txt | 8 +- src/blocks.c | 60 ++-- src/buffer.h | 36 +++ src/cmark.c | 15 +- src/cmark.h | 102 +++--- src/cmark_ctype.h | 7 + src/cmark_extension_api.h | 123 +++++++- src/commonmark.c | 40 +-- src/config.h.in | 2 - src/houdini.h | 18 +- src/houdini_html_e.c | 2 +- src/html.c | 230 +++++++------- src/html.h | 27 ++ src/inlines.c | 38 ++- src/inlines.h | 3 + src/iterator.c | 52 +++- src/latex.c | 54 +--- src/libcmark.pc.in | 2 +- src/linked_list.c | 14 +- src/main.c | 21 +- src/man.c | 51 +-- src/node.c | 190 ++++-------- src/node.h | 29 +- src/parser.h | 4 +- src/plugin.c | 11 +- src/registry.c | 116 ++----- src/registry.h | 10 +- src/render.h | 9 + src/scanners.h | 5 + src/syntax_extension.c | 74 ++++- src/syntax_extension.h | 13 +- test/entity_tests.py | 1 - toolchain-mingw32.cmake | 2 +- 42 files changed, 835 insertions(+), 1722 deletions(-) create mode 100644 extensions/core-extensions.h delete mode 100644 extensions/ext_scanners.c delete mode 100644 extensions/ext_scanners.h delete mode 100644 extensions/ext_scanners.re create mode 100644 src/html.h diff --git a/Makefile b/Makefile index 6484ee00f..e6a889560 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,4 @@ SRCDIR=src -EXTDIR=extensions DATADIR=data BUILDDIR?=build GENERATOR?=Unix Makefiles @@ -127,19 +126,6 @@ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re --encoding-policy substitute -o $@ $< $(CLANG_FORMAT) $@ -# We include scanners.c in the repository, so this shouldn't -# normally need to be generated. -$(EXTDIR)/ext_scanners.c: $(EXTDIR)/ext_scanners.re - @case "$$(re2c -v)" in \ - *\ 0.13.*|*\ 0.14|*\ 0.14.1) \ - echo "re2c >= 0.14.2 is required"; \ - false; \ - ;; \ - esac - re2c --case-insensitive -b -i --no-generation-date -8 \ - --encoding-policy substitute -o $@ $< - clang-format -style llvm -i $@ - # We include entities.inc in the repository, so normally this # doesn't need to be regenerated: $(SRCDIR)/entities.inc: tools/make_entities_inc.py @@ -203,6 +189,9 @@ newbench: format: $(CLANG_FORMAT) src/*.c src/*.h api_test/*.c api_test/*.h +format-extensions: + clang-format -style llvm -i extensions/*.c extensions/*.h + operf: $(CMARK) operf $< < $(BENCHFILE) > /dev/null diff --git a/api_test/main.c b/api_test/main.c index d7202343b..a95abc314 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -24,7 +24,7 @@ static void test_md_to_html(test_batch_runner *runner, const char *markdown, const char *expected_html, const char *msg); static void test_content(test_batch_runner *runner, cmark_node_type type, - int allowed_content); + unsigned int *allowed_content); static void test_char(test_batch_runner *runner, int valid, const char *utf8, const char *msg); @@ -177,7 +177,7 @@ static void accessors(test_batch_runner *runner) { OK(runner, cmark_node_set_literal(string, literal + sizeof("prefix")), "set_literal suffix"); - char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT); + char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL); static const char expected_html[] = "

Header

\n" "
    \n" @@ -299,7 +299,7 @@ static void iterator_delete(test_batch_runner *runner) { } } - char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT); + char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL); static const char expected[] = "

    a c

    \n" "

    a c

    \n"; STR_EQ(runner, html, expected, "iterate and delete nodes"); @@ -339,7 +339,7 @@ static void create_tree(test_batch_runner *runner) { OK(runner, cmark_node_append_child(emph, str2), "append3"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append3 consistent"); - html = cmark_render_html(doc, CMARK_OPT_DEFAULT); + html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL); STR_EQ(runner, html, "

    Hello, world!

    \n", "render_html"); free(html); @@ -375,7 +375,7 @@ static void create_tree(test_batch_runner *runner) { cmark_node_unlink(emph); - html = cmark_render_html(doc, CMARK_OPT_DEFAULT); + html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL); STR_EQ(runner, html, "

    brzz!

    \n", "render_html after shuffling"); free(html); @@ -407,7 +407,7 @@ static void custom_nodes(test_batch_runner *runner) { STR_EQ(runner, cmark_node_get_on_exit(cb), "", "get_on_exit (empty)"); cmark_node_append_child(doc, cb); - html = cmark_render_html(doc, CMARK_OPT_DEFAULT); + html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL); STR_EQ(runner, html, "

    \n CMARK_NODE_LAST_INLINE - ? CMARK_NODE_LAST_BLOCK - : CMARK_NODE_LAST_INLINE; - OK(runner, max_node_type < 32, "all node types < 32"); - - int list_item_flag = 1 << CMARK_NODE_ITEM; - int top_level_blocks = - (1 << CMARK_NODE_BLOCK_QUOTE) | (1 << CMARK_NODE_LIST) | - (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_HTML_BLOCK) | - (1 << CMARK_NODE_PARAGRAPH) | (1 << CMARK_NODE_HEADING) | - (1 << CMARK_NODE_THEMATIC_BREAK); - int all_inlines = (1 << CMARK_NODE_TEXT) | (1 << CMARK_NODE_SOFTBREAK) | - (1 << CMARK_NODE_LINEBREAK) | (1 << CMARK_NODE_CODE) | - (1 << CMARK_NODE_HTML_INLINE) | (1 << CMARK_NODE_EMPH) | - (1 << CMARK_NODE_STRONG) | (1 << CMARK_NODE_LINK) | - (1 << CMARK_NODE_IMAGE); + unsigned int list_item_flag[] = {CMARK_NODE_ITEM, 0}; + unsigned int top_level_blocks[] = { + CMARK_NODE_BLOCK_QUOTE, CMARK_NODE_LIST, + CMARK_NODE_CODE_BLOCK, CMARK_NODE_HTML_BLOCK, + CMARK_NODE_PARAGRAPH, CMARK_NODE_HEADING, + CMARK_NODE_THEMATIC_BREAK, 0}; + unsigned int all_inlines[] = { + CMARK_NODE_TEXT, CMARK_NODE_SOFTBREAK, + CMARK_NODE_LINEBREAK, CMARK_NODE_CODE, + CMARK_NODE_HTML_INLINE, CMARK_NODE_EMPH, + CMARK_NODE_STRONG, CMARK_NODE_LINK, + CMARK_NODE_IMAGE, 0}; test_content(runner, CMARK_NODE_DOCUMENT, top_level_blocks); test_content(runner, CMARK_NODE_BLOCK_QUOTE, top_level_blocks); @@ -472,7 +468,7 @@ void hierarchy(test_batch_runner *runner) { } static void test_content(test_batch_runner *runner, cmark_node_type type, - int allowed_content) { + unsigned int *allowed_content) { cmark_node *node = cmark_node_new(type); for (int i = 0; i < num_node_types; ++i) { @@ -480,7 +476,10 @@ static void test_content(test_batch_runner *runner, cmark_node_type type, cmark_node *child = cmark_node_new(child_type); int got = cmark_node_append_child(node, child); - int expected = (allowed_content >> child_type) & 1; + int expected = 0; + if (allowed_content) + for (unsigned int *p = allowed_content; *p; ++p) + expected |= *p == child_type; INT_EQ(runner, got, expected, "add %d as child of %d", child_type, type); @@ -505,17 +504,17 @@ static void render_html(test_batch_runner *runner) { cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); cmark_node *paragraph = cmark_node_first_child(doc); - html = cmark_render_html(paragraph, CMARK_OPT_DEFAULT); + html = cmark_render_html(paragraph, CMARK_OPT_DEFAULT, NULL); STR_EQ(runner, html, "

    foo bar

    \n", "render single paragraph"); free(html); cmark_node *string = cmark_node_first_child(paragraph); - html = cmark_render_html(string, CMARK_OPT_DEFAULT); + html = cmark_render_html(string, CMARK_OPT_DEFAULT, NULL); STR_EQ(runner, html, "foo ", "render single inline"); free(html); cmark_node *emph = cmark_node_next(string); - html = cmark_render_html(emph, CMARK_OPT_DEFAULT); + html = cmark_render_html(emph, CMARK_OPT_DEFAULT, NULL); STR_EQ(runner, html, "bar", "render inline with children"); free(html); diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 85d9e4450..f13818a53 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -1,11 +1,7 @@ cmake_minimum_required(VERSION 2.8) -set(LIBRARY "cmarkextensions") +set(STATICLIBRARY "libcmarkextensions_static") set(LIBRARY_SOURCES - ${PROJECT_SOURCE_DIR}/src/buffer.c - ${PROJECT_SOURCE_DIR}/src/cmark_ctype.c core-extensions.c - ext_scanners.c - ext_scanners.h ) include_directories( @@ -27,6 +23,54 @@ include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") -add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) +add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES}) -target_link_libraries(cmarkextensions libcmark) +set_target_properties(${STATICLIBRARY} PROPERTIES + COMPILE_FLAGS -DCMARK_STATIC_DEFINE + POSITION_INDEPENDENT_CODE ON) + +if (MSVC) + set_target_properties(${STATICLIBRARY} PROPERTIES + OUTPUT_NAME "cmarkextensions_static" + VERSION ${PROJECT_VERSION}) +else() + set_target_properties(${STATICLIBRARY} PROPERTIES + OUTPUT_NAME "cmarkextensions" + VERSION ${PROJECT_VERSION}) +endif(MSVC) + +# Feature tests +include(CheckIncludeFile) +include(CheckCSourceCompiles) +include(CheckCSourceRuns) +include(CheckSymbolExists) +CHECK_INCLUDE_FILE(stdbool.h HAVE_STDBOOL_H) +CHECK_C_SOURCE_COMPILES( + "int main() { __builtin_expect(0,0); return 0; }" + HAVE___BUILTIN_EXPECT) +CHECK_C_SOURCE_COMPILES(" + int f(void) __attribute__ (()); + int main() { return 0; } +" HAVE___ATTRIBUTE__) + +# Always compile with warnings +if(MSVC) + # Force to always compile with W4 + if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") + string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4") + endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4706 /D_CRT_SECURE_NO_WARNINGS") +elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-unused-parameter -std=c99 -pedantic") +endif() + +# Compile as C++ under MSVC older than 12.0 +if(MSVC AND MSVC_VERSION LESS 1800) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP") +endif() + +if(CMAKE_BUILD_TYPE STREQUAL "Ubsan") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined") +endif() diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index ad9715636..cf7f9f621 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -1,325 +1,3 @@ -#include -#include +#include "core-extensions.h" -#include -#include - -#include "parser.h" -#include "buffer.h" -#include "ext_scanners.h" - -typedef struct { - int n_columns; - cmark_llist *cells; -} table_row; - -static void free_table_cell(void *data) { - cmark_strbuf_free((cmark_strbuf *) data); - free(data); -} - -static void free_table_row(table_row *row) { - - if (!row) - return; - - cmark_llist_free_full(row->cells, (cmark_free_func) free_table_cell); - - free(row); -} - -static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsize_t len) -{ - cmark_strbuf *res = (cmark_strbuf *)malloc(sizeof(cmark_strbuf)); - bufsize_t r, w; - - cmark_strbuf_init(mem, res, len + 1); - cmark_strbuf_put(res, string, len); - cmark_strbuf_putc(res, '\0'); - - for (r = 0, w = 0; r < len; ++r) { - if (res->ptr[r] == '\\' && res->ptr[r + 1] == '|') - r++; - - res->ptr[w++] = res->ptr[r]; - } - - cmark_strbuf_truncate(res, w); - - return res; -} - -static table_row *row_from_string(cmark_mem *mem, unsigned char *string, int len) { - table_row *row = NULL; - bufsize_t cell_matched = 0; - bufsize_t cell_offset = 0; - - row = malloc(sizeof(table_row)); - row->n_columns = 0; - row->cells = NULL; - - do { - cell_matched = scan_table_cell(string, len, cell_offset); - if (cell_matched) { - cmark_strbuf *cell_buf = unescape_pipes(mem, string + cell_offset + 1, - cell_matched - 1); - row->n_columns += 1; - row->cells = cmark_llist_append(row->cells, cell_buf); - } - cell_offset += cell_matched; - } while (cell_matched); - - cell_matched = scan_table_row_end(string, len, cell_offset); - cell_offset += cell_matched; - - if (!cell_matched || cell_offset != len) { - free_table_row(row); - row = NULL; - } - - return row; -} - -static cmark_node *try_opening_table_header(cmark_syntax_extension *self, - cmark_parser * parser, - cmark_node * parent_container, - unsigned char * input, - int len) { - bufsize_t matched = scan_table_start(input, len, cmark_parser_get_first_nonspace(parser)); - cmark_node *table_header; - table_row *header_row = NULL; - table_row *marker_row = NULL; - const char *parent_string; - - if (!matched) - goto done; - - parent_string = cmark_node_get_string_content(parent_container); - - header_row = row_from_string(parser->mem, (unsigned char *) parent_string, strlen(parent_string)); - - if (!header_row) { - goto done; - } - - marker_row = row_from_string(parser->mem, input + cmark_parser_get_first_nonspace(parser), - len - cmark_parser_get_first_nonspace(parser)); - - assert(marker_row); - - if (header_row->n_columns != marker_row->n_columns) { - goto done; - } - - if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { - goto done; - } - - cmark_node_set_syntax_extension(parent_container, self); - cmark_node_set_n_table_columns(parent_container, header_row->n_columns); - - table_header = cmark_parser_add_child(parser, parent_container, - CMARK_NODE_TABLE_ROW, cmark_parser_get_offset(parser)); - cmark_node_set_syntax_extension(table_header, self); - cmark_node_set_is_table_header(table_header, true); - - { - cmark_llist *tmp; - - for (tmp = header_row->cells; tmp; tmp = tmp->next) { - cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; - cmark_node *header_cell = cmark_parser_add_child(parser, table_header, - CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); - cmark_node_set_string_content(header_cell, (char *) cell_buf->ptr); - cmark_node_set_syntax_extension(header_cell, self); - } - } - - cmark_parser_advance_offset(parser, input, - strlen(input) - 1 - cmark_parser_get_offset(parser), - false); -done: - free_table_row(header_row); - free_table_row(marker_row); - return parent_container; -} - -static cmark_node *try_opening_table_row(cmark_syntax_extension *self, - cmark_parser * parser, - cmark_node * parent_container, - unsigned char * input, - int len) { - cmark_node *table_row_block; - table_row *row; - - if (cmark_parser_is_blank(parser)) - return NULL; - - table_row_block = cmark_parser_add_child(parser, parent_container, - CMARK_NODE_TABLE_ROW, cmark_parser_get_offset(parser)); - - cmark_node_set_syntax_extension(table_row_block, self); - - /* We don't advance the offset here */ - - row = row_from_string(parser->mem, input + cmark_parser_get_first_nonspace(parser), - len - cmark_parser_get_first_nonspace(parser)); - - { - cmark_llist *tmp; - - for (tmp = row->cells; tmp; tmp = tmp->next) { - cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; - cmark_node *cell = cmark_parser_add_child(parser, table_row_block, - CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); - cmark_node_set_string_content(cell, (char *) cell_buf->ptr); - cmark_node_set_syntax_extension(cell, self); - } - } - - free_table_row(row); - - cmark_parser_advance_offset(parser, input, - len - 1 - cmark_parser_get_offset(parser), - false); - - return table_row_block; -} - -static cmark_node *try_opening_table_block(cmark_syntax_extension * syntax_extension, - int indented, - cmark_parser * parser, - cmark_node * parent_container, - unsigned char * input, - int len) { - cmark_node_type parent_type = cmark_node_get_type(parent_container); - - if (!indented && parent_type == CMARK_NODE_PARAGRAPH) { - return try_opening_table_header(syntax_extension, parser, parent_container, input, len); - } else if (!indented && parent_type == CMARK_NODE_TABLE) { - return try_opening_table_row(syntax_extension, parser, parent_container, input, len); - } - - return NULL; -} - -static int table_matches(cmark_syntax_extension *self, - cmark_parser * parser, - unsigned char * input, - int len, - cmark_node * parent_container) { - int res = 0; - - if (cmark_node_get_type(parent_container) == CMARK_NODE_TABLE) { - table_row *new_row = row_from_string(parser->mem, input + cmark_parser_get_first_nonspace(parser), - len - cmark_parser_get_first_nonspace(parser)); - if (new_row) { - if (new_row->n_columns == cmark_node_get_n_table_columns(parent_container)) - res = 1; - } - free_table_row(new_row); - } - - return res; -} - -static cmark_syntax_extension *register_table_syntax_extension(void) { - cmark_syntax_extension *ext = cmark_syntax_extension_new("piped-tables"); - - cmark_syntax_extension_set_match_block_func(ext, table_matches); - cmark_syntax_extension_set_open_block_func(ext, try_opening_table_block); - - return ext; -} - -static cmark_node *strikethrough_match(cmark_syntax_extension *self, - cmark_parser *parser, - cmark_node *parent, - unsigned char character, - cmark_inline_parser *inline_parser) -{ - cmark_node *res = NULL; - int left_flanking, right_flanking, punct_before, punct_after; - int num_delims; - - /* Exit early */ - if (character != '~') - return NULL; - - num_delims = cmark_inline_parser_scan_delimiters(inline_parser, 1, '~', - &left_flanking, &right_flanking, &punct_before, &punct_after); - - if (num_delims > 0) { /* Should not be needed */ - int can_open, can_close; - - res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); - cmark_node_set_literal(res, "~"); - - can_open = left_flanking; - can_close = right_flanking; - if (can_open || can_close) - cmark_inline_parser_push_delimiter(inline_parser, character, can_open, can_close, res); - } - - return res; -} - -static delimiter *strikethrough_insert(cmark_syntax_extension *self, - cmark_parser *parser, - cmark_inline_parser *inline_parser, - delimiter *opener, - delimiter *closer) -{ - cmark_node *strikethrough; - cmark_node *tmp, *next; - delimiter *delim, *tmp_delim; - delimiter *res = closer->next; - - strikethrough = opener->inl_text; - - if (!cmark_node_set_type(strikethrough, CMARK_NODE_STRIKETHROUGH)) - goto done; - - cmark_node_set_string_content(strikethrough, "~"); - tmp = cmark_node_next(opener->inl_text); - - while (tmp) { - if (tmp == closer->inl_text) - break; - next = cmark_node_next(tmp); - cmark_node_append_child(strikethrough, tmp); - tmp = next; - } - - cmark_node_free(closer->inl_text); - - delim = closer; - while (delim != NULL && delim != opener) { - tmp_delim = delim->previous; - cmark_inline_parser_remove_delimiter(inline_parser, delim); - delim = tmp_delim; - } - - cmark_inline_parser_remove_delimiter(inline_parser, opener); - -done: - return res; -} - -static cmark_syntax_extension *create_strikethrough_extension(void) { - cmark_syntax_extension *ext = cmark_syntax_extension_new("tilde_strikethrough"); - cmark_llist *special_chars = NULL; - - cmark_syntax_extension_set_match_inline_func(ext, strikethrough_match); - cmark_syntax_extension_set_inline_from_delim_func(ext, strikethrough_insert); - special_chars = cmark_llist_append(special_chars, (void *) '~'); - cmark_syntax_extension_set_special_inline_chars(ext, special_chars); - - return ext; -} - -int init_libcmarkextensions(cmark_plugin *plugin) { - cmark_plugin_register_syntax_extension(plugin, register_table_syntax_extension()); - cmark_plugin_register_syntax_extension(plugin, create_strikethrough_extension()); - return 1; -} +int core_extensions_registration(cmark_plugin *plugin) { return 1; } diff --git a/extensions/core-extensions.h b/extensions/core-extensions.h new file mode 100644 index 000000000..59d8056d4 --- /dev/null +++ b/extensions/core-extensions.h @@ -0,0 +1,16 @@ +#ifndef CORE_EXTENSIONS_H +#define CORE_EXTENSIONS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +int core_extensions_registration(cmark_plugin *plugin); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/extensions/ext_scanners.c b/extensions/ext_scanners.c deleted file mode 100644 index 78df8d242..000000000 --- a/extensions/ext_scanners.c +++ /dev/null @@ -1,585 +0,0 @@ -/* Generated by re2c 0.16 */ -#include -#include "ext_scanners.h" - -bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), - unsigned char *ptr, int len, bufsize_t offset) { - bufsize_t res; - - if (ptr == NULL || offset > len) { - return 0; - } else { - unsigned char lim = ptr[len]; - - ptr[len] = '\0'; - res = scanner(ptr + offset); - ptr[len] = lim; - } - - return res; -} - -bufsize_t _scan_table_cell(const unsigned char *p) { - const unsigned char *marker = NULL; - const unsigned char *start = p; - - { - unsigned char yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = { - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 0, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, - 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *(marker = p); - if (yych <= 0xDF) { - if (yych <= '{') { - if (yych != '\n') - goto yy3; - } else { - if (yych <= '|') - goto yy4; - if (yych <= 0x7F) - goto yy3; - if (yych >= 0xC2) - goto yy5; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy7; - if (yych == 0xED) - goto yy9; - goto yy8; - } else { - if (yych <= 0xF0) - goto yy10; - if (yych <= 0xF3) - goto yy11; - if (yych <= 0xF4) - goto yy12; - } - } - yy2 : { return 0; } - yy3: - yych = *++p; - goto yy2; - yy4: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= '{') { - if (yych <= '\n') { - if (yych <= '\t') - goto yy14; - goto yy2; - } else { - if (yych == '\r') - goto yy2; - goto yy14; - } - } else { - if (yych <= 0x7F) { - if (yych <= '|') - goto yy2; - goto yy14; - } else { - if (yych <= 0xC1) - goto yy2; - if (yych <= 0xF4) - goto yy14; - goto yy2; - } - } - yy5: - yych = *++p; - if (yych <= 0x7F) - goto yy6; - if (yych <= 0xBF) - goto yy3; - yy6: - p = marker; - if (yyaccept == 0) { - goto yy2; - } else { - goto yy15; - } - yy7: - yych = *++p; - if (yych <= 0x9F) - goto yy6; - if (yych <= 0xBF) - goto yy5; - goto yy6; - yy8: - yych = *++p; - if (yych <= 0x7F) - goto yy6; - if (yych <= 0xBF) - goto yy5; - goto yy6; - yy9: - yych = *++p; - if (yych <= 0x7F) - goto yy6; - if (yych <= 0x9F) - goto yy5; - goto yy6; - yy10: - yych = *++p; - if (yych <= 0x8F) - goto yy6; - if (yych <= 0xBF) - goto yy8; - goto yy6; - yy11: - yych = *++p; - if (yych <= 0x7F) - goto yy6; - if (yych <= 0xBF) - goto yy8; - goto yy6; - yy12: - yych = *++p; - if (yych <= 0x7F) - goto yy6; - if (yych <= 0x8F) - goto yy8; - goto yy6; - yy13: - yyaccept = 1; - marker = ++p; - yych = *p; - yy14: - if (yybm[0 + yych] & 64) { - goto yy13; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\r') - goto yy15; - if (yych <= '\\') - goto yy16; - } else { - if (yych <= 0xDF) - goto yy18; - if (yych <= 0xE0) - goto yy19; - goto yy20; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy21; - if (yych <= 0xEF) - goto yy20; - goto yy22; - } else { - if (yych <= 0xF3) - goto yy23; - if (yych <= 0xF4) - goto yy24; - } - } - yy15 : { return (bufsize_t)(p - start); } - yy16: - yyaccept = 1; - marker = ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy16; - } - if (yych <= 0xDF) { - if (yych <= '\f') { - if (yych == '\n') - goto yy15; - goto yy13; - } else { - if (yych <= '\r') - goto yy15; - if (yych <= 0x7F) - goto yy13; - if (yych <= 0xC1) - goto yy15; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy19; - if (yych == 0xED) - goto yy21; - goto yy20; - } else { - if (yych <= 0xF0) - goto yy22; - if (yych <= 0xF3) - goto yy23; - if (yych <= 0xF4) - goto yy24; - goto yy15; - } - } - yy18: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy6; - if (yych <= 0xBF) - goto yy13; - goto yy6; - yy19: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy6; - if (yych <= 0xBF) - goto yy18; - goto yy6; - yy20: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy6; - if (yych <= 0xBF) - goto yy18; - goto yy6; - yy21: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy6; - if (yych <= 0x9F) - goto yy18; - goto yy6; - yy22: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy6; - if (yych <= 0xBF) - goto yy20; - goto yy6; - yy23: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy6; - if (yych <= 0xBF) - goto yy20; - goto yy6; - yy24: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy6; - if (yych <= 0x8F) - goto yy20; - goto yy6; - } -} - -bufsize_t _scan_table_row_end(const unsigned char *p) { - const unsigned char *marker = NULL; - const unsigned char *start = p; - - { - unsigned char yych; - yych = *(marker = p); - if (yych <= 0xDF) { - if (yych <= '{') { - if (yych != '\n') - goto yy28; - } else { - if (yych <= '|') - goto yy29; - if (yych <= 0x7F) - goto yy28; - if (yych >= 0xC2) - goto yy30; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy32; - if (yych == 0xED) - goto yy34; - goto yy33; - } else { - if (yych <= 0xF0) - goto yy35; - if (yych <= 0xF3) - goto yy36; - if (yych <= 0xF4) - goto yy37; - } - } - yy27 : { return 0; } - yy28: - yych = *++p; - goto yy27; - yy29: - yych = *(marker = ++p); - if (yych == '\n') - goto yy38; - if (yych == '\r') - goto yy40; - goto yy27; - yy30: - yych = *++p; - if (yych <= 0x7F) - goto yy31; - if (yych <= 0xBF) - goto yy28; - yy31: - p = marker; - goto yy27; - yy32: - yych = *++p; - if (yych <= 0x9F) - goto yy31; - if (yych <= 0xBF) - goto yy30; - goto yy31; - yy33: - yych = *++p; - if (yych <= 0x7F) - goto yy31; - if (yych <= 0xBF) - goto yy30; - goto yy31; - yy34: - yych = *++p; - if (yych <= 0x7F) - goto yy31; - if (yych <= 0x9F) - goto yy30; - goto yy31; - yy35: - yych = *++p; - if (yych <= 0x8F) - goto yy31; - if (yych <= 0xBF) - goto yy33; - goto yy31; - yy36: - yych = *++p; - if (yych <= 0x7F) - goto yy31; - if (yych <= 0xBF) - goto yy33; - goto yy31; - yy37: - yych = *++p; - if (yych <= 0x7F) - goto yy31; - if (yych <= 0x8F) - goto yy33; - goto yy31; - yy38: - ++p; - { return (bufsize_t)(p - start); } - yy40: - ++p; - if ((yych = *p) == '\n') - goto yy38; - goto yy31; - } -} - -bufsize_t _scan_table_start(const unsigned char *p) { - const unsigned char *marker = NULL; - const unsigned char *start = p; - - { - unsigned char yych; - static const unsigned char yybm[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - yych = *(marker = p); - if (yych <= 0xDF) { - if (yych <= '{') { - if (yych != '\n') - goto yy44; - } else { - if (yych <= '|') - goto yy45; - if (yych <= 0x7F) - goto yy44; - if (yych >= 0xC2) - goto yy46; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy48; - if (yych == 0xED) - goto yy50; - goto yy49; - } else { - if (yych <= 0xF0) - goto yy51; - if (yych <= 0xF3) - goto yy52; - if (yych <= 0xF4) - goto yy53; - } - } - yy43 : { return 0; } - yy44: - yych = *++p; - goto yy43; - yy45: - yych = *(marker = ++p); - if (yybm[0 + yych] & 64) { - goto yy54; - } - if (yych == '-') - goto yy56; - goto yy43; - yy46: - yych = *++p; - if (yych <= 0x7F) - goto yy47; - if (yych <= 0xBF) - goto yy44; - yy47: - p = marker; - goto yy43; - yy48: - yych = *++p; - if (yych <= 0x9F) - goto yy47; - if (yych <= 0xBF) - goto yy46; - goto yy47; - yy49: - yych = *++p; - if (yych <= 0x7F) - goto yy47; - if (yych <= 0xBF) - goto yy46; - goto yy47; - yy50: - yych = *++p; - if (yych <= 0x7F) - goto yy47; - if (yych <= 0x9F) - goto yy46; - goto yy47; - yy51: - yych = *++p; - if (yych <= 0x8F) - goto yy47; - if (yych <= 0xBF) - goto yy49; - goto yy47; - yy52: - yych = *++p; - if (yych <= 0x7F) - goto yy47; - if (yych <= 0xBF) - goto yy49; - goto yy47; - yy53: - yych = *++p; - if (yych <= 0x7F) - goto yy47; - if (yych <= 0x8F) - goto yy49; - goto yy47; - yy54: - ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy54; - } - if (yych != '-') - goto yy47; - yy56: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy56; - } - if (yych <= '\f') { - if (yych == '\t') - goto yy58; - if (yych <= '\n') - goto yy47; - } else { - if (yych <= ' ') { - if (yych <= 0x1F) - goto yy47; - } else { - if (yych == '|') - goto yy60; - goto yy47; - } - } - yy58: - ++p; - yych = *p; - if (yych <= '\f') { - if (yych == '\t') - goto yy58; - if (yych <= '\n') - goto yy47; - goto yy58; - } else { - if (yych <= ' ') { - if (yych <= 0x1F) - goto yy47; - goto yy58; - } else { - if (yych != '|') - goto yy47; - } - } - yy60: - ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy54; - } - if (yych <= '\r') { - if (yych <= 0x08) - goto yy47; - if (yych >= '\v') - goto yy63; - } else { - if (yych == '-') - goto yy56; - goto yy47; - } - yy61: - ++p; - { return (bufsize_t)(p - start); } - yy63: - ++p; - if ((yych = *p) == '\n') - goto yy61; - goto yy47; - } -} diff --git a/extensions/ext_scanners.h b/extensions/ext_scanners.h deleted file mode 100644 index c96b18490..000000000 --- a/extensions/ext_scanners.h +++ /dev/null @@ -1,20 +0,0 @@ -#include "cmark.h" -#include "chunk.h" - -#ifdef __cplusplus -extern "C" { -#endif - -bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, - int len, bufsize_t offset); -bufsize_t _scan_table_start(const unsigned char *p); -bufsize_t _scan_table_cell(const unsigned char *p); -bufsize_t _scan_table_row_end(const unsigned char *p); - -#define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n) -#define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n) -#define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n) - -#ifdef __cplusplus -} -#endif diff --git a/extensions/ext_scanners.re b/extensions/ext_scanners.re deleted file mode 100644 index 7ad561f51..000000000 --- a/extensions/ext_scanners.re +++ /dev/null @@ -1,65 +0,0 @@ -#include -#include "ext_scanners.h" - -bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset) -{ - bufsize_t res; - - if (ptr == NULL || offset > len) { - return 0; - } else { - unsigned char lim = ptr[len]; - - ptr[len] = '\0'; - res = scanner(ptr + offset); - ptr[len] = lim; - } - - return res; -} - -/*!re2c - re2c:define:YYCTYPE = "unsigned char"; - re2c:define:YYCURSOR = p; - re2c:define:YYMARKER = marker; - re2c:define:YYCTXMARKER = marker; - re2c:yyfill:enable = 0; - - spacechar = [ \t\v\f]; - newline = [\r]?[\n]; - - escaped_char = [\\][|!"#$%&'()*+,./:;<=>?@[\\\]^_`{}~-]; - - table_marker = [|](spacechar*[-]+spacechar*); - table_cell = [|](escaped_char|[^|\r\n])+; -*/ - -bufsize_t _scan_table_cell(const unsigned char *p) -{ - const unsigned char *marker = NULL; - const unsigned char *start = p; -/*!re2c - table_cell { return (bufsize_t)(p - start); } - .? { return 0; } -*/ -} - -bufsize_t _scan_table_row_end(const unsigned char *p) -{ - const unsigned char *marker = NULL; - const unsigned char *start = p; -/*!re2c - [|]newline { return (bufsize_t)(p - start); } - .? { return 0; } -*/ -} - -bufsize_t _scan_table_start(const unsigned char *p) -{ - const unsigned char *marker = NULL; - const unsigned char *start = p; -/*!re2c - (table_marker)+ [|]newline { return (bufsize_t)(p - start); } - .? { return 0; } -*/ -} diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index f32644a06..692588d71 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -27,37 +27,31 @@ Node Structure .RS 0n typedef enum { /* Error status */ - CMARK_NODE_NONE, + CMARK_NODE_NONE = 0x0000, /* Block */ - CMARK_NODE_DOCUMENT, - CMARK_NODE_BLOCK_QUOTE, - CMARK_NODE_LIST, - CMARK_NODE_ITEM, - CMARK_NODE_CODE_BLOCK, - CMARK_NODE_HTML_BLOCK, - CMARK_NODE_CUSTOM_BLOCK, - CMARK_NODE_PARAGRAPH, - CMARK_NODE_HEADING, - CMARK_NODE_THEMATIC_BREAK, - - CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT, - CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK, + CMARK_NODE_DOCUMENT = CMARK_NODE_TYPE_BLOCK | 0x0001, + CMARK_NODE_BLOCK_QUOTE = CMARK_NODE_TYPE_BLOCK | 0x0002, + CMARK_NODE_LIST = CMARK_NODE_TYPE_BLOCK | 0x0003, + CMARK_NODE_ITEM = CMARK_NODE_TYPE_BLOCK | 0x0004, + CMARK_NODE_CODE_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0005, + CMARK_NODE_HTML_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0006, + CMARK_NODE_CUSTOM_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0007, + CMARK_NODE_PARAGRAPH = CMARK_NODE_TYPE_BLOCK | 0x0008, + CMARK_NODE_HEADING = CMARK_NODE_TYPE_BLOCK | 0x0009, + CMARK_NODE_THEMATIC_BREAK = CMARK_NODE_TYPE_BLOCK | 0x000a, /* Inline */ - CMARK_NODE_TEXT, - CMARK_NODE_SOFTBREAK, - CMARK_NODE_LINEBREAK, - CMARK_NODE_CODE, - CMARK_NODE_HTML_INLINE, - CMARK_NODE_CUSTOM_INLINE, - CMARK_NODE_EMPH, - CMARK_NODE_STRONG, - CMARK_NODE_LINK, - CMARK_NODE_IMAGE, - - CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT, - CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE, + CMARK_NODE_TEXT = CMARK_NODE_TYPE_INLINE | 0x0001, + CMARK_NODE_SOFTBREAK = CMARK_NODE_TYPE_INLINE | 0x0002, + CMARK_NODE_LINEBREAK = CMARK_NODE_TYPE_INLINE | 0x0003, + CMARK_NODE_CODE = CMARK_NODE_TYPE_INLINE | 0x0004, + CMARK_NODE_HTML_INLINE = CMARK_NODE_TYPE_INLINE | 0x0005, + CMARK_NODE_CUSTOM_INLINE = CMARK_NODE_TYPE_INLINE | 0x0006, + CMARK_NODE_EMPH = CMARK_NODE_TYPE_INLINE | 0x0007, + CMARK_NODE_STRONG = CMARK_NODE_TYPE_INLINE | 0x0008, + CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009, + CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a, } cmark_node_type; .RE \f[] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c5d1c0efb..6f7539553 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -54,7 +54,6 @@ set(LIBRARY_SOURCES set(PROGRAM "cmark") set(PROGRAM_SOURCES - ${LIBRARY_SOURCES} main.c ) @@ -68,6 +67,11 @@ include (GenerateExportHeader) add_executable(${PROGRAM} ${PROGRAM_SOURCES}) add_compiler_export_flags() +target_link_libraries(${PROGRAM} libcmark) + +add_dependencies(${PROGRAM} libcmarkextensions_static) +target_link_libraries(${PROGRAM} libcmarkextensions_static) + # Disable the PUBLIC declarations when compiling the executable: set_target_properties(${PROGRAM} PROPERTIES COMPILE_FLAGS -DCMARK_STATIC_DEFINE) @@ -197,7 +201,7 @@ if(MSVC) endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4706 /D_CRT_SECURE_NO_WARNINGS") elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -std=c99 -pedantic") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-unused-parameter -std=c99 -pedantic") endif() # Compile as C++ under MSVC older than 12.0 diff --git a/src/blocks.c b/src/blocks.c index 9f93c06fa..1efda3d71 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -82,11 +82,11 @@ static cmark_node *make_document(cmark_mem *mem) { } int cmark_parser_attach_syntax_extension(cmark_parser *parser, - cmark_syntax_extension *extension) { - parser->syntax_extensions = cmark_llist_append(parser->syntax_extensions, extension); - if (extension->match_inline && extension->insert_inline_from_delim) { + cmark_syntax_extension *extension) { + parser->syntax_extensions = cmark_llist_append(parser->mem, parser->syntax_extensions, extension); + if (extension->match_inline || extension->insert_inline_from_delim) { parser->inline_syntax_extensions = cmark_llist_append( - parser->inline_syntax_extensions, extension); + parser->mem, parser->inline_syntax_extensions, extension); } return 1; @@ -145,8 +145,8 @@ void cmark_parser_free(cmark_parser *parser) { cmark_parser_dispose(parser); cmark_strbuf_free(&parser->curline); cmark_strbuf_free(&parser->linebuf); - cmark_llist_free(parser->syntax_extensions); - cmark_llist_free(parser->inline_syntax_extensions); + cmark_llist_free(parser->mem, parser->syntax_extensions); + cmark_llist_free(parser->mem, parser->inline_syntax_extensions); mem->free(parser); } @@ -173,31 +173,19 @@ static bool is_blank(cmark_strbuf *s, bufsize_t offset) { return true; } -static CMARK_INLINE bool can_contain(cmark_node_type parent_type, - cmark_node_type child_type) { - if (parent_type == CMARK_NODE_TABLE) { - return child_type == CMARK_NODE_TABLE_ROW; - } - - if (parent_type == CMARK_NODE_TABLE_ROW) - return child_type == CMARK_NODE_TABLE_CELL; - - return (parent_type == CMARK_NODE_DOCUMENT || - parent_type == CMARK_NODE_BLOCK_QUOTE || - parent_type == CMARK_NODE_ITEM || - (parent_type == CMARK_NODE_LIST && child_type == CMARK_NODE_ITEM)); -} - static CMARK_INLINE bool accepts_lines(cmark_node_type block_type) { return (block_type == CMARK_NODE_PARAGRAPH || block_type == CMARK_NODE_HEADING || block_type == CMARK_NODE_CODE_BLOCK); } -static CMARK_INLINE bool contains_inlines(cmark_node_type block_type) { - return (block_type == CMARK_NODE_PARAGRAPH || - block_type == CMARK_NODE_HEADING || - block_type == CMARK_NODE_TABLE_CELL); +static CMARK_INLINE bool contains_inlines(cmark_node *node) { + if (node->extension && node->extension->contains_inlines_func) { + return node->extension->contains_inlines_func(node->extension, node); + } + + return (node->type == CMARK_NODE_PARAGRAPH || + node->type == CMARK_NODE_HEADING); } static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) { @@ -378,7 +366,7 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent, // if 'parent' isn't the kind of node that can accept this child, // then back up til we hit a node that can. - while (!can_contain(S_type(parent), block_type)) { + while (!cmark_node_can_contain_type(parent, block_type)) { parent = finalize(parser, parent); } @@ -397,7 +385,7 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent, return child; } -static void manage_extensions_special_characters(cmark_parser *parser, bool add) { +void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add) { cmark_llist *tmp_ext; for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) { @@ -421,18 +409,18 @@ static void process_inlines(cmark_parser *parser, cmark_node *cur; cmark_event_type ev_type; - manage_extensions_special_characters(parser, true); + cmark_manage_extensions_special_characters(parser, true); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER) { - if (contains_inlines(cur->type)) { + if (contains_inlines(cur)) { cmark_parse_inlines(parser, cur, refmap, options); } } } - manage_extensions_special_characters(parser, false); + cmark_manage_extensions_special_characters(parser, false); cmark_iter_free(iter); } @@ -1289,7 +1277,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, /* parser->current might have changed if feed_reentrant was called */ if (current == parser->current) - add_text_to_container(parser, container, last_matched_container, &input); + add_text_to_container(parser, container, last_matched_container, &input); finished: parser->last_line_length = input.len; @@ -1305,6 +1293,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, cmark_node *cmark_parser_finish(cmark_parser *parser) { cmark_node *res; + cmark_llist *extensions; /* Parser was already finished once */ if (parser->root == NULL) @@ -1333,6 +1322,15 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { cmark_parser_reset(parser); + for (extensions = parser->syntax_extensions; extensions; extensions = extensions->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) extensions->data; + if (ext->postprocess_func) { + cmark_node *processed = ext->postprocess_func(ext, res); + if (processed) + res = processed; + } + } + return res; } diff --git a/src/buffer.h b/src/buffer.h index e8780753f..90fa7df71 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -32,22 +32,32 @@ extern unsigned char cmark_strbuf__initbuf[]; * For the cases where CMARK_BUF_INIT cannot be used to do static * initialization. */ +CMARK_EXPORT void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, bufsize_t initial_size); /** * Grow the buffer to hold at least `target_size` bytes. */ +CMARK_EXPORT void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); +CMARK_EXPORT void cmark_strbuf_free(cmark_strbuf *buf); + +CMARK_EXPORT void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); +CMARK_EXPORT bufsize_t cmark_strbuf_len(const cmark_strbuf *buf); +CMARK_EXPORT int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); +CMARK_EXPORT unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); + +CMARK_EXPORT void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf); @@ -57,22 +67,48 @@ static CMARK_INLINE const char *cmark_strbuf_cstr(const cmark_strbuf *buf) { #define cmark_strbuf_at(buf, n) ((buf)->ptr[n]) +CMARK_EXPORT void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); + +CMARK_EXPORT void cmark_strbuf_sets(cmark_strbuf *buf, const char *string); + +CMARK_EXPORT void cmark_strbuf_putc(cmark_strbuf *buf, int c); + +CMARK_EXPORT void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); + +CMARK_EXPORT void cmark_strbuf_puts(cmark_strbuf *buf, const char *string); + +CMARK_EXPORT void cmark_strbuf_clear(cmark_strbuf *buf); +CMARK_EXPORT bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos); + +CMARK_EXPORT bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos); + +CMARK_EXPORT void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); + +CMARK_EXPORT void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); + +CMARK_EXPORT void cmark_strbuf_rtrim(cmark_strbuf *buf); + +CMARK_EXPORT void cmark_strbuf_trim(cmark_strbuf *buf); + +CMARK_EXPORT void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); + +CMARK_EXPORT void cmark_strbuf_unescape(cmark_strbuf *s); #ifdef __cplusplus diff --git a/src/cmark.c b/src/cmark.c index c9d450fdb..5758da90c 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -7,6 +7,9 @@ #include "cmark.h" #include "buffer.h" +cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK; +cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE; + int cmark_version() { return CMARK_VERSION; } const char *cmark_version_string() { return CMARK_VERSION_STRING; } @@ -41,18 +44,8 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options) { doc = cmark_parse_document(text, len, options); - result = cmark_render_html(doc, options); + result = cmark_render_html(doc, options, NULL); cmark_node_free(doc); return result; } - -int cmark_init(void) { - cmark_discover_plugins(); - return 1; -} - -int cmark_deinit(void) { - cmark_release_plugins(); - return 1; -} diff --git a/src/cmark.h b/src/cmark.h index fbede2731..cc1b089c8 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -1,9 +1,9 @@ -#ifndef CMARK_H -#define CMARK_H +#ifndef CMARK_CMARK_H +#define CMARK_CMARK_H #include -#include -#include +#include "cmark_export.h" +#include "cmark_version.h" #ifdef __cplusplus extern "C" { @@ -30,49 +30,44 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options); /** ## Node Structure */ +#define CMARK_NODE_TYPE_PRESENT (0x8000) +#define CMARK_NODE_TYPE_BLOCK (CMARK_NODE_TYPE_PRESENT | 0x0000) +#define CMARK_NODE_TYPE_INLINE (CMARK_NODE_TYPE_PRESENT | 0x4000) +#define CMARK_NODE_TYPE_MASK (0xc000) +#define CMARK_NODE_VALUE_MASK (0x3fff) + typedef enum { /* Error status */ - CMARK_NODE_NONE, + CMARK_NODE_NONE = 0x0000, /* Block */ - CMARK_NODE_DOCUMENT, - CMARK_NODE_BLOCK_QUOTE, - CMARK_NODE_LIST, - CMARK_NODE_ITEM, - CMARK_NODE_CODE_BLOCK, - CMARK_NODE_HTML_BLOCK, - CMARK_NODE_CUSTOM_BLOCK, - CMARK_NODE_PARAGRAPH, - CMARK_NODE_HEADING, - CMARK_NODE_THEMATIC_BREAK, - - /* blocks with no syntax rules in the current specification */ - CMARK_NODE_TABLE, - CMARK_NODE_TABLE_ROW, - CMARK_NODE_TABLE_CELL, - - CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT, - CMARK_NODE_LAST_BLOCK = CMARK_NODE_TABLE_CELL, + CMARK_NODE_DOCUMENT = CMARK_NODE_TYPE_BLOCK | 0x0001, + CMARK_NODE_BLOCK_QUOTE = CMARK_NODE_TYPE_BLOCK | 0x0002, + CMARK_NODE_LIST = CMARK_NODE_TYPE_BLOCK | 0x0003, + CMARK_NODE_ITEM = CMARK_NODE_TYPE_BLOCK | 0x0004, + CMARK_NODE_CODE_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0005, + CMARK_NODE_HTML_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0006, + CMARK_NODE_CUSTOM_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0007, + CMARK_NODE_PARAGRAPH = CMARK_NODE_TYPE_BLOCK | 0x0008, + CMARK_NODE_HEADING = CMARK_NODE_TYPE_BLOCK | 0x0009, + CMARK_NODE_THEMATIC_BREAK = CMARK_NODE_TYPE_BLOCK | 0x000a, /* Inline */ - CMARK_NODE_TEXT, - CMARK_NODE_SOFTBREAK, - CMARK_NODE_LINEBREAK, - CMARK_NODE_CODE, - CMARK_NODE_HTML_INLINE, - CMARK_NODE_CUSTOM_INLINE, - CMARK_NODE_EMPH, - CMARK_NODE_STRONG, - CMARK_NODE_LINK, - CMARK_NODE_IMAGE, - - /* inlines with no syntax rules in the current specification */ - CMARK_NODE_STRIKETHROUGH, - - CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT, - CMARK_NODE_LAST_INLINE = CMARK_NODE_STRIKETHROUGH, + CMARK_NODE_TEXT = CMARK_NODE_TYPE_INLINE | 0x0001, + CMARK_NODE_SOFTBREAK = CMARK_NODE_TYPE_INLINE | 0x0002, + CMARK_NODE_LINEBREAK = CMARK_NODE_TYPE_INLINE | 0x0003, + CMARK_NODE_CODE = CMARK_NODE_TYPE_INLINE | 0x0004, + CMARK_NODE_HTML_INLINE = CMARK_NODE_TYPE_INLINE | 0x0005, + CMARK_NODE_CUSTOM_INLINE = CMARK_NODE_TYPE_INLINE | 0x0006, + CMARK_NODE_EMPH = CMARK_NODE_TYPE_INLINE | 0x0007, + CMARK_NODE_STRONG = CMARK_NODE_TYPE_INLINE | 0x0008, + CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009, + CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a, } cmark_node_type; +extern cmark_node_type CMARK_NODE_LAST_BLOCK; +extern cmark_node_type CMARK_NODE_LAST_INLINE; + /* For backwards compatibility: */ #define CMARK_NODE_HEADER CMARK_NODE_HEADING #define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK @@ -95,8 +90,6 @@ typedef struct cmark_node cmark_node; typedef struct cmark_parser cmark_parser; typedef struct cmark_iter cmark_iter; -typedef void (*cmark_free_func) (void *user_data); - /** * ## Custom memory allocator support */ @@ -128,6 +121,10 @@ cmark_mem *cmark_get_arena_mem_allocator(); CMARK_EXPORT void cmark_arena_reset(void); +/** Callback for freeing user data with a 'cmark_mem' context. + */ +typedef void (*cmark_free_func) (cmark_mem *mem, void *user_data); + /* * ## Basic data structures @@ -152,21 +149,23 @@ typedef struct _cmark_llist * head of the list. */ CMARK_EXPORT -cmark_llist * cmark_llist_append (cmark_llist * head, +cmark_llist * cmark_llist_append (cmark_mem * mem, + cmark_llist * head, void * data); /** Free the list starting with 'head', calling 'free_func' with the * data pointer of each of its elements */ CMARK_EXPORT -void cmark_llist_free_full (cmark_llist * head, +void cmark_llist_free_full (cmark_mem * mem, + cmark_llist * head, cmark_free_func free_func); /** Free the list starting with 'head' */ CMARK_EXPORT -void cmark_llist_free (cmark_llist * head); - +void cmark_llist_free (cmark_mem * mem, + cmark_llist * head); /** * ## Creating and Destroying Nodes @@ -479,11 +478,6 @@ CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node); */ CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node); -CMARK_EXPORT int cmark_node_get_n_table_columns(cmark_node *node); -CMARK_EXPORT int cmark_node_set_n_table_columns(cmark_node *node, int n_columns); -CMARK_EXPORT int cmark_node_is_table_header(cmark_node *node); -CMARK_EXPORT int cmark_node_set_is_table_header(cmark_node *node, int is_table_header); - /** * ## Tree Manipulation */ @@ -522,6 +516,10 @@ CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child); */ CMARK_EXPORT void cmark_consolidate_text_nodes(cmark_node *root); +/** Ensures a node and all its children own their own chunk memory. + */ +CMARK_EXPORT void cmark_node_own(cmark_node *root); + /** * ## Parsing * @@ -605,13 +603,13 @@ char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem); * responsibility to free the returned buffer. */ CMARK_EXPORT -char *cmark_render_html(cmark_node *root, int options); +char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions); /** As for 'cmark_render_html', but specifying the allocator to use for * the resulting string. */ CMARK_EXPORT -char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem); +char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem); /** Render a 'node' tree as a groff man page, without the header. * It is the caller's responsibility to free the returned buffer. diff --git a/src/cmark_ctype.h b/src/cmark_ctype.h index 9a0761851..4b90940a0 100644 --- a/src/cmark_ctype.h +++ b/src/cmark_ctype.h @@ -5,18 +5,25 @@ extern "C" { #endif +#include "cmark_export.h" + /** Locale-independent versions of functions from ctype.h. * We want cmark to behave the same no matter what the system locale. */ +CMARK_EXPORT int cmark_isspace(char c); +CMARK_EXPORT int cmark_ispunct(char c); +CMARK_EXPORT int cmark_isalnum(char c); +CMARK_EXPORT int cmark_isdigit(char c); +CMARK_EXPORT int cmark_isalpha(char c); #ifdef __cplusplus diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index bae8310de..e62e106f4 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -1,12 +1,13 @@ -#ifndef CMARK_EXTENSION_API_H -#define CMARK_EXTENSION_API_H +#ifndef CMARK_CMARK_EXTENSION_API_H +#define CMARK_CMARK_EXTENSION_API_H #ifdef __cplusplus extern "C" { #endif #include -#include "buffer.h" +#include +#include /** * ## Extension Support @@ -217,16 +218,48 @@ typedef int (*cmark_match_block_func) (cmark_syntax_extension *extension, int len, cmark_node *container); +typedef const char *(*cmark_get_type_string_func) (cmark_syntax_extension *extension, + cmark_node *node); + +typedef int (*cmark_can_contain_func) (cmark_syntax_extension *extension, + cmark_node *node, + cmark_node_type child); + +typedef int (*cmark_contains_inlines_func) (cmark_syntax_extension *extension, + cmark_node *node); + +typedef void (*cmark_common_render_func) (cmark_syntax_extension *extension, + cmark_renderer *renderer, + cmark_node *node, + cmark_event_type ev_type, + int options); + +typedef void (*cmark_html_render_func) (cmark_syntax_extension *extension, + cmark_html_renderer *renderer, + cmark_node *node, + cmark_event_type ev_type, + int options); + +typedef int (*cmark_html_filter_func) (cmark_syntax_extension *extension, + const unsigned char *tag, + size_t tag_len); + +typedef cmark_node *(*cmark_postprocess_func) (cmark_syntax_extension *extension, + cmark_node *root); + /** Free a cmark_syntax_extension. */ CMARK_EXPORT -void cmark_syntax_extension_free (cmark_syntax_extension *extension); +void cmark_syntax_extension_free (cmark_mem *mem, cmark_syntax_extension *extension); /** Return a newly-constructed cmark_syntax_extension, named 'name'. */ CMARK_EXPORT cmark_syntax_extension *cmark_syntax_extension_new (const char *name); +CMARK_EXPORT +cmark_node_type cmark_syntax_extension_add_node(int is_inline); + /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT @@ -257,6 +290,54 @@ CMARK_EXPORT void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension, cmark_llist *special_chars); +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension, + cmark_get_type_string_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension, + cmark_can_contain_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension, + cmark_contains_inlines_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension, + cmark_html_render_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension, + cmark_html_filter_func func); + /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT @@ -264,6 +345,12 @@ void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, void *priv, cmark_free_func free_func); +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension, + cmark_postprocess_func func); + /** Return the index of the line currently being parsed, starting with 1. */ CMARK_EXPORT @@ -477,6 +564,30 @@ void cmark_inline_parser_advance_offset(cmark_inline_parser *parser); CMARK_EXPORT int cmark_inline_parser_get_offset(cmark_inline_parser *parser); +/** Set the offset in bytes in the chunk being processed by the given inline parser. + */ +CMARK_EXPORT +void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset); + +/** Gets the cmark_chunk being operated on by the given inline parser. + * Use cmark_inline_parser_get_offset to get our current position in the chunk. + */ +CMARK_EXPORT +cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser); + +/** Returns 1 if the inline parser is currently in a bracket; pass 1 for 'image' + * if you want to know about an image-type bracket, 0 for link-type. */ +CMARK_EXPORT +int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image); + +/** Remove the last n characters from the last child of the given node. + * This only works where all n characters are in the single last child, and the last + * child is CMARK_NODE_TEXT. + */ +CMARK_EXPORT +void cmark_node_unput(cmark_node *node, int n); + + /** Get the character located at the current inline parsing offset */ CMARK_EXPORT @@ -539,6 +650,10 @@ int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, int *right_flanking, int *punct_before, int *punct_after); + +CMARK_EXPORT +void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add); + #ifdef __cplusplus } #endif diff --git a/src/commonmark.c b/src/commonmark.c index e41e4ee3a..eeaf33394 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -11,6 +11,7 @@ #include "utf8.h" #include "scanners.h" #include "render.h" +#include "syntax_extension.h" #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) #define LIT(s) renderer->out(renderer, s, false, LITERAL) @@ -151,8 +152,7 @@ static bool is_autolink(cmark_node *node) { // if there is no block-level ancestor, returns NULL. static cmark_node *get_containing_block(cmark_node *node) { while (node) { - if (node->type >= CMARK_NODE_FIRST_BLOCK && - node->type <= CMARK_NODE_LAST_BLOCK) { + if (CMARK_NODE_BLOCK_P(node)) { return node; } else { node = node->parent; @@ -191,6 +191,11 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_node_get_list_tight(tmp->parent->parent))); } + if (node->extension && node->extension->commonmark_render_func) { + node->extension->commonmark_render_func(node->extension, renderer, node, ev_type, options); + return 1; + } + switch (node->type) { case CMARK_NODE_DOCUMENT: break; @@ -335,33 +340,6 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; - case CMARK_NODE_TABLE: - BLANKLINE(); - break; - - case CMARK_NODE_TABLE_ROW: - if (entering) { - CR(); - LIT("|"); - } - break; - case CMARK_NODE_TABLE_CELL: - if (entering) { - } else { - LIT(" |"); - if (node->parent->as.table_row.is_header && !node->next) { - int i; - int n_cols = node->parent->parent->as.table.n_columns; - CR(); - LIT("|"); - for (i = 0; i < n_cols; i++) { - LIT(" --- |"); - } - CR(); - } - } - break; - case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; @@ -484,10 +462,6 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; - case CMARK_NODE_STRIKETHROUGH: - OUT(cmark_node_get_string_content(node), false, LITERAL); - break; - default: assert(false); break; diff --git a/src/config.h.in b/src/config.h.in index e14bf73ff..de1a4dd49 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -69,8 +69,6 @@ CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ... #endif -#define EXTENSION_DIR LIBDIR "/extensions" - #ifdef __cplusplus } #endif diff --git a/src/houdini.h b/src/houdini.h index f738e8243..7852c3a83 100644 --- a/src/houdini.h +++ b/src/houdini.h @@ -31,17 +31,23 @@ extern "C" { #define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10) #define HOUDINI_UNESCAPED_SIZE(x) (x) -extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, +CMARK_EXPORT +bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); -extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, +CMARK_EXPORT +int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); -extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, +CMARK_EXPORT +int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure); -extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, +CMARK_EXPORT +int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); -extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, +CMARK_EXPORT +void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); -extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, +CMARK_EXPORT +int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); #ifdef __cplusplus diff --git a/src/houdini_html_e.c b/src/houdini_html_e.c index 0e539f037..da0b15c53 100644 --- a/src/houdini_html_e.c +++ b/src/houdini_html_e.c @@ -48,7 +48,7 @@ int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, if (unlikely(i >= size)) break; - /* The forward slash is only escaped in secure mode */ + /* The forward slash and single quote are only escaped in secure mode */ if ((src[i] == '/' || src[i] == '\'') && !secure) { cmark_strbuf_putc(ob, src[i]); } else { diff --git a/src/html.c b/src/html.c index 600d98326..f6525f73c 100644 --- a/src/html.c +++ b/src/html.c @@ -5,12 +5,10 @@ #include "cmark_ctype.h" #include "config.h" #include "cmark.h" -#include "node.h" -#include "buffer.h" #include "houdini.h" #include "scanners.h" - -#define BUFFER_SIZE 100 +#include "syntax_extension.h" +#include "html.h" // Functions to convert cmark_nodes to HTML strings. @@ -19,46 +17,67 @@ static void escape_html(cmark_strbuf *dest, const unsigned char *source, houdini_escape_html0(dest, source, length, 0); } -static CMARK_INLINE void cr(cmark_strbuf *html) { - if (html->size && html->ptr[html->size - 1] != '\n') - cmark_strbuf_putc(html, '\n'); -} +static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size_t len) { + cmark_strbuf *html = renderer->html; + cmark_llist *it; + cmark_syntax_extension *ext; + bool filtered; + uint8_t *match; -struct render_state { - cmark_strbuf *html; - cmark_node *plain; - bool need_closing_table_body; - bool in_table_header; -}; + while (len) { + match = (uint8_t *) memchr(data, '<', len); + if (!match) + break; -static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html, - int options) { - char buffer[BUFFER_SIZE]; - if (CMARK_OPT_SOURCEPOS & options) { - snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"", - cmark_node_get_start_line(node), cmark_node_get_start_column(node), - cmark_node_get_end_line(node), cmark_node_get_end_column(node)); - cmark_strbuf_puts(html, buffer); + if (match != data) { + cmark_strbuf_put(html, data, match - data); + len -= (match - data); + data = match; + } + + filtered = false; + for (it = renderer->filter_extensions; it; it = it->next) { + ext = ((cmark_syntax_extension *) it->data); + if (!ext->html_filter_func(ext, data, len)) { + filtered = true; + break; + } + } + + if (!filtered) { + cmark_strbuf_putc(html, '<'); + } else { + cmark_strbuf_puts(html, "<"); + } + + ++data; + --len; } + + if (len) + cmark_strbuf_put(html, data, len); } -static int S_render_node(cmark_node *node, cmark_event_type ev_type, - struct render_state *state, int options) { +static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { cmark_node *parent; cmark_node *grandparent; - cmark_strbuf *html = state->html; + cmark_strbuf *html = renderer->html; + cmark_llist *it; + cmark_syntax_extension *ext; char start_heading[] = "plain == node) { // back at original node - state->plain = NULL; + if (renderer->plain == node) { // back at original node + renderer->plain = NULL; } - if (state->plain != NULL) { + if (renderer->plain != NULL) { switch (node->type) { case CMARK_NODE_TEXT: case CMARK_NODE_CODE: @@ -77,18 +96,23 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, return 1; } + if (node->extension && node->extension->html_render_func) { + node->extension->html_render_func(node->extension, renderer, node, ev_type, options); + return 1; + } + switch (node->type) { case CMARK_NODE_DOCUMENT: break; case CMARK_NODE_BLOCK_QUOTE: if (entering) { - cr(html); + cmark_html_render_cr(html); cmark_strbuf_puts(html, "\n"); } else { - cr(html); + cmark_html_render_cr(html); cmark_strbuf_puts(html, "\n"); } break; @@ -98,19 +122,19 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, int start = node->as.list.start; if (entering) { - cr(html); + cmark_html_render_cr(html); if (list_type == CMARK_BULLET_LIST) { cmark_strbuf_puts(html, "\n"); } else if (start == 1) { cmark_strbuf_puts(html, "\n"); } else { snprintf(buffer, BUFFER_SIZE, "
      \n"); } } else { @@ -122,9 +146,9 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_ITEM: if (entering) { - cr(html); + cmark_html_render_cr(html); cmark_strbuf_puts(html, "'); } else { cmark_strbuf_puts(html, "\n"); @@ -133,10 +157,10 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_HEADING: if (entering) { - cr(html); + cmark_html_render_cr(html); start_heading[2] = (char)('0' + node->as.heading.level); cmark_strbuf_puts(html, start_heading); - S_render_sourcepos(node, html, options); + cmark_html_render_sourcepos(node, html, options); cmark_strbuf_putc(html, '>'); } else { end_heading[3] = (char)('0' + node->as.heading.level); @@ -146,11 +170,11 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, break; case CMARK_NODE_CODE_BLOCK: - cr(html); + cmark_html_render_cr(html); if (node->as.code.info.len == 0) { cmark_strbuf_puts(html, ""); } else { bufsize_t first_tag = 0; @@ -160,7 +184,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } cmark_strbuf_puts(html, "as.code.info.data, first_tag); cmark_strbuf_puts(html, "\">"); @@ -171,17 +195,19 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, break; case CMARK_NODE_HTML_BLOCK: - cr(html); + cmark_html_render_cr(html); if (options & CMARK_OPT_SAFE) { cmark_strbuf_puts(html, ""); + } else if (renderer->filter_extensions) { + filter_html_block(renderer, node->as.literal.data, node->as.literal.len); } else { cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); } - cr(html); + cmark_html_render_cr(html); break; case CMARK_NODE_CUSTOM_BLOCK: - cr(html); + cmark_html_render_cr(html); if (entering) { cmark_strbuf_put(html, node->as.custom.on_enter.data, node->as.custom.on_enter.len); @@ -189,13 +215,13 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, cmark_strbuf_put(html, node->as.custom.on_exit.data, node->as.custom.on_exit.len); } - cr(html); + cmark_html_render_cr(html); break; case CMARK_NODE_THEMATIC_BREAK: - cr(html); + cmark_html_render_cr(html); cmark_strbuf_puts(html, "\n"); break; @@ -209,9 +235,9 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } if (!tight) { if (entering) { - cr(html); + cmark_html_render_cr(html); cmark_strbuf_puts(html, "'); } else { cmark_strbuf_puts(html, "

      \n"); @@ -219,65 +245,6 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } break; - case CMARK_NODE_TABLE: - if (entering) { - cr(html); - cmark_strbuf_puts(html, "'); - state->need_closing_table_body = false; - } else { - if (state->need_closing_table_body) - cmark_strbuf_puts(html, ""); - state->need_closing_table_body = false; - cmark_strbuf_puts(html, ""); - } - break; - - case CMARK_NODE_TABLE_ROW: - if (entering) { - cr(html); - if (node->as.table_row.is_header) { - state->in_table_header = true; - cmark_strbuf_puts(html, ""); - cr(html); - } - cmark_strbuf_puts(html, "'); - } else { - cr(html); - cmark_strbuf_puts(html, ""); - if (node->as.table_row.is_header) { - cr(html); - cmark_strbuf_puts(html, ""); - cr(html); - cmark_strbuf_puts(html, ""); - state->need_closing_table_body = true; - state->in_table_header = false; - } - } - break; - - case CMARK_NODE_TABLE_CELL: - if (entering) { - cr(html); - if (state->in_table_header) { - cmark_strbuf_puts(html, "'); - } else { - if (state->in_table_header) { - cmark_strbuf_puts(html, ""); - } else { - cmark_strbuf_puts(html, ""); - } - } - break; - case CMARK_NODE_TEXT: escape_html(html, node->as.literal.data, node->as.literal.len); break; @@ -306,7 +273,20 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, if (options & CMARK_OPT_SAFE) { cmark_strbuf_puts(html, ""); } else { - cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); + filtered = false; + for (it = renderer->filter_extensions; it; it = it->next) { + ext = (cmark_syntax_extension *) it->data; + if (!ext->html_filter_func(ext, node->as.literal.data, node->as.literal.len)) { + filtered = true; + break; + } + } + if (!filtered) { + cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); + } else { + cmark_strbuf_puts(html, "<"); + cmark_strbuf_put(html, node->as.literal.data + 1, node->as.literal.len - 1); + } } break; @@ -363,7 +343,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, node->as.link.url.len); } cmark_strbuf_puts(html, "\" alt=\""); - state->plain = node; + renderer->plain = node; } else { if (node->as.link.title.len) { cmark_strbuf_puts(html, "\" title=\""); @@ -374,41 +354,41 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } break; - case CMARK_NODE_STRIKETHROUGH: - if (entering) { - cmark_strbuf_puts(html, ""); - } else { - cmark_strbuf_puts(html, ""); - } - break; - default: assert(false); break; } - // cmark_strbuf_putc(html, 'x'); return 1; } -char *cmark_render_html(cmark_node *root, int options) { - return cmark_render_html_with_mem(root, options, cmark_node_mem(root)); +char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions) { + return cmark_render_html_with_mem(root, options, extensions, cmark_node_mem(root)); } -char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem) { +char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem) { char *result; cmark_strbuf html = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; - struct render_state state = {&html, NULL, false, false}; + cmark_html_renderer renderer = {&html, NULL, NULL, NULL}; cmark_iter *iter = cmark_iter_new(root); + for (; extensions; extensions = extensions->next) + if (((cmark_syntax_extension *) extensions->data)->html_filter_func) + renderer.filter_extensions = cmark_llist_append( + mem, + renderer.filter_extensions, + (cmark_syntax_extension *) extensions->data); + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); - S_render_node(cur, ev_type, &state, options); + S_render_node(&renderer, cur, ev_type, options); } result = (char *)cmark_strbuf_detach(&html); + cmark_llist_free(mem, renderer.filter_extensions); + cmark_iter_free(iter); return result; } diff --git a/src/html.h b/src/html.h new file mode 100644 index 000000000..aeba7bcda --- /dev/null +++ b/src/html.h @@ -0,0 +1,27 @@ +#ifndef CMARK_HTML_H +#define CMARK_HTML_H + +#include "buffer.h" +#include "node.h" + +CMARK_INLINE +static void cmark_html_render_cr(cmark_strbuf *html) { + if (html->size && html->ptr[html->size - 1] != '\n') + cmark_strbuf_putc(html, '\n'); +} + +#define BUFFER_SIZE 100 + +CMARK_INLINE +static void cmark_html_render_sourcepos(cmark_node *node, cmark_strbuf *html, int options) { + char buffer[BUFFER_SIZE]; + if (CMARK_OPT_SOURCEPOS & options) { + snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"", + cmark_node_get_start_line(node), cmark_node_get_start_column(node), + cmark_node_get_end_line(node), cmark_node_get_end_column(node)); + cmark_strbuf_puts(html, buffer); + } +} + + +#endif diff --git a/src/inlines.c b/src/inlines.c index dec4860ba..da6a7ef9f 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1105,7 +1105,6 @@ static cmark_node *try_extensions(cmark_parser *parser, for (tmp = parser->inline_syntax_extensions; tmp; tmp = tmp->next) { cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; - res = ext->match_inline(ext, parser, parent, c, subj); if (res) @@ -1197,10 +1196,10 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, } // Parse inlines from parent's string_content, adding as children of parent. -extern void cmark_parse_inlines(cmark_parser *parser, - cmark_node *parent, - cmark_reference_map *refmap, - int options) { +void cmark_parse_inlines(cmark_parser *parser, + cmark_node *parent, + cmark_reference_map *refmap, + int options) { subject subj; subject_from_buf(parser->mem, &subj, &parent->content, refmap); cmark_chunk_rtrim(&subj.input); @@ -1409,6 +1408,35 @@ int cmark_inline_parser_get_offset(cmark_inline_parser *parser) { return parser->pos; } +void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset) { + parser->pos = offset; +} + +cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser) { + return &parser->input; +} + +int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image) { + for (bracket *b = parser->last_bracket; b; b = b->previous) + if (b->active && b->image == image) + return 1; + return 0; +} + +void cmark_node_unput(cmark_node *node, int n) { + node = node->last_child; + while (n > 0 && node && node->type == CMARK_NODE_TEXT) { + if (node->as.literal.len < n) { + n -= node->as.literal.len; + node->as.literal.len = 0; + } else { + node->as.literal.len -= n; + n = 0; + } + node = node->prev; + } +} + delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser) { return parser->last_delim; } diff --git a/src/inlines.h b/src/inlines.h index 586b53fa7..0d8305c2f 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -5,9 +5,12 @@ extern "C" { #endif +#include "references.h" + cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); +CMARK_EXPORT void cmark_parse_inlines(cmark_parser *parser, cmark_node *parent, cmark_reference_map *refmap, diff --git a/src/iterator.c b/src/iterator.c index 24423a217..149a445e1 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -6,12 +6,6 @@ #include "cmark.h" #include "iterator.h" -static const int S_leaf_mask = - (1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_THEMATIC_BREAK) | - (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_TEXT) | - (1 << CMARK_NODE_SOFTBREAK) | (1 << CMARK_NODE_LINEBREAK) | - (1 << CMARK_NODE_CODE) | (1 << CMARK_NODE_HTML_INLINE); - cmark_iter *cmark_iter_new(cmark_node *root) { if (root == NULL) { return NULL; @@ -30,7 +24,18 @@ cmark_iter *cmark_iter_new(cmark_node *root) { void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); } static bool S_is_leaf(cmark_node *node) { - return ((1 << node->type) & S_leaf_mask) != 0; + switch (node->type) { + case CMARK_NODE_HTML_BLOCK: + case CMARK_NODE_THEMATIC_BREAK: + case CMARK_NODE_CODE_BLOCK: + case CMARK_NODE_TEXT: + case CMARK_NODE_SOFTBREAK: + case CMARK_NODE_LINEBREAK: + case CMARK_NODE_CODE: + case CMARK_NODE_HTML_INLINE: + return 1; + } + return 0; } cmark_event_type cmark_iter_next(cmark_iter *iter) { @@ -118,3 +123,36 @@ void cmark_consolidate_text_nodes(cmark_node *root) { cmark_strbuf_free(&buf); cmark_iter_free(iter); } + +void cmark_node_own(cmark_node *root) { + if (root == NULL) { + return; + } + cmark_iter *iter = cmark_iter_new(root); + cmark_event_type ev_type; + cmark_node *cur; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + if (ev_type == CMARK_EVENT_ENTER) { + switch (cur->type) { + case CMARK_NODE_TEXT: + case CMARK_NODE_HTML_INLINE: + case CMARK_NODE_CODE: + case CMARK_NODE_HTML_BLOCK: + cmark_chunk_to_cstr(iter->mem, &cur->as.literal); + break; + case CMARK_NODE_LINK: + cmark_chunk_to_cstr(iter->mem, &cur->as.link.url); + cmark_chunk_to_cstr(iter->mem, &cur->as.link.title); + break; + case CMARK_NODE_CUSTOM_INLINE: + cmark_chunk_to_cstr(iter->mem, &cur->as.custom.on_enter); + cmark_chunk_to_cstr(iter->mem, &cur->as.custom.on_exit); + break; + } + } + } + + cmark_iter_free(iter); +} diff --git a/src/latex.c b/src/latex.c index a8d485c63..068dc3f61 100644 --- a/src/latex.c +++ b/src/latex.c @@ -10,6 +10,7 @@ #include "utf8.h" #include "scanners.h" #include "render.h" +#include "syntax_extension.h" #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) #define LIT(s) renderer->out(renderer, s, false, LITERAL) @@ -226,8 +227,10 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_list_type list_type; bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); - // avoid warning about unused parameter: - (void)(options); + if (node->extension && node->extension->latex_render_func) { + node->extension->latex_render_func(node->extension, renderer, node, ev_type, options); + return 1; + } switch (node->type) { case CMARK_NODE_DOCUMENT: @@ -346,44 +349,6 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; - case CMARK_NODE_TABLE: - if (entering) { - int i, n_cols; - CR(); - LIT("\\begin{table}"); - CR(); - LIT("\\begin{tabular}{"); - - n_cols = node->as.table.n_columns; - for (i = 0; i < n_cols; i++) { - LIT("l"); - } - LIT("}"); - CR(); - } else { - LIT("\\end{tabular}"); - CR(); - LIT("\\end{table}"); - CR(); - } - break; - - case CMARK_NODE_TABLE_ROW: - if (!entering) { - CR(); - } - break; - - case CMARK_NODE_TABLE_CELL: - if (!entering) { - if (node->next) { - LIT(" & "); - } else { - LIT(" \\\\"); - } - } - break; - case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; @@ -478,15 +443,6 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; - case CMARK_NODE_STRIKETHROUGH: - /* requires \usepackage{ulem} */ - if (entering) { - LIT("\\sout{"); - } else { - LIT("}"); - } - break; - default: assert(false); break; diff --git a/src/libcmark.pc.in b/src/libcmark.pc.in index c3153ba2e..024ae4832 100644 --- a/src/libcmark.pc.in +++ b/src/libcmark.pc.in @@ -6,5 +6,5 @@ includedir=@CMAKE_INSTALL_PREFIX@/include Name: libcmark Description: CommonMark parsing, rendering, and manipulation Version: @PROJECT_VERSION@ -Libs: -L${libdir} -lcmark -ldl +Libs: -L${libdir} -lcmark Cflags: -I${includedir} diff --git a/src/linked_list.c b/src/linked_list.c index 7d6690dae..f8bc60422 100644 --- a/src/linked_list.c +++ b/src/linked_list.c @@ -2,9 +2,9 @@ #include "cmark.h" -cmark_llist *cmark_llist_append(cmark_llist *head, void *data) { +cmark_llist *cmark_llist_append(cmark_mem *mem, cmark_llist *head, void *data) { cmark_llist *tmp; - cmark_llist *new_node = (cmark_llist *) malloc(sizeof(cmark_llist)); + cmark_llist *new_node = (cmark_llist *) mem->calloc(1, sizeof(cmark_llist)); new_node->data = data; new_node->next = NULL; @@ -19,19 +19,19 @@ cmark_llist *cmark_llist_append(cmark_llist *head, void *data) { return head; } -void cmark_llist_free_full(cmark_llist *head, cmark_free_func free_func) { +void cmark_llist_free_full(cmark_mem *mem, cmark_llist *head, cmark_free_func free_func) { cmark_llist *tmp, *prev; for (tmp = head; tmp;) { if (free_func) - free_func(tmp->data); + free_func(mem, tmp->data); prev = tmp; tmp = tmp->next; - free(prev); + mem->free(prev); } } -void cmark_llist_free(cmark_llist *head) { - cmark_llist_free_full(head, NULL); +void cmark_llist_free(cmark_mem *mem, cmark_llist *head) { + cmark_llist_free_full(mem, head, NULL); } diff --git a/src/main.c b/src/main.c index a1cca572f..6fe54d102 100644 --- a/src/main.c +++ b/src/main.c @@ -8,8 +8,11 @@ #include "node.h" #include "cmark_extension_api.h" #include "syntax_extension.h" +#include "parser.h" #include "registry.h" +#include "../extensions/core-extensions.h" + #if defined(_WIN32) && !defined(__CYGWIN__) #include #include @@ -42,14 +45,14 @@ void print_usage() { } static bool print_document(cmark_node *document, writer_format writer, - int options, int width) { + int options, int width, cmark_parser *parser) { char *result; cmark_mem *mem = cmark_get_default_mem_allocator(); switch (writer) { case FORMAT_HTML: - result = cmark_render_html_with_mem(document, options, mem); + result = cmark_render_html_with_mem(document, options, parser->syntax_extensions, mem); break; case FORMAT_XML: result = cmark_render_xml_with_mem(document, options, mem); @@ -68,7 +71,7 @@ static bool print_document(cmark_node *document, writer_format writer, return false; } printf("%s", result); - cmark_node_mem(document)->free(result); + mem->free(result); return true; } @@ -79,13 +82,14 @@ static void print_extensions(void) { printf ("Available extensions:\n"); - syntax_extensions = cmark_list_syntax_extensions(); + cmark_mem *mem = cmark_get_default_mem_allocator(); + syntax_extensions = cmark_list_syntax_extensions(mem); for (tmp = syntax_extensions; tmp; tmp=tmp->next) { cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; printf("%s\n", ext->name); } - cmark_llist_free(syntax_extensions); + cmark_llist_free(mem, syntax_extensions); } int main(int argc, char *argv[]) { @@ -101,6 +105,8 @@ int main(int argc, char *argv[]) { int options = CMARK_OPT_DEFAULT; int res = 1; + cmark_register_plugin(core_extensions_registration); + #if defined(_WIN32) && !defined(__CYGWIN__) _setmode(_fileno(stdin), _O_BINARY); _setmode(_fileno(stdout), _O_BINARY); @@ -229,10 +235,9 @@ int main(int argc, char *argv[]) { document = cmark_parser_finish(parser); - if (!print_document(document, writer, options, width)) + if (!print_document(document, writer, options, width, parser)) goto failure; - success: res = 0; @@ -247,6 +252,8 @@ int main(int argc, char *argv[]) { cmark_arena_reset(); #endif + cmark_release_plugins(); + free(files); return res; diff --git a/src/man.c b/src/man.c index 205a07cb5..1ae1ac898 100644 --- a/src/man.c +++ b/src/man.c @@ -9,6 +9,7 @@ #include "buffer.h" #include "utf8.h" #include "render.h" +#include "syntax_extension.h" #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) #define LIT(s) renderer->out(renderer, s, false, LITERAL) @@ -77,8 +78,10 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, bool entering = (ev_type == CMARK_EVENT_ENTER); bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); - // avoid unused parameter error: - (void)(options); + if (node->extension && node->extension->man_render_func) { + node->extension->man_render_func(node->extension, renderer, node, ev_type, options); + return 1; + } switch (node->type) { case CMARK_NODE_DOCUMENT: @@ -186,40 +189,6 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; - case CMARK_NODE_TABLE: - if (entering) { - int i, n_cols; - CR(); - LIT(".TS"); - CR(); - LIT("tab(@);"); - CR(); - - n_cols = node->as.table.n_columns; - - for (i = 0; i < n_cols; i++) { - LIT("c"); - } - - if (n_cols) { - LIT("."); - CR(); - } - } else { - LIT(".TE"); - CR(); - } - break; - case CMARK_NODE_TABLE_ROW: - if (!entering) { - CR(); - } - break; - case CMARK_NODE_TABLE_CELL: - if (!entering && node->next) { - LIT("@"); - } - break; case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; @@ -286,16 +255,6 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; - case CMARK_NODE_STRIKETHROUGH: - if (entering) { - CR(); - LIT(".ST \""); - } else { - LIT("\""); - CR(); - } - break; - default: assert(false); break; diff --git a/src/node.c b/src/node.c index 2fd674f37..266cf2509 100644 --- a/src/node.c +++ b/src/node.c @@ -3,55 +3,29 @@ #include "config.h" #include "node.h" +#include "syntax_extension.h" static void S_node_unlink(cmark_node *node); #define NODE_MEM(node) cmark_node_mem(node) -static CMARK_INLINE bool S_is_block(cmark_node *node) { - if (node == NULL) { - return false; - } - return node->type >= CMARK_NODE_FIRST_BLOCK && - node->type <= CMARK_NODE_LAST_BLOCK; -} - -static CMARK_INLINE bool S_is_inline(cmark_node *node) { - if (node == NULL) { - return false; - } - return node->type >= CMARK_NODE_FIRST_INLINE && - node->type <= CMARK_NODE_LAST_INLINE; -} - -static bool S_can_contain(cmark_node *node, cmark_node *child) { - cmark_node *cur; - - if (node == NULL || child == NULL) { - return false; - } - - // Verify that child is not an ancestor of node or equal to node. - cur = node; - do { - if (cur == child) { +bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) { + if (child_type == CMARK_NODE_DOCUMENT) { return false; } - cur = cur->parent; - } while (cur != NULL); - if (child->type == CMARK_NODE_DOCUMENT) { - return false; + if (node->extension && node->extension->can_contain_func) { + return node->extension->can_contain_func(node->extension, node, child_type); } switch (node->type) { case CMARK_NODE_DOCUMENT: case CMARK_NODE_BLOCK_QUOTE: case CMARK_NODE_ITEM: - return S_is_block(child) && child->type != CMARK_NODE_ITEM; + return CMARK_NODE_TYPE_BLOCK_P(child_type) && child_type != CMARK_NODE_ITEM; case CMARK_NODE_LIST: - return child->type == CMARK_NODE_ITEM; + return child_type == CMARK_NODE_ITEM; case CMARK_NODE_CUSTOM_BLOCK: return true; @@ -62,21 +36,8 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) { case CMARK_NODE_STRONG: case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: - case CMARK_NODE_STRIKETHROUGH: case CMARK_NODE_CUSTOM_INLINE: - return S_is_inline(child); - case CMARK_NODE_TABLE: - return child->type == CMARK_NODE_TABLE_ROW; - case CMARK_NODE_TABLE_ROW: - return child->type == CMARK_NODE_TABLE_CELL; - case CMARK_NODE_TABLE_CELL: - return child->type == CMARK_NODE_TEXT || - child->type == CMARK_NODE_CODE || - child->type == CMARK_NODE_EMPH || - child->type == CMARK_NODE_STRONG || - child->type == CMARK_NODE_LINK || - child->type == CMARK_NODE_IMAGE || - child->type == CMARK_NODE_STRIKETHROUGH; + return CMARK_NODE_TYPE_INLINE_P(child_type); default: break; @@ -85,6 +46,28 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) { return false; } +static bool S_can_contain(cmark_node *node, cmark_node *child) { + cmark_node *cur; + + if (node == NULL || child == NULL) { + return false; + } + if (NODE_MEM(node) != NODE_MEM(child)) { + return 0; + } + + // Verify that child is not an ancestor of node or equal to node. + cur = node; + do { + if (cur == child) { + return false; + } + cur = cur->parent; + } while (cur != NULL); + + return cmark_node_can_contain_type(node, (cmark_node_type) child->type); +} + cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) { cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node)); cmark_strbuf_init(mem, &node->content, 0); @@ -117,29 +100,29 @@ cmark_node *cmark_node_new(cmark_node_type type) { static void free_node_as(cmark_node *node) { switch (node->type) { - case CMARK_NODE_CODE_BLOCK: + case CMARK_NODE_CODE_BLOCK: cmark_chunk_free(NODE_MEM(node), &node->as.code.info); cmark_chunk_free(NODE_MEM(node), &node->as.code.literal); - break; - case CMARK_NODE_TEXT: - case CMARK_NODE_HTML_INLINE: - case CMARK_NODE_CODE: - case CMARK_NODE_HTML_BLOCK: + break; + case CMARK_NODE_TEXT: + case CMARK_NODE_HTML_INLINE: + case CMARK_NODE_CODE: + case CMARK_NODE_HTML_BLOCK: cmark_chunk_free(NODE_MEM(node), &node->as.literal); - break; - case CMARK_NODE_LINK: - case CMARK_NODE_IMAGE: + break; + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: cmark_chunk_free(NODE_MEM(node), &node->as.link.url); cmark_chunk_free(NODE_MEM(node), &node->as.link.title); - break; - case CMARK_NODE_CUSTOM_BLOCK: - case CMARK_NODE_CUSTOM_INLINE: + break; + case CMARK_NODE_CUSTOM_BLOCK: + case CMARK_NODE_CUSTOM_INLINE: cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_enter); cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_exit); - break; - default: - break; - } + break; + default: + break; + } } // Free a cmark_node list and any children. @@ -149,7 +132,7 @@ static void S_free_nodes(cmark_node *e) { cmark_strbuf_free(&e->content); if (e->user_data && e->user_data_free_func) - e->user_data_free_func(e->user_data); + e->user_data_free_func(NODE_MEM(e), e->user_data); free_node_as(e); @@ -184,7 +167,7 @@ int cmark_node_set_type(cmark_node * node, cmark_node_type type) { if (type == node->type) return 1; - initial_type = node->type; + initial_type = (cmark_node_type) node->type; node->type = type; if (!S_can_contain(node->parent, node)) { @@ -206,6 +189,10 @@ const char *cmark_node_get_type_string(cmark_node *node) { return "NONE"; } + if (node->extension && node->extension->get_type_string_func) { + return node->extension->get_type_string_func(node->extension, node); + } + switch (node->type) { case CMARK_NODE_NONE: return "none"; @@ -223,15 +210,6 @@ const char *cmark_node_get_type_string(cmark_node *node) { return "html_block"; case CMARK_NODE_CUSTOM_BLOCK: return "custom_block"; - case CMARK_NODE_TABLE: - return "table"; - case CMARK_NODE_TABLE_ROW: - if (node->as.table_row.is_header) - return "table_header"; - else - return "table_row"; - case CMARK_NODE_TABLE_CELL: - return "table_cell"; case CMARK_NODE_PARAGRAPH: return "paragraph"; case CMARK_NODE_HEADING: @@ -258,8 +236,6 @@ const char *cmark_node_get_type_string(cmark_node *node) { return "link"; case CMARK_NODE_IMAGE: return "image"; - case CMARK_NODE_STRIKETHROUGH: - return "strikethrough"; } return ""; @@ -760,68 +736,6 @@ int cmark_node_get_end_column(cmark_node *node) { return node->end_column; } -int cmark_node_get_n_table_columns(cmark_node *node) { - if (node == NULL) { - return -1; - } - - switch (node->type) { - case CMARK_NODE_TABLE: - return node->as.table.n_columns; - default: - break; - } - - return -1; -} - -int cmark_node_set_n_table_columns(cmark_node *node, int n_columns) { - if (node == NULL) { - return 0; - } - - switch (node->type) { - case CMARK_NODE_TABLE: - node->as.table.n_columns = n_columns; - return 1; - default: - break; - } - - return 0; -} - -int cmark_node_is_table_header(cmark_node *node) { - if (node == NULL) { - return 0; - } - - switch (node->type) { - case CMARK_NODE_TABLE_ROW: - return node->as.table_row.is_header; - default: - break; - } - - return 1; -} - -int cmark_node_set_is_table_header(cmark_node *node, int is_table_header) { - if (node == NULL) { - return 0; - } - - switch (node->type) { - case CMARK_NODE_TABLE_ROW: - node->as.table_row.is_header = is_table_header; - return 1; - default: - break; - } - - return 0; -} - // Unlink a node without adjusting its next, prev, and parent pointers. static void S_node_unlink(cmark_node *node) { if (node == NULL) { diff --git a/src/node.h b/src/node.h index cbb0e551b..e32814bcf 100644 --- a/src/node.h +++ b/src/node.h @@ -47,14 +47,6 @@ typedef struct { cmark_chunk on_exit; } cmark_custom; -typedef struct { - int n_columns; -} cmark_table; - -typedef struct { - bool is_header; -} cmark_table_row; - enum cmark_node__internal_flags { CMARK_NODE__OPEN = (1 << 0), CMARK_NODE__LAST_LINE_BLANK = (1 << 1), @@ -88,9 +80,8 @@ struct cmark_node { cmark_heading heading; cmark_link link; cmark_custom custom; - cmark_table table; - cmark_table_row table_row; int html_block_type; + void *opaque; } as; }; @@ -99,6 +90,24 @@ static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) { } CMARK_EXPORT int cmark_node_check(cmark_node *node, FILE *out); +static CMARK_INLINE bool CMARK_NODE_TYPE_BLOCK_P(cmark_node_type node_type) { + return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_BLOCK; +} + +static CMARK_INLINE bool CMARK_NODE_BLOCK_P(cmark_node *node) { + return node != NULL && CMARK_NODE_TYPE_BLOCK_P((cmark_node_type) node->type); +} + +static CMARK_INLINE bool CMARK_NODE_TYPE_INLINE_P(cmark_node_type node_type) { + return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_INLINE; +} + +static CMARK_INLINE bool CMARK_NODE_INLINE_P(cmark_node *node) { + return node != NULL && CMARK_NODE_TYPE_INLINE_P((cmark_node_type) node->type); +} + +CMARK_EXPORT bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type); + #ifdef __cplusplus } #endif diff --git a/src/parser.h b/src/parser.h index 247423a76..9cdb0717f 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,5 +1,5 @@ -#ifndef CMARK_AST_H -#define CMARK_AST_H +#ifndef CMARK_PARSER_H +#define CMARK_PARSER_H #include #include "node.h" diff --git a/src/plugin.c b/src/plugin.c index 39c361ac7..3992fe197 100644 --- a/src/plugin.c +++ b/src/plugin.c @@ -2,15 +2,17 @@ #include "plugin.h" +extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; + int cmark_plugin_register_syntax_extension(cmark_plugin * plugin, cmark_syntax_extension * extension) { - plugin->syntax_extensions = cmark_llist_append(plugin->syntax_extensions, extension); + plugin->syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, plugin->syntax_extensions, extension); return 1; } cmark_plugin * cmark_plugin_new(void) { - cmark_plugin *res = malloc(sizeof(cmark_plugin)); + cmark_plugin *res = (cmark_plugin *) CMARK_DEFAULT_MEM_ALLOCATOR.calloc(1, sizeof(cmark_plugin)); res->syntax_extensions = NULL; @@ -19,9 +21,10 @@ cmark_plugin_new(void) { void cmark_plugin_free(cmark_plugin *plugin) { - cmark_llist_free_full(plugin->syntax_extensions, + cmark_llist_free_full(&CMARK_DEFAULT_MEM_ALLOCATOR, + plugin->syntax_extensions, (cmark_free_func) cmark_syntax_extension_free); - free(plugin); + CMARK_DEFAULT_MEM_ALLOCATOR.free(plugin); } cmark_llist * diff --git a/src/registry.c b/src/registry.c index 8f7b9c4e1..3ff01f2ad 100644 --- a/src/registry.c +++ b/src/registry.c @@ -1,5 +1,3 @@ -#include -#include #include #include #include @@ -10,121 +8,45 @@ #include "registry.h" #include "plugin.h" +extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; static cmark_llist *syntax_extensions = NULL; -static cmark_llist *plugin_handles = NULL; -static cmark_plugin *scan_file(char* filename) { - char* last_slash = strrchr(filename, '/'); - char* name_start = last_slash ? last_slash + 1 : filename; - char* last_dot = strrchr(filename, '.'); - cmark_plugin *plugin = NULL; - char *init_func_name = NULL; - int i; - void *libhandle; - char *libname = NULL; +void cmark_register_plugin(cmark_plugin_init_func reg_fn) { + cmark_plugin *plugin = cmark_plugin_new(); - if (!last_dot || strcmp(last_dot, ".so")) - goto done; - - libname = malloc(sizeof(char) * (strlen(EXTENSION_DIR) + strlen(filename) + 2)); - snprintf(libname, strlen(EXTENSION_DIR) + strlen(filename) + 2, "%s/%s", - EXTENSION_DIR, filename); - libhandle = dlopen(libname, RTLD_NOW); - free(libname); - - if (!libhandle) { - printf("Error loading DSO: %s\n", dlerror()); - goto done; - } - - name_start[last_dot - name_start] = '\0'; - - for (i = 0; name_start[i]; i++) { - if (name_start[i] == '-') - name_start[i] = '_'; - } - - init_func_name = malloc(sizeof(char) * (strlen(name_start) + 6)); - - snprintf(init_func_name, strlen(name_start) + 6, "init_%s", name_start); - - cmark_plugin_init_func initfunc = (cmark_plugin_init_func) - (intptr_t) dlsym(libhandle, init_func_name); - free(init_func_name); - - plugin = cmark_plugin_new(); - - if (initfunc) { - if (initfunc(plugin)) { - plugin_handles = cmark_llist_append(plugin_handles, libhandle); - } else { - cmark_plugin_free(plugin); - printf("Error Initializing plugin %s\n", name_start); - plugin = NULL; - dlclose(libhandle); - } - } else { - printf("Error loading init function: %s\n", dlerror()); - dlclose(libhandle); - } - -done: - return plugin; -} - -static void scan_path(char *path) { - DIR *dir = opendir(path); - struct dirent* direntry; - - if (!dir) + if (!reg_fn(plugin)) { + cmark_plugin_free(plugin); return; - - while ((direntry = readdir(dir))) { - cmark_plugin *plugin = scan_file(direntry->d_name); - if (plugin) { - cmark_llist *syntax_extensions_list = cmark_plugin_steal_syntax_extensions(plugin); - cmark_llist *tmp; - - for (tmp = syntax_extensions_list; tmp; tmp=tmp->next) { - syntax_extensions = cmark_llist_append(syntax_extensions, tmp->data); - } - - cmark_llist_free(syntax_extensions_list); - cmark_plugin_free(plugin); - } } - closedir(dir); -} + cmark_llist *syntax_extensions_list = cmark_plugin_steal_syntax_extensions(plugin), + *it; -void cmark_discover_plugins(void) { - cmark_release_plugins(); - scan_path(EXTENSION_DIR); -} + for (it = syntax_extensions_list; it; it = it->next) { + syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions, it->data); + } -static void -release_plugin_handle(void *libhandle) { - dlclose(libhandle); + cmark_llist_free(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions_list); + cmark_plugin_free(plugin); } void cmark_release_plugins(void) { if (syntax_extensions) { - cmark_llist_free_full(syntax_extensions, + cmark_llist_free_full( + &CMARK_DEFAULT_MEM_ALLOCATOR, + syntax_extensions, (cmark_free_func) cmark_syntax_extension_free); syntax_extensions = NULL; } - - cmark_llist_free_full(plugin_handles, release_plugin_handle); - plugin_handles = NULL; } -cmark_llist *cmark_list_syntax_extensions(void) { - cmark_llist *tmp; +cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem) { + cmark_llist *it; cmark_llist *res = NULL; - for (tmp = syntax_extensions; tmp; tmp = tmp->next) { - res = cmark_llist_append(res, tmp->data); + for (it = syntax_extensions; it; it = it->next) { + res = cmark_llist_append(mem, res, it->data); } return res; } diff --git a/src/registry.h b/src/registry.h index bc566e010..0f0fbae26 100644 --- a/src/registry.h +++ b/src/registry.h @@ -6,10 +6,16 @@ extern "C" { #endif #include "cmark.h" +#include "plugin.h" -void cmark_discover_plugins(void); +CMARK_EXPORT +void cmark_register_plugin(cmark_plugin_init_func reg_fn); + +CMARK_EXPORT void cmark_release_plugins(void); -cmark_llist *cmark_list_syntax_extensions(void); + +CMARK_EXPORT +cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem); #ifdef __cplusplus } diff --git a/src/render.h b/src/render.h index b73ace464..d9d4f4b9f 100644 --- a/src/render.h +++ b/src/render.h @@ -32,6 +32,15 @@ struct cmark_renderer { typedef struct cmark_renderer cmark_renderer; +struct cmark_html_renderer { + cmark_strbuf *html; + cmark_node *plain; + cmark_llist *filter_extensions; + void *opaque; +}; + +typedef struct cmark_html_renderer cmark_html_renderer; + void cmark_render_ascii(cmark_renderer *renderer, const char *s); void cmark_render_code_point(cmark_renderer *renderer, uint32_t c); diff --git a/src/scanners.h b/src/scanners.h index 207f91a00..b48ca25e7 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -1,3 +1,6 @@ +#ifndef CMARK_SCANNERS_H +#define CMARK_SCANNERS_H + #include "cmark.h" #include "chunk.h" @@ -53,3 +56,5 @@ bufsize_t _scan_dangerous_url(const unsigned char *p); #ifdef __cplusplus } #endif + +#endif diff --git a/src/syntax_extension.c b/src/syntax_extension.c index d8c4459d9..c613f8d1d 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -4,23 +4,38 @@ #include "syntax_extension.h" #include "buffer.h" -void cmark_syntax_extension_free(cmark_syntax_extension *extension) { +extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; + +static cmark_mem *_mem = &CMARK_DEFAULT_MEM_ALLOCATOR; + +void cmark_syntax_extension_free(cmark_mem *mem, cmark_syntax_extension *extension) { if (extension->free_function && extension->priv) { - extension->free_function(extension->priv); + extension->free_function(mem, extension->priv); } - cmark_llist_free(extension->special_inline_chars); - free(extension->name); - free(extension); + cmark_llist_free(mem, extension->special_inline_chars); + mem->free(extension->name); + mem->free(extension); } cmark_syntax_extension *cmark_syntax_extension_new(const char *name) { - cmark_syntax_extension *res = (cmark_syntax_extension *) calloc(1, sizeof(cmark_syntax_extension)); - res->name = (char *) malloc(sizeof(char) * (strlen(name)) + 1); + cmark_syntax_extension *res = (cmark_syntax_extension *) _mem->calloc(1, sizeof(cmark_syntax_extension)); + res->name = (char *) _mem->calloc(1, sizeof(char) * (strlen(name)) + 1); strcpy(res->name, name); return res; } +cmark_node_type cmark_syntax_extension_add_node(int is_inline) { + cmark_node_type *ref = !is_inline ? &CMARK_NODE_LAST_BLOCK : &CMARK_NODE_LAST_INLINE; + + if ((*ref & CMARK_NODE_VALUE_MASK) == CMARK_NODE_VALUE_MASK) { + assert(false); + return (cmark_node_type) 0; + } + + return *ref = (cmark_node_type) ((int) *ref + 1); +} + void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension, cmark_open_block_func func) { extension->try_opening_block = func; @@ -46,6 +61,51 @@ void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *ext extension->special_inline_chars = special_chars; } +void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension, + cmark_get_type_string_func func) { + extension->get_type_string_func = func; +} + +void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension, + cmark_can_contain_func func) { + extension->can_contain_func = func; +} + +void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension, + cmark_contains_inlines_func func) { + extension->contains_inlines_func = func; +} + +void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func) { + extension->commonmark_render_func = func; +} + +void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func) { + extension->latex_render_func = func; +} + +void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func) { + extension->man_render_func = func; +} + +void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension, + cmark_html_render_func func) { + extension->html_render_func = func; +} + +void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension, + cmark_html_filter_func func) { + extension->html_filter_func = func; +} + +void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension, + cmark_postprocess_func func) { + extension->postprocess_func = func; +} + void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, void *priv, cmark_free_func free_func) { diff --git a/src/syntax_extension.h b/src/syntax_extension.h index f46a7d2fe..8b3b407c2 100644 --- a/src/syntax_extension.h +++ b/src/syntax_extension.h @@ -1,5 +1,5 @@ -#ifndef SYNTAX_EXTENSION_H -#define SYNTAX_EXTENSION_H +#ifndef CMARK_SYNTAX_EXTENSION_H +#define CMARK_SYNTAX_EXTENSION_H #include "cmark.h" #include "cmark_extension_api.h" @@ -13,6 +13,15 @@ struct cmark_syntax_extension { char * name; void * priv; cmark_free_func free_function; + cmark_get_type_string_func get_type_string_func; + cmark_can_contain_func can_contain_func; + cmark_contains_inlines_func contains_inlines_func; + cmark_common_render_func commonmark_render_func; + cmark_common_render_func latex_render_func; + cmark_common_render_func man_render_func; + cmark_html_render_func html_render_func; + cmark_html_filter_func html_filter_func; + cmark_postprocess_func postprocess_func; }; #endif diff --git a/test/entity_tests.py b/test/entity_tests.py index 0e3daad09..3abb3b8c0 100644 --- a/test/entity_tests.py +++ b/test/entity_tests.py @@ -54,7 +54,6 @@ def get_entities(): print(entity, '[ERRORED (return code {})]'.format(rc)) print(err) elif check in actual: - print(entity, '[PASSED]') passed += 1 else: print(entity, '[FAILED]') diff --git a/toolchain-mingw32.cmake b/toolchain-mingw32.cmake index 61c62a831..c1c6971d9 100644 --- a/toolchain-mingw32.cmake +++ b/toolchain-mingw32.cmake @@ -9,7 +9,7 @@ SET(CMAKE_RC_COMPILER i586-mingw32msvc-windres) # here is the target environment located SET(CMAKE_FIND_ROOT_PATH /usr/i586-mingw32msvc "${CMAKE_SOURCE_DIR}/windows") -# adjust the default behaviour of the FIND_XXX() commands: +# adjust the default behaviour of the FIND_XYZ() commands: # search headers and libraries in the target environment, search # programs in the host environment set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) From 0445d10bbe8ccfb6f84991dc484b6cc28fb42bdc Mon Sep 17 00:00:00 2001 From: Yuki Izumi Date: Tue, 29 Nov 2016 14:58:18 +1100 Subject: [PATCH 005/218] Table extension from c068469 reworked Note this includes a hack to the core code to escape pipes in the 'commonmark' renderer. This is to fix test cases with the table extension; i.e. we treat pipes as special characters that need escaping. We use the cmark_mem of the parser in order to ensure we use the arena allocator when necessary. A very flexible table format is supported; see test/extensions.txt for examples. Leading and trailing pipes can be omitted, and alignment specifiers can be used in the separator between the header and body. Table bodies don't need to be a consistent width. Embedded HTML is OK. Note we reuse the inline parser from cmark to parse tables -- this is to ensure pipes e.g. in the middle of an inline code block don't prematurely terminate a table cell. --- Makefile | 30 +- extensions/CMakeLists.txt | 4 + extensions/core-extensions.c | 6 +- extensions/ext_scanners.c | 382 +++++++++++++++++++ extensions/ext_scanners.h | 16 + extensions/ext_scanners.re | 42 +++ extensions/table.c | 687 +++++++++++++++++++++++++++++++++++ extensions/table.h | 8 + man/man3/cmark.3 | 73 +++- src/commonmark.c | 2 +- suppressions | 10 + test/CMakeLists.txt | 19 +- test/afl_test_cases/test.md | 5 +- test/extensions.txt | 360 ++++++++++++++++++ 14 files changed, 1630 insertions(+), 14 deletions(-) create mode 100644 extensions/ext_scanners.c create mode 100644 extensions/ext_scanners.h create mode 100644 extensions/ext_scanners.re create mode 100644 extensions/table.c create mode 100644 extensions/table.h create mode 100644 suppressions create mode 100644 test/extensions.txt diff --git a/Makefile b/Makefile index e6a889560..4150d0177 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,12 @@ SRCDIR=src +EXTDIR=extensions DATADIR=data BUILDDIR?=build GENERATOR?=Unix Makefiles MINGW_BUILDDIR?=build-mingw MINGW_INSTALLDIR?=windows SPEC=test/spec.txt +EXTENSIONS_SPEC=test/extensions.txt SITE=_site SPECVERSION=$(shell perl -ne 'print $$1 if /^version: *([0-9.]+)/' $(SPEC)) FUZZCHARS?=2000000 # for fuzztest @@ -80,7 +82,7 @@ afl: -o test/afl_results \ -x test/fuzzing_dictionary \ -t 100 \ - $(CMARK) $(CMARK_OPTS) + $(CMARK) -e table $(CMARK_OPTS) libFuzzer: @[ -n "$(LIB_FUZZER_PATH)" ] || { echo '$$LIB_FUZZER_PATH not set'; false; } @@ -126,6 +128,19 @@ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re --encoding-policy substitute -o $@ $< $(CLANG_FORMAT) $@ +# We include scanners.c in the repository, so this shouldn't +# normally need to be generated. +$(EXTDIR)/ext_scanners.c: $(EXTDIR)/ext_scanners.re + @case "$$(re2c -v)" in \ + *\ 0.13.*|*\ 0.14|*\ 0.14.1) \ + echo "re2c >= 0.14.2 is required"; \ + false; \ + ;; \ + esac + re2c --case-insensitive -b -i --no-generation-date -8 \ + --encoding-policy substitute -o $@ $< + clang-format -style llvm -i $@ + # We include entities.inc in the repository, so normally this # doesn't need to be regenerated: $(SRCDIR)/entities.inc: tools/make_entities_inc.py @@ -138,14 +153,19 @@ update-spec: test: $(SPEC) cmake_build $(MAKE) -C $(BUILDDIR) test || (cat $(BUILDDIR)/Testing/Temporary/LastTest.log && exit 1) -$(ALLTESTS): $(SPEC) - python3 test/spec_tests.py --spec $< --dump-tests | python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); print("\n".join([test["markdown"] for test in tests]))' > $@ +$(ALLTESTS): $(SPEC) $(EXTENSIONS_SPEC) + ( \ + python3 test/spec_tests.py --spec $(SPEC) --dump-tests | \ + python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); print("\n".join([test["markdown"] for test in tests]))'; \ + python3 test/spec_tests.py --spec $(EXTENSIONS_SPEC) --dump-tests | \ + python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); print("\n".join([test["markdown"] for test in tests]))'; \ + ) > $@ leakcheck: $(ALLTESTS) for format in html man xml latex commonmark; do \ for opts in "" "--smart"; do \ - echo "cmark -t $$format $$opts" ; \ - valgrind -q --leak-check=full --dsymutil=yes --error-exitcode=1 $(PROG) -t $$format $$opts $(ALLTESTS) >/dev/null || exit 1;\ + echo "cmark -t $$format -e table $$opts" ; \ + valgrind -q --leak-check=full --dsymutil=yes --suppressions=suppressions --error-exitcode=1 $(PROG) -t $$format -e table $$opts $(ALLTESTS) >/dev/null || exit 1;\ done; \ done; diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index f13818a53..ba99cdcf5 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -2,6 +2,10 @@ cmake_minimum_required(VERSION 2.8) set(STATICLIBRARY "libcmarkextensions_static") set(LIBRARY_SOURCES core-extensions.c + table.c + ext_scanners.c + ext_scanners.re + ext_scanners.h ) include_directories( diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index cf7f9f621..009c69970 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -1,3 +1,7 @@ #include "core-extensions.h" +#include "table.h" -int core_extensions_registration(cmark_plugin *plugin) { return 1; } +int core_extensions_registration(cmark_plugin *plugin) { + cmark_plugin_register_syntax_extension(plugin, create_table_extension()); + return 1; +} diff --git a/extensions/ext_scanners.c b/extensions/ext_scanners.c new file mode 100644 index 000000000..708f868e8 --- /dev/null +++ b/extensions/ext_scanners.c @@ -0,0 +1,382 @@ +/* Generated by re2c 0.14.3 */ +#include "ext_scanners.h" +#include + +bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), + unsigned char *ptr, int len, bufsize_t offset) { + bufsize_t res; + + if (ptr == NULL || offset > len) { + return 0; + } else { + unsigned char lim = ptr[len]; + + ptr[len] = '\0'; + res = scanner(ptr + offset); + ptr[len] = lim; + } + + return res; +} + +bufsize_t _scan_table_start(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + yych = *(marker = p); + if (yych <= '{') { + if (yych <= 0x1F) { + if (yych <= '\t') { + if (yych <= 0x08) + goto yy6; + goto yy3; + } else { + if (yych <= '\n') + goto yy2; + if (yych <= '\f') + goto yy3; + goto yy6; + } + } else { + if (yych <= '-') { + if (yych <= ' ') + goto yy3; + if (yych <= ',') + goto yy6; + goto yy5; + } else { + if (yych == ':') + goto yy4; + goto yy6; + } + } + } else { + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '|') + goto yy3; + if (yych <= 0x7F) + goto yy6; + } else { + if (yych <= 0xDF) + goto yy7; + if (yych <= 0xE0) + goto yy9; + goto yy10; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy14; + if (yych <= 0xEF) + goto yy10; + goto yy11; + } else { + if (yych <= 0xF3) + goto yy12; + if (yych <= 0xF4) + goto yy13; + } + } + } + yy2 : { return 0; } + yy3: + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy22; + } + if (yych <= '\f') { + if (yych == '\t') + goto yy29; + if (yych <= '\n') + goto yy2; + goto yy29; + } else { + if (yych <= ' ') { + if (yych <= 0x1F) + goto yy2; + goto yy29; + } else { + if (yych == ':') + goto yy31; + goto yy2; + } + } + yy4: + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy22; + } + goto yy2; + yy5: + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy22; + } + if (yych <= ' ') { + if (yych <= 0x08) + goto yy2; + if (yych <= '\r') + goto yy16; + if (yych <= 0x1F) + goto yy2; + goto yy16; + } else { + if (yych <= ':') { + if (yych <= '9') + goto yy2; + goto yy15; + } else { + if (yych == '|') + goto yy16; + goto yy2; + } + } + yy6: + yych = *++p; + goto yy2; + yy7: + yych = *++p; + if (yych <= 0x7F) + goto yy8; + if (yych <= 0xBF) + goto yy6; + yy8: + p = marker; + goto yy2; + yy9: + yych = *++p; + if (yych <= 0x9F) + goto yy8; + if (yych <= 0xBF) + goto yy7; + goto yy8; + yy10: + yych = *++p; + if (yych <= 0x7F) + goto yy8; + if (yych <= 0xBF) + goto yy7; + goto yy8; + yy11: + yych = *++p; + if (yych <= 0x8F) + goto yy8; + if (yych <= 0xBF) + goto yy10; + goto yy8; + yy12: + yych = *++p; + if (yych <= 0x7F) + goto yy8; + if (yych <= 0xBF) + goto yy10; + goto yy8; + yy13: + yych = *++p; + if (yych <= 0x7F) + goto yy8; + if (yych <= 0x8F) + goto yy10; + goto yy8; + yy14: + yych = *++p; + if (yych <= 0x7F) + goto yy8; + if (yych <= 0x9F) + goto yy7; + goto yy8; + yy15: + ++p; + yych = *p; + yy16: + if (yybm[0 + yych] & 64) { + goto yy15; + } + if (yych <= '\r') { + if (yych <= 0x08) + goto yy8; + if (yych <= '\n') + goto yy20; + goto yy19; + } else { + if (yych != '|') + goto yy8; + } + yy17: + ++p; + yych = *p; + if (yych <= 0x1F) { + if (yych <= '\n') { + if (yych <= 0x08) + goto yy8; + if (yych <= '\t') + goto yy17; + goto yy20; + } else { + if (yych <= '\f') + goto yy17; + if (yych >= 0x0E) + goto yy8; + } + } else { + if (yych <= '-') { + if (yych <= ' ') + goto yy17; + if (yych <= ',') + goto yy8; + goto yy25; + } else { + if (yych == ':') + goto yy24; + goto yy8; + } + } + yy19: + yych = *++p; + if (yych != '\n') + goto yy8; + yy20: + ++p; + { return (bufsize_t)(p - start); } + yy22: + ++p; + yych = *p; + if (yybm[0 + yych] & 128) { + goto yy22; + } + if (yych <= 0x1F) { + if (yych <= '\n') { + if (yych <= 0x08) + goto yy8; + if (yych <= '\t') + goto yy15; + goto yy20; + } else { + if (yych <= '\f') + goto yy15; + if (yych <= '\r') + goto yy19; + goto yy8; + } + } else { + if (yych <= ':') { + if (yych <= ' ') + goto yy15; + if (yych <= '9') + goto yy8; + goto yy15; + } else { + if (yych == '|') + goto yy17; + goto yy8; + } + } + yy24: + ++p; + yych = *p; + if (yych != '-') + goto yy8; + yy25: + ++p; + yych = *p; + if (yych <= ' ') { + if (yych <= '\n') { + if (yych <= 0x08) + goto yy8; + if (yych >= '\n') + goto yy20; + } else { + if (yych <= '\f') + goto yy27; + if (yych <= '\r') + goto yy19; + if (yych <= 0x1F) + goto yy8; + } + } else { + if (yych <= '9') { + if (yych == '-') + goto yy25; + goto yy8; + } else { + if (yych <= ':') + goto yy27; + if (yych == '|') + goto yy17; + goto yy8; + } + } + yy27: + ++p; + yych = *p; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych <= 0x08) + goto yy8; + goto yy27; + } else { + if (yych <= '\n') + goto yy20; + if (yych <= '\f') + goto yy27; + goto yy19; + } + } else { + if (yych <= ' ') { + if (yych <= 0x1F) + goto yy8; + goto yy27; + } else { + if (yych == '|') + goto yy17; + goto yy8; + } + } + yy29: + ++p; + yych = *p; + if (yybm[0 + yych] & 128) { + goto yy22; + } + if (yych <= '\f') { + if (yych == '\t') + goto yy29; + if (yych <= '\n') + goto yy8; + goto yy29; + } else { + if (yych <= ' ') { + if (yych <= 0x1F) + goto yy8; + goto yy29; + } else { + if (yych != ':') + goto yy8; + } + } + yy31: + ++p; + if (yybm[0 + (yych = *p)] & 128) { + goto yy22; + } + goto yy8; + } +} diff --git a/extensions/ext_scanners.h b/extensions/ext_scanners.h new file mode 100644 index 000000000..da36a9a32 --- /dev/null +++ b/extensions/ext_scanners.h @@ -0,0 +1,16 @@ +#include "chunk.h" +#include "cmark.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), + unsigned char *ptr, int len, bufsize_t offset); +bufsize_t _scan_table_start(const unsigned char *p); + +#define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n) + +#ifdef __cplusplus +} +#endif diff --git a/extensions/ext_scanners.re b/extensions/ext_scanners.re new file mode 100644 index 000000000..793f0f09e --- /dev/null +++ b/extensions/ext_scanners.re @@ -0,0 +1,42 @@ +#include +#include "ext_scanners.h" + +bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset) +{ + bufsize_t res; + + if (ptr == NULL || offset > len) { + return 0; + } else { + unsigned char lim = ptr[len]; + + ptr[len] = '\0'; + res = scanner(ptr + offset); + ptr[len] = lim; + } + + return res; +} + +/*!re2c + re2c:define:YYCTYPE = "unsigned char"; + re2c:define:YYCURSOR = p; + re2c:define:YYMARKER = marker; + re2c:define:YYCTXMARKER = marker; + re2c:yyfill:enable = 0; + + spacechar = [ \t\v\f]; + newline = [\r]?[\n]; + + table_marker = (spacechar*[:]?[-]+[:]?spacechar*); +*/ + +bufsize_t _scan_table_start(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [|]? table_marker ([|] table_marker)* [|]? spacechar* newline { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} diff --git a/extensions/table.c b/extensions/table.c new file mode 100644 index 000000000..260b16da1 --- /dev/null +++ b/extensions/table.c @@ -0,0 +1,687 @@ +#include +#include +#include +#include + +#include "ext_scanners.h" +#include "table.h" + +static cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, + CMARK_NODE_TABLE_CELL; + +typedef struct { + uint16_t n_columns; + cmark_llist *cells; +} table_row; + +typedef struct { + uint16_t n_columns; + uint8_t *alignments; +} node_table; + +typedef enum { + ALIGN_NONE, + ALIGN_LEFT, + ALIGN_CENTER, + ALIGN_RIGHT +} table_column_alignment; + +typedef struct { bool is_header; } node_table_row; + +static void free_node_table(cmark_mem *mem, void *ptr) { + node_table *t = ptr; + mem->free(t->alignments); + mem->free(t); +} + +static void free_node_table_row(cmark_mem *mem, void *ptr) { mem->free(ptr); } + +static uint16_t get_n_table_columns(cmark_node *node) { + if (!node || node->type != CMARK_NODE_TABLE) + return -1; + + return ((node_table *)node->user_data)->n_columns; +} + +static int set_n_table_columns(cmark_node *node, uint16_t n_columns) { + if (!node || node->type != CMARK_NODE_TABLE) + return 0; + + ((node_table *)node->user_data)->n_columns = n_columns; + return 1; +} + +static uint8_t *get_table_alignments(cmark_node *node) { + if (!node || node->type != CMARK_NODE_TABLE) + return 0; + + return ((node_table *)node->user_data)->alignments; +} + +static int set_table_alignments(cmark_node *node, uint8_t *alignments) { + if (!node || node->type != CMARK_NODE_TABLE) + return 0; + + ((node_table *)node->user_data)->alignments = alignments; + return 1; +} + +static int is_table_header(cmark_node *node, int is_table_header) { + if (!node || node->type != CMARK_NODE_TABLE_ROW) + return 0; + + ((node_table_row *)node->user_data)->is_header = is_table_header; + return 1; +} + +static void free_table_cell(cmark_mem *mem, void *data) { + cmark_node_free((cmark_node *)data); +} + +static void free_table_row(cmark_mem *mem, table_row *row) { + if (!row) + return; + + cmark_llist_free_full(mem, row->cells, (cmark_free_func)free_table_cell); + + mem->free(row); +} + +static void reescape_pipes(cmark_strbuf *strbuf, cmark_mem *mem, + unsigned char *string, bufsize_t len) { + bufsize_t r; + + cmark_strbuf_init(mem, strbuf, len * 2); + for (r = 0; r < len; ++r) { + if (string[r] == '\\' && r + 1 < len && + (string[r + 1] == '|' || string[r + 1] == '\\')) + cmark_strbuf_putc(strbuf, '\\'); + + cmark_strbuf_putc(strbuf, string[r]); + } +} + +static void maybe_consume_pipe(cmark_node **n, int *offset) { + if (*n && (*n)->type == CMARK_NODE_TEXT && *offset < (*n)->as.literal.len && + (*n)->as.literal.data[*offset] == '|') + ++(*offset); +} + +static const char *find_unescaped_pipe(const char *cstr, size_t len) { + bool escaping = false; + for (; len; --len, ++cstr) { + if (escaping) + escaping = false; + else if (*cstr == '\\') + escaping = true; + else if (*cstr == '|') + return cstr; + } + return NULL; +} + +static cmark_node *consume_until_pipe_or_eol(cmark_syntax_extension *self, + cmark_parser *parser, + cmark_node **n, int *offset) { + cmark_node *result = + cmark_node_new_with_mem(CMARK_NODE_TABLE_CELL, parser->mem); + cmark_node_set_syntax_extension(result, self); + bool was_escape = false; + + while (*n) { + if ((*n)->type == CMARK_NODE_TEXT) { + cmark_node *child = cmark_parser_add_child( + parser, result, CMARK_NODE_TEXT, cmark_parser_get_offset(parser)); + + const char *cstr = cmark_chunk_to_cstr(parser->mem, &(*n)->as.literal); + + if (was_escape) { + child->as.literal = cmark_chunk_dup(&(*n)->as.literal, *offset, 1); + cmark_node_own(child); + ++*offset; + was_escape = false; + continue; + } + + if (strcmp(cstr + *offset, "\\") == 0 && (*n)->next && + (*n)->next->type == CMARK_NODE_TEXT) { + was_escape = true; + *n = (*n)->next; + continue; + } + + const char *pipe = + find_unescaped_pipe(cstr + *offset, (*n)->as.literal.len - *offset); + + if (!pipe) { + child->as.literal = cmark_chunk_dup(&(*n)->as.literal, *offset, + (*n)->as.literal.len - *offset); + cmark_node_own(child); + } else { + int len = pipe - cstr - *offset; + child->as.literal = cmark_chunk_dup(&(*n)->as.literal, *offset, len); + cmark_node_own(child); + *offset += len + 1; + if (*offset >= (*n)->as.literal.len) { + *offset = 0; + *n = (*n)->next; + } + return result; + } + + *n = (*n)->next; + *offset = 0; + } else { + cmark_node *next = (*n)->next; + cmark_node_append_child(result, *n); + cmark_node_own(*n); + *n = next; + *offset = 0; + } + } + + if (!result->first_child) { + cmark_node_free(result); + result = NULL; + } + + return result; +} + +static table_row *row_from_string(cmark_syntax_extension *self, + cmark_parser *parser, unsigned char *string, + int len) { + table_row *row = NULL; + + cmark_node *temp_container = + cmark_node_new_with_mem(CMARK_NODE_PARAGRAPH, parser->mem); + reescape_pipes(&temp_container->content, parser->mem, string, len); + + cmark_manage_extensions_special_characters(parser, true); + cmark_parse_inlines(parser, temp_container, parser->refmap, parser->options); + cmark_manage_extensions_special_characters(parser, false); + + if (!temp_container->first_child) { + cmark_node_free(temp_container); + return NULL; + } + + row = (table_row *)parser->mem->calloc(1, sizeof(table_row)); + row->n_columns = 0; + row->cells = NULL; + + cmark_node *node = temp_container->first_child; + int offset = 0; + + maybe_consume_pipe(&node, &offset); + cmark_node *child; + while ((child = consume_until_pipe_or_eol(self, parser, &node, &offset)) != + NULL) { + ++row->n_columns; + row->cells = cmark_llist_append(parser->mem, row->cells, child); + } + + cmark_node_free(temp_container); + + return row; +} + +static cmark_node *try_opening_table_header(cmark_syntax_extension *self, + cmark_parser *parser, + cmark_node *parent_container, + unsigned char *input, int len) { + bufsize_t matched = + scan_table_start(input, len, cmark_parser_get_first_nonspace(parser)); + cmark_node *table_header, *child; + table_row *header_row = NULL; + table_row *marker_row = NULL; + const char *parent_string; + uint16_t i; + + if (!matched) + goto done; + + parent_string = cmark_node_get_string_content(parent_container); + + header_row = row_from_string(self, parser, (unsigned char *)parent_string, + strlen(parent_string)); + + if (!header_row) { + goto done; + } + + marker_row = row_from_string(self, parser, + input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + + assert(marker_row); + + if (header_row->n_columns != marker_row->n_columns) { + goto done; + } + + if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { + goto done; + } + + cmark_node_set_syntax_extension(parent_container, self); + + cmark_node_set_user_data(parent_container, + parser->mem->calloc(1, sizeof(node_table))); + cmark_node_set_user_data_free_func(parent_container, free_node_table); + + set_n_table_columns(parent_container, header_row->n_columns); + + uint8_t *alignments = + parser->mem->calloc(header_row->n_columns, sizeof(uint8_t)); + cmark_llist *it = marker_row->cells; + for (i = 0; it; it = it->next, ++i) { + cmark_node *node = it->data; + assert(node->type == CMARK_NODE_TABLE_CELL); + + cmark_strbuf strbuf; + cmark_strbuf_init(parser->mem, &strbuf, 0); + for (child = node->first_child; child; child = child->next) { + assert(child->type == CMARK_NODE_TEXT); + cmark_strbuf_put(&strbuf, child->as.literal.data, child->as.literal.len); + } + cmark_strbuf_trim(&strbuf); + char const *text = cmark_strbuf_cstr(&strbuf); + + bool left = text[0] == ':', right = text[strbuf.size - 1] == ':'; + cmark_strbuf_free(&strbuf); + + if (left && right) + alignments[i] = ALIGN_CENTER; + else if (left) + alignments[i] = ALIGN_LEFT; + else if (right) + alignments[i] = ALIGN_RIGHT; + } + set_table_alignments(parent_container, alignments); + + table_header = + cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW, + cmark_parser_get_offset(parser)); + cmark_node_set_syntax_extension(table_header, self); + + cmark_node_set_user_data(table_header, + parser->mem->calloc(1, sizeof(node_table_row))); + cmark_node_set_user_data_free_func(table_header, free_node_table_row); + is_table_header(table_header, true); + + { + cmark_llist *tmp, *next; + + for (tmp = header_row->cells; tmp; tmp = next) { + cmark_node *header_cell = tmp->data; + cmark_node_append_child(table_header, header_cell); + next = header_row->cells = tmp->next; + parser->mem->free(tmp); + } + } + + cmark_parser_advance_offset( + parser, (char *)input, + strlen((char *)input) - 1 - cmark_parser_get_offset(parser), false); +done: + free_table_row(parser->mem, header_row); + free_table_row(parser->mem, marker_row); + return parent_container; +} + +static cmark_node *try_opening_table_row(cmark_syntax_extension *self, + cmark_parser *parser, + cmark_node *parent_container, + unsigned char *input, int len) { + cmark_node *table_row_block; + table_row *row; + + if (cmark_parser_is_blank(parser)) + return NULL; + + table_row_block = + cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW, + cmark_parser_get_offset(parser)); + + cmark_node_set_syntax_extension(table_row_block, self); + cmark_node_set_user_data(table_row_block, + parser->mem->calloc(1, sizeof(node_table_row))); + cmark_node_set_user_data_free_func(table_row_block, free_node_table_row); + + /* We don't advance the offset here */ + + row = row_from_string(self, parser, + input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + + { + cmark_llist *tmp, *next; + int i; + int table_columns = get_n_table_columns(parent_container); + + for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = next, ++i) { + cmark_node *cell = tmp->data; + assert(cell->type == CMARK_NODE_TABLE_CELL); + cmark_node_append_child(table_row_block, cell); + row->cells = next = tmp->next; + parser->mem->free(tmp); + } + + for (; i < table_columns; ++i) { + cmark_node *cell = + cmark_parser_add_child(parser, table_row_block, CMARK_NODE_TABLE_CELL, + cmark_parser_get_offset(parser)); + cmark_node_set_syntax_extension(cell, self); + } + } + + free_table_row(parser->mem, row); + + cmark_parser_advance_offset(parser, (char *)input, + len - 1 - cmark_parser_get_offset(parser), false); + + return table_row_block; +} + +static cmark_node *try_opening_table_block(cmark_syntax_extension *self, + int indented, cmark_parser *parser, + cmark_node *parent_container, + unsigned char *input, int len) { + cmark_node_type parent_type = cmark_node_get_type(parent_container); + + if (!indented && parent_type == CMARK_NODE_PARAGRAPH) { + return try_opening_table_header(self, parser, parent_container, input, len); + } else if (!indented && parent_type == CMARK_NODE_TABLE) { + return try_opening_table_row(self, parser, parent_container, input, len); + } + + return NULL; +} + +static int matches(cmark_syntax_extension *self, cmark_parser *parser, + unsigned char *input, int len, + cmark_node *parent_container) { + int res = 0; + + if (cmark_node_get_type(parent_container) == CMARK_NODE_TABLE) { + table_row *new_row = row_from_string( + self, parser, input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + if (new_row && new_row->n_columns) + res = 1; + free_table_row(parser->mem, new_row); + } + + return res; +} + +static const char *get_type_string(cmark_syntax_extension *ext, + cmark_node *node) { + if (node->type == CMARK_NODE_TABLE) { + return "table"; + } else if (node->type == CMARK_NODE_TABLE_ROW) { + if (((node_table_row *)node->user_data)->is_header) + return "table_header"; + else + return "table_row"; + } else if (node->type == CMARK_NODE_TABLE_CELL) { + return "table_cell"; + } + + return ""; +} + +static int can_contain(cmark_syntax_extension *extension, cmark_node *node, + cmark_node_type child_type) { + if (node->type == CMARK_NODE_TABLE) { + return child_type == CMARK_NODE_TABLE_ROW; + } else if (node->type == CMARK_NODE_TABLE_ROW) { + return child_type == CMARK_NODE_TABLE_CELL; + } else if (node->type == CMARK_NODE_TABLE_CELL) { + return child_type == CMARK_NODE_TEXT || child_type == CMARK_NODE_CODE || + child_type == CMARK_NODE_EMPH || child_type == CMARK_NODE_STRONG || + child_type == CMARK_NODE_LINK || child_type == CMARK_NODE_IMAGE || + child_type == CMARK_NODE_HTML_INLINE; + } + return false; +} + +static int contains_inlines(cmark_syntax_extension *extension, + cmark_node *node) { + return node->type == CMARK_NODE_TABLE_CELL; +} + +static void commonmark_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + + if (node->type == CMARK_NODE_TABLE) { + renderer->blankline(renderer); + } else if (node->type == CMARK_NODE_TABLE_ROW) { + if (entering) { + renderer->cr(renderer); + renderer->out(renderer, "|", false, LITERAL); + } + } else if (node->type == CMARK_NODE_TABLE_CELL) { + if (entering) { + } else { + renderer->out(renderer, " |", false, LITERAL); + if (((node_table_row *)node->parent->user_data)->is_header && + !node->next) { + int i; + uint8_t *alignments = get_table_alignments(node->parent->parent); + uint16_t n_cols = + ((node_table *)node->parent->parent->user_data)->n_columns; + renderer->cr(renderer); + renderer->out(renderer, "|", false, LITERAL); + for (i = 0; i < n_cols; i++) { + if (alignments[i] == ALIGN_NONE) + renderer->out(renderer, " --- |", false, LITERAL); + else if (alignments[i] == ALIGN_LEFT) + renderer->out(renderer, " :-- |", false, LITERAL); + else if (alignments[i] == ALIGN_CENTER) + renderer->out(renderer, " :-: |", false, LITERAL); + else if (alignments[i] == ALIGN_RIGHT) + renderer->out(renderer, " --: |", false, LITERAL); + } + renderer->cr(renderer); + } + } + } else { + assert(false); + } +} + +static void latex_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + + if (node->type == CMARK_NODE_TABLE) { + if (entering) { + int i; + uint16_t n_cols; + renderer->cr(renderer); + renderer->out(renderer, "\\begin{table}", false, LITERAL); + renderer->cr(renderer); + renderer->out(renderer, "\\begin{tabular}{", false, LITERAL); + + n_cols = ((node_table *)node->user_data)->n_columns; + for (i = 0; i < n_cols; i++) { + renderer->out(renderer, "l", false, LITERAL); + } + renderer->out(renderer, "}", false, LITERAL); + renderer->cr(renderer); + } else { + renderer->out(renderer, "\\end{tabular}", false, LITERAL); + renderer->cr(renderer); + renderer->out(renderer, "\\end{table}", false, LITERAL); + renderer->cr(renderer); + } + } else if (node->type == CMARK_NODE_TABLE_ROW) { + if (!entering) { + renderer->cr(renderer); + } + } else if (node->type == CMARK_NODE_TABLE_CELL) { + if (!entering) { + if (node->next) { + renderer->out(renderer, " & ", false, LITERAL); + } else { + renderer->out(renderer, " \\\\", false, LITERAL); + } + } + } else { + assert(false); + } +} + +static void man_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + + if (node->type == CMARK_NODE_TABLE) { + if (entering) { + int i; + uint16_t n_cols; + renderer->cr(renderer); + renderer->out(renderer, ".TS", false, LITERAL); + renderer->cr(renderer); + renderer->out(renderer, "tab(@);", false, LITERAL); + renderer->cr(renderer); + + n_cols = ((node_table *)node->user_data)->n_columns; + + for (i = 0; i < n_cols; i++) { + renderer->out(renderer, "c", false, LITERAL); + } + + if (n_cols) { + renderer->out(renderer, ".", false, LITERAL); + renderer->cr(renderer); + } + } else { + renderer->out(renderer, ".TE", false, LITERAL); + renderer->cr(renderer); + } + } else if (node->type == CMARK_NODE_TABLE_ROW) { + if (!entering) { + renderer->cr(renderer); + } + } else if (node->type == CMARK_NODE_TABLE_CELL) { + if (!entering && node->next) { + renderer->out(renderer, "@", false, LITERAL); + } + } else { + assert(false); + } +} + +struct html_table_state { + unsigned need_closing_table_body : 1; + unsigned in_table_header : 1; +}; + +static void html_render(cmark_syntax_extension *extension, + cmark_html_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + cmark_strbuf *html = renderer->html; + cmark_node *n; + + // XXX: we just monopolise renderer->opaque. + struct html_table_state *table_state = + (struct html_table_state *)&renderer->opaque; + + if (node->type == CMARK_NODE_TABLE) { + if (entering) { + cmark_html_render_cr(html); + cmark_strbuf_puts(html, "'); + table_state->need_closing_table_body = false; + } else { + if (table_state->need_closing_table_body) + cmark_strbuf_puts(html, ""); + table_state->need_closing_table_body = false; + cmark_strbuf_puts(html, "\n"); + } + } else if (node->type == CMARK_NODE_TABLE_ROW) { + if (entering) { + cmark_html_render_cr(html); + if (((node_table_row *)node->user_data)->is_header) { + table_state->in_table_header = 1; + cmark_strbuf_puts(html, ""); + cmark_html_render_cr(html); + } + cmark_strbuf_puts(html, "'); + } else { + cmark_html_render_cr(html); + cmark_strbuf_puts(html, ""); + if (((node_table_row *)node->user_data)->is_header) { + cmark_html_render_cr(html); + cmark_strbuf_puts(html, ""); + cmark_html_render_cr(html); + cmark_strbuf_puts(html, ""); + table_state->need_closing_table_body = 1; + table_state->in_table_header = false; + } + } + } else if (node->type == CMARK_NODE_TABLE_CELL) { + uint8_t *alignments = get_table_alignments(node->parent->parent); + if (entering) { + cmark_html_render_cr(html); + if (table_state->in_table_header) { + cmark_strbuf_puts(html, "parent->first_child; n; n = n->next, ++i) + if (n == node) + break; + + if (alignments[i] == ALIGN_LEFT) + cmark_strbuf_puts(html, " align=\"left\""); + else if (alignments[i] == ALIGN_CENTER) + cmark_strbuf_puts(html, " align=\"center\""); + else if (alignments[i] == ALIGN_RIGHT) + cmark_strbuf_puts(html, " align=\"right\""); + + cmark_html_render_sourcepos(node, html, options); + cmark_strbuf_putc(html, '>'); + } else { + if (table_state->in_table_header) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_puts(html, ""); + } + } + } else { + assert(false); + } +} + +cmark_syntax_extension *create_table_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("table"); + + cmark_syntax_extension_set_match_block_func(ext, matches); + cmark_syntax_extension_set_open_block_func(ext, try_opening_table_block); + cmark_syntax_extension_set_get_type_string_func(ext, get_type_string); + cmark_syntax_extension_set_can_contain_func(ext, can_contain); + cmark_syntax_extension_set_contains_inlines_func(ext, contains_inlines); + cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render); + cmark_syntax_extension_set_latex_render_func(ext, latex_render); + cmark_syntax_extension_set_man_render_func(ext, man_render); + cmark_syntax_extension_set_html_render_func(ext, html_render); + CMARK_NODE_TABLE = cmark_syntax_extension_add_node(0); + CMARK_NODE_TABLE_ROW = cmark_syntax_extension_add_node(0); + CMARK_NODE_TABLE_CELL = cmark_syntax_extension_add_node(0); + + return ext; +} diff --git a/extensions/table.h b/extensions/table.h new file mode 100644 index 000000000..ff630b25e --- /dev/null +++ b/extensions/table.h @@ -0,0 +1,8 @@ +#ifndef TABLE_H +#define TABLE_H + +#include "core-extensions.h" + +cmark_syntax_extension *create_table_extension(void); + +#endif diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index 692588d71..230340346 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -130,6 +130,51 @@ memory. Memory in these slabs is not reused at all. Resets the arena allocator, quickly returning all used memory to the operating system. +.PP +\fItypedef\f[] \fBvoid\f[](\fI*cmark_free_func\f[]) + +.PP +Callback for freeing user data with a \f[I]cmark_mem\f[] context. + +.SS +Linked list + +.PP +.nf +\fC +.RS 0n +typedef struct _cmark_llist +{ + struct _cmark_llist *next; + void *data; +} cmark_llist; +.RE +\f[] +.fi + +.PP +A generic singly linked list. + +.PP +\fIcmark_llist *\f[] \fBcmark_llist_append\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[], \fIvoid * data\f[]) + +.PP +Append an element to the linked list, return the possibly modified head +of the list. + +.PP +\fIvoid\f[] \fBcmark_llist_free_full\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[], \fIcmark_free_func free_func\f[]) + +.PP +Free the list starting with \f[I]head\f[], calling \f[I]free_func\f[] +with the data pointer of each of its elements + +.PP +\fIvoid\f[] \fBcmark_llist_free\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[]) + +.PP +Free the list starting with \f[I]head\f[] + .SS Creating and Destroying Nodes @@ -328,6 +373,12 @@ Returns the user data of \f[I]node\f[]. Sets arbitrary user data for \f[I]node\f[]. Returns 1 on success, 0 on failure. +.PP +\fIint\f[] \fBcmark_node_set_user_data_free_func\f[](\fIcmark_node *node\f[], \fIcmark_free_func free_func\f[]) + +.PP +Set free function for user data */ + .PP \fIcmark_node_type\f[] \fBcmark_node_get_type\f[](\fIcmark_node *node\f[]) @@ -437,6 +488,18 @@ Returns the info string from a fenced code block. Sets the info string in a fenced code block, returning 1 on success and 0 on failure. +.PP +\fIint\f[] \fBcmark_node_set_fenced\f[](\fIcmark_node * node\f[], \fIint fenced\f[], \fIint length\f[], \fIint offset\f[], \fIchar character\f[]) + +.PP +Sets code blocks fencing details + +.PP +\fIint\f[] \fBcmark_node_get_fenced\f[](\fIcmark_node *node\f[], \fIint *length\f[], \fIint *offset\f[], \fIchar *character\f[]) + +.PP +Returns code blocks fencing details + .PP \fIconst char *\f[] \fBcmark_node_get_url\f[](\fIcmark_node *node\f[]) @@ -575,6 +638,12 @@ Returns 1 on success, 0 on failure. .PP Consolidates adjacent text nodes. +.PP +\fIvoid\f[] \fBcmark_node_own\f[](\fIcmark_node *root\f[]) + +.PP +Ensures a node and all its children own their own chunk memory. + .SS Parsing .PP @@ -669,7 +738,7 @@ As for \f[I]cmark_render_xml\f[], but specifying the allocator to use for the resulting string. .PP -\fIchar *\f[] \fBcmark_render_html\f[](\fIcmark_node *root\f[], \fIint options\f[]) +\fIchar *\f[] \fBcmark_render_html\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_llist *extensions\f[]) .PP Render a \f[I]node\f[] tree as an HTML fragment. It is up to the user to @@ -677,7 +746,7 @@ add an appropriate header and footer. It is the caller's responsibility to free the returned buffer. .PP -\fIchar *\f[] \fBcmark_render_html_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[]) +\fIchar *\f[] \fBcmark_render_html_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_llist *extensions\f[], \fIcmark_mem *mem\f[]) .PP As for \f[I]cmark_render_html\f[], but specifying the allocator to use diff --git a/src/commonmark.c b/src/commonmark.c index eeaf33394..4d5a5c01f 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -34,7 +34,7 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, c < 0x80 && escape != LITERAL && ((escape == NORMAL && (c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || - c == '>' || c == '\\' || c == '`' || c == '!' || + c == '>' || c == '\\' || c == '`' || c == '!' || c == '|' || (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || (renderer->begin_content && (c == '-' || c == '+' || c == '=') && // begin_content doesn't get set to false til we've passed digits diff --git a/suppressions b/suppressions new file mode 100644 index 000000000..d03ae8532 --- /dev/null +++ b/suppressions @@ -0,0 +1,10 @@ +{ + . + Memcheck:Leak + fun:malloc + fun:__smakebuf + fun:__srefill0 + fun:__fread + fun:fread + fun:main +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6da3a6bac..ad28135ae 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -41,16 +41,15 @@ IF (PYTHONINTERP_FOUND) "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" ) - add_test(roundtriptest_executable + add_test(roundtriptest_library ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" ) - add_test(entity_executable - ${PYTHON_EXECUTABLE} - "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py" + add_test(entity_library + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py" "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" ) endif() @@ -63,6 +62,17 @@ IF (PYTHONINTERP_FOUND) ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark --smart" ) + add_test(extensions_executable + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table" + ) + + add_test(roundtrip_extensions_executable + ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" + "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table" + ) + add_test(regressiontest_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" @@ -70,6 +80,7 @@ IF (PYTHONINTERP_FOUND) "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark" ) + ELSE(PYTHONINTERP_FOUND) message("\n*** A python 3 interpreter is required to run the spec tests.\n") diff --git a/test/afl_test_cases/test.md b/test/afl_test_cases/test.md index 27eee009c..4337853d3 100644 --- a/test/afl_test_cases/test.md +++ b/test/afl_test_cases/test.md @@ -32,5 +32,8 @@ cb x -[f]: /u "t" +| a | b | +| --- | --- | +| c | `d|` \| e | +[f]: /u "t" diff --git a/test/extensions.txt b/test/extensions.txt new file mode 100644 index 000000000..c982d688d --- /dev/null +++ b/test/extensions.txt @@ -0,0 +1,360 @@ +--- +title: Extensions test +author: Yuki Izumi +version: 0.1 +date: '2016-08-31' +license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' +... + +## Tables + +Here's a well-formed table, doing everything it should. + +```````````````````````````````` example +| abc | def | +| --- | --- | +| ghi | jkl | +| mno | pqr | +. + + + + + + + + + + + + + + + +
      abc def
      ghi jkl
      mno pqr
      +```````````````````````````````` + +We're going to mix up the table now; we'll demonstrate that inline formatting +works fine, but block elements don't. You can also have empty cells, and the +textual alignment of the columns is shown to be irrelevant. + +```````````````````````````````` example +Hello! + +| _abc_ | セン | +| ----- | ---- | +| 1. Block elements inside cells don't work. | | +| But **_inline elements do_**. | x | + +Hi! +. +

      Hello!

      + + + + + + + + + + + + + + + +
      abc セン
      1. Block elements inside cells don't work.
      But inline elements do. x
      +

      Hi!

      +```````````````````````````````` + +Here we demonstrate some edge cases about what is and isn't a table. + +```````````````````````````````` example +| Not enough table | to be considered table | + +| Not enough table | to be considered table | +| Not enough table | to be considered table | + +| Just enough table | to be considered table | +| ----------------- | ---------------------- | + +| ---- | --- | + +|x| +|-| + +| xyz | +| --- | +. +

      | Not enough table | to be considered table |

      +

      | Not enough table | to be considered table | +| Not enough table | to be considered table |

      + + + + + + + +
      Just enough table to be considered table
      +

      | ---- | --- |

      + + + + + + +
      x
      + + + + + + +
      xyz
      +```````````````````````````````` + +A "simpler" table, GFM style: + +```````````````````````````````` example +abc | def +--- | --- +xyz | ghi +. + + + + + + + + + + + +
      abc def
      xyz ghi
      +```````````````````````````````` + +We are making the parser slighly more lax here. Here is a table with spaces at +the end: + +```````````````````````````````` example +Hello! + +| _abc_ | セン | +| ----- | ---- | +| this row has a space at the end | | +| But **_inline elements do_**. | x | + +Hi! +. +

      Hello!

      + + + + + + + + + + + + + + + +
      abc セン
      this row has a space at the end
      But inline elements do. x
      +

      Hi!

      +```````````````````````````````` + +Table alignment: + +```````````````````````````````` example +aaa | bbb | ccc | ddd | eee +:-- | --- | :-: | --- | --: +fff | ggg | hhh | iii | jjj +. + + + + + + + + + + + + + + + + + +
      aaa bbb ccc ddd eee
      fff ggg hhh iii jjj
      +```````````````````````````````` + +### Table cell count mismatches + +The header and marker row must match. + +```````````````````````````````` example +| a | b | c | +| --- | --- | +| this | isn't | okay | +. +

      | a | b | c | +| --- | --- | +| this | isn't | okay |

      +```````````````````````````````` + +But any of the body rows can be shorter. Rows longer +than the header are truncated. + +```````````````````````````````` example +| a | b | c | +| --- | --- | --- +| x +| a | b +| 1 | 2 | 3 | 4 | 5 | +. + + + + + + + + + + + + + + + + + + + + + + + +
      a b c
      x
      a b
      1 2 3
      +```````````````````````````````` + +### Embedded pipes + +Tables with embedded pipes could be tricky. + +```````````````````````````````` example +| a | b | +| --- | --- | +| Escaped pipes are \|okay\|. | Like \| this. | +| Within `|code| is okay` too. | +| **_`c|`_** \| complex +| don't **\_reparse\_** +. + + + + + + + + + + + + + + + + + + + + + + + +
      a b
      Escaped pipes are |okay|. Like | this.
      Within |code| is okay too.
      c| | complex
      don't _reparse_
      +```````````````````````````````` + +### Oddly-formatted markers + +This shouldn't assert. + +```````````````````````````````` example +| a | +--- | +. + + + + + + +
      a
      +```````````````````````````````` + +### Escaping + +```````````````````````````````` example +| a | +| --- | +| \\ | +| \\\\ | +| \_ | +| \| | +| \a | +. + + + + + + + + + + + + + + + + + + + + + +
      a
      \
      \\
      _
      |
      a
      +```````````````````````````````` + +### Embedded HTML + +```````````````````````````````` example +| a | +| --- | +| hello | +| ok
      sure | +. + + + + + + + + + + + + +
      a
      hello
      ok
      sure
      +```````````````````````````````` From c55225f7e665c2184e11322ae9405ef514d10efd Mon Sep 17 00:00:00 2001 From: Yuki Izumi Date: Tue, 29 Nov 2016 15:01:28 +1100 Subject: [PATCH 006/218] Strikethrough extension from c068469 reworked This is quite straightforward; we do take care in other extensions (i.e. autolink) to ensure tildes are left for the strikethrough extension to consume. --- Makefile | 8 +- extensions/CMakeLists.txt | 1 + extensions/core-extensions.c | 3 + extensions/strikethrough.c | 146 +++++++++++++++++++++++++++++++++++ extensions/strikethrough.h | 9 +++ extensions/table.c | 2 + test/CMakeLists.txt | 4 +- test/afl_test_cases/test.md | 2 + test/extensions.txt | 28 +++++++ 9 files changed, 197 insertions(+), 6 deletions(-) create mode 100644 extensions/strikethrough.c create mode 100644 extensions/strikethrough.h diff --git a/Makefile b/Makefile index 4150d0177..f47019629 100644 --- a/Makefile +++ b/Makefile @@ -82,7 +82,7 @@ afl: -o test/afl_results \ -x test/fuzzing_dictionary \ -t 100 \ - $(CMARK) -e table $(CMARK_OPTS) + $(CMARK) -e table -e strikethrough $(CMARK_OPTS) libFuzzer: @[ -n "$(LIB_FUZZER_PATH)" ] || { echo '$$LIB_FUZZER_PATH not set'; false; } @@ -164,9 +164,9 @@ $(ALLTESTS): $(SPEC) $(EXTENSIONS_SPEC) leakcheck: $(ALLTESTS) for format in html man xml latex commonmark; do \ for opts in "" "--smart"; do \ - echo "cmark -t $$format -e table $$opts" ; \ - valgrind -q --leak-check=full --dsymutil=yes --suppressions=suppressions --error-exitcode=1 $(PROG) -t $$format -e table $$opts $(ALLTESTS) >/dev/null || exit 1;\ - done; \ + echo "cmark -t $$format -e table -e strikethrough $$opts" ; \ + valgrind -q --leak-check=full --dsymutil=yes --suppressions=suppressions --error-exitcode=1 $(PROG) -t $$format -e table -e strikethrough $$opts $(ALLTESTS) >/dev/null || exit 1;\ + done; \ done; fuzztest: diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index ba99cdcf5..9e98aad64 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -3,6 +3,7 @@ set(STATICLIBRARY "libcmarkextensions_static") set(LIBRARY_SOURCES core-extensions.c table.c + strikethrough.c ext_scanners.c ext_scanners.re ext_scanners.h diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 009c69970..e9cefd690 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -1,7 +1,10 @@ #include "core-extensions.h" +#include "strikethrough.h" #include "table.h" int core_extensions_registration(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, create_table_extension()); + cmark_plugin_register_syntax_extension(plugin, + create_strikethrough_extension()); return 1; } diff --git a/extensions/strikethrough.c b/extensions/strikethrough.c new file mode 100644 index 000000000..086084df3 --- /dev/null +++ b/extensions/strikethrough.c @@ -0,0 +1,146 @@ +#include "strikethrough.h" +#include + +cmark_node_type CMARK_NODE_STRIKETHROUGH; + +static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser, + cmark_node *parent, unsigned char character, + cmark_inline_parser *inline_parser) { + cmark_node *res = NULL; + int left_flanking, right_flanking, punct_before, punct_after; + + if (character != '~') + return NULL; + + cmark_inline_parser_scan_delimiters(inline_parser, 100, '~', &left_flanking, + &right_flanking, &punct_before, + &punct_after); + + res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + cmark_node_set_literal(res, "~"); + + if (left_flanking || right_flanking) { + cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking, + right_flanking, res); + } + + return res; +} + +static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser, + cmark_inline_parser *inline_parser, delimiter *opener, + delimiter *closer) { + cmark_node *strikethrough; + cmark_node *tmp, *next; + delimiter *delim, *tmp_delim; + delimiter *res = closer->next; + + strikethrough = opener->inl_text; + + if (!cmark_node_set_type(strikethrough, CMARK_NODE_STRIKETHROUGH)) + goto done; + + cmark_node_set_syntax_extension(strikethrough, self); + + cmark_node_set_string_content(strikethrough, "~"); + tmp = cmark_node_next(opener->inl_text); + + while (tmp) { + if (tmp == closer->inl_text) + break; + next = cmark_node_next(tmp); + cmark_node_append_child(strikethrough, tmp); + tmp = next; + } + + cmark_node_free(closer->inl_text); + + delim = closer; + while (delim != NULL && delim != opener) { + tmp_delim = delim->previous; + cmark_inline_parser_remove_delimiter(inline_parser, delim); + delim = tmp_delim; + } + + cmark_inline_parser_remove_delimiter(inline_parser, opener); + +done: + return res; +} + +static const char *get_type_string(cmark_syntax_extension *extension, + cmark_node *node) { + return node->type == CMARK_NODE_STRIKETHROUGH ? "strikethrough" : ""; +} + +static int can_contain(cmark_syntax_extension *extension, cmark_node *node, + cmark_node_type child_type) { + if (node->type != CMARK_NODE_STRIKETHROUGH) + return false; + + return CMARK_NODE_TYPE_INLINE_P(child_type); +} + +static void commonmark_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + renderer->out(renderer, cmark_node_get_string_content(node), false, LITERAL); +} + +static void latex_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + // requires \usepackage{ulem} + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + renderer->out(renderer, "\\sout{", false, LITERAL); + } else { + renderer->out(renderer, "}", false, LITERAL); + } +} + +static void man_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + renderer->cr(renderer); + renderer->out(renderer, ".ST \"", false, LITERAL); + } else { + renderer->out(renderer, "\"", false, LITERAL); + renderer->cr(renderer); + } +} + +static void html_render(cmark_syntax_extension *extension, + cmark_html_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + cmark_strbuf_puts(renderer->html, ""); + } else { + cmark_strbuf_puts(renderer->html, ""); + } +} + +cmark_syntax_extension *create_strikethrough_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("strikethrough"); + cmark_llist *special_chars = NULL; + + cmark_syntax_extension_set_get_type_string_func(ext, get_type_string); + cmark_syntax_extension_set_can_contain_func(ext, can_contain); + cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render); + cmark_syntax_extension_set_latex_render_func(ext, latex_render); + cmark_syntax_extension_set_man_render_func(ext, man_render); + cmark_syntax_extension_set_html_render_func(ext, html_render); + CMARK_NODE_STRIKETHROUGH = cmark_syntax_extension_add_node(1); + + cmark_syntax_extension_set_match_inline_func(ext, match); + cmark_syntax_extension_set_inline_from_delim_func(ext, insert); + + cmark_mem *mem = cmark_get_default_mem_allocator(); + special_chars = cmark_llist_append(mem, special_chars, (void *)'~'); + cmark_syntax_extension_set_special_inline_chars(ext, special_chars); + + return ext; +} diff --git a/extensions/strikethrough.h b/extensions/strikethrough.h new file mode 100644 index 000000000..1c43f5792 --- /dev/null +++ b/extensions/strikethrough.h @@ -0,0 +1,9 @@ +#ifndef STRIKETHROUGH_H +#define STRIKETHROUGH_H + +#include "core-extensions.h" + +extern cmark_node_type CMARK_NODE_STRIKETHROUGH; +cmark_syntax_extension *create_strikethrough_extension(void); + +#endif diff --git a/extensions/table.c b/extensions/table.c index 260b16da1..dc74cdf68 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -4,6 +4,7 @@ #include #include "ext_scanners.h" +#include "strikethrough.h" #include "table.h" static cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, @@ -442,6 +443,7 @@ static int can_contain(cmark_syntax_extension *extension, cmark_node *node, return child_type == CMARK_NODE_TEXT || child_type == CMARK_NODE_CODE || child_type == CMARK_NODE_EMPH || child_type == CMARK_NODE_STRONG || child_type == CMARK_NODE_LINK || child_type == CMARK_NODE_IMAGE || + child_type == CMARK_NODE_STRIKETHROUGH || child_type == CMARK_NODE_HTML_INLINE; } return false; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ad28135ae..13dbc321a 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -63,14 +63,14 @@ IF (PYTHONINTERP_FOUND) ) add_test(extensions_executable - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table" + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough" ) add_test(roundtrip_extensions_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" - "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough" ) add_test(regressiontest_executable diff --git a/test/afl_test_cases/test.md b/test/afl_test_cases/test.md index 4337853d3..cb71ac810 100644 --- a/test/afl_test_cases/test.md +++ b/test/afl_test_cases/test.md @@ -36,4 +36,6 @@ cb | --- | --- | | c | `d|` \| e | +google ~~yahoo~~ + [f]: /u "t" diff --git a/test/extensions.txt b/test/extensions.txt index c982d688d..36e03354c 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -358,3 +358,31 @@ This shouldn't assert. ok
      sure ```````````````````````````````` + + +## Strikethroughs + +A well-formed strikethrough. + +```````````````````````````````` example +A proper ~strikethrough~. +. +

      A proper strikethrough.

      +```````````````````````````````` + +Some strikethrough edge cases. + +```````````````````````````````` example +These are ~not strikethroughs. + +No, they are not~ + +This ~is ~ legit~ isn't ~ legit. + +This is just ~~~~~one~~~~~ huge strikethrough. +. +

      These are ~not strikethroughs.

      +

      No, they are not~

      +

      This is ~ legit isn't ~ legit.

      +

      This is just one huge strikethrough.

      +```````````````````````````````` From 920f728d5ece99e473e20a923d7af53f9097be7e Mon Sep 17 00:00:00 2001 From: Yuki Izumi Date: Tue, 29 Nov 2016 15:12:51 +1100 Subject: [PATCH 007/218] Autolink extension The autolinker is based on https://github.com/vmg/rinku with some additional changes and fixes. We do our best not to include punctuation, but to include matching parentheses within a link. --- Makefile | 6 +- extensions/CMakeLists.txt | 1 + extensions/autolink.c | 352 +++++++++++++++++++++++++++++++++++ extensions/autolink.h | 8 + extensions/core-extensions.c | 2 + test/CMakeLists.txt | 4 +- test/afl_test_cases/test.md | 2 + test/extensions.txt | 71 +++++++ 8 files changed, 441 insertions(+), 5 deletions(-) create mode 100644 extensions/autolink.c create mode 100644 extensions/autolink.h diff --git a/Makefile b/Makefile index f47019629..a649daf2d 100644 --- a/Makefile +++ b/Makefile @@ -82,7 +82,7 @@ afl: -o test/afl_results \ -x test/fuzzing_dictionary \ -t 100 \ - $(CMARK) -e table -e strikethrough $(CMARK_OPTS) + $(CMARK) -e table -e strikethrough -e autolink $(CMARK_OPTS) libFuzzer: @[ -n "$(LIB_FUZZER_PATH)" ] || { echo '$$LIB_FUZZER_PATH not set'; false; } @@ -164,8 +164,8 @@ $(ALLTESTS): $(SPEC) $(EXTENSIONS_SPEC) leakcheck: $(ALLTESTS) for format in html man xml latex commonmark; do \ for opts in "" "--smart"; do \ - echo "cmark -t $$format -e table -e strikethrough $$opts" ; \ - valgrind -q --leak-check=full --dsymutil=yes --suppressions=suppressions --error-exitcode=1 $(PROG) -t $$format -e table -e strikethrough $$opts $(ALLTESTS) >/dev/null || exit 1;\ + echo "cmark -t $$format -e table -e strikethrough -e autolink $$opts" ; \ + valgrind -q --leak-check=full --dsymutil=yes --suppressions=suppressions --error-exitcode=1 $(PROG) -t $$format -e table -e strikethrough -e autolink $$opts $(ALLTESTS) >/dev/null || exit 1;\ done; \ done; diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 9e98aad64..b79b04855 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -4,6 +4,7 @@ set(LIBRARY_SOURCES core-extensions.c table.c strikethrough.c + autolink.c ext_scanners.c ext_scanners.re ext_scanners.h diff --git a/extensions/autolink.c b/extensions/autolink.c new file mode 100644 index 000000000..148463d33 --- /dev/null +++ b/extensions/autolink.c @@ -0,0 +1,352 @@ +#include "autolink.h" +#include +#include + +#if defined(_WIN32) +#define strncasecmp _strnicmp +#else +#include +#endif + +static int sd_autolink_issafe(const uint8_t *link, size_t link_len) { + static const size_t valid_uris_count = 5; + static const char *valid_uris[] = {"/", "http://", "https://", "ftp://", + "mailto:"}; + + size_t i; + + for (i = 0; i < valid_uris_count; ++i) { + size_t len = strlen(valid_uris[i]); + + if (link_len > len && strncasecmp((char *)link, valid_uris[i], len) == 0 && + cmark_isalnum(link[len])) + return 1; + } + + return 0; +} + +static size_t autolink_delim(uint8_t *data, size_t link_end) { + uint8_t cclose, copen; + size_t i; + + for (i = 0; i < link_end; ++i) + if (data[i] == '<') { + link_end = i; + break; + } + + while (link_end > 0) { + cclose = data[link_end - 1]; + + switch (cclose) { + case '"': + copen = '"'; + break; + case '\'': + copen = '\''; + break; + case ')': + copen = '('; + break; + case ']': + copen = '['; + break; + case '}': + copen = '{'; + break; + default: + copen = 0; + } + + if (strchr("?!.,:*_~", data[link_end - 1]) != NULL) + link_end--; + + else if (data[link_end - 1] == ';') { + size_t new_end = link_end - 2; + + while (new_end > 0 && cmark_isalpha(data[new_end])) + new_end--; + + if (new_end < link_end - 2 && data[new_end] == '&') + link_end = new_end; + else + link_end--; + } else if (copen != 0) { + size_t closing = 0; + size_t opening = 0; + size_t i = 0; + + /* Try to close the final punctuation sign in this same line; + * if we managed to close it outside of the URL, that means that it's + * not part of the URL. If it closes inside the URL, that means it + * is part of the URL. + * + * Examples: + * + * foo http://www.pokemon.com/Pikachu_(Electric) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo (http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * (foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => foo http://www.pokemon.com/Pikachu_(Electric) + */ + + while (i < link_end) { + if (data[i] == copen) + opening++; + else if (data[i] == cclose) + closing++; + + i++; + } + + if (closing == opening) + break; + + link_end--; + } else + break; + } + + return link_end; +} + +static size_t check_domain(uint8_t *data, size_t size, int allow_short) { + size_t i, np = 0, uscore1 = 0, uscore2 = 0; + + for (i = 1; i < size - 1; i++) { + if (data[i] == '_') + uscore2++; + else if (data[i] == '.') { + uscore1 = uscore2; + uscore2 = 0; + np++; + } else if (!cmark_isalnum(data[i]) && data[i] != '-') + break; + } + + if (uscore1 > 0 || uscore2 > 0) + return 0; + + if (allow_short) { + /* We don't need a valid domain in the strict sense (with + * least one dot; so just make sure it's composed of valid + * domain characters and return the length of the the valid + * sequence. */ + return i; + } else { + /* a valid domain needs to have at least a dot. + * that's as far as we get */ + return np ? i : 0; + } +} + +static cmark_node *www_match(cmark_parser *parser, cmark_node *parent, + cmark_inline_parser *inline_parser) { + cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); + size_t max_rewind = cmark_inline_parser_get_offset(inline_parser); + uint8_t *data = chunk->data + max_rewind; + size_t size = chunk->len - max_rewind; + + size_t link_end; + + if (max_rewind > 0 && strchr("*_~([", data[-1]) == NULL && + !cmark_isspace(data[-1])) + return 0; + + if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) + return 0; + + link_end = check_domain(data, size, 0); + + if (link_end == 0) + return NULL; + + while (link_end < size && !cmark_isspace(data[link_end])) + link_end++; + + link_end = autolink_delim(data, link_end); + + if (link_end == 0) + return NULL; + + cmark_inline_parser_set_offset(inline_parser, max_rewind + link_end); + + cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); + + cmark_strbuf buf; + cmark_strbuf_init(parser->mem, &buf, 10); + cmark_strbuf_puts(&buf, "http://"); + cmark_strbuf_put(&buf, data, link_end); + node->as.link.url = cmark_chunk_buf_detach(&buf); + + cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + text->as.literal = cmark_chunk_dup(chunk, max_rewind, link_end); + cmark_node_append_child(node, text); + + return node; +} + +static cmark_node *email_match(cmark_parser *parser, cmark_node *parent, + cmark_inline_parser *inline_parser) { + size_t link_end, rewind; + int nb = 0, np = 0, ns = 0; + + cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); + size_t max_rewind = cmark_inline_parser_get_offset(inline_parser); + uint8_t *data = chunk->data + max_rewind; + size_t size = chunk->len - max_rewind; + + for (rewind = 0; rewind < max_rewind; ++rewind) { + uint8_t c = data[-rewind - 1]; + + if (cmark_isalnum(c)) + continue; + + if (strchr(".+-_", c) != NULL) + continue; + + if (c == '/') + ns++; + + break; + } + + if (rewind == 0 || ns > 0) + return 0; + + for (link_end = 0; link_end < size; ++link_end) { + uint8_t c = data[link_end]; + + if (cmark_isalnum(c)) + continue; + + if (c == '@') + nb++; + else if (c == '.' && link_end < size - 1) + np++; + else if (c != '-' && c != '_') + break; + } + + if (link_end < 2 || nb != 1 || np == 0 || + (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) + return 0; + + link_end = autolink_delim(data, link_end); + + if (link_end == 0) + return NULL; + + cmark_inline_parser_set_offset(inline_parser, max_rewind + link_end); + cmark_node_unput(parent, rewind); + + cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); + + cmark_strbuf buf; + cmark_strbuf_init(parser->mem, &buf, 10); + cmark_strbuf_puts(&buf, "mailto:"); + cmark_strbuf_put(&buf, data - rewind, link_end + rewind); + node->as.link.url = cmark_chunk_buf_detach(&buf); + + cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + text->as.literal = + cmark_chunk_dup(chunk, max_rewind - rewind, link_end + rewind); + cmark_node_append_child(node, text); + + return node; +} + +static cmark_node *url_match(cmark_parser *parser, cmark_node *parent, + cmark_inline_parser *inline_parser) { + size_t link_end, rewind = 0, domain_len; + + cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); + size_t max_rewind = cmark_inline_parser_get_offset(inline_parser); + uint8_t *data = chunk->data + max_rewind; + size_t size = chunk->len - max_rewind; + + if (size < 4 || data[1] != '/' || data[2] != '/') + return 0; + + while (rewind < max_rewind && cmark_isalpha(data[-rewind - 1])) + rewind++; + + if (!sd_autolink_issafe(data - rewind, size + rewind)) + return 0; + + link_end = strlen("://"); + + domain_len = check_domain(data + link_end, size - link_end, 1); + + if (domain_len == 0) + return 0; + + link_end += domain_len; + while (link_end < size && !cmark_isspace(data[link_end])) + link_end++; + + link_end = autolink_delim(data, link_end); + + if (link_end == 0) + return NULL; + + cmark_inline_parser_set_offset(inline_parser, max_rewind + link_end); + cmark_node_unput(parent, rewind); + + cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); + + cmark_chunk url = + cmark_chunk_dup(chunk, max_rewind - rewind, link_end + rewind); + node->as.link.url = url; + + cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + text->as.literal = url; + cmark_node_append_child(node, text); + + return node; +} + +static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser, + cmark_node *parent, unsigned char c, + cmark_inline_parser *inline_parser) { + if (cmark_inline_parser_in_bracket(inline_parser, false) || + cmark_inline_parser_in_bracket(inline_parser, true)) + return NULL; + + if (c == ':') + return url_match(parser, parent, inline_parser); + + if (c == '@') + return email_match(parser, parent, inline_parser); + + if (c == 'w') + return www_match(parser, parent, inline_parser); + + return NULL; + + // note that we could end up re-consuming something already a + // part of an inline, because we don't track when the last + // inline was finished in inlines.c. +} + +cmark_syntax_extension *create_autolink_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("autolink"); + cmark_llist *special_chars = NULL; + + cmark_syntax_extension_set_match_inline_func(ext, match); + + cmark_mem *mem = cmark_get_default_mem_allocator(); + special_chars = cmark_llist_append(mem, special_chars, (void *)':'); + special_chars = cmark_llist_append(mem, special_chars, (void *)'@'); + special_chars = cmark_llist_append(mem, special_chars, (void *)'w'); + cmark_syntax_extension_set_special_inline_chars(ext, special_chars); + + return ext; +} diff --git a/extensions/autolink.h b/extensions/autolink.h new file mode 100644 index 000000000..ee2ea2ffc --- /dev/null +++ b/extensions/autolink.h @@ -0,0 +1,8 @@ +#ifndef AUTOLINK_H +#define AUTOLINK_H + +#include "core-extensions.h" + +cmark_syntax_extension *create_autolink_extension(void); + +#endif diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index e9cefd690..aced82acf 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -1,4 +1,5 @@ #include "core-extensions.h" +#include "autolink.h" #include "strikethrough.h" #include "table.h" @@ -6,5 +7,6 @@ int core_extensions_registration(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, create_table_extension()); cmark_plugin_register_syntax_extension(plugin, create_strikethrough_extension()); + cmark_plugin_register_syntax_extension(plugin, create_autolink_extension()); return 1; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 13dbc321a..952866966 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -63,14 +63,14 @@ IF (PYTHONINTERP_FOUND) ) add_test(extensions_executable - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough" + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough -e autolink" ) add_test(roundtrip_extensions_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" - "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough -e autolink" ) add_test(regressiontest_executable diff --git a/test/afl_test_cases/test.md b/test/afl_test_cases/test.md index cb71ac810..2d0a9cb6f 100644 --- a/test/afl_test_cases/test.md +++ b/test/afl_test_cases/test.md @@ -38,4 +38,6 @@ cb google ~~yahoo~~ +google.com http://google.com google@google.com + [f]: /u "t" diff --git a/test/extensions.txt b/test/extensions.txt index 36e03354c..c689785d9 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -386,3 +386,74 @@ This is just ~~~~~one~~~~~ huge strikethrough.

      This is ~ legit isn't ~ legit.

      This is just one huge strikethrough.

      ```````````````````````````````` + + +## Autolinks + +```````````````````````````````` example +: http://google.com https://google.com + + http://google.com/å + +scyther@pokemon.com + +www.github.com www.github.com/á + +www.google.com/a_b + +**Autolink and http://inlines** + +![http://inline.com/image](http://inline.com/image) + +a.w@b.c + +Full stop outside parens shouldn't be included http://google.com/ok. + +(Full stop inside parens shouldn't be included http://google.com/ok.) +. +

      : http://google.com https://google.com

      +

      http://google.com/å http://google.com/å

      +

      scyther@pokemon.com

      +

      www.github.com www.github.com/á

      +

      www.google.com/a_b

      +

      Autolink and http://inlines

      +

      http://inline.com/image

      +

      a.w@b.c

      +

      Full stop outside parens shouldn't be included http://google.com/ok.

      +

      (Full stop inside parens shouldn't be included http://google.com/ok.)

      +```````````````````````````````` + + +## Interop + +Autolink and strikethrough. + +```````````````````````````````` example +~~www.google.com~~ + +~~http://google.com~~ +. +

      www.google.com

      +

      http://google.com

      +```````````````````````````````` + +Autolink and tables. + +```````````````````````````````` example +| a | b | +| --- | --- | +| https://github.com www.github.com | http://pokemon.com | +. + + + + + + + + + + + +
      a b
      https://github.com www.github.com http://pokemon.com
      +```````````````````````````````` From 1eab596e970b60ceadd0d9e0367183ea71cbfeb9 Mon Sep 17 00:00:00 2001 From: Yuki Izumi Date: Tue, 29 Nov 2016 15:13:02 +1100 Subject: [PATCH 008/218] Tagfilter extension When we encounter a tag that causes an HTML 5 parser's content model flag [1] to be changed to RCDATA, CDATA or RAWTEXT [2] [3], we escape the tag by replacing its opening "<" with "<". This causes the tag to appear verbatim in the page it's placed on. We do this to prevent users breaking the page content, where the parser would not interpret further tags as inserted by cmark as HTML until a matching close tag was hit. (Such a closing tag could exist if a user entered it themselves, but it'd cause all cmark-generated markup in between to be rendered raw, and is unlikely to be desireable behaviour.) [1] https://www.w3.org/TR/2009/WD-html5-20090423/syntax.html#tokenization [2] https://www.w3.org/TR/2009/WD-html5-20090212/serializing-html-fragments.html#parsing-html-fragments [3] https://github.com/google/gumbo-parser/blob/aa91b27b02c0c80c482e24348a457ed7c3c088e0/src/parser.c#L4023-L4053 --- Makefile | 6 ++-- extensions/CMakeLists.txt | 1 + extensions/core-extensions.c | 2 ++ extensions/tagfilter.c | 59 ++++++++++++++++++++++++++++++++++++ extensions/tagfilter.h | 8 +++++ test/CMakeLists.txt | 4 +-- test/afl_test_cases/test.md | 6 ++++ test/extensions.txt | 44 +++++++++++++++++++++++++++ 8 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 extensions/tagfilter.c create mode 100644 extensions/tagfilter.h diff --git a/Makefile b/Makefile index a649daf2d..cc0362201 100644 --- a/Makefile +++ b/Makefile @@ -82,7 +82,7 @@ afl: -o test/afl_results \ -x test/fuzzing_dictionary \ -t 100 \ - $(CMARK) -e table -e strikethrough -e autolink $(CMARK_OPTS) + $(CMARK) -e table -e strikethrough -e autolink -e tagfilter $(CMARK_OPTS) libFuzzer: @[ -n "$(LIB_FUZZER_PATH)" ] || { echo '$$LIB_FUZZER_PATH not set'; false; } @@ -164,8 +164,8 @@ $(ALLTESTS): $(SPEC) $(EXTENSIONS_SPEC) leakcheck: $(ALLTESTS) for format in html man xml latex commonmark; do \ for opts in "" "--smart"; do \ - echo "cmark -t $$format -e table -e strikethrough -e autolink $$opts" ; \ - valgrind -q --leak-check=full --dsymutil=yes --suppressions=suppressions --error-exitcode=1 $(PROG) -t $$format -e table -e strikethrough -e autolink $$opts $(ALLTESTS) >/dev/null || exit 1;\ + echo "cmark -t $$format -e table -e strikethrough -e autolink -e tagfilter $$opts" ; \ + valgrind -q --leak-check=full --dsymutil=yes --suppressions=suppressions --error-exitcode=1 $(PROG) -t $$format -e table -e strikethrough -e autolink -e tagfilter $$opts $(ALLTESTS) >/dev/null || exit 1;\ done; \ done; diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index b79b04855..8d12a530c 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -5,6 +5,7 @@ set(LIBRARY_SOURCES table.c strikethrough.c autolink.c + tagfilter.c ext_scanners.c ext_scanners.re ext_scanners.h diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index aced82acf..49bd8d42e 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -2,11 +2,13 @@ #include "autolink.h" #include "strikethrough.h" #include "table.h" +#include "tagfilter.h" int core_extensions_registration(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, create_table_extension()); cmark_plugin_register_syntax_extension(plugin, create_strikethrough_extension()); cmark_plugin_register_syntax_extension(plugin, create_autolink_extension()); + cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension()); return 1; } diff --git a/extensions/tagfilter.c b/extensions/tagfilter.c new file mode 100644 index 000000000..80cbd6b9b --- /dev/null +++ b/extensions/tagfilter.c @@ -0,0 +1,59 @@ +#include "tagfilter.h" +#include + +static const char *blacklist[] = { + "title", "textarea", "style", "xmp", "iframe", + "noembed", "noframes", "script", "plaintext", NULL, +}; + +static int is_tag(const unsigned char *tag_data, size_t tag_size, + const char *tagname) { + size_t i; + + if (tag_size < 3 || tag_data[0] != '<') + return 0; + + i = 1; + + if (tag_data[i] == '/') { + i++; + } + + for (; i < tag_size; ++i, ++tagname) { + if (*tagname == 0) + break; + + if (tag_data[i] != *tagname) + return 0; + } + + if (i == tag_size) + return 0; + + if (cmark_isspace(tag_data[i]) || tag_data[i] == '>') + return 1; + + if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>') + return 1; + + return 0; +} + +static int filter(cmark_syntax_extension *ext, const unsigned char *tag, + size_t tag_len) { + const char **it; + + for (it = blacklist; *it; ++it) { + if (is_tag(tag, tag_len, *it)) { + return 0; + } + } + + return 1; +} + +cmark_syntax_extension *create_tagfilter_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter"); + cmark_syntax_extension_set_html_filter_func(ext, filter); + return ext; +} diff --git a/extensions/tagfilter.h b/extensions/tagfilter.h new file mode 100644 index 000000000..4068b5071 --- /dev/null +++ b/extensions/tagfilter.h @@ -0,0 +1,8 @@ +#ifndef TAGFILTER_H +#define TAGFILTER_H + +#include "core-extensions.h" + +cmark_syntax_extension *create_tagfilter_extension(void); + +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 952866966..76f936ab7 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -63,14 +63,14 @@ IF (PYTHONINTERP_FOUND) ) add_test(extensions_executable - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough -e autolink" + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough -e autolink -e tagfilter" ) add_test(roundtrip_extensions_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" - "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough -e autolink" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough -e autolink -e tagfilter" ) add_test(regressiontest_executable diff --git a/test/afl_test_cases/test.md b/test/afl_test_cases/test.md index 2d0a9cb6f..4156a2aef 100644 --- a/test/afl_test_cases/test.md +++ b/test/afl_test_cases/test.md @@ -40,4 +40,10 @@ google ~~yahoo~~ google.com http://google.com google@google.com +and but + +<surewhynot> +sure +</surewhynot> + [f]: /u "t" diff --git a/test/extensions.txt b/test/extensions.txt index c689785d9..c1ef3acfb 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -424,6 +424,50 @@ Full stop outside parens shouldn't be included http://google.com/ok. ```````````````````````````````` +## HTML tag filter + + +```````````````````````````````` example +This is <xmp> not okay, but **this** <strong>is</strong>. + +<p>This is <xmp> not okay, but **this** <strong>is</strong>.</p> + +Nope, I won't have <textarea>. + +<p>No <textarea> here either.</p> + +<p>This <random /> <thing> is okay</thing> though.</p> + +Yep, <totally>okay</totally>. + +<!-- HTML comments are okay, though. --> +<!- But we're strict. -> +<! No nonsense. > +<!-- Leave multiline comments the heck alone, though, okay? +Even with {"x":"y"} or 1 > 2 or whatever. Even **markdown**. +--> +<!--- Support everything CommonMark's parser does. --> +<!----> +<!--thistoo--> +. +<p>This is &lt;xmp> not okay, but <strong>this</strong> <strong>is</strong>.</p> +<p>This is &lt;xmp> not okay, but **this** <strong>is</strong>.</p> +<p>Nope, I won't have &lt;textarea>.</p> +<p>No &lt;textarea> here either.</p> +<p>This <random /> <thing> is okay</thing> though.</p> +<p>Yep, <totally>okay</totally>.</p> +<!-- HTML comments are okay, though. --> +<p>&lt;!- But we're strict. -&gt; +&lt;! No nonsense. &gt;</p> +<!-- Leave multiline comments the heck alone, though, okay? +Even with {"x":"y"} or 1 > 2 or whatever. Even **markdown**. +--> +<!--- Support everything CommonMark's parser does. --> +<!----> +<!--thistoo--> +```````````````````````````````` + + ## Interop Autolink and strikethrough. From 3e5f8a459399959566a34e37eb519f627aa568ad Mon Sep 17 00:00:00 2001 From: Vicent Marti <tanoku@gmail.com> Date: Mon, 28 Nov 2016 11:01:20 +0100 Subject: [PATCH 009/218] Reduce maximum amount of backticks --- src/inlines.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inlines.c b/src/inlines.c index da6a7ef9f..bfb291738 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -31,7 +31,7 @@ static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99"; #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH) #define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG) -#define MAXBACKTICKS 1000 +#define MAXBACKTICKS 80 typedef struct bracket { struct bracket *previous; From c36f8484a5f07e273431e40ecfd7b5e160a62946 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Fri, 9 Dec 2016 14:54:51 +1100 Subject: [PATCH 010/218] Get a clean build on MSVC (#5) --- api_test/main.c | 2 +- extensions/CMakeLists.txt | 2 +- extensions/autolink.c | 33 ++++++++++++++++++--------------- extensions/table.c | 22 +++++++++++----------- src/CMakeLists.txt | 2 +- src/arena.c | 2 +- src/blocks.c | 10 +++++----- src/buffer.c | 4 ++-- src/cmark.c | 6 +++++- src/commonmark.c | 6 +++--- src/html.c | 4 ++-- src/inlines.c | 14 +++++++------- src/node.c | 16 ++++++++-------- src/render.c | 6 +++--- 14 files changed, 68 insertions(+), 61 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index a95abc314..1fd851165 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -479,7 +479,7 @@ static void test_content(test_batch_runner *runner, cmark_node_type type, int expected = 0; if (allowed_content) for (unsigned int *p = allowed_content; *p; ++p) - expected |= *p == child_type; + expected |= *p == (unsigned int)child_type; INT_EQ(runner, got, expected, "add %d as child of %d", child_type, type); diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 8d12a530c..50d0f2338 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -68,7 +68,7 @@ if(MSVC) else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4") endif() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4706 /D_CRT_SECURE_NO_WARNINGS") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /WX /wd4706 /wd4204 /wd4221 /wd4100 /D_CRT_SECURE_NO_WARNINGS") elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-unused-parameter -std=c99 -pedantic") endif() diff --git a/extensions/autolink.c b/extensions/autolink.c index 148463d33..f40fc8c02 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -75,7 +75,7 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) { } else if (copen != 0) { size_t closing = 0; size_t opening = 0; - size_t i = 0; + i = 0; /* Try to close the final punctuation sign in this same line; * if we managed to close it outside of the URL, that means that it's @@ -176,18 +176,19 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent, if (link_end == 0) return NULL; - cmark_inline_parser_set_offset(inline_parser, max_rewind + link_end); + cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end)); cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); cmark_strbuf buf; cmark_strbuf_init(parser->mem, &buf, 10); cmark_strbuf_puts(&buf, "http://"); - cmark_strbuf_put(&buf, data, link_end); + cmark_strbuf_put(&buf, data, (bufsize_t)link_end); node->as.link.url = cmark_chunk_buf_detach(&buf); cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); - text->as.literal = cmark_chunk_dup(chunk, max_rewind, link_end); + text->as.literal = + cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end); cmark_node_append_child(node, text); return node; @@ -195,11 +196,12 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent, static cmark_node *email_match(cmark_parser *parser, cmark_node *parent, cmark_inline_parser *inline_parser) { - size_t link_end, rewind; + size_t link_end; + int rewind; int nb = 0, np = 0, ns = 0; cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); - size_t max_rewind = cmark_inline_parser_get_offset(inline_parser); + int max_rewind = cmark_inline_parser_get_offset(inline_parser); uint8_t *data = chunk->data + max_rewind; size_t size = chunk->len - max_rewind; @@ -244,7 +246,7 @@ static cmark_node *email_match(cmark_parser *parser, cmark_node *parent, if (link_end == 0) return NULL; - cmark_inline_parser_set_offset(inline_parser, max_rewind + link_end); + cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end)); cmark_node_unput(parent, rewind); cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); @@ -252,12 +254,12 @@ static cmark_node *email_match(cmark_parser *parser, cmark_node *parent, cmark_strbuf buf; cmark_strbuf_init(parser->mem, &buf, 10); cmark_strbuf_puts(&buf, "mailto:"); - cmark_strbuf_put(&buf, data - rewind, link_end + rewind); + cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind)); node->as.link.url = cmark_chunk_buf_detach(&buf); cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); - text->as.literal = - cmark_chunk_dup(chunk, max_rewind - rewind, link_end + rewind); + text->as.literal = cmark_chunk_dup(chunk, max_rewind - rewind, + (bufsize_t)(link_end + rewind)); cmark_node_append_child(node, text); return node; @@ -265,10 +267,11 @@ static cmark_node *email_match(cmark_parser *parser, cmark_node *parent, static cmark_node *url_match(cmark_parser *parser, cmark_node *parent, cmark_inline_parser *inline_parser) { - size_t link_end, rewind = 0, domain_len; + size_t link_end, domain_len; + int rewind = 0; cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); - size_t max_rewind = cmark_inline_parser_get_offset(inline_parser); + int max_rewind = cmark_inline_parser_get_offset(inline_parser); uint8_t *data = chunk->data + max_rewind; size_t size = chunk->len - max_rewind; @@ -297,13 +300,13 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent, if (link_end == 0) return NULL; - cmark_inline_parser_set_offset(inline_parser, max_rewind + link_end); + cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end)); cmark_node_unput(parent, rewind); cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); - cmark_chunk url = - cmark_chunk_dup(chunk, max_rewind - rewind, link_end + rewind); + cmark_chunk url = cmark_chunk_dup(chunk, max_rewind - rewind, + (bufsize_t)(link_end + rewind)); node->as.link.url = url; cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); diff --git a/extensions/table.c b/extensions/table.c index dc74cdf68..cd75bbd9b 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -30,18 +30,18 @@ typedef enum { typedef struct { bool is_header; } node_table_row; static void free_node_table(cmark_mem *mem, void *ptr) { - node_table *t = ptr; + node_table *t = (node_table *)ptr; mem->free(t->alignments); mem->free(t); } static void free_node_table_row(cmark_mem *mem, void *ptr) { mem->free(ptr); } -static uint16_t get_n_table_columns(cmark_node *node) { +static int get_n_table_columns(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE) return -1; - return ((node_table *)node->user_data)->n_columns; + return (int)((node_table *)node->user_data)->n_columns; } static int set_n_table_columns(cmark_node *node, uint16_t n_columns) { @@ -71,7 +71,7 @@ static int is_table_header(cmark_node *node, int is_table_header) { if (!node || node->type != CMARK_NODE_TABLE_ROW) return 0; - ((node_table_row *)node->user_data)->is_header = is_table_header; + ((node_table_row *)node->user_data)->is_header = (is_table_header != 0); return 1; } @@ -159,7 +159,7 @@ static cmark_node *consume_until_pipe_or_eol(cmark_syntax_extension *self, (*n)->as.literal.len - *offset); cmark_node_own(child); } else { - int len = pipe - cstr - *offset; + int len = (int)(pipe - cstr - *offset); child->as.literal = cmark_chunk_dup(&(*n)->as.literal, *offset, len); cmark_node_own(child); *offset += len + 1; @@ -245,7 +245,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, parent_string = cmark_node_get_string_content(parent_container); header_row = row_from_string(self, parser, (unsigned char *)parent_string, - strlen(parent_string)); + (int)strlen(parent_string)); if (!header_row) { goto done; @@ -274,10 +274,10 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, set_n_table_columns(parent_container, header_row->n_columns); uint8_t *alignments = - parser->mem->calloc(header_row->n_columns, sizeof(uint8_t)); + (uint8_t *)parser->mem->calloc(header_row->n_columns, sizeof(uint8_t)); cmark_llist *it = marker_row->cells; for (i = 0; it; it = it->next, ++i) { - cmark_node *node = it->data; + cmark_node *node = (cmark_node *)it->data; assert(node->type == CMARK_NODE_TABLE_CELL); cmark_strbuf strbuf; @@ -315,7 +315,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_llist *tmp, *next; for (tmp = header_row->cells; tmp; tmp = next) { - cmark_node *header_cell = tmp->data; + cmark_node *header_cell = (cmark_node *)tmp->data; cmark_node_append_child(table_header, header_cell); next = header_row->cells = tmp->next; parser->mem->free(tmp); @@ -324,7 +324,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_parser_advance_offset( parser, (char *)input, - strlen((char *)input) - 1 - cmark_parser_get_offset(parser), false); + (int)strlen((char *)input) - 1 - cmark_parser_get_offset(parser), false); done: free_table_row(parser->mem, header_row); free_table_row(parser->mem, marker_row); @@ -362,7 +362,7 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, int table_columns = get_n_table_columns(parent_container); for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = next, ++i) { - cmark_node *cell = tmp->data; + cmark_node *cell = (cmark_node *)tmp->data; assert(cell->type == CMARK_NODE_TABLE_CELL); cmark_node_append_child(table_row_block, cell); row->cells = next = tmp->next; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6f7539553..4725c0e53 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -199,7 +199,7 @@ if(MSVC) else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4") endif() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4706 /D_CRT_SECURE_NO_WARNINGS") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /WX /wd4706 /wd4204 /wd4221 /wd4100 /D_CRT_SECURE_NO_WARNINGS") elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-unused-parameter -std=c99 -pedantic") endif() diff --git a/src/arena.c b/src/arena.c index 1a30a97c0..0f31cf333 100644 --- a/src/arena.c +++ b/src/arena.c @@ -10,7 +10,7 @@ static struct arena_chunk { } *A = NULL; static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) { - struct arena_chunk *c = calloc(1, sizeof(*c)); + struct arena_chunk *c = (struct arena_chunk *)calloc(1, sizeof(*c)); if (!c) abort(); c->sz = sz; diff --git a/src/blocks.c b/src/blocks.c index 1efda3d71..f809882c9 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -181,7 +181,7 @@ static CMARK_INLINE bool accepts_lines(cmark_node_type block_type) { static CMARK_INLINE bool contains_inlines(cmark_node *node) { if (node->extension && node->extension->contains_inlines_func) { - return node->extension->contains_inlines_func(node->extension, node); + return node->extension->contains_inlines_func(node->extension, node) != 0; } return (node->type == CMARK_NODE_PARAGRAPH || @@ -392,7 +392,7 @@ void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add) cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data; cmark_llist *tmp_char; for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { - unsigned char c = (unsigned char) (unsigned long) tmp_char->data; + unsigned char c = (unsigned char)(size_t)tmp_char->data; if (add) cmark_inlines_add_special_character(c); else @@ -607,7 +607,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, process = true; } - chunk_len = (eol - buffer); + chunk_len = (bufsize_t)(eol - buffer); if (process) { if (parser->linebuf.size > 0) { cmark_strbuf_put(&parser->linebuf, buffer, chunk_len); @@ -987,7 +987,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, parser->first_nonspace + 1); (*container)->as.code.fenced = true; (*container)->as.code.fence_char = peek_at(input, parser->first_nonspace); - (*container)->as.code.fence_length = (matched > 255) ? 255 : matched; + (*container)->as.code.fence_length = (matched > 255) ? 255 : (uint8_t)matched; (*container)->as.code.fence_offset = (int8_t)(parser->first_nonspace - parser->offset); (*container)->as.code.info = cmark_chunk_literal(""); @@ -1383,5 +1383,5 @@ void cmark_parser_advance_offset(cmark_parser *parser, int columns) { cmark_chunk input_chunk = cmark_chunk_literal(input); - S_advance_offset(parser, &input_chunk, count, columns); + S_advance_offset(parser, &input_chunk, count, columns != 0); } diff --git a/src/buffer.c b/src/buffer.c index a237b11b3..75733d402 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -96,7 +96,7 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { cmark_strbuf_set(buf, (const unsigned char *)string, - string ? strlen(string) : 0); + string ? (bufsize_t)strlen(string) : 0); } void cmark_strbuf_putc(cmark_strbuf *buf, int c) { @@ -117,7 +117,7 @@ void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, } void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { - cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string)); + cmark_strbuf_put(buf, (const unsigned char *)string, (bufsize_t)strlen(string)); } void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, diff --git a/src/cmark.c b/src/cmark.c index 5758da90c..dd013d2ce 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -32,7 +32,11 @@ static void *xrealloc(void *ptr, size_t size) { return new_ptr; } -cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free}; +static void xfree(void *ptr) { + free(ptr); +} + +cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, xfree}; cmark_mem *cmark_get_default_mem_allocator() { return &CMARK_DEFAULT_MEM_ALLOCATOR; diff --git a/src/commonmark.c b/src/commonmark.c index 4d5a5c01f..c73ea680c 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -43,13 +43,13 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, (renderer->begin_content && (c == '.' || c == ')') && follows_digit && (nextc == 0 || cmark_isspace(nextc))))) || (escape == URL && - (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || + (c == '`' || c == '<' || c == '>' || cmark_isspace((char)c) || c == '\\' || c == ')' || c == '(')) || (escape == TITLE && (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); if (needs_escaping) { - if (cmark_isspace(c)) { + if (cmark_isspace((char)c)) { // use percent encoding for spaces snprintf(encoded, ENCODED_SIZE, "%%%2x", c); cmark_strbuf_puts(renderer->buffer, encoded); @@ -239,7 +239,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, list_delim == CMARK_PAREN_DELIM ? ")" : ".", list_number < 10 ? " " : " "); - marker_width = strlen(listmarker); + marker_width = (bufsize_t)strlen(listmarker); } if (entering) { if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { diff --git a/src/html.c b/src/html.c index f6525f73c..951f33ecc 100644 --- a/src/html.c +++ b/src/html.c @@ -30,7 +30,7 @@ static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size break; if (match != data) { - cmark_strbuf_put(html, data, match - data); + cmark_strbuf_put(html, data, (bufsize_t)(match - data)); len -= (match - data); data = match; } @@ -55,7 +55,7 @@ static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size } if (len) - cmark_strbuf_put(html, data, len); + cmark_strbuf_put(html, data, (bufsize_t)len); } static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, diff --git a/src/inlines.c b/src/inlines.c index bfb291738..d2fe8383a 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -72,7 +72,7 @@ static CMARK_INLINE cmark_node *make_literal(cmark_mem *mem, cmark_node_type t, cmark_chunk s) { cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e)); cmark_strbuf_init(mem, &e->content, 0); - e->type = t; + e->type = (uint16_t)t; e->as.literal = s; return e; } @@ -81,7 +81,7 @@ static CMARK_INLINE cmark_node *make_literal(cmark_mem *mem, cmark_node_type t, static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) { cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e)); cmark_strbuf_init(mem, &e->content, 0); - e->type = t; + e->type = (uint16_t)t; return e; } @@ -508,7 +508,7 @@ static cmark_syntax_extension *get_extension_for_special_char(cmark_parser *pars cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data; cmark_llist *tmp_char; for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { - unsigned char tmp_c = (unsigned char) (unsigned long) tmp_char->data; + unsigned char tmp_c = (unsigned char)(size_t)tmp_char->data; if (tmp_c == c) { return ext; @@ -1340,7 +1340,7 @@ void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser, int can_open, int can_close, cmark_node *inl_text) { - push_delimiter(parser, c, can_open, can_close, inl_text); + push_delimiter(parser, c, can_open != 0, can_close != 0, inl_text); } void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim) { @@ -1389,8 +1389,8 @@ int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, *punct_before = cmark_utf8proc_is_punctuation(before_char); *punct_after = cmark_utf8proc_is_punctuation(after_char); - space_before = cmark_utf8proc_is_space(before_char); - space_after = cmark_utf8proc_is_space(after_char); + space_before = cmark_utf8proc_is_space(before_char) != 0; + space_after = cmark_utf8proc_is_space(after_char) != 0; *left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && !(*punct_after && !space_before && !*punct_before); @@ -1418,7 +1418,7 @@ cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser) { int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image) { for (bracket *b = parser->last_bracket; b; b = b->previous) - if (b->active && b->image == image) + if (b->active && b->image == (image != 0)) return 1; return 0; } diff --git a/src/node.c b/src/node.c index 266cf2509..0098b026b 100644 --- a/src/node.c +++ b/src/node.c @@ -15,7 +15,7 @@ bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) { } if (node->extension && node->extension->can_contain_func) { - return node->extension->can_contain_func(node->extension, node, child_type); + return node->extension->can_contain_func(node->extension, node, child_type) != 0; } switch (node->type) { @@ -168,18 +168,18 @@ int cmark_node_set_type(cmark_node * node, cmark_node_type type) { return 1; initial_type = (cmark_node_type) node->type; - node->type = type; + node->type = (uint16_t)type; if (!S_can_contain(node->parent, node)) { - node->type = initial_type; + node->type = (uint16_t)initial_type; return 0; } /* We rollback the type to free the union members appropriately */ - node->type = initial_type; + node->type = (uint16_t)initial_type; free_node_as(node); - node->type = type; + node->type = (uint16_t)type; return 1; } @@ -549,9 +549,9 @@ int cmark_node_set_fenced(cmark_node * node, int fenced, } if (node->type == CMARK_NODE_CODE_BLOCK) { - node->as.code.fenced = fenced; - node->as.code.fence_length = length; - node->as.code.fence_offset = offset; + node->as.code.fenced = (int8_t)fenced; + node->as.code.fence_length = (uint8_t)length; + node->as.code.fence_offset = (uint8_t)offset; node->as.code.fence_char = character; return 1; } else { diff --git a/src/render.c b/src/render.c index b7eabcb53..3190fd20d 100644 --- a/src/render.c +++ b/src/render.c @@ -20,7 +20,7 @@ static CMARK_INLINE void S_blankline(cmark_renderer *renderer) { static void S_out(cmark_renderer *renderer, const char *source, bool wrap, cmark_escaping escape) { - int length = strlen(source); + int length = (int)strlen(source); unsigned char nextc; int32_t c; int i = 0; @@ -95,12 +95,12 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap, // we need to escape a potential list marker after // a digit: renderer->begin_content = - renderer->begin_content && cmark_isdigit(c) == 1; + renderer->begin_content && cmark_isdigit((char)c) == 1; } else { (renderer->outc)(renderer, escape, c, nextc); renderer->begin_line = false; renderer->begin_content = - renderer->begin_content && cmark_isdigit(c) == 1; + renderer->begin_content && cmark_isdigit((char)c) == 1; } // If adding the character went beyond width, look for an From b9df6b34bbc226f8e1425f6b2f07143fd1b7e1cb Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Tue, 13 Dec 2016 12:55:43 +1100 Subject: [PATCH 011/218] table: trim cells, fix escaping, cleanup (#4) * Include table alignment when rendering LaTeX * Include table alignment when rendering man (preserving default centre alignment here) * Trim table cell interiors * Expand test cases * Fix escaping behaviour * Do not use enum for alignment * Do not collide against stdlib `ispunct` * Cleanup pipe code * Don't reparse matched rows --- extensions/table.c | 242 +++++++++++++++++++++++--------------- src/blocks.c | 5 + src/chunk.h | 16 +++ src/cmark_extension_api.h | 8 ++ src/inlines.c | 8 +- src/parser.h | 1 + test/extensions.txt | 165 ++++++++++++++------------ 7 files changed, 272 insertions(+), 173 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index cd75bbd9b..35c42d57a 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -18,20 +18,28 @@ typedef struct { typedef struct { uint16_t n_columns; uint8_t *alignments; + table_row *last_matched_row; } node_table; -typedef enum { - ALIGN_NONE, - ALIGN_LEFT, - ALIGN_CENTER, - ALIGN_RIGHT -} table_column_alignment; - typedef struct { bool is_header; } node_table_row; +static void free_table_cell(cmark_mem *mem, void *data) { + cmark_node_free((cmark_node *)data); +} + +static void free_table_row(cmark_mem *mem, table_row *row) { + if (!row) + return; + + cmark_llist_free_full(mem, row->cells, (cmark_free_func)free_table_cell); + + mem->free(row); +} + static void free_node_table(cmark_mem *mem, void *ptr) { node_table *t = (node_table *)ptr; mem->free(t->alignments); + free_table_row(mem, t->last_matched_row); mem->free(t); } @@ -75,50 +83,23 @@ static int is_table_header(cmark_node *node, int is_table_header) { return 1; } -static void free_table_cell(cmark_mem *mem, void *data) { - cmark_node_free((cmark_node *)data); -} - -static void free_table_row(cmark_mem *mem, table_row *row) { - if (!row) - return; - - cmark_llist_free_full(mem, row->cells, (cmark_free_func)free_table_cell); - - mem->free(row); -} - -static void reescape_pipes(cmark_strbuf *strbuf, cmark_mem *mem, - unsigned char *string, bufsize_t len) { - bufsize_t r; - - cmark_strbuf_init(mem, strbuf, len * 2); - for (r = 0; r < len; ++r) { - if (string[r] == '\\' && r + 1 < len && - (string[r + 1] == '|' || string[r + 1] == '\\')) - cmark_strbuf_putc(strbuf, '\\'); - - cmark_strbuf_putc(strbuf, string[r]); - } -} - static void maybe_consume_pipe(cmark_node **n, int *offset) { if (*n && (*n)->type == CMARK_NODE_TEXT && *offset < (*n)->as.literal.len && (*n)->as.literal.data[*offset] == '|') - ++(*offset); + ++*offset; } -static const char *find_unescaped_pipe(const char *cstr, size_t len) { +static int find_unescaped_pipe(const cmark_chunk *chunk, int offset) { bool escaping = false; - for (; len; --len, ++cstr) { + for (; offset < chunk->len; ++offset) { if (escaping) escaping = false; - else if (*cstr == '\\') + else if (chunk->data[offset] == '\\') escaping = true; - else if (*cstr == '|') - return cstr; + else if (chunk->data[offset] == '|') + return offset; } - return NULL; + return -1; } static cmark_node *consume_until_pipe_or_eol(cmark_syntax_extension *self, @@ -130,52 +111,67 @@ static cmark_node *consume_until_pipe_or_eol(cmark_syntax_extension *self, bool was_escape = false; while (*n) { - if ((*n)->type == CMARK_NODE_TEXT) { + cmark_node *node = *n; + + if (node->type == CMARK_NODE_TEXT) { cmark_node *child = cmark_parser_add_child( parser, result, CMARK_NODE_TEXT, cmark_parser_get_offset(parser)); - const char *cstr = cmark_chunk_to_cstr(parser->mem, &(*n)->as.literal); - if (was_escape) { - child->as.literal = cmark_chunk_dup(&(*n)->as.literal, *offset, 1); + child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, 1); cmark_node_own(child); + if (child->as.literal.data[0] == '|') + cmark_node_free(child->prev); ++*offset; + if (*offset >= node->as.literal.len) { + *offset = 0; + *n = node->next; + } was_escape = false; continue; } - if (strcmp(cstr + *offset, "\\") == 0 && (*n)->next && - (*n)->next->type == CMARK_NODE_TEXT) { + const char *lit = (char *)node->as.literal.data + *offset; + const int lit_len = node->as.literal.len - *offset; + + if (lit_len == 1 && lit[0] == '\\' && + node->next && + node->next->type == CMARK_NODE_TEXT) { + child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, 1); + cmark_node_own(child); was_escape = true; - *n = (*n)->next; + *n = node->next; continue; } - const char *pipe = - find_unescaped_pipe(cstr + *offset, (*n)->as.literal.len - *offset); - - if (!pipe) { - child->as.literal = cmark_chunk_dup(&(*n)->as.literal, *offset, - (*n)->as.literal.len - *offset); + int pipe = find_unescaped_pipe(&node->as.literal, *offset); + if (pipe == -1) { + child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, + node->as.literal.len - *offset); cmark_node_own(child); } else { - int len = (int)(pipe - cstr - *offset); - child->as.literal = cmark_chunk_dup(&(*n)->as.literal, *offset, len); - cmark_node_own(child); - *offset += len + 1; - if (*offset >= (*n)->as.literal.len) { + pipe -= *offset; + + if (pipe) { + child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, pipe); + cmark_node_own(child); + } else + cmark_node_free(child); + + *offset += pipe + 1; + if (*offset >= node->as.literal.len) { *offset = 0; - *n = (*n)->next; + *n = node->next; } - return result; + break; } - *n = (*n)->next; + *n = node->next; *offset = 0; } else { - cmark_node *next = (*n)->next; - cmark_node_append_child(result, *n); - cmark_node_own(*n); + cmark_node *next = node->next; + cmark_node_append_child(result, node); + cmark_node_own(node); *n = next; *offset = 0; } @@ -183,12 +179,29 @@ static cmark_node *consume_until_pipe_or_eol(cmark_syntax_extension *self, if (!result->first_child) { cmark_node_free(result); - result = NULL; + return NULL; + } + + if (result->first_child->type == CMARK_NODE_TEXT) { + cmark_chunk c = cmark_chunk_ltrim_new(parser->mem, &result->first_child->as.literal); + cmark_chunk_free(parser->mem, &result->first_child->as.literal); + result->first_child->as.literal = c; + } + + if (result->last_child->type == CMARK_NODE_TEXT) { + cmark_chunk c = cmark_chunk_rtrim_new(parser->mem, &result->last_child->as.literal); + cmark_chunk_free(parser->mem, &result->last_child->as.literal); + result->last_child->as.literal = c; } + cmark_consolidate_text_nodes(result); return result; } +static int table_ispunct(char c) { + return cmark_ispunct(c) && c != '|'; +} + static table_row *row_from_string(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *string, int len) { @@ -196,10 +209,12 @@ static table_row *row_from_string(cmark_syntax_extension *self, cmark_node *temp_container = cmark_node_new_with_mem(CMARK_NODE_PARAGRAPH, parser->mem); - reescape_pipes(&temp_container->content, parser->mem, string, len); + cmark_strbuf_set(&temp_container->content, string, len); cmark_manage_extensions_special_characters(parser, true); + cmark_parser_set_backslash_ispunct_func(parser, table_ispunct); cmark_parse_inlines(parser, temp_container, parser->refmap, parser->options); + cmark_parser_set_backslash_ispunct_func(parser, NULL); cmark_manage_extensions_special_characters(parser, false); if (!temp_container->first_child) { @@ -233,7 +248,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, unsigned char *input, int len) { bufsize_t matched = scan_table_start(input, len, cmark_parser_get_first_nonspace(parser)); - cmark_node *table_header, *child; + cmark_node *table_header; table_row *header_row = NULL; table_row *marker_row = NULL; const char *parent_string; @@ -282,10 +297,9 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_strbuf strbuf; cmark_strbuf_init(parser->mem, &strbuf, 0); - for (child = node->first_child; child; child = child->next) { - assert(child->type == CMARK_NODE_TEXT); - cmark_strbuf_put(&strbuf, child->as.literal.data, child->as.literal.len); - } + assert(node->first_child->type == CMARK_NODE_TEXT); + assert(node->first_child == node->last_child); + cmark_strbuf_put(&strbuf, node->first_child->as.literal.data, node->first_child->as.literal.len); cmark_strbuf_trim(&strbuf); char const *text = cmark_strbuf_cstr(&strbuf); @@ -293,11 +307,11 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_strbuf_free(&strbuf); if (left && right) - alignments[i] = ALIGN_CENTER; + alignments[i] = 'c'; else if (left) - alignments[i] = ALIGN_LEFT; + alignments[i] = 'l'; else if (right) - alignments[i] = ALIGN_RIGHT; + alignments[i] = 'r'; } set_table_alignments(parent_container, alignments); @@ -336,6 +350,7 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, cmark_node *parent_container, unsigned char *input, int len) { cmark_node *table_row_block; + node_table *nt; table_row *row; if (cmark_parser_is_blank(parser)) @@ -351,10 +366,14 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, cmark_node_set_user_data_free_func(table_row_block, free_node_table_row); /* We don't advance the offset here */ - - row = row_from_string(self, parser, - input + cmark_parser_get_first_nonspace(parser), - len - cmark_parser_get_first_nonspace(parser)); + nt = (node_table *)parent_container->user_data; + if (nt->last_matched_row) { + row = nt->last_matched_row; + nt->last_matched_row = NULL; + } else + row = row_from_string(self, parser, + input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); { cmark_llist *tmp, *next; @@ -404,14 +423,19 @@ static int matches(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *input, int len, cmark_node *parent_container) { int res = 0; + node_table *nt; if (cmark_node_get_type(parent_container) == CMARK_NODE_TABLE) { table_row *new_row = row_from_string( self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); - if (new_row && new_row->n_columns) + if (new_row && new_row->n_columns) { res = 1; - free_table_row(parser->mem, new_row); + nt = (node_table *)parent_container->user_data; + free_table_row(parser->mem, nt->last_matched_row); + nt->last_matched_row = new_row; + } else + free_table_row(parser->mem, new_row); } return res; @@ -468,6 +492,7 @@ static void commonmark_render(cmark_syntax_extension *extension, } } else if (node->type == CMARK_NODE_TABLE_CELL) { if (entering) { + renderer->out(renderer, " ", false, LITERAL); } else { renderer->out(renderer, " |", false, LITERAL); if (((node_table_row *)node->parent->user_data)->is_header && @@ -479,14 +504,12 @@ static void commonmark_render(cmark_syntax_extension *extension, renderer->cr(renderer); renderer->out(renderer, "|", false, LITERAL); for (i = 0; i < n_cols; i++) { - if (alignments[i] == ALIGN_NONE) - renderer->out(renderer, " --- |", false, LITERAL); - else if (alignments[i] == ALIGN_LEFT) - renderer->out(renderer, " :-- |", false, LITERAL); - else if (alignments[i] == ALIGN_CENTER) - renderer->out(renderer, " :-: |", false, LITERAL); - else if (alignments[i] == ALIGN_RIGHT) - renderer->out(renderer, " --: |", false, LITERAL); + switch (alignments[i]) { + case 0: renderer->out(renderer, " --- |", false, LITERAL); break; + case 'l': renderer->out(renderer, " :-- |", false, LITERAL); break; + case 'c': renderer->out(renderer, " :-: |", false, LITERAL); break; + case 'r': renderer->out(renderer, " --: |", false, LITERAL); break; + } } renderer->cr(renderer); } @@ -505,6 +528,8 @@ static void latex_render(cmark_syntax_extension *extension, if (entering) { int i; uint16_t n_cols; + uint8_t *alignments = get_table_alignments(node); + renderer->cr(renderer); renderer->out(renderer, "\\begin{table}", false, LITERAL); renderer->cr(renderer); @@ -512,7 +537,18 @@ static void latex_render(cmark_syntax_extension *extension, n_cols = ((node_table *)node->user_data)->n_columns; for (i = 0; i < n_cols; i++) { - renderer->out(renderer, "l", false, LITERAL); + switch(alignments[i]) { + case 0: + case 'l': + renderer->out(renderer, "l", false, LITERAL); + break; + case 'c': + renderer->out(renderer, "c", false, LITERAL); + break; + case 'r': + renderer->out(renderer, "r", false, LITERAL); + break; + } } renderer->out(renderer, "}", false, LITERAL); renderer->cr(renderer); @@ -548,6 +584,8 @@ static void man_render(cmark_syntax_extension *extension, if (entering) { int i; uint16_t n_cols; + uint8_t *alignments = get_table_alignments(node); + renderer->cr(renderer); renderer->out(renderer, ".TS", false, LITERAL); renderer->cr(renderer); @@ -557,7 +595,18 @@ static void man_render(cmark_syntax_extension *extension, n_cols = ((node_table *)node->user_data)->n_columns; for (i = 0; i < n_cols; i++) { - renderer->out(renderer, "c", false, LITERAL); + switch (alignments[i]) { + case 'l': + renderer->out(renderer, "l", false, LITERAL); + break; + case 0: + case 'c': + renderer->out(renderer, "c", false, LITERAL); + break; + case 'r': + renderer->out(renderer, "r", false, LITERAL); + break; + } } if (n_cols) { @@ -648,12 +697,11 @@ static void html_render(cmark_syntax_extension *extension, if (n == node) break; - if (alignments[i] == ALIGN_LEFT) - cmark_strbuf_puts(html, " align=\"left\""); - else if (alignments[i] == ALIGN_CENTER) - cmark_strbuf_puts(html, " align=\"center\""); - else if (alignments[i] == ALIGN_RIGHT) - cmark_strbuf_puts(html, " align=\"right\""); + switch (alignments[i]) { + case 'l': cmark_strbuf_puts(html, " align=\"left\""); break; + case 'c': cmark_strbuf_puts(html, " align=\"center\""); break; + case 'r': cmark_strbuf_puts(html, " align=\"right\""); break; + } cmark_html_render_sourcepos(node, html, options); cmark_strbuf_putc(html, '>'); diff --git a/src/blocks.c b/src/blocks.c index f809882c9..1f065d110 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1385,3 +1385,8 @@ void cmark_parser_advance_offset(cmark_parser *parser, S_advance_offset(parser, &input_chunk, count, columns != 0); } + +void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser, + cmark_ispunct_func func) { + parser->backslash_ispunct = func; +} diff --git a/src/chunk.h b/src/chunk.h index f198be34a..f0a1c6c69 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -117,4 +117,20 @@ static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) { return c; } +/* trim_new variants are to be used when the source chunk may or may not be + * allocated; forces a newly allocated chunk. */ +static CMARK_INLINE cmark_chunk cmark_chunk_ltrim_new(cmark_mem *mem, cmark_chunk *c) { + cmark_chunk r = cmark_chunk_dup(c, 0, c->len); + cmark_chunk_ltrim(&r); + cmark_chunk_to_cstr(mem, &r); + return r; +} + +static CMARK_INLINE cmark_chunk cmark_chunk_rtrim_new(cmark_mem *mem, cmark_chunk *c) { + cmark_chunk r = cmark_chunk_dup(c, 0, c->len); + cmark_chunk_rtrim(&r); + cmark_chunk_to_cstr(mem, &r); + return r; +} + #endif diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index e62e106f4..08ee0d129 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -247,6 +247,8 @@ typedef int (*cmark_html_filter_func) (cmark_syntax_extension *extension, typedef cmark_node *(*cmark_postprocess_func) (cmark_syntax_extension *extension, cmark_node *root); +typedef int (*cmark_ispunct_func) (char c); + /** Free a cmark_syntax_extension. */ CMARK_EXPORT @@ -351,6 +353,12 @@ CMARK_EXPORT void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension, cmark_postprocess_func func); +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_parser_set_backslash_ispunct_func(cmark_parser *extension, + cmark_ispunct_func func); + /** Return the index of the line currently being parsed, starting with 1. */ CMARK_EXPORT diff --git a/src/inlines.c b/src/inlines.c index d2fe8383a..18c51c6c8 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -673,11 +673,11 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, } // Parse backslash-escape or just a backslash, returning an inline. -static cmark_node *handle_backslash(subject *subj) { +static cmark_node *handle_backslash(cmark_parser *parser, subject *subj) { advance(subj); unsigned char nextchar = peek_char(subj); - if (cmark_ispunct( - nextchar)) { // only ascii symbols and newline can be escaped + if ((parser->backslash_ispunct ? parser->backslash_ispunct : cmark_ispunct)(nextchar)) { + // only ascii symbols and newline can be escaped advance(subj); return make_str(subj->mem, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (!is_eof(subj) && skip_line_end(subj)) { @@ -1134,7 +1134,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, new_inl = handle_backticks(subj); break; case '\\': - new_inl = handle_backslash(subj); + new_inl = handle_backslash(parser, subj); break; case '&': new_inl = handle_entity(subj); diff --git a/src/parser.h b/src/parser.h index 9cdb0717f..89c4209f4 100644 --- a/src/parser.h +++ b/src/parser.h @@ -47,6 +47,7 @@ struct cmark_parser { bool last_buffer_ended_with_cr; cmark_llist *syntax_extensions; cmark_llist *inline_syntax_extensions; + cmark_ispunct_func backslash_ispunct; }; #ifdef __cplusplus diff --git a/test/extensions.txt b/test/extensions.txt index c1ef3acfb..323d9d602 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -19,18 +19,18 @@ Here's a well-formed table, doing everything it should. <table> <thead> <tr> -<th> abc </th> -<th> def </th> +<th>abc</th> +<th>def</th> </tr> </thead> <tbody> <tr> -<td> ghi </td> -<td> jkl </td> +<td>ghi</td> +<td>jkl</td> </tr> <tr> -<td> mno </td> -<td> pqr </td> +<td>mno</td> +<td>pqr</td> </tr></tbody></table> ```````````````````````````````` @@ -52,18 +52,18 @@ Hi! <table> <thead> <tr> -<th> <em>abc</em> </th> -<th> セン </th> +<th><em>abc</em></th> +<th>セン</th> </tr> </thead> <tbody> <tr> -<td> 1. Block elements inside cells don't work. </td> -<td> </td> +<td>1. Block elements inside cells don't work.</td> +<td></td> </tr> <tr> -<td> But <strong><em>inline elements do</em></strong>. </td> -<td> x </td> +<td>But <strong><em>inline elements do</em></strong>.</td> +<td>x</td> </tr></tbody></table> <p>Hi!</p> ```````````````````````````````` @@ -93,8 +93,8 @@ Here we demonstrate some edge cases about what is and isn't a table. <table> <thead> <tr> -<th> Just enough table </th> -<th> to be considered table </th> +<th>Just enough table</th> +<th>to be considered table</th> </tr> </thead> <tbody></tbody></table> @@ -109,7 +109,7 @@ Here we demonstrate some edge cases about what is and isn't a table. <table> <thead> <tr> -<th> xyz </th> +<th>xyz</th> </tr> </thead> <tbody></tbody></table> @@ -125,14 +125,14 @@ xyz | ghi <table> <thead> <tr> -<th>abc </th> -<th> def</th> +<th>abc</th> +<th>def</th> </tr> </thead> <tbody> <tr> -<td>xyz </td> -<td> ghi</td> +<td>xyz</td> +<td>ghi</td> </tr></tbody></table> ```````````````````````````````` @@ -153,18 +153,18 @@ Hi! <table> <thead> <tr> -<th> <em>abc</em> </th> -<th> セン </th> +<th><em>abc</em></th> +<th>セン</th> </tr> </thead> <tbody> <tr> -<td> this row has a space at the end </td> -<td> </td> +<td>this row has a space at the end</td> +<td></td> </tr> <tr> -<td> But <strong><em>inline elements do</em></strong>. </td> -<td> x </td> +<td>But <strong><em>inline elements do</em></strong>.</td> +<td>x</td> </tr></tbody></table> <p>Hi!</p> ```````````````````````````````` @@ -179,20 +179,20 @@ fff | ggg | hhh | iii | jjj <table> <thead> <tr> -<th align="left">aaa </th> -<th> bbb </th> -<th align="center"> ccc </th> -<th> ddd </th> -<th align="right"> eee</th> +<th align="left">aaa</th> +<th>bbb</th> +<th align="center">ccc</th> +<th>ddd</th> +<th align="right">eee</th> </tr> </thead> <tbody> <tr> -<td align="left">fff </td> -<td> ggg </td> -<td align="center"> hhh </td> -<td> iii </td> -<td align="right"> jjj</td> +<td align="left">fff</td> +<td>ggg</td> +<td align="center">hhh</td> +<td>iii</td> +<td align="right">jjj</td> </tr></tbody></table> ```````````````````````````````` @@ -223,26 +223,26 @@ than the header are truncated. <table> <thead> <tr> -<th> a </th> -<th> b </th> -<th> c </th> +<th>a</th> +<th>b</th> +<th>c</th> </tr> </thead> <tbody> <tr> -<td> x</td> +<td>x</td> <td></td> <td></td> </tr> <tr> -<td> a </td> -<td> b</td> +<td>a</td> +<td>b</td> <td></td> </tr> <tr> -<td> 1 </td> -<td> 2 </td> -<td> 3 </td> +<td>1</td> +<td>2</td> +<td>3</td> </tr></tbody></table> ```````````````````````````````` @@ -261,25 +261,25 @@ Tables with embedded pipes could be tricky. <table> <thead> <tr> -<th> a </th> -<th> b </th> +<th>a</th> +<th>b</th> </tr> </thead> <tbody> <tr> -<td> Escaped pipes are |okay|. </td> -<td> Like | this. </td> +<td>Escaped pipes are |okay|.</td> +<td>Like | this.</td> </tr> <tr> -<td> Within <code>|code| is okay</code> too. </td> +<td>Within <code>|code| is okay</code> too.</td> <td></td> </tr> <tr> -<td> <strong><em><code>c|</code></em></strong> | complex</td> +<td><strong><em><code>c|</code></em></strong> | complex</td> <td></td> </tr> <tr> -<td> don't <strong>_reparse_</strong></td> +<td>don't <strong>_reparse_</strong></td> <td></td> </tr></tbody></table> ```````````````````````````````` @@ -295,7 +295,7 @@ This shouldn't assert. <table> <thead> <tr> -<th> a </th> +<th>a</th> </tr> </thead> <tbody></tbody></table> @@ -304,36 +304,57 @@ This shouldn't assert. ### Escaping ```````````````````````````````` example -| a | -| --- | -| \\ | -| \\\\ | -| \_ | -| \| | -| \a | +| a | b | +| --- | --- | +| \\ | `\\` | +| \\\\ | `\\\\` | +| \_ | `\_` | +| \| | `\|` | +| \a | `\a` | + +\\ `\\` + +\\\\ `\\\\` + +\_ `\_` + +\| `\|` + +\a `\a` . <table> <thead> <tr> -<th> a </th> +<th>a</th> +<th>b</th> </tr> </thead> <tbody> <tr> -<td> \ </td> +<td>\</td> +<td><code>\\</code></td> </tr> <tr> -<td> \\ </td> +<td>\\</td> +<td><code>\\\\</code></td> </tr> <tr> -<td> _ </td> +<td>_</td> +<td><code>\_</code></td> </tr> <tr> -<td> | </td> +<td>|</td> +<td><code>\|</code></td> </tr> <tr> -<td> a </td> +<td>\a</td> +<td><code>\a</code></td> </tr></tbody></table> +<p>\ <code>\\</code></p> +<p>\\ <code>\\\\</code></p> +<p>_ <code>\_</code></p> +<p>| <code>\|</code></p> +<p>\a <code>\a</code></p> ```````````````````````````````` ### Embedded HTML @@ -347,15 +368,15 @@ This shouldn't assert. <table> <thead> <tr> -<th> a </th> +<th>a</th> </tr> </thead> <tbody> <tr> -<td> <strong>hello</strong> </td> +<td><strong>hello</strong></td> </tr> <tr> -<td> ok <br> sure </td> +<td>ok <br> sure</td> </tr></tbody></table> ```````````````````````````````` @@ -491,13 +512,13 @@ Autolink and tables. <table> <thead> <tr> -<th> a </th> -<th> b </th> +<th>a</th> +<th>b</th> </tr> </thead> <tbody> <tr> -<td> <a href="https://github.com">https://github.com</a> <a href="http://www.github.com">www.github.com</a> </td> -<td> <a href="http://pokemon.com">http://pokemon.com</a> </td> +<td><a href="https://github.com">https://github.com</a> <a href="http://www.github.com">www.github.com</a></td> +<td><a href="http://pokemon.com">http://pokemon.com</a></td> </tr></tbody></table> ```````````````````````````````` From 3636dc980fa57987cb49f20d406537196c3ade15 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Thu, 5 Jan 2017 12:32:51 +1100 Subject: [PATCH 012/218] Initialise openers bottom correctly --- src/inlines.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/inlines.c b/src/inlines.c index 18c51c6c8..d8b6bbd93 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -529,6 +529,7 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta int i; // initialize openers_bottom: + memset(&openers_bottom, 0, sizeof(openers_bottom)); for (i=0; i < 3; i++) { openers_bottom[i]['*'] = stack_bottom; openers_bottom[i]['_'] = stack_bottom; From a49c13a52b265a9f3c84248128d6fcec288fc7a6 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Mon, 23 Jan 2017 12:34:12 +1100 Subject: [PATCH 013/218] Fix for inline parser changes --- test/extensions.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/extensions.txt b/test/extensions.txt index 323d9d602..09c2d2cb9 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -44,7 +44,7 @@ Hello! | _abc_ | セン | | ----- | ---- | | 1. Block elements inside cells don't work. | | -| But **_inline elements do_**. | x | +| But _**inline elements do**_. | x | Hi! . @@ -62,7 +62,7 @@ Hi! <td></td> </tr> <tr> -<td>But <strong><em>inline elements do</em></strong>.</td> +<td>But <em><strong>inline elements do</strong></em>.</td> <td>x</td> </tr></tbody></table> <p>Hi!</p> @@ -145,7 +145,7 @@ Hello! | _abc_ | セン | | ----- | ---- | | this row has a space at the end | | -| But **_inline elements do_**. | x | +| But _**inline elements do**_. | x | Hi! . @@ -163,7 +163,7 @@ Hi! <td></td> </tr> <tr> -<td>But <strong><em>inline elements do</em></strong>.</td> +<td>But <em><strong>inline elements do</strong></em>.</td> <td>x</td> </tr></tbody></table> <p>Hi!</p> @@ -255,7 +255,7 @@ Tables with embedded pipes could be tricky. | --- | --- | | Escaped pipes are \|okay\|. | Like \| this. | | Within `|code| is okay` too. | -| **_`c|`_** \| complex +| _**`c|`**_ \| complex | don't **\_reparse\_** . <table> @@ -275,7 +275,7 @@ Tables with embedded pipes could be tricky. <td></td> </tr> <tr> -<td><strong><em><code>c|</code></em></strong> | complex</td> +<td><em><strong><code>c|</code></strong></em> | complex</td> <td></td> </tr> <tr> From b555d89beaab3812fdb9144a248d40e36f8ecc93 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Thu, 26 Jan 2017 20:16:37 +1100 Subject: [PATCH 014/218] Abort if we fail to alloc chunk itself --- src/arena.c | 2 ++ src/main.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index 0f31cf333..d9977cb4f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -15,6 +15,8 @@ static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev abort(); c->sz = sz; c->ptr = calloc(1, sz); + if (!c->ptr) + abort(); c->prev = prev; return c; } diff --git a/src/main.c b/src/main.c index 6fe54d102..5e1dcf52e 100644 --- a/src/main.c +++ b/src/main.c @@ -235,7 +235,7 @@ int main(int argc, char *argv[]) { document = cmark_parser_finish(parser); - if (!print_document(document, writer, options, width, parser)) + if (!document || !print_document(document, writer, options, width, parser)) goto failure; success: @@ -247,7 +247,8 @@ int main(int argc, char *argv[]) { if (parser) cmark_parser_free(parser); - cmark_node_free(document); + if (document) + cmark_node_free(document); #else cmark_arena_reset(); #endif From 16846c6111fb9b6380defd25396f39e3a41fc60d Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Fri, 27 Jan 2017 12:27:10 +1100 Subject: [PATCH 015/218] Add a no-crash test --- test/extensions.txt | 5 +++++ test/spec_tests.py | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/test/extensions.txt b/test/extensions.txt index 09c2d2cb9..7dba59abb 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -444,6 +444,11 @@ Full stop outside parens shouldn't be included http://google.com/ok. <p>(Full stop inside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.)</p> ```````````````````````````````` +```````````````````````````````` example +This shouldn't crash everything: (_A_@_.A +. +<IGNORE> +```````````````````````````````` ## HTML tag filter diff --git a/test/spec_tests.py b/test/spec_tests.py index 9974fe057..7050a35df 100755 --- a/test/spec_tests.py +++ b/test/spec_tests.py @@ -43,7 +43,9 @@ def do_test(converter, test, normalize, result_counts): if retcode == 0: expected_html = test['html'] unicode_error = None - if normalize: + if expected_html.strip() == '<IGNORE>': + passed = True + elif normalize: try: passed = normalize_html(actual_html) == normalize_html(expected_html) except UnicodeDecodeError as e: From f893cfa37b6f980fbede629f19145890ababea28 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Mon, 6 Feb 2017 19:44:06 +1100 Subject: [PATCH 016/218] Add opaque_free_func to extensions --- src/cmark_extension_api.h | 12 +++++++++++- src/node.c | 3 +++ src/syntax_extension.c | 5 +++++ src/syntax_extension.h | 1 + 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index 08ee0d129..3cb917ade 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -249,6 +249,10 @@ typedef cmark_node *(*cmark_postprocess_func) (cmark_syntax_extension *extension typedef int (*cmark_ispunct_func) (char c); +typedef void (*cmark_opaque_free_func) (cmark_syntax_extension *extension, + cmark_mem *mem, + cmark_node *node); + /** Free a cmark_syntax_extension. */ CMARK_EXPORT @@ -356,7 +360,13 @@ void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extensi /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT -void cmark_parser_set_backslash_ispunct_func(cmark_parser *extension, +void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension, + cmark_opaque_free_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser, cmark_ispunct_func func); /** Return the index of the line currently being parsed, starting with 1. diff --git a/src/node.c b/src/node.c index 0098b026b..bcf322fd2 100644 --- a/src/node.c +++ b/src/node.c @@ -134,6 +134,9 @@ static void S_free_nodes(cmark_node *e) { if (e->user_data && e->user_data_free_func) e->user_data_free_func(NODE_MEM(e), e->user_data); + if (e->as.opaque && e->extension && e->extension->opaque_free_func) + e->extension->opaque_free_func(e->extension, NODE_MEM(e), e); + free_node_as(e); if (e->last_child) { diff --git a/src/syntax_extension.c b/src/syntax_extension.c index c613f8d1d..8d67bdfd0 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -112,3 +112,8 @@ void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, extension->priv = priv; extension->free_function = free_func; } + +void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension, + cmark_opaque_free_func func) { + extension->opaque_free_func = func; +} diff --git a/src/syntax_extension.h b/src/syntax_extension.h index 8b3b407c2..5182605d0 100644 --- a/src/syntax_extension.h +++ b/src/syntax_extension.h @@ -22,6 +22,7 @@ struct cmark_syntax_extension { cmark_html_render_func html_render_func; cmark_html_filter_func html_filter_func; cmark_postprocess_func postprocess_func; + cmark_opaque_free_func opaque_free_func; }; #endif From 98225993e29ec9ce3a01c64fa28b204f167bab66 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Mon, 6 Feb 2017 19:44:15 +1100 Subject: [PATCH 017/218] Use opaque instead of user_data in table --- extensions/table.c | 49 ++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index 35c42d57a..90d05884a 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -49,14 +49,14 @@ static int get_n_table_columns(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE) return -1; - return (int)((node_table *)node->user_data)->n_columns; + return (int)((node_table *)node->as.opaque)->n_columns; } static int set_n_table_columns(cmark_node *node, uint16_t n_columns) { if (!node || node->type != CMARK_NODE_TABLE) return 0; - ((node_table *)node->user_data)->n_columns = n_columns; + ((node_table *)node->as.opaque)->n_columns = n_columns; return 1; } @@ -64,14 +64,14 @@ static uint8_t *get_table_alignments(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE) return 0; - return ((node_table *)node->user_data)->alignments; + return ((node_table *)node->as.opaque)->alignments; } static int set_table_alignments(cmark_node *node, uint8_t *alignments) { if (!node || node->type != CMARK_NODE_TABLE) return 0; - ((node_table *)node->user_data)->alignments = alignments; + ((node_table *)node->as.opaque)->alignments = alignments; return 1; } @@ -79,7 +79,7 @@ static int is_table_header(cmark_node *node, int is_table_header) { if (!node || node->type != CMARK_NODE_TABLE_ROW) return 0; - ((node_table_row *)node->user_data)->is_header = (is_table_header != 0); + ((node_table_row *)node->as.opaque)->is_header = (is_table_header != 0); return 1; } @@ -282,9 +282,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_node_set_syntax_extension(parent_container, self); - cmark_node_set_user_data(parent_container, - parser->mem->calloc(1, sizeof(node_table))); - cmark_node_set_user_data_free_func(parent_container, free_node_table); + parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table)); set_n_table_columns(parent_container, header_row->n_columns); @@ -320,9 +318,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_parser_get_offset(parser)); cmark_node_set_syntax_extension(table_header, self); - cmark_node_set_user_data(table_header, - parser->mem->calloc(1, sizeof(node_table_row))); - cmark_node_set_user_data_free_func(table_header, free_node_table_row); + table_header->as.opaque = parser->mem->calloc(1, sizeof(node_table_row)); is_table_header(table_header, true); { @@ -361,12 +357,10 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, cmark_parser_get_offset(parser)); cmark_node_set_syntax_extension(table_row_block, self); - cmark_node_set_user_data(table_row_block, - parser->mem->calloc(1, sizeof(node_table_row))); - cmark_node_set_user_data_free_func(table_row_block, free_node_table_row); + table_row_block->as.opaque = parser->mem->calloc(1, sizeof(node_table_row)); /* We don't advance the offset here */ - nt = (node_table *)parent_container->user_data; + nt = (node_table *)parent_container->as.opaque; if (nt->last_matched_row) { row = nt->last_matched_row; nt->last_matched_row = NULL; @@ -431,7 +425,7 @@ static int matches(cmark_syntax_extension *self, cmark_parser *parser, len - cmark_parser_get_first_nonspace(parser)); if (new_row && new_row->n_columns) { res = 1; - nt = (node_table *)parent_container->user_data; + nt = (node_table *)parent_container->as.opaque; free_table_row(parser->mem, nt->last_matched_row); nt->last_matched_row = new_row; } else @@ -446,7 +440,7 @@ static const char *get_type_string(cmark_syntax_extension *ext, if (node->type == CMARK_NODE_TABLE) { return "table"; } else if (node->type == CMARK_NODE_TABLE_ROW) { - if (((node_table_row *)node->user_data)->is_header) + if (((node_table_row *)node->as.opaque)->is_header) return "table_header"; else return "table_row"; @@ -495,12 +489,12 @@ static void commonmark_render(cmark_syntax_extension *extension, renderer->out(renderer, " ", false, LITERAL); } else { renderer->out(renderer, " |", false, LITERAL); - if (((node_table_row *)node->parent->user_data)->is_header && + if (((node_table_row *)node->parent->as.opaque)->is_header && !node->next) { int i; uint8_t *alignments = get_table_alignments(node->parent->parent); uint16_t n_cols = - ((node_table *)node->parent->parent->user_data)->n_columns; + ((node_table *)node->parent->parent->as.opaque)->n_columns; renderer->cr(renderer); renderer->out(renderer, "|", false, LITERAL); for (i = 0; i < n_cols; i++) { @@ -535,7 +529,7 @@ static void latex_render(cmark_syntax_extension *extension, renderer->cr(renderer); renderer->out(renderer, "\\begin{tabular}{", false, LITERAL); - n_cols = ((node_table *)node->user_data)->n_columns; + n_cols = ((node_table *)node->as.opaque)->n_columns; for (i = 0; i < n_cols; i++) { switch(alignments[i]) { case 0: @@ -592,7 +586,7 @@ static void man_render(cmark_syntax_extension *extension, renderer->out(renderer, "tab(@);", false, LITERAL); renderer->cr(renderer); - n_cols = ((node_table *)node->user_data)->n_columns; + n_cols = ((node_table *)node->as.opaque)->n_columns; for (i = 0; i < n_cols; i++) { switch (alignments[i]) { @@ -662,7 +656,7 @@ static void html_render(cmark_syntax_extension *extension, } else if (node->type == CMARK_NODE_TABLE_ROW) { if (entering) { cmark_html_render_cr(html); - if (((node_table_row *)node->user_data)->is_header) { + if (((node_table_row *)node->as.opaque)->is_header) { table_state->in_table_header = 1; cmark_strbuf_puts(html, "<thead>"); cmark_html_render_cr(html); @@ -673,7 +667,7 @@ static void html_render(cmark_syntax_extension *extension, } else { cmark_html_render_cr(html); cmark_strbuf_puts(html, "</tr>"); - if (((node_table_row *)node->user_data)->is_header) { + if (((node_table_row *)node->as.opaque)->is_header) { cmark_html_render_cr(html); cmark_strbuf_puts(html, "</thead>"); cmark_html_render_cr(html); @@ -717,6 +711,14 @@ static void html_render(cmark_syntax_extension *extension, } } +static void opaque_free(cmark_syntax_extension *ext, cmark_mem *mem, cmark_node *node) { + if (node->type == CMARK_NODE_TABLE) { + free_node_table(mem, node->as.opaque); + } else if (node->type == CMARK_NODE_TABLE_ROW) { + free_node_table_row(mem, node->as.opaque); + } +} + cmark_syntax_extension *create_table_extension(void) { cmark_syntax_extension *ext = cmark_syntax_extension_new("table"); @@ -729,6 +731,7 @@ cmark_syntax_extension *create_table_extension(void) { cmark_syntax_extension_set_latex_render_func(ext, latex_render); cmark_syntax_extension_set_man_render_func(ext, man_render); cmark_syntax_extension_set_html_render_func(ext, html_render); + cmark_syntax_extension_set_opaque_free_func(ext, opaque_free); CMARK_NODE_TABLE = cmark_syntax_extension_add_node(0); CMARK_NODE_TABLE_ROW = cmark_syntax_extension_add_node(0); CMARK_NODE_TABLE_CELL = cmark_syntax_extension_add_node(0); From 061c1f6483ff70796cd33f4d8b259b9fc4d606af Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Thu, 2 Feb 2017 16:59:02 +1100 Subject: [PATCH 018/218] Rework email autolink as postprocessor --- Makefile | 4 +- extensions/autolink.c | 212 ++++++++++++++++++++++++-------------- src/blocks.c | 14 +-- src/cmark_extension_api.h | 1 + test/spec_tests.py | 2 +- 5 files changed, 148 insertions(+), 85 deletions(-) diff --git a/Makefile b/Makefile index cc0362201..30bc65f56 100644 --- a/Makefile +++ b/Makefile @@ -156,9 +156,9 @@ test: $(SPEC) cmake_build $(ALLTESTS): $(SPEC) $(EXTENSIONS_SPEC) ( \ python3 test/spec_tests.py --spec $(SPEC) --dump-tests | \ - python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); print("\n".join([test["markdown"] for test in tests]))'; \ + python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); u8s = open(1, "w", encoding="utf-8", closefd=False); print("\n".join([test["markdown"] for test in tests]), file=u8s)'; \ python3 test/spec_tests.py --spec $(EXTENSIONS_SPEC) --dump-tests | \ - python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); print("\n".join([test["markdown"] for test in tests]))'; \ + python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); u8s = open(1, "w", encoding="utf-8", closefd=False); print("\n".join([test["markdown"] for test in tests]), file=u8s)'; \ ) > $@ leakcheck: $(ALLTESTS) diff --git a/extensions/autolink.c b/extensions/autolink.c index f40fc8c02..18c3b51f9 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -194,77 +194,6 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent, return node; } -static cmark_node *email_match(cmark_parser *parser, cmark_node *parent, - cmark_inline_parser *inline_parser) { - size_t link_end; - int rewind; - int nb = 0, np = 0, ns = 0; - - cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); - int max_rewind = cmark_inline_parser_get_offset(inline_parser); - uint8_t *data = chunk->data + max_rewind; - size_t size = chunk->len - max_rewind; - - for (rewind = 0; rewind < max_rewind; ++rewind) { - uint8_t c = data[-rewind - 1]; - - if (cmark_isalnum(c)) - continue; - - if (strchr(".+-_", c) != NULL) - continue; - - if (c == '/') - ns++; - - break; - } - - if (rewind == 0 || ns > 0) - return 0; - - for (link_end = 0; link_end < size; ++link_end) { - uint8_t c = data[link_end]; - - if (cmark_isalnum(c)) - continue; - - if (c == '@') - nb++; - else if (c == '.' && link_end < size - 1) - np++; - else if (c != '-' && c != '_') - break; - } - - if (link_end < 2 || nb != 1 || np == 0 || - (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) - return 0; - - link_end = autolink_delim(data, link_end); - - if (link_end == 0) - return NULL; - - cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end)); - cmark_node_unput(parent, rewind); - - cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); - - cmark_strbuf buf; - cmark_strbuf_init(parser->mem, &buf, 10); - cmark_strbuf_puts(&buf, "mailto:"); - cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind)); - node->as.link.url = cmark_chunk_buf_detach(&buf); - - cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); - text->as.literal = cmark_chunk_dup(chunk, max_rewind - rewind, - (bufsize_t)(link_end + rewind)); - cmark_node_append_child(node, text); - - return node; -} - static cmark_node *url_match(cmark_parser *parser, cmark_node *parent, cmark_inline_parser *inline_parser) { size_t link_end, domain_len; @@ -326,9 +255,6 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser, if (c == ':') return url_match(parser, parent, inline_parser); - if (c == '@') - return email_match(parser, parent, inline_parser); - if (c == 'w') return www_match(parser, parent, inline_parser); @@ -339,15 +265,151 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser, // inline was finished in inlines.c. } +static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset) { + size_t link_end; + uint8_t *data = text->as.literal.data, + *at; + size_t size = text->as.literal.len; + int rewind, max_rewind, + nb = 0, np = 0, ns = 0; + + if (offset >= size) + return; + + data += offset; + size -= offset; + + at = (uint8_t *)memchr(data, '@', size); + if (!at) + return; + + max_rewind = (int)(at - data); + data += max_rewind; + size -= max_rewind; + + for (rewind = 0; rewind < max_rewind; ++rewind) { + uint8_t c = data[-rewind - 1]; + + if (cmark_isalnum(c)) + continue; + + if (strchr(".+-_", c) != NULL) + continue; + + if (c == '/') + ns++; + + break; + } + + if (rewind == 0 || ns > 0) { + postprocess_text(parser, text, max_rewind + 1 + offset); + return; + } + + for (link_end = 0; link_end < size; ++link_end) { + uint8_t c = data[link_end]; + + if (cmark_isalnum(c)) + continue; + + if (c == '@') + nb++; + else if (c == '.' && link_end < size - 1) + np++; + else if (c != '-' && c != '_') + break; + } + + if (link_end < 2 || nb != 1 || np == 0 || + (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) { + postprocess_text(parser, text, max_rewind + 1 + offset); + return; + } + + link_end = autolink_delim(data, link_end); + + if (link_end == 0) { + postprocess_text(parser, text, max_rewind + 1 + offset); + return; + } + + cmark_chunk_to_cstr(parser->mem, &text->as.literal); + + cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); + cmark_strbuf buf; + cmark_strbuf_init(parser->mem, &buf, 10); + cmark_strbuf_puts(&buf, "mailto:"); + cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind)); + link_node->as.link.url = cmark_chunk_buf_detach(&buf); + + cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + cmark_chunk email = cmark_chunk_dup( + &text->as.literal, + offset + max_rewind - rewind, + (bufsize_t)(link_end + rewind)); + cmark_chunk_to_cstr(parser->mem, &email); + link_text->as.literal = email; + cmark_node_append_child(link_node, link_text); + + cmark_node_insert_after(text, link_node); + + cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + post->as.literal = cmark_chunk_dup(&text->as.literal, + (bufsize_t)(offset + max_rewind + link_end), + (bufsize_t)(size - link_end)); + cmark_chunk_to_cstr(parser->mem, &post->as.literal); + + cmark_node_insert_after(link_node, post); + + text->as.literal.len = offset + max_rewind - rewind; + text->as.literal.data[text->as.literal.len] = 0; + + postprocess_text(parser, post, 0); +} + +static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) { + cmark_iter *iter; + cmark_event_type ev; + cmark_node *node; + bool in_link = false; + + cmark_consolidate_text_nodes(root); + iter = cmark_iter_new(root); + + while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + node = cmark_iter_get_node(iter); + if (in_link) { + if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LINK) { + in_link = false; + } + continue; + } + + if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_LINK) { + in_link = true; + continue; + } + + if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) { + postprocess_text(parser, node, 0); + } + } + + cmark_iter_free(iter); + + return root; +} + cmark_syntax_extension *create_autolink_extension(void) { cmark_syntax_extension *ext = cmark_syntax_extension_new("autolink"); cmark_llist *special_chars = NULL; cmark_syntax_extension_set_match_inline_func(ext, match); + cmark_syntax_extension_set_postprocess_func(ext, postprocess); cmark_mem *mem = cmark_get_default_mem_allocator(); special_chars = cmark_llist_append(mem, special_chars, (void *)':'); - special_chars = cmark_llist_append(mem, special_chars, (void *)'@'); special_chars = cmark_llist_append(mem, special_chars, (void *)'w'); cmark_syntax_extension_set_special_inline_chars(ext, special_chars); diff --git a/src/blocks.c b/src/blocks.c index 1f065d110..ffb180dec 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1317,20 +1317,20 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { } #endif - res = parser->root; - parser->root = NULL; - - cmark_parser_reset(parser); - for (extensions = parser->syntax_extensions; extensions; extensions = extensions->next) { cmark_syntax_extension *ext = (cmark_syntax_extension *) extensions->data; if (ext->postprocess_func) { - cmark_node *processed = ext->postprocess_func(ext, res); + cmark_node *processed = ext->postprocess_func(ext, parser, parser->root); if (processed) - res = processed; + parser->root = processed; } } + res = parser->root; + parser->root = NULL; + + cmark_parser_reset(parser); + return res; } diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index 3cb917ade..51b2268ad 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -245,6 +245,7 @@ typedef int (*cmark_html_filter_func) (cmark_syntax_extension *extension, size_t tag_len); typedef cmark_node *(*cmark_postprocess_func) (cmark_syntax_extension *extension, + cmark_parser *parser, cmark_node *root); typedef int (*cmark_ispunct_func) (char c); diff --git a/test/spec_tests.py b/test/spec_tests.py index 7050a35df..0a64130ae 100755 --- a/test/spec_tests.py +++ b/test/spec_tests.py @@ -133,7 +133,7 @@ def get_tests(specfile): pattern_re = re.compile('.') tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ] if args.dump_tests: - out(json.dumps(tests, ensure_ascii=False, indent=2)) + out(json.dumps(tests, indent=2)) exit(0) else: skipped = len(all_tests) - len(tests) From 9f85003f7f9346525072917410dccbe04ae69e64 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Mon, 6 Feb 2017 13:05:53 +1100 Subject: [PATCH 019/218] Compile shared library for extensions --- extensions/CMakeLists.txt | 48 +++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 50d0f2338..bc0c87d3b 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -1,4 +1,5 @@ cmake_minimum_required(VERSION 2.8) +set(LIBRARY "libcmarkextensions") set(STATICLIBRARY "libcmarkextensions_static") set(LIBRARY_SOURCES core-extensions.c @@ -30,21 +31,44 @@ include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") -add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES}) +if (CMARK_SHARED) + add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) -set_target_properties(${STATICLIBRARY} PROPERTIES - COMPILE_FLAGS -DCMARK_STATIC_DEFINE - POSITION_INDEPENDENT_CODE ON) + set_target_properties(${LIBRARY} PROPERTIES + OUTPUT_NAME "cmarkextensions" + SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} + VERSION ${PROJECT_VERSION}) + + set_property(TARGET ${LIBRARY} + APPEND PROPERTY MACOSX_RPATH true) + + # Avoid name clash between PROGRAM and LIBRARY pdb files. + set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmarkextensions_dll) + + #generate_export_header(${LIBRARY} + #BASE_NAME ${PROJECT_NAME}) + + list(APPEND CMARK_INSTALL ${LIBRARY}) + target_link_libraries(${LIBRARY} libcmark) +endif() + +if (CMARK_STATIC) + add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES}) -if (MSVC) - set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmarkextensions_static" - VERSION ${PROJECT_VERSION}) -else() set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmarkextensions" - VERSION ${PROJECT_VERSION}) -endif(MSVC) + COMPILE_FLAGS -DCMARK_STATIC_DEFINE + POSITION_INDEPENDENT_CODE ON) + + if (MSVC) + set_target_properties(${STATICLIBRARY} PROPERTIES + OUTPUT_NAME "cmarkextensions_static" + VERSION ${PROJECT_VERSION}) + else() + set_target_properties(${STATICLIBRARY} PROPERTIES + OUTPUT_NAME "cmarkextensions" + VERSION ${PROJECT_VERSION}) + endif(MSVC) +endif() # Feature tests include(CheckIncludeFile) From b9a3ab87be1b897443d2ca4f5a022059a0dc9a27 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Mon, 6 Feb 2017 13:06:20 +1100 Subject: [PATCH 020/218] Use extensions in spec test --- src/blocks.c | 4 ++ src/cmark_extension_api.h | 3 ++ test/CMakeLists.txt | 5 ++- test/cmark.py | 92 +++++++++++++++++++++++++++++---------- test/roundtrip_tests.py | 10 +++-- test/spec.txt | 22 ++++++++++ test/spec_tests.py | 13 ++++-- 7 files changed, 116 insertions(+), 33 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index ffb180dec..b479c452e 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1390,3 +1390,7 @@ void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser, cmark_ispunct_func func) { parser->backslash_ispunct = func; } + +cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser) { + return parser->syntax_extensions; +} diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index 51b2268ad..d49b1ef43 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -673,6 +673,9 @@ int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, CMARK_EXPORT void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add); +CMARK_EXPORT +cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser); + #ifdef __cplusplus } #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 76f936ab7..1b18b0c2d 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -63,14 +63,15 @@ IF (PYTHONINTERP_FOUND) ) add_test(extensions_executable - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough -e autolink -e tagfilter" + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark" "--extensions" "table strikethrough autolink tagfilter" ) add_test(roundtrip_extensions_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" - "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark -e table -e strikethrough -e autolink -e tagfilter" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark" + "--extensions" "table strikethrough autolink tagfilter" ) add_test(regressiontest_executable diff --git a/test/cmark.py b/test/cmark.py index 4be85a3b0..0cdb881fe 100644 --- a/test/cmark.py +++ b/test/cmark.py @@ -11,50 +11,96 @@ def pipe_through_prog(prog, text): [result, err] = p1.communicate(input=text.encode('utf-8')) return [p1.returncode, result.decode('utf-8'), err] -def to_html(lib, text): - markdown = lib.cmark_markdown_to_html - markdown.restype = c_char_p - markdown.argtypes = [c_char_p, c_size_t, c_int] +def parse(lib, extlib, text, extensions): + register_plugin = lib.cmark_register_plugin + register_plugin.argtypes = [c_void_p] + + core_extensions_registration = extlib.core_extensions_registration + + find_syntax_extension = lib.cmark_find_syntax_extension + find_syntax_extension.restype = c_void_p + find_syntax_extension.argtypes = [c_char_p] + + parser_attach_syntax_extension = lib.cmark_parser_attach_syntax_extension + parser_attach_syntax_extension.argtypes = [c_void_p, c_void_p] + + parser_new = lib.cmark_parser_new + parser_new.restype = c_void_p + parser_new.argtypes = [c_int] + + parser_feed = lib.cmark_parser_feed + parser_feed.argtypes = [c_void_p, c_char_p, c_int] + + parser_finish = lib.cmark_parser_finish + parser_finish.restype = c_void_p + parser_finish.argtypes = [c_void_p] + + register_plugin(core_extensions_registration) + + parser = parser_new(0) + for e in set(extensions): + ext = find_syntax_extension(bytes(e, 'utf-8')) + if not ext: + raise Exception("Extension not found: '{}'".format(e)) + parser_attach_syntax_extension(parser, ext) + textbytes = text.encode('utf-8') textlen = len(textbytes) - result = markdown(textbytes, textlen, 0).decode('utf-8') + parser_feed(parser, textbytes, textlen) + + return [parser_finish(parser), parser] + +def to_html(lib, extlib, text, extensions): + document, parser = parse(lib, extlib, text, extensions) + parser_get_syntax_extensions = lib.cmark_parser_get_syntax_extensions + parser_get_syntax_extensions.restype = c_void_p + parser_get_syntax_extensions.argtypes = [c_void_p] + syntax_extensions = parser_get_syntax_extensions(parser) + + render_html = lib.cmark_render_html + render_html.restype = c_char_p + render_html.argtypes = [c_void_p, c_int, c_void_p] + result = render_html(document, 0, syntax_extensions).decode('utf-8') return [0, result, ''] -def to_commonmark(lib, text): - textbytes = text.encode('utf-8') - textlen = len(textbytes) - parse_document = lib.cmark_parse_document - parse_document.restype = c_void_p - parse_document.argtypes = [c_char_p, c_size_t, c_int] +def to_commonmark(lib, extlib, text, extensions): + document, _ = parse(lib, extlib, text, extensions) + render_commonmark = lib.cmark_render_commonmark render_commonmark.restype = c_char_p render_commonmark.argtypes = [c_void_p, c_int, c_int] - node = parse_document(textbytes, textlen, 0) - result = render_commonmark(node, 0, 0).decode('utf-8') + result = render_commonmark(document, 0, 0).decode('utf-8') return [0, result, ''] class CMark: - def __init__(self, prog=None, library_dir=None): + def __init__(self, prog=None, library_dir=None, extensions=None): self.prog = prog + self.extensions = [] + if extensions: + self.extensions = extensions.split() + if prog: - self.to_html = lambda x: pipe_through_prog(prog, x) - self.to_commonmark = lambda x: pipe_through_prog(prog + ' -t commonmark', x) + extsfun = lambda exts: ''.join([' -e ' + e for e in set(exts)]) + self.to_html = lambda x, exts=[]: pipe_through_prog(prog + extsfun(exts + self.extensions), x) + self.to_commonmark = lambda x, exts=[]: pipe_through_prog(prog + ' -t commonmark' + extsfun(exts + self.extensions), x) else: sysname = platform.system() if sysname == 'Darwin': - libnames = [ "libcmark.dylib" ] + libnames = [ ["lib", ".dylib" ] ] elif sysname == 'Windows': - libnames = [ "cmark.dll", "libcmark.dll" ] + libnames = [ ["", ".dll"], ["lib", ".dll"] ] else: - libnames = [ "libcmark.so" ] + libnames = [ ["lib", ".so"] ] if not library_dir: library_dir = os.path.join("build", "src") - for libname in libnames: - candidate = os.path.join(library_dir, libname) + for prefix, suffix in libnames: + candidate = os.path.join(library_dir, prefix + "cmark" + suffix) if os.path.isfile(candidate): libpath = candidate break cmark = CDLL(libpath) - self.to_html = lambda x: to_html(cmark, x) - self.to_commonmark = lambda x: to_commonmark(cmark, x) + extlib = CDLL(os.path.join( + library_dir, "..", "extensions", prefix + "cmarkextensions" + suffix)) + self.to_html = lambda x, exts=[]: to_html(cmark, extlib, x, exts + self.extensions) + self.to_commonmark = lambda x, exts=[]: to_commonmark(cmark, extlib, x, exts + self.extensions) diff --git a/test/roundtrip_tests.py b/test/roundtrip_tests.py index a52aa8d83..7436db9a2 100644 --- a/test/roundtrip_tests.py +++ b/test/roundtrip_tests.py @@ -14,6 +14,8 @@ default=None, help='limit to sections matching regex pattern') parser.add_argument('--library-dir', dest='library_dir', nargs='?', default=None, help='directory containing dynamic library') + parser.add_argument('--extensions', dest='extensions', nargs='?', + default=None, help='space separated list of extensions to enable') parser.add_argument('--no-normalize', dest='normalize', action='store_const', const=False, default=True, help='do not normalize HTML') @@ -23,11 +25,11 @@ spec = sys.argv[1] -def converter(md): - cmark = CMark(prog=args.program, library_dir=args.library_dir) - [ec, result, err] = cmark.to_commonmark(md) +def converter(md, exts): + cmark = CMark(prog=args.program, library_dir=args.library_dir, extensions=args.extensions) + [ec, result, err] = cmark.to_commonmark(md, exts) if ec == 0: - [ec, html, err] = cmark.to_html(result) + [ec, html, err] = cmark.to_html(result, exts) if ec == 0: # In the commonmark writer we insert dummy HTML # comments between lists, and between lists and code diff --git a/test/spec.txt b/test/spec.txt index 64a60b19d..1589c0ad5 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -5322,6 +5322,28 @@ Here the outer list is loose, the inner list tight: ```````````````````````````````` +## Tables (extension) + +```````````````````````````````` example table +| hello | okay | +| ----- | ---- | +| hi | yes | +. +<table> +<thead> +<tr> +<th>hello</th> +<th>okay</th> +</tr> +</thead> +<tbody> +<tr> +<td>hi</td> +<td>yes</td> +</tr></tbody></table> +```````````````````````````````` + + # Inlines Inlines are parsed sequentially from the beginning of the character diff --git a/test/spec_tests.py b/test/spec_tests.py index 0a64130ae..66ff9f429 100755 --- a/test/spec_tests.py +++ b/test/spec_tests.py @@ -19,6 +19,8 @@ default=None, help='limit to sections matching regex pattern') parser.add_argument('--library-dir', dest='library_dir', nargs='?', default=None, help='directory containing dynamic library') + parser.add_argument('--extensions', dest='extensions', nargs='?', + default=None, help='space separated list of extensions to enable') parser.add_argument('--no-normalize', dest='normalize', action='store_const', const=False, default=True, help='do not normalize HTML') @@ -39,7 +41,7 @@ def print_test_header(headertext, example_number, start_line, end_line): out("Example %d (lines %d-%d) %s\n" % (example_number,start_line,end_line,headertext)) def do_test(converter, test, normalize, result_counts): - [retcode, actual_html, err] = converter(test['markdown']) + [retcode, actual_html, err] = converter(test['markdown'], test['extensions']) if retcode == 0: expected_html = test['html'] unicode_error = None @@ -84,6 +86,7 @@ def get_tests(specfile): markdown_lines = [] html_lines = [] state = 0 # 0 regular text, 1 markdown example, 2 html output + extensions = [] headertext = '' tests = [] @@ -93,8 +96,9 @@ def get_tests(specfile): for line in specf: line_number = line_number + 1 l = line.strip() - if l == "`" * 32 + " example": + if l.startswith("`" * 32 + " example"): state = 1 + extensions = l[32 + len(" example"):].split() elif l == "`" * 32: state = 0 example_number = example_number + 1 @@ -105,7 +109,8 @@ def get_tests(specfile): "example": example_number, "start_line": start_line, "end_line": end_line, - "section": headertext}) + "section": headertext, + "extensions": extensions}) start_line = 0 markdown_lines = [] html_lines = [] @@ -137,7 +142,7 @@ def get_tests(specfile): exit(0) else: skipped = len(all_tests) - len(tests) - converter = CMark(prog=args.program, library_dir=args.library_dir).to_html + converter = CMark(prog=args.program, library_dir=args.library_dir, extensions=args.extensions).to_html result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped} for test in tests: do_test(converter, test, args.normalize, result_counts) From 7b0b5015bcbba545284316d8425e3d08ab912425 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Mon, 6 Feb 2017 14:57:48 +1100 Subject: [PATCH 021/218] Only escape pipes in commonmark output when necessary --- extensions/strikethrough.c | 10 +++--- extensions/table.c | 57 ++++++++++++++++++--------------- src/cmark_extension_api.h | 10 ++++++ src/commonmark.c | 12 ++++--- src/latex.c | 7 ++-- src/man.c | 7 ++-- src/render.c | 8 +++-- src/render.h | 7 ++-- src/syntax_extension.c | 5 +++ src/syntax_extension.h | 1 + test/spec.txt | 65 ++++++++++++++++++++++++++++++++++---- 11 files changed, 134 insertions(+), 55 deletions(-) diff --git a/extensions/strikethrough.c b/extensions/strikethrough.c index 086084df3..1696c8096 100644 --- a/extensions/strikethrough.c +++ b/extensions/strikethrough.c @@ -84,7 +84,7 @@ static int can_contain(cmark_syntax_extension *extension, cmark_node *node, static void commonmark_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { - renderer->out(renderer, cmark_node_get_string_content(node), false, LITERAL); + renderer->out(renderer, node, cmark_node_get_string_content(node), false, LITERAL); } static void latex_render(cmark_syntax_extension *extension, @@ -93,9 +93,9 @@ static void latex_render(cmark_syntax_extension *extension, // requires \usepackage{ulem} bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { - renderer->out(renderer, "\\sout{", false, LITERAL); + renderer->out(renderer, node, "\\sout{", false, LITERAL); } else { - renderer->out(renderer, "}", false, LITERAL); + renderer->out(renderer, node, "}", false, LITERAL); } } @@ -105,9 +105,9 @@ static void man_render(cmark_syntax_extension *extension, bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { renderer->cr(renderer); - renderer->out(renderer, ".ST \"", false, LITERAL); + renderer->out(renderer, node, ".ST \"", false, LITERAL); } else { - renderer->out(renderer, "\"", false, LITERAL); + renderer->out(renderer, node, "\"", false, LITERAL); renderer->cr(renderer); } } diff --git a/extensions/table.c b/extensions/table.c index 90d05884a..df3a01c50 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -482,13 +482,13 @@ static void commonmark_render(cmark_syntax_extension *extension, } else if (node->type == CMARK_NODE_TABLE_ROW) { if (entering) { renderer->cr(renderer); - renderer->out(renderer, "|", false, LITERAL); + renderer->out(renderer, node, "|", false, LITERAL); } } else if (node->type == CMARK_NODE_TABLE_CELL) { if (entering) { - renderer->out(renderer, " ", false, LITERAL); + renderer->out(renderer, node, " ", false, LITERAL); } else { - renderer->out(renderer, " |", false, LITERAL); + renderer->out(renderer, node, " |", false, LITERAL); if (((node_table_row *)node->parent->as.opaque)->is_header && !node->next) { int i; @@ -496,13 +496,13 @@ static void commonmark_render(cmark_syntax_extension *extension, uint16_t n_cols = ((node_table *)node->parent->parent->as.opaque)->n_columns; renderer->cr(renderer); - renderer->out(renderer, "|", false, LITERAL); + renderer->out(renderer, node, "|", false, LITERAL); for (i = 0; i < n_cols; i++) { switch (alignments[i]) { - case 0: renderer->out(renderer, " --- |", false, LITERAL); break; - case 'l': renderer->out(renderer, " :-- |", false, LITERAL); break; - case 'c': renderer->out(renderer, " :-: |", false, LITERAL); break; - case 'r': renderer->out(renderer, " --: |", false, LITERAL); break; + case 0: renderer->out(renderer, node, " --- |", false, LITERAL); break; + case 'l': renderer->out(renderer, node, " :-- |", false, LITERAL); break; + case 'c': renderer->out(renderer, node, " :-: |", false, LITERAL); break; + case 'r': renderer->out(renderer, node, " --: |", false, LITERAL); break; } } renderer->cr(renderer); @@ -525,31 +525,31 @@ static void latex_render(cmark_syntax_extension *extension, uint8_t *alignments = get_table_alignments(node); renderer->cr(renderer); - renderer->out(renderer, "\\begin{table}", false, LITERAL); + renderer->out(renderer, node, "\\begin{table}", false, LITERAL); renderer->cr(renderer); - renderer->out(renderer, "\\begin{tabular}{", false, LITERAL); + renderer->out(renderer, node, "\\begin{tabular}{", false, LITERAL); n_cols = ((node_table *)node->as.opaque)->n_columns; for (i = 0; i < n_cols; i++) { switch(alignments[i]) { case 0: case 'l': - renderer->out(renderer, "l", false, LITERAL); + renderer->out(renderer, node, "l", false, LITERAL); break; case 'c': - renderer->out(renderer, "c", false, LITERAL); + renderer->out(renderer, node, "c", false, LITERAL); break; case 'r': - renderer->out(renderer, "r", false, LITERAL); + renderer->out(renderer, node, "r", false, LITERAL); break; } } - renderer->out(renderer, "}", false, LITERAL); + renderer->out(renderer, node, "}", false, LITERAL); renderer->cr(renderer); } else { - renderer->out(renderer, "\\end{tabular}", false, LITERAL); + renderer->out(renderer, node, "\\end{tabular}", false, LITERAL); renderer->cr(renderer); - renderer->out(renderer, "\\end{table}", false, LITERAL); + renderer->out(renderer, node, "\\end{table}", false, LITERAL); renderer->cr(renderer); } } else if (node->type == CMARK_NODE_TABLE_ROW) { @@ -559,9 +559,9 @@ static void latex_render(cmark_syntax_extension *extension, } else if (node->type == CMARK_NODE_TABLE_CELL) { if (!entering) { if (node->next) { - renderer->out(renderer, " & ", false, LITERAL); + renderer->out(renderer, node, " & ", false, LITERAL); } else { - renderer->out(renderer, " \\\\", false, LITERAL); + renderer->out(renderer, node, " \\\\", false, LITERAL); } } } else { @@ -581,9 +581,9 @@ static void man_render(cmark_syntax_extension *extension, uint8_t *alignments = get_table_alignments(node); renderer->cr(renderer); - renderer->out(renderer, ".TS", false, LITERAL); + renderer->out(renderer, node, ".TS", false, LITERAL); renderer->cr(renderer); - renderer->out(renderer, "tab(@);", false, LITERAL); + renderer->out(renderer, node, "tab(@);", false, LITERAL); renderer->cr(renderer); n_cols = ((node_table *)node->as.opaque)->n_columns; @@ -591,24 +591,24 @@ static void man_render(cmark_syntax_extension *extension, for (i = 0; i < n_cols; i++) { switch (alignments[i]) { case 'l': - renderer->out(renderer, "l", false, LITERAL); + renderer->out(renderer, node, "l", false, LITERAL); break; case 0: case 'c': - renderer->out(renderer, "c", false, LITERAL); + renderer->out(renderer, node, "c", false, LITERAL); break; case 'r': - renderer->out(renderer, "r", false, LITERAL); + renderer->out(renderer, node, "r", false, LITERAL); break; } } if (n_cols) { - renderer->out(renderer, ".", false, LITERAL); + renderer->out(renderer, node, ".", false, LITERAL); renderer->cr(renderer); } } else { - renderer->out(renderer, ".TE", false, LITERAL); + renderer->out(renderer, node, ".TE", false, LITERAL); renderer->cr(renderer); } } else if (node->type == CMARK_NODE_TABLE_ROW) { @@ -617,7 +617,7 @@ static void man_render(cmark_syntax_extension *extension, } } else if (node->type == CMARK_NODE_TABLE_CELL) { if (!entering && node->next) { - renderer->out(renderer, "@", false, LITERAL); + renderer->out(renderer, node, "@", false, LITERAL); } } else { assert(false); @@ -719,6 +719,10 @@ static void opaque_free(cmark_syntax_extension *ext, cmark_mem *mem, cmark_node } } +static int escape(cmark_syntax_extension *ext, cmark_node *node, int c) { + return c == '|'; +} + cmark_syntax_extension *create_table_extension(void) { cmark_syntax_extension *ext = cmark_syntax_extension_new("table"); @@ -732,6 +736,7 @@ cmark_syntax_extension *create_table_extension(void) { cmark_syntax_extension_set_man_render_func(ext, man_render); cmark_syntax_extension_set_html_render_func(ext, html_render); cmark_syntax_extension_set_opaque_free_func(ext, opaque_free); + cmark_syntax_extension_set_commonmark_escape_func(ext, escape); CMARK_NODE_TABLE = cmark_syntax_extension_add_node(0); CMARK_NODE_TABLE_ROW = cmark_syntax_extension_add_node(0); CMARK_NODE_TABLE_CELL = cmark_syntax_extension_add_node(0); diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index d49b1ef43..b11a985b3 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -234,6 +234,10 @@ typedef void (*cmark_common_render_func) (cmark_syntax_extension *extension, cmark_event_type ev_type, int options); +typedef int (*cmark_commonmark_escape_func) (cmark_syntax_extension *extension, + cmark_node *node, + int c); + typedef void (*cmark_html_render_func) (cmark_syntax_extension *extension, cmark_html_renderer *renderer, cmark_node *node, @@ -345,6 +349,12 @@ CMARK_EXPORT void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension, cmark_html_filter_func func); +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension, + cmark_commonmark_escape_func func); + /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT diff --git a/src/commonmark.c b/src/commonmark.c index c73ea680c..3ce35b987 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -13,8 +13,8 @@ #include "render.h" #include "syntax_extension.h" -#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) -#define LIT(s) renderer->out(renderer, s, false, LITERAL) +#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, node, s, false, LITERAL) #define CR() renderer->cr(renderer) #define BLANKLINE() renderer->blankline(renderer) #define ENCODED_SIZE 20 @@ -22,7 +22,8 @@ // Functions to convert cmark_nodes to commonmark strings. -static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, +static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, + cmark_escaping escape, int32_t c, unsigned char nextc) { bool needs_escaping = false; bool follows_digit = @@ -33,8 +34,9 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, needs_escaping = c < 0x80 && escape != LITERAL && ((escape == NORMAL && - (c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || - c == '>' || c == '\\' || c == '`' || c == '!' || c == '|' || + ((node->parent && node->parent->extension && node->parent->extension->commonmark_escape_func && node->parent->extension->commonmark_escape_func(node->extension, node->parent, c)) || + c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || + c == '>' || c == '\\' || c == '`' || c == '!' || (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || (renderer->begin_content && (c == '-' || c == '+' || c == '=') && // begin_content doesn't get set to false til we've passed digits diff --git a/src/latex.c b/src/latex.c index 068dc3f61..29572e13d 100644 --- a/src/latex.c +++ b/src/latex.c @@ -12,13 +12,14 @@ #include "render.h" #include "syntax_extension.h" -#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) -#define LIT(s) renderer->out(renderer, s, false, LITERAL) +#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, node, s, false, LITERAL) #define CR() renderer->cr(renderer) #define BLANKLINE() renderer->blankline(renderer) #define LIST_NUMBER_STRING_SIZE 20 -static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, +static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, + cmark_escaping escape, int32_t c, unsigned char nextc) { if (escape == LITERAL) { cmark_render_code_point(renderer, c); diff --git a/src/man.c b/src/man.c index 1ae1ac898..2b52ad5b2 100644 --- a/src/man.c +++ b/src/man.c @@ -11,14 +11,15 @@ #include "render.h" #include "syntax_extension.h" -#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) -#define LIT(s) renderer->out(renderer, s, false, LITERAL) +#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, node, s, false, LITERAL) #define CR() renderer->cr(renderer) #define BLANKLINE() renderer->blankline(renderer) #define LIST_NUMBER_SIZE 20 // Functions to convert cmark_nodes to groff man strings. -static void S_outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c, +static void S_outc(cmark_renderer *renderer, cmark_node *node, + cmark_escaping escape, int32_t c, unsigned char nextc) { (void)(nextc); diff --git a/src/render.c b/src/render.c index 3190fd20d..5582d3792 100644 --- a/src/render.c +++ b/src/render.c @@ -18,7 +18,8 @@ static CMARK_INLINE void S_blankline(cmark_renderer *renderer) { } } -static void S_out(cmark_renderer *renderer, const char *source, bool wrap, +static void S_out(cmark_renderer *renderer, cmark_node *node, + const char *source, bool wrap, cmark_escaping escape) { int length = (int)strlen(source); unsigned char nextc; @@ -97,7 +98,7 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap, renderer->begin_content = renderer->begin_content && cmark_isdigit((char)c) == 1; } else { - (renderer->outc)(renderer, escape, c, nextc); + (renderer->outc)(renderer, node, escape, c, nextc); renderer->begin_line = false; renderer->begin_content = renderer->begin_content && cmark_isdigit((char)c) == 1; @@ -143,7 +144,8 @@ void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) { } char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, - void (*outc)(cmark_renderer *, cmark_escaping, int32_t, + void (*outc)(cmark_renderer *, cmark_node *, + cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, cmark_node *node, diff --git a/src/render.h b/src/render.h index d9d4f4b9f..36c48206c 100644 --- a/src/render.h +++ b/src/render.h @@ -24,10 +24,10 @@ struct cmark_renderer { bool begin_content; bool no_linebreaks; bool in_tight_list_item; - void (*outc)(struct cmark_renderer *, cmark_escaping, int32_t, unsigned char); + void (*outc)(struct cmark_renderer *, cmark_node *, cmark_escaping, int32_t, unsigned char); void (*cr)(struct cmark_renderer *); void (*blankline)(struct cmark_renderer *); - void (*out)(struct cmark_renderer *, const char *, bool, cmark_escaping); + void (*out)(struct cmark_renderer *, cmark_node *, const char *, bool, cmark_escaping); }; typedef struct cmark_renderer cmark_renderer; @@ -46,7 +46,8 @@ void cmark_render_ascii(cmark_renderer *renderer, const char *s); void cmark_render_code_point(cmark_renderer *renderer, uint32_t c); char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, - void (*outc)(cmark_renderer *, cmark_escaping, int32_t, + void (*outc)(cmark_renderer *, cmark_node *, + cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, cmark_node *node, diff --git a/src/syntax_extension.c b/src/syntax_extension.c index 8d67bdfd0..38984d808 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -117,3 +117,8 @@ void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extensi cmark_opaque_free_func func) { extension->opaque_free_func = func; } + +void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension, + cmark_commonmark_escape_func func) { + extension->commonmark_escape_func = func; +} diff --git a/src/syntax_extension.h b/src/syntax_extension.h index 5182605d0..060461deb 100644 --- a/src/syntax_extension.h +++ b/src/syntax_extension.h @@ -23,6 +23,7 @@ struct cmark_syntax_extension { cmark_html_filter_func html_filter_func; cmark_postprocess_func postprocess_func; cmark_opaque_free_func opaque_free_func; + cmark_commonmark_escape_func commonmark_escape_func; }; #endif diff --git a/test/spec.txt b/test/spec.txt index 1589c0ad5..15560d01a 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -5324,25 +5324,76 @@ Here the outer list is loose, the inner list tight: ## Tables (extension) +A [table](@) is an arrangement of data with rows and columns, consisting of a +single header row, a [delimiter row] separating the header from the data, and zero or more data rows. + +Each row consists of cells containing arbitrary text, in which [inlines] are parsed, separated by pipes (`|`). A leading and trailing pipe is also recommended for clarity of reading, and if there's otherwise parsing ambiguity. Spaces between pipes and cell content are trimmed. + +The [delimiter row](@) consists of cells whose only content are hyphens (`-`), and optionally, a leading or trailing colon (`:`), or both, to indicate left, right, or center alignment respectively. + +```````````````````````````````` example table +| foo | bar | +| --- | --- | +| baz | bim | +. +<table> +<thead> +<tr> +<th>foo</th> +<th>bar</th> +</tr> +</thead> +<tbody> +<tr> +<td>baz</td> +<td>bim</td> +</tr></tbody></table> +```````````````````````````````` + +Cells in one column don't need to match length, though it's easier to read if +they are. Likewise, use of leading and trailing pipes can be inconsistent: + ```````````````````````````````` example table -| hello | okay | -| ----- | ---- | -| hi | yes | +| abc | defghi | +:-: | -----------: +hi | ok . <table> <thead> <tr> -<th>hello</th> -<th>okay</th> +<th align="center">abc</th> +<th align="right">defghi</th> </tr> </thead> <tbody> <tr> -<td>hi</td> -<td>yes</td> +<td align="center">hi</td> +<td align="right">ok</td> </tr></tbody></table> ```````````````````````````````` +Include a pipe in a cell's content by escaping it. Pipes inside other inline spans (such as emphasis, code, etc.) will not break a cell: + +```````````````````````````````` example table +| f\|oo | +| ------ | +| b`|`az | +| b **|** im | +. +<table> +<thead> +<tr> +<th>f|oo</th> +</tr> +</thead> +<tbody> +<tr> +<td>b<code>|</code>az</td> +</tr> +<tr> +<td>b <strong>|</strong> im</td> +</tr></tbody></table> +```````````````````````````````` # Inlines From d6ac760e493feba5b1dd62f017baf44a5c6cb844 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Tue, 7 Feb 2017 13:35:34 +1100 Subject: [PATCH 022/218] Fix Windows build --- extensions/CMakeLists.txt | 10 ++++++---- extensions/autolink.c | 2 +- extensions/core-extensions.h | 2 ++ src/CMakeLists.txt | 5 ++++- test/spec_tests.py | 3 ++- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index bc0c87d3b..32586fc62 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -17,6 +17,8 @@ include_directories( ${PROJECT_BINARY_DIR}/src ) +include (GenerateExportHeader) + # We make LIB_INSTALL_DIR configurable rather than # hard-coding lib, because on some OSes different locations # are used for different architectures (e.g. /usr/lib64 on @@ -30,6 +32,7 @@ include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") +add_compiler_export_flags() if (CMARK_SHARED) add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) @@ -45,18 +48,17 @@ if (CMARK_SHARED) # Avoid name clash between PROGRAM and LIBRARY pdb files. set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmarkextensions_dll) - #generate_export_header(${LIBRARY} - #BASE_NAME ${PROJECT_NAME}) - list(APPEND CMARK_INSTALL ${LIBRARY}) target_link_libraries(${LIBRARY} libcmark) + + generate_export_header(${LIBRARY} BASE_NAME cmarkextensions) endif() if (CMARK_STATIC) add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES}) set_target_properties(${STATICLIBRARY} PROPERTIES - COMPILE_FLAGS -DCMARK_STATIC_DEFINE + COMPILE_FLAGS -DCMARKEXTENSIONS_STATIC_DEFINE POSITION_INDEPENDENT_CODE ON) if (MSVC) diff --git a/extensions/autolink.c b/extensions/autolink.c index 18c3b51f9..940ac5c76 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -273,7 +273,7 @@ static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset) int rewind, max_rewind, nb = 0, np = 0, ns = 0; - if (offset >= size) + if (offset < 0 || (size_t)offset >= size) return; data += offset; diff --git a/extensions/core-extensions.h b/extensions/core-extensions.h index 59d8056d4..78ae8de64 100644 --- a/extensions/core-extensions.h +++ b/extensions/core-extensions.h @@ -6,7 +6,9 @@ extern "C" { #endif #include <cmark_extension_api.h> +#include "cmarkextensions_export.h" +CMARKEXTENSIONS_EXPORT int core_extensions_registration(cmark_plugin *plugin); #ifdef __cplusplus diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4725c0e53..a59872ba2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -58,6 +58,9 @@ set(PROGRAM_SOURCES ) include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) +include_directories( + ${PROJECT_BINARY_DIR}/extensions +) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmark_version.h.in ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h) @@ -74,7 +77,7 @@ target_link_libraries(${PROGRAM} libcmarkextensions_static) # Disable the PUBLIC declarations when compiling the executable: set_target_properties(${PROGRAM} PROPERTIES - COMPILE_FLAGS -DCMARK_STATIC_DEFINE) + COMPILE_FLAGS "-DCMARK_STATIC_DEFINE -DCMARKEXTENSIONS_STATIC_DEFINE") # Check integrity of node structure when compiled as debug: set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES -DDEBUG") diff --git a/test/spec_tests.py b/test/spec_tests.py index 66ff9f429..8931ef7ad 100755 --- a/test/spec_tests.py +++ b/test/spec_tests.py @@ -42,8 +42,9 @@ def print_test_header(headertext, example_number, start_line, end_line): def do_test(converter, test, normalize, result_counts): [retcode, actual_html, err] = converter(test['markdown'], test['extensions']) + actual_html = re.sub(r'\r\n', '\n', actual_html) if retcode == 0: - expected_html = test['html'] + expected_html = re.sub(r'\r\n', '\n', test['html']) unicode_error = None if expected_html.strip() == '<IGNORE>': passed = True From fd110a5c0cc203cd993e5e792916108f59ffc055 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Thu, 9 Feb 2017 19:07:15 +1100 Subject: [PATCH 023/218] Preserve number of tildes in failed strikethru --- extensions/strikethrough.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/extensions/strikethrough.c b/extensions/strikethrough.c index 1696c8096..802a6bb7a 100644 --- a/extensions/strikethrough.c +++ b/extensions/strikethrough.c @@ -7,17 +7,22 @@ static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser, cmark_node *parent, unsigned char character, cmark_inline_parser *inline_parser) { cmark_node *res = NULL; - int left_flanking, right_flanking, punct_before, punct_after; + int left_flanking, right_flanking, punct_before, punct_after, delims; + char buffer[101]; if (character != '~') return NULL; - cmark_inline_parser_scan_delimiters(inline_parser, 100, '~', &left_flanking, - &right_flanking, &punct_before, - &punct_after); + delims = cmark_inline_parser_scan_delimiters( + inline_parser, sizeof(buffer) - 1, '~', + &left_flanking, + &right_flanking, &punct_before, &punct_after); + + memset(buffer, '~', delims); + buffer[delims] = 0; res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); - cmark_node_set_literal(res, "~"); + cmark_node_set_literal(res, buffer); if (left_flanking || right_flanking) { cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking, From e548d42f9dcdd1335ab8e84aaa71a3c4762120e3 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Fri, 10 Feb 2017 17:02:08 +1100 Subject: [PATCH 024/218] autolink simplification (caller requires :// anyway) --- extensions/autolink.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/extensions/autolink.c b/extensions/autolink.c index 940ac5c76..03d84cbad 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -9,9 +9,8 @@ #endif static int sd_autolink_issafe(const uint8_t *link, size_t link_len) { - static const size_t valid_uris_count = 5; - static const char *valid_uris[] = {"/", "http://", "https://", "ftp://", - "mailto:"}; + static const size_t valid_uris_count = 3; + static const char *valid_uris[] = {"http://", "https://", "ftp://"}; size_t i; From 6923e1e6cf2dea8b020f11ef6bad2ca53e2e7220 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Fri, 10 Feb 2017 17:02:39 +1100 Subject: [PATCH 025/218] autolink_delim only works with ")", fix balance behaviour --- extensions/autolink.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/extensions/autolink.c b/extensions/autolink.c index 03d84cbad..f1a1e5754 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -39,21 +39,9 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) { cclose = data[link_end - 1]; switch (cclose) { - case '"': - copen = '"'; - break; - case '\'': - copen = '\''; - break; case ')': copen = '('; break; - case ']': - copen = '['; - break; - case '}': - copen = '{'; - break; default: copen = 0; } @@ -76,24 +64,24 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) { size_t opening = 0; i = 0; - /* Try to close the final punctuation sign in this same line; - * if we managed to close it outside of the URL, that means that it's - * not part of the URL. If it closes inside the URL, that means it - * is part of the URL. + /* Allow any number of matching brackets (as recognised in copen/cclose) + * at the end of the URL. If there is a greater number of closing + * brackets than opening ones, we remove one character from the end of + * the link. * - * Examples: + * Examples (input text => output linked portion): * - * foo http://www.pokemon.com/Pikachu_(Electric) bar + * http://www.pokemon.com/Pikachu_(Electric) * => http://www.pokemon.com/Pikachu_(Electric) * - * foo (http://www.pokemon.com/Pikachu_(Electric)) bar - * => http://www.pokemon.com/Pikachu_(Electric) + * http://www.pokemon.com/Pikachu_((Electric) + * => http://www.pokemon.com/Pikachu_((Electric) * - * foo http://www.pokemon.com/Pikachu_(Electric)) bar + * http://www.pokemon.com/Pikachu_(Electric)) * => http://www.pokemon.com/Pikachu_(Electric) * - * (foo http://www.pokemon.com/Pikachu_(Electric)) bar - * => foo http://www.pokemon.com/Pikachu_(Electric) + * http://www.pokemon.com/Pikachu_((Electric)) + * => http://www.pokemon.com/Pikachu_((Electric)) */ while (i < link_end) { @@ -105,7 +93,7 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) { i++; } - if (closing == opening) + if (closing <= opening) break; link_end--; From a331579886c05c80ed00e91242754295cef02cb0 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Thu, 9 Feb 2017 19:08:13 +1100 Subject: [PATCH 026/218] spec update --- test/spec.txt | 489 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 416 insertions(+), 73 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index 15560d01a..6a920d1f2 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -3163,7 +3163,187 @@ aaa <h1>aaa</h1> ```````````````````````````````` +<div class="extension"> +## Tables (extension) + +If the `table` extension is enabled, an additional leaf block type is +available + +A [table](@) is an arrangement of data with rows and columns, consisting of a +single header row, a [delimiter row] separating the header from the data, and +zero or more data rows. + +Each row consists of cells containing arbitrary text, in which [inlines] are +parsed, separated by pipes (`|`). A leading and trailing pipe is also +recommended for clarity of reading, and if there's otherwise parsing ambiguity. +Spaces between pipes and cell content are trimmed. Block-level elements cannot +be inserted in a table. + +The [delimiter row](@) consists of cells whose only content are hyphens (`-`), +and optionally, a leading or trailing colon (`:`), or both, to indicate left, +right, or center alignment respectively. + +```````````````````````````````` example table +| foo | bar | +| --- | --- | +| baz | bim | +. +<table> +<thead> +<tr> +<th>foo</th> +<th>bar</th> +</tr> +</thead> +<tbody> +<tr> +<td>baz</td> +<td>bim</td> +</tr></tbody></table> +```````````````````````````````` + +Cells in one column don't need to match length, though it's easier to read if +they are. Likewise, use of leading and trailing pipes may be inconsistent: + +```````````````````````````````` example table +| abc | defghi | +:-: | -----------: +bar | baz +. +<table> +<thead> +<tr> +<th align="center">abc</th> +<th align="right">defghi</th> +</tr> +</thead> +<tbody> +<tr> +<td align="center">bar</td> +<td align="right">baz</td> +</tr></tbody></table> +```````````````````````````````` + +Include a pipe in a cell's content by escaping it. Pipes inside other inline +spans (such as emphasis, code, etc.) will not break a cell: + +```````````````````````````````` example table +| f\|oo | +| ------ | +| b `|` az | +| b **|** im | +. +<table> +<thead> +<tr> +<th>f|oo</th> +</tr> +</thead> +<tbody> +<tr> +<td>b <code>|</code> az</td> +</tr> +<tr> +<td>b <strong>|</strong> im</td> +</tr></tbody></table> +```````````````````````````````` + +The table is broken at the first empty line, or beginning of another +block-level structure: + +```````````````````````````````` example table +| abc | def | +| --- | --- | +| bar | baz | +> bar +. +<table> +<thead> +<tr> +<th>abc</th> +<th>def</th> +</tr> +</thead> +<tbody> +<tr> +<td>bar</td> +<td>baz</td> +</tr></tbody></table> +<blockquote> +<p>bar</p> +</blockquote> +```````````````````````````````` + +```````````````````````````````` example table +| abc | def | +| --- | --- | +| bar | baz | +bar + +bar +. +<table> +<thead> +<tr> +<th>abc</th> +<th>def</th> +</tr> +</thead> +<tbody> +<tr> +<td>bar</td> +<td>baz</td> +</tr> +<tr> +<td>bar</td> +<td></td> +</tr></tbody></table> +<p>bar</p> +```````````````````````````````` + +The header row must match the [delimiter row] in the number of cells. If not, +a table will not be recognized: + +```````````````````````````````` example table +| abc | def | +| --- | +| bar | +. +<p>| abc | def | +| --- | +| bar |</p> +```````````````````````````````` + +The remainder of the table's rows may vary in the number of cells. If there +are a number of cells than the header, empty cells are inserted. If there are +greater, the excess is ignored: + +```````````````````````````````` example table +| abc | def | +| --- | --- | +| bar | +| bar | baz | boo | +. +<table> +<thead> +<tr> +<th>abc</th> +<th>def</th> +</tr> +</thead> +<tbody> +<tr> +<td>bar</td> +<td></td> +</tr> +<tr> +<td>bar</td> +<td>baz</td> +</tr></tbody></table> +```````````````````````````````` + +</div> # Container blocks @@ -5322,79 +5502,6 @@ Here the outer list is loose, the inner list tight: ```````````````````````````````` -## Tables (extension) - -A [table](@) is an arrangement of data with rows and columns, consisting of a -single header row, a [delimiter row] separating the header from the data, and zero or more data rows. - -Each row consists of cells containing arbitrary text, in which [inlines] are parsed, separated by pipes (`|`). A leading and trailing pipe is also recommended for clarity of reading, and if there's otherwise parsing ambiguity. Spaces between pipes and cell content are trimmed. - -The [delimiter row](@) consists of cells whose only content are hyphens (`-`), and optionally, a leading or trailing colon (`:`), or both, to indicate left, right, or center alignment respectively. - -```````````````````````````````` example table -| foo | bar | -| --- | --- | -| baz | bim | -. -<table> -<thead> -<tr> -<th>foo</th> -<th>bar</th> -</tr> -</thead> -<tbody> -<tr> -<td>baz</td> -<td>bim</td> -</tr></tbody></table> -```````````````````````````````` - -Cells in one column don't need to match length, though it's easier to read if -they are. Likewise, use of leading and trailing pipes can be inconsistent: - -```````````````````````````````` example table -| abc | defghi | -:-: | -----------: -hi | ok -. -<table> -<thead> -<tr> -<th align="center">abc</th> -<th align="right">defghi</th> -</tr> -</thead> -<tbody> -<tr> -<td align="center">hi</td> -<td align="right">ok</td> -</tr></tbody></table> -```````````````````````````````` - -Include a pipe in a cell's content by escaping it. Pipes inside other inline spans (such as emphasis, code, etc.) will not break a cell: - -```````````````````````````````` example table -| f\|oo | -| ------ | -| b`|`az | -| b **|** im | -. -<table> -<thead> -<tr> -<th>f|oo</th> -</tr> -</thead> -<tbody> -<tr> -<td>b<code>|</code>az</td> -</tr> -<tr> -<td>b <strong>|</strong> im</td> -</tr></tbody></table> -```````````````````````````````` - # Inlines Inlines are parsed sequentially from the beginning of the character @@ -7192,6 +7299,43 @@ __a<http://foo.bar/?q=__> ```````````````````````````````` +<div class="extension"> + +## Strikethrough (extension) + +If the `strikethrough` extension is enabled, an additional emphasis type is +available. + +Strikethrough text is any text wrapped in tildes (`~`). + +```````````````````````````````` example strikethrough +~Hi~ Hello, world! +. +<p><del>Hi</del> Hello, world!</p> +```````````````````````````````` + +Any number of tildes may be used on either side of the text; they do not need +to match, and they cannot be nested. + +```````````````````````````````` example strikethrough +This ~text~~~~ is ~~~~curious~. +. +<p>This <del>text</del> is <del>curious</del>.</p> +```````````````````````````````` + +As with regular emphasis delimiters, a new paragraph will cause the cessation +of parsing a strikethrough: + +```````````````````````````````` example strikethrough +This ~~has a + +new paragraph~~. +. +<p>This ~~has a</p> +<p>new paragraph~~.</p> +```````````````````````````````` + +</div> ## Links @@ -8613,6 +8757,166 @@ foo@bar.example.com <p>foo@bar.example.com</p> ```````````````````````````````` +<div class="extension"> + +## Autolinks (extension) + +If the `autolink` extension is enabled, autolinks will be recognised in a +greater number of conditions. + +[Autolink]s can also be constructed without requiring the use of `<` and to `>` +to delimit them, although they will be recognized under a smaller set of +circumstances. All such recognized autolinks can only come after whitespace, +or any of the delimiting characters `*`, `_`, `~`, `(`, and `[`. + +An [extended www autolink](@) will be recognized when a [valid domain] is +found. A [valid domain](@) consists of the text `www.`, followed by +alphanumeric characters, underscores (`_`), hyphens (`-`) and periods (`.`). +There must be at least one period, and no underscores may be present in the +last two segments of the domain. + +The scheme `http` will be inserted automatically: + +```````````````````````````````` example autolink +www.commonmark.org +. +<p><a href="http://www.commonmark.org">www.commonmark.org</a></p> +```````````````````````````````` + +After a [valid domain], zero or more non-space non-`<` characters may follow: + +```````````````````````````````` example autolink +Visit www.commonmark.org/help for more information. +. +<p>Visit <a href="http://www.commonmark.org/help">www.commonmark.org/help</a> for more information.</p> +```````````````````````````````` + +We then apply [extended autolink path validation](@) as follows: + +Trailing punctuation (specifically, `?`, `!`, `.`, `,`, `:`, `*`, `_`, and `~`) +will not be considered part of the autolink, though they may be included in the +interior of the link: + +```````````````````````````````` example autolink +Visit www.commonmark.org. + +Visit www.commonmark.org/a.b. +. +<p>Visit <a href="http://www.commonmark.org">www.commonmark.org</a>.</p> +<p>Visit <a href="http://www.commonmark.org/a.b">www.commonmark.org/a.b</a>.</p> +```````````````````````````````` + +When an autolink ends in `)`, we scan the entire autolink for the total number +of parentheses. If there is a greater number of closing parentheses than +opening ones, we don't consider the last character part of the autolink, in +order to facilitate including an autolink inside a parenthesis: + +```````````````````````````````` example autolink +www.google.com/search?q=Markup+(business) + +(www.google.com/search?q=Markup+(business)) +. +<p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p> +<p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>)</p> +```````````````````````````````` + +This check is only done when the link ends in a closing parentheses `)`, so if +the only parentheses are in the interior of the autolink, no special rules are +applied: + +```````````````````````````````` example autolink +www.google.com/search?q=(business))+ok +. +<p><a href="http://www.google.com/search?q=(business))+ok">www.google.com/search?q=(business))+ok</a></p> +```````````````````````````````` + +If an autolink ends in a semicolon (`;`), we check to see if it appears to +resemble an [entity reference][entity references]; if the preceding text is `&` +followed by one or more alphanumeric characters. If so, it is excluded from +the autolink: + +```````````````````````````````` example autolink +www.google.com/search?q=commonmark&hl=en + +www.google.com/search?q=commonmark&hl; +. +<p><a href="http://www.google.com/search?q=commonmark&amp;hl=en">www.google.com/search?q=commonmark&amp;hl=en</a></p> +<p><a href="http://www.google.com/search?q=commonmark">www.google.com/search?q=commonmark</a>&amp;hl;</p> +```````````````````````````````` + +`<` immediately ends an autolink. + +```````````````````````````````` example autolink +www.commonmark.org/he<lp +. +<p><a href="http://www.commonmark.org/he">www.commonmark.org/he</a>&lt;lp</p> +```````````````````````````````` + +An [extended url autolink](@) will be recognised when one of the schemes +`http://`, `https://`, or `ftp://`, followed by a [valid domain], then zero or +more non-space non-`<` characters according to +[extended autolink path validation]: + +```````````````````````````````` example autolink +http://commonmark.org + +(Visit https://encrypted.google.com/search?q=Markup+(business)) + +Anonymous FTP is available at ftp://foo.bar.baz. +. +<p><a href="http://commonmark.org">http://commonmark.org</a></p> +<p>(Visit <a href="https://encrypted.google.com/search?q=Markup+(business)">https://encrypted.google.com/search?q=Markup+(business)</a>)</p> +<p>Anonymous FTP is available at <a href="ftp://foo.bar.baz">ftp://foo.bar.baz</a>.</p> +```````````````````````````````` + + +An [extended email autolink](@) will be recognised when an email address is +recognised within any text node. Email addresses are recognised according to +the following rules: + +* One ore more characters which are alphanumeric, or `.`, `-`, `_`, or `+`. +* An `@` symbol. +* One or more characters which are alphanumeric, or `.`, `-`, or `_`. At least + one of the characters here must be a period (`.`). The last character must + not be one of `-` or `_`. If the last character is a period (`.`), it will + be excluded from the autolink. + +The scheme `mailto:` will automatically be added to the generated link: + +```````````````````````````````` example autolink +foo@bar.baz +. +<p><a href="mailto:foo@bar.baz">foo@bar.baz</a></p> +```````````````````````````````` + +`+` can occur before the `@`, but not after. + +```````````````````````````````` example autolink +hello@mail+xyz.example isn't valid, but hello+xyz@mail.example is. +. +<p>hello@mail+xyz.example isn't valid, but <a href="mailto:hello+xyz@mail.example">hello+xyz@mail.example</a> is.</p> +```````````````````````````````` + +`.`, `-`, and `_` can occur on both sides of the `@`, but only `.` may occur at +the end of the email address, in which case it will not be considered part of +the address: + +```````````````````````````````` example autolink +a.b-c_d@a.b + +a.b-c_d@a.b. + +a.b-c_d@a.b- + +a.b-c_d@a.b_ +. +<p><a href="mailto:a.b-c_d@a.b">a.b-c_d@a.b</a></p> +<p><a href="mailto:a.b-c_d@a.b">a.b-c_d@a.b</a>.</p> +<p>a.b-c_d@a.b-</p> +<p>a.b-c_d@a.b_</p> +```````````````````````````````` + +</div> ## Raw HTML @@ -8884,6 +9188,45 @@ foo <a href="\*"> ```````````````````````````````` +<div class="extension"> + +## Raw HTML (extension) + +If the `tagfilter` extension is enabled, the following HTML tags will be +filtered when rendering HTML output: + +* `<title>` +* `<textarea>` +* `<style>` +* `<xmp>` +* `<iframe>` +* `<noembed>` +* `<noframes>` +* `<script>` +* `<plaintext>` + +Filtering is done by replacing the leading `<` with the entity `&lt;`. These +tags are chosen in particular as they change how HTML is interpreted in a way +unique to them (i.e. nested HTML is interpreted differently), and this is +usually undesireable in the context of other rendered Markdown content. + +All other HTML tags are left untouched. + +```````````````````````````````` example tagfilter +<strong> <title> <style> <em> + +<blockquote> + <xmp> is disallowed. +</blockquote> +. +<p><strong> &lt;title> &lt;style> <em></p> +<blockquote> + &lt;xmp> is disallowed. +</blockquote> +```````````````````````````````` + +</div> + ## Hard line breaks A line break (not in a code span or HTML tag) that is preceded From 8cb4978d43ba3bbcac32a0f97e7cf9b53ce376e9 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Mon, 6 Mar 2017 16:36:09 +1100 Subject: [PATCH 027/218] Windows build fix (again) --- extensions/CMakeLists.txt | 2 +- src/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 32586fc62..5104660f8 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -58,7 +58,7 @@ if (CMARK_STATIC) add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES}) set_target_properties(${STATICLIBRARY} PROPERTIES - COMPILE_FLAGS -DCMARKEXTENSIONS_STATIC_DEFINE + COMPILE_FLAGS "-DCMARK_STATIC_DEFINE -DCMARKEXTENSIONS_STATIC_DEFINE" POSITION_INDEPENDENT_CODE ON) if (MSVC) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a59872ba2..5103d0e13 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -70,7 +70,7 @@ include (GenerateExportHeader) add_executable(${PROGRAM} ${PROGRAM_SOURCES}) add_compiler_export_flags() -target_link_libraries(${PROGRAM} libcmark) +target_link_libraries(${PROGRAM} libcmark_static) add_dependencies(${PROGRAM} libcmarkextensions_static) target_link_libraries(${PROGRAM} libcmarkextensions_static) From 90b6f7d934aaa0d64eea8356777fc761550c80d5 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Mon, 13 Mar 2017 12:15:44 +1100 Subject: [PATCH 028/218] Support UTF-8 domains in autolinks --- extensions/autolink.c | 13 +++++++++++-- src/utf8.h | 11 +++++++++++ test/extensions.txt | 3 +++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/extensions/autolink.c b/extensions/autolink.c index f1a1e5754..68ed7e558 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -1,6 +1,7 @@ #include "autolink.h" #include <parser.h> #include <string.h> +#include <utf8.h> #if defined(_WIN32) #define strncasecmp _strnicmp @@ -8,6 +9,14 @@ #include <strings.h> #endif +static int is_valid_hostchar(const uint8_t *link, size_t link_len) { + int32_t ch; + int r = cmark_utf8proc_iterate(link, (bufsize_t)link_len, &ch); + if (r < 0) + return 0; + return !cmark_utf8proc_is_space(ch) && !cmark_utf8proc_is_punctuation(ch); +} + static int sd_autolink_issafe(const uint8_t *link, size_t link_len) { static const size_t valid_uris_count = 3; static const char *valid_uris[] = {"http://", "https://", "ftp://"}; @@ -18,7 +27,7 @@ static int sd_autolink_issafe(const uint8_t *link, size_t link_len) { size_t len = strlen(valid_uris[i]); if (link_len > len && strncasecmp((char *)link, valid_uris[i], len) == 0 && - cmark_isalnum(link[len])) + is_valid_hostchar(link + len, link_len - len)) return 1; } @@ -114,7 +123,7 @@ static size_t check_domain(uint8_t *data, size_t size, int allow_short) { uscore1 = uscore2; uscore2 = 0; np++; - } else if (!cmark_isalnum(data[i]) && data[i] != '-') + } else if (!is_valid_hostchar(data + i, size - i) && data[i] != '-') break; } diff --git a/src/utf8.h b/src/utf8.h index 8e45714d4..5e6434483 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -8,13 +8,24 @@ extern "C" { #endif +CMARK_EXPORT void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len); + +CMARK_EXPORT void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); + +CMARK_EXPORT int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); + +CMARK_EXPORT void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line, bufsize_t size); + +CMARK_EXPORT int cmark_utf8proc_is_space(int32_t uc); + +CMARK_EXPORT int cmark_utf8proc_is_punctuation(int32_t uc); #ifdef __cplusplus diff --git a/test/extensions.txt b/test/extensions.txt index 7dba59abb..276748ff3 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -431,6 +431,8 @@ a.w@b.c Full stop outside parens shouldn't be included http://google.com/ok. (Full stop inside parens shouldn't be included http://google.com/ok.) + +http://🍄.ga/ http://x🍄.ga/ . <p>: <a href="http://google.com">http://google.com</a> <a href="https://google.com">https://google.com</a></p> <p><a href="http://google.com/%C3%A5">http://google.com/å</a> <a href="http://google.com/%C3%A5">http://google.com/å</a></p> @@ -442,6 +444,7 @@ Full stop outside parens shouldn't be included http://google.com/ok. <p><a href="mailto:a.w@b.c">a.w@b.c</a></p> <p>Full stop outside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.</p> <p>(Full stop inside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.)</p> +<p><a href="http://%F0%9F%8D%84.ga/">http://🍄.ga/</a> <a href="http://x%F0%9F%8D%84.ga/">http://x🍄.ga/</a></p> ```````````````````````````````` ```````````````````````````````` example From dcfddd72c8e114c6c30319d84d71dd38d5a4b302 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Mon, 13 Mar 2017 13:05:23 +1100 Subject: [PATCH 029/218] Handle links in quotes correctly. --- extensions/autolink.c | 2 +- test/extensions.txt | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/extensions/autolink.c b/extensions/autolink.c index 68ed7e558..6ceca5733 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -55,7 +55,7 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) { copen = 0; } - if (strchr("?!.,:*_~", data[link_end - 1]) != NULL) + if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL) link_end--; else if (data[link_end - 1] == ';') { diff --git a/test/extensions.txt b/test/extensions.txt index 276748ff3..e6ebf9814 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -432,6 +432,10 @@ Full stop outside parens shouldn't be included http://google.com/ok. (Full stop inside parens shouldn't be included http://google.com/ok.) +"http://google.com" + +'http://google.com' + http://🍄.ga/ http://x🍄.ga/ . <p>: <a href="http://google.com">http://google.com</a> <a href="https://google.com">https://google.com</a></p> @@ -444,6 +448,8 @@ http://🍄.ga/ http://x🍄.ga/ <p><a href="mailto:a.w@b.c">a.w@b.c</a></p> <p>Full stop outside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.</p> <p>(Full stop inside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.)</p> +<p>&quot;<a href="http://google.com">http://google.com</a>&quot;</p> +<p>'<a href="http://google.com">http://google.com</a>'</p> <p><a href="http://%F0%9F%8D%84.ga/">http://🍄.ga/</a> <a href="http://x%F0%9F%8D%84.ga/">http://x🍄.ga/</a></p> ```````````````````````````````` From 331b192a5733ed81d538780e765d49aa77371b65 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Wed, 15 Mar 2017 15:44:00 +1100 Subject: [PATCH 030/218] Reference links in tables (#10) * Add failing test. * Fix by parsing inlines after blocks are done --- extensions/table.c | 178 +++++++++++++++++++++++--------------------- test/extensions.txt | 24 ++++++ 2 files changed, 118 insertions(+), 84 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index df3a01c50..09b9681ca 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -2,6 +2,7 @@ #include <inlines.h> #include <parser.h> #include <references.h> +#include <string.h> #include "ext_scanners.h" #include "strikethrough.h" @@ -18,10 +19,13 @@ typedef struct { typedef struct { uint16_t n_columns; uint8_t *alignments; - table_row *last_matched_row; } node_table; -typedef struct { bool is_header; } node_table_row; +typedef struct { + bool is_header; + unsigned char *raw_content; + size_t raw_content_len; +} node_table_row; static void free_table_cell(cmark_mem *mem, void *data) { cmark_node_free((cmark_node *)data); @@ -39,11 +43,14 @@ static void free_table_row(cmark_mem *mem, table_row *row) { static void free_node_table(cmark_mem *mem, void *ptr) { node_table *t = (node_table *)ptr; mem->free(t->alignments); - free_table_row(mem, t->last_matched_row); mem->free(t); } -static void free_node_table_row(cmark_mem *mem, void *ptr) { mem->free(ptr); } +static void free_node_table_row(cmark_mem *mem, void *ptr) { + node_table_row *ntr = (node_table_row *)ptr; + mem->free(ntr->raw_content); + mem->free(ntr); +} static int get_n_table_columns(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE) @@ -75,14 +82,6 @@ static int set_table_alignments(cmark_node *node, uint8_t *alignments) { return 1; } -static int is_table_header(cmark_node *node, int is_table_header) { - if (!node || node->type != CMARK_NODE_TABLE_ROW) - return 0; - - ((node_table_row *)node->as.opaque)->is_header = (is_table_header != 0); - return 1; -} - static void maybe_consume_pipe(cmark_node **n, int *offset) { if (*n && (*n)->type == CMARK_NODE_TEXT && *offset < (*n)->as.literal.len && (*n)->as.literal.data[*offset] == '|') @@ -251,6 +250,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_node *table_header; table_row *header_row = NULL; table_row *marker_row = NULL; + node_table_row *ntr; const char *parent_string; uint16_t i; @@ -318,19 +318,11 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_parser_get_offset(parser)); cmark_node_set_syntax_extension(table_header, self); - table_header->as.opaque = parser->mem->calloc(1, sizeof(node_table_row)); - is_table_header(table_header, true); - - { - cmark_llist *tmp, *next; - - for (tmp = header_row->cells; tmp; tmp = next) { - cmark_node *header_cell = (cmark_node *)tmp->data; - cmark_node_append_child(table_header, header_cell); - next = header_row->cells = tmp->next; - parser->mem->free(tmp); - } - } + table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row)); + ntr->is_header = true; + ntr->raw_content_len = strlen(parent_string); + ntr->raw_content = (unsigned char *)malloc(ntr->raw_content_len); + memcpy(ntr->raw_content, parent_string, ntr->raw_content_len); cmark_parser_advance_offset( parser, (char *)input, @@ -346,8 +338,7 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, cmark_node *parent_container, unsigned char *input, int len) { cmark_node *table_row_block; - node_table *nt; - table_row *row; + node_table_row *ntr; if (cmark_parser_is_blank(parser)) return NULL; @@ -357,40 +348,11 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, cmark_parser_get_offset(parser)); cmark_node_set_syntax_extension(table_row_block, self); - table_row_block->as.opaque = parser->mem->calloc(1, sizeof(node_table_row)); - - /* We don't advance the offset here */ - nt = (node_table *)parent_container->as.opaque; - if (nt->last_matched_row) { - row = nt->last_matched_row; - nt->last_matched_row = NULL; - } else - row = row_from_string(self, parser, - input + cmark_parser_get_first_nonspace(parser), - len - cmark_parser_get_first_nonspace(parser)); - - { - cmark_llist *tmp, *next; - int i; - int table_columns = get_n_table_columns(parent_container); - - for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = next, ++i) { - cmark_node *cell = (cmark_node *)tmp->data; - assert(cell->type == CMARK_NODE_TABLE_CELL); - cmark_node_append_child(table_row_block, cell); - row->cells = next = tmp->next; - parser->mem->free(tmp); - } + table_row_block->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row)); - for (; i < table_columns; ++i) { - cmark_node *cell = - cmark_parser_add_child(parser, table_row_block, CMARK_NODE_TABLE_CELL, - cmark_parser_get_offset(parser)); - cmark_node_set_syntax_extension(cell, self); - } - } - - free_table_row(parser->mem, row); + ntr->raw_content_len = len - cmark_parser_get_first_nonspace(parser); + ntr->raw_content = (unsigned char *)malloc(len); + memcpy(ntr->raw_content, input + cmark_parser_get_first_nonspace(parser), ntr->raw_content_len); cmark_parser_advance_offset(parser, (char *)input, len - 1 - cmark_parser_get_offset(parser), false); @@ -417,25 +379,20 @@ static int matches(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *input, int len, cmark_node *parent_container) { int res = 0; - node_table *nt; if (cmark_node_get_type(parent_container) == CMARK_NODE_TABLE) { table_row *new_row = row_from_string( self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); - if (new_row && new_row->n_columns) { + if (new_row && new_row->n_columns) res = 1; - nt = (node_table *)parent_container->as.opaque; - free_table_row(parser->mem, nt->last_matched_row); - nt->last_matched_row = new_row; - } else - free_table_row(parser->mem, new_row); + free_table_row(parser->mem, new_row); } return res; } -static const char *get_type_string(cmark_syntax_extension *ext, +static const char *get_type_string(cmark_syntax_extension *self, cmark_node *node) { if (node->type == CMARK_NODE_TABLE) { return "table"; @@ -711,7 +668,7 @@ static void html_render(cmark_syntax_extension *extension, } } -static void opaque_free(cmark_syntax_extension *ext, cmark_mem *mem, cmark_node *node) { +static void opaque_free(cmark_syntax_extension *self, cmark_mem *mem, cmark_node *node) { if (node->type == CMARK_NODE_TABLE) { free_node_table(mem, node->as.opaque); } else if (node->type == CMARK_NODE_TABLE_ROW) { @@ -719,27 +676,80 @@ static void opaque_free(cmark_syntax_extension *ext, cmark_mem *mem, cmark_node } } -static int escape(cmark_syntax_extension *ext, cmark_node *node, int c) { +static int escape(cmark_syntax_extension *self, cmark_node *node, int c) { return c == '|'; } +static cmark_node *postprocess(cmark_syntax_extension *self, cmark_parser *parser, cmark_node *root) { + cmark_iter *iter; + cmark_event_type ev; + cmark_node *node; + node_table_row *ntr; + table_row *row; + + iter = cmark_iter_new(root); + + while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + node = cmark_iter_get_node(iter); + if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_TABLE_ROW) { + ntr = (node_table_row *)node->as.opaque; + if (!ntr->raw_content) + continue; + row = row_from_string(self, parser, + ntr->raw_content, + (int)ntr->raw_content_len); + free(ntr->raw_content); + ntr->raw_content = NULL; + ntr->raw_content_len = 0; + + { + cmark_llist *tmp, *next; + int i; + int table_columns = get_n_table_columns(node->parent); + + for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = next, ++i) { + cmark_node *cell = (cmark_node *)tmp->data; + assert(cell->type == CMARK_NODE_TABLE_CELL); + cmark_node_append_child(node, cell); + row->cells = next = tmp->next; + parser->mem->free(tmp); + } + + for (; i < table_columns; ++i) { + cmark_node *cell = + cmark_parser_add_child(parser, node, CMARK_NODE_TABLE_CELL, + cmark_parser_get_offset(parser)); + cmark_node_set_syntax_extension(cell, self); + } + } + + free_table_row(parser->mem, row); + } + } + + cmark_iter_free(iter); + + return root; +} + cmark_syntax_extension *create_table_extension(void) { - cmark_syntax_extension *ext = cmark_syntax_extension_new("table"); - - cmark_syntax_extension_set_match_block_func(ext, matches); - cmark_syntax_extension_set_open_block_func(ext, try_opening_table_block); - cmark_syntax_extension_set_get_type_string_func(ext, get_type_string); - cmark_syntax_extension_set_can_contain_func(ext, can_contain); - cmark_syntax_extension_set_contains_inlines_func(ext, contains_inlines); - cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render); - cmark_syntax_extension_set_latex_render_func(ext, latex_render); - cmark_syntax_extension_set_man_render_func(ext, man_render); - cmark_syntax_extension_set_html_render_func(ext, html_render); - cmark_syntax_extension_set_opaque_free_func(ext, opaque_free); - cmark_syntax_extension_set_commonmark_escape_func(ext, escape); + cmark_syntax_extension *self = cmark_syntax_extension_new("table"); + + cmark_syntax_extension_set_match_block_func(self, matches); + cmark_syntax_extension_set_open_block_func(self, try_opening_table_block); + cmark_syntax_extension_set_get_type_string_func(self, get_type_string); + cmark_syntax_extension_set_can_contain_func(self, can_contain); + cmark_syntax_extension_set_contains_inlines_func(self, contains_inlines); + cmark_syntax_extension_set_commonmark_render_func(self, commonmark_render); + cmark_syntax_extension_set_latex_render_func(self, latex_render); + cmark_syntax_extension_set_man_render_func(self, man_render); + cmark_syntax_extension_set_html_render_func(self, html_render); + cmark_syntax_extension_set_opaque_free_func(self, opaque_free); + cmark_syntax_extension_set_commonmark_escape_func(self, escape); + cmark_syntax_extension_set_postprocess_func(self, postprocess); CMARK_NODE_TABLE = cmark_syntax_extension_add_node(0); CMARK_NODE_TABLE_ROW = cmark_syntax_extension_add_node(0); CMARK_NODE_TABLE_CELL = cmark_syntax_extension_add_node(0); - return ext; + return self; } diff --git a/test/extensions.txt b/test/extensions.txt index e6ebf9814..046e4e5b2 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -380,6 +380,30 @@ This shouldn't assert. </tr></tbody></table> ```````````````````````````````` +### Reference-style links + +```````````````````````````````` example +Here's a link to [Freedom Planet 2][]. + +| Here's a link to [Freedom Planet 2][] in a table header. | +| --- | +| Here's a link to [Freedom Planet 2][] in a table row. | + +[Freedom Planet 2]: http://www.freedomplanet2.com/ +. +<p>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a>.</p> +<table> +<thead> +<tr> +<th>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a> in a table header.</th> +</tr> +</thead> +<tbody> +<tr> +<td>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a> in a table row.</td> +</tr></tbody></table> +```````````````````````````````` + ## Strikethroughs From 166942bbf8e95f099ac41dcdd663ff7555379446 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Tue, 21 Mar 2017 10:56:47 +1100 Subject: [PATCH 031/218] Handle UTF-8 BOM (#14) * Add failing UTF-8 BOM test * Add simple UTF-8 BOM skip * Restrict BOM check to first line --- api_test/main.c | 7 +++++++ src/blocks.c | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/api_test/main.c b/api_test/main.c index 1fd851165..c8a8f3748 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -739,6 +739,13 @@ static void utf8(test_batch_runner *runner) { STR_EQ(runner, html, "<pre><code>\xef\xbf\xbd\n</code></pre>\n", "utf8 with \\0\\n"); free(html); + + // Test byte-order marker + static const char string_with_bom[] = "\xef\xbb\xbf# Hello\n"; + html = cmark_markdown_to_html( + string_with_bom, sizeof(string_with_bom) - 1, CMARK_OPT_DEFAULT); + STR_EQ(runner, html, "<h1>Hello</h1>\n", "utf8 with BOM"); + free(html); } static void test_char(test_batch_runner *runner, int valid, const char *utf8, diff --git a/src/blocks.c b/src/blocks.c index b479c452e..0c2222056 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1262,6 +1262,12 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, input.len = parser->curline.size; input.alloc = 0; + // Skip UTF-8 BOM. + if (parser->line_number == 0 && + input.len >= 3 && + memcmp(input.data, "\xef\xbb\xbf", 3) == 0) + parser->offset += 3; + parser->line_number++; last_matched_container = check_open_blocks(parser, &input, &all_matched); From 5d088595f1b1f3af02861ce3132e0bc4f28356fb Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Wed, 22 Mar 2017 11:35:55 +1100 Subject: [PATCH 032/218] Add CMARK_OPT_GITHUB_PRE_LANG --- src/cmark.h | 5 +++++ src/html.c | 24 ++++++++++++++++++------ src/main.c | 3 +++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/cmark.h b/src/cmark.h index cc1b089c8..57e5ab9fe 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -696,6 +696,11 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar */ #define CMARK_OPT_SMART (1 << 10) +/** Use GitHub-style <pre lang="x"> tags for code blocks instead of <pre><code + * class="language-x">. + */ +#define CMARK_OPT_GITHUB_PRE_LANG (1 << 11) + /** * ## Version information */ diff --git a/src/html.c b/src/html.c index 951f33ecc..6665ce06e 100644 --- a/src/html.c +++ b/src/html.c @@ -183,15 +183,27 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, first_tag += 1; } - cmark_strbuf_puts(html, "<pre"); - cmark_html_render_sourcepos(node, html, options); - cmark_strbuf_puts(html, "><code class=\"language-"); - escape_html(html, node->as.code.info.data, first_tag); - cmark_strbuf_puts(html, "\">"); + if (options & CMARK_OPT_GITHUB_PRE_LANG) { + cmark_strbuf_puts(html, "<pre"); + cmark_html_render_sourcepos(node, html, options); + cmark_strbuf_puts(html, " lang=\""); + escape_html(html, node->as.code.info.data, first_tag); + cmark_strbuf_puts(html, "\">"); + } else { + cmark_strbuf_puts(html, "<pre"); + cmark_html_render_sourcepos(node, html, options); + cmark_strbuf_puts(html, "><code class=\"language-"); + escape_html(html, node->as.code.info.data, first_tag); + cmark_strbuf_puts(html, "\">"); + } } escape_html(html, node->as.code.literal.data, node->as.code.literal.len); - cmark_strbuf_puts(html, "</code></pre>\n"); + if (options & CMARK_OPT_GITHUB_PRE_LANG) { + cmark_strbuf_puts(html, "</pre>\n"); + } else { + cmark_strbuf_puts(html, "</code></pre>\n"); + } break; case CMARK_NODE_HTML_BLOCK: diff --git a/src/main.c b/src/main.c index 5e1dcf52e..db073668c 100644 --- a/src/main.c +++ b/src/main.c @@ -38,6 +38,7 @@ void print_usage() { printf(" --nobreaks Render soft line breaks as spaces\n"); printf(" --safe Suppress raw HTML and dangerous URLs\n"); printf(" --smart Use smart punctuation\n"); + printf(" --github-pre-lang Use GitHub-style <pre lang> for code blocks\n"); printf(" -e, --extension EXTENSION_NAME Specify an extension name to use\n"); printf(" --list-extensions List available extensions and quit\n"); printf(" --help, -h Print usage information\n"); @@ -130,6 +131,8 @@ int main(int argc, char *argv[]) { options |= CMARK_OPT_NOBREAKS; } else if (strcmp(argv[i], "--smart") == 0) { options |= CMARK_OPT_SMART; + } else if (strcmp(argv[i], "--github-pre-lang") == 0) { + options |= CMARK_OPT_GITHUB_PRE_LANG; } else if (strcmp(argv[i], "--safe") == 0) { options |= CMARK_OPT_SAFE; } else if (strcmp(argv[i], "--validate-utf8") == 0) { From 81816f38b25359d6086ca5cf6cd28c06fab743aa Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Wed, 22 Mar 2017 12:05:39 +1100 Subject: [PATCH 033/218] Use <pre><code> --- src/html.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/html.c b/src/html.c index 6665ce06e..aaf2b7402 100644 --- a/src/html.c +++ b/src/html.c @@ -188,7 +188,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, cmark_html_render_sourcepos(node, html, options); cmark_strbuf_puts(html, " lang=\""); escape_html(html, node->as.code.info.data, first_tag); - cmark_strbuf_puts(html, "\">"); + cmark_strbuf_puts(html, "\"><code>"); } else { cmark_strbuf_puts(html, "<pre"); cmark_html_render_sourcepos(node, html, options); @@ -199,11 +199,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, } escape_html(html, node->as.code.literal.data, node->as.code.literal.len); - if (options & CMARK_OPT_GITHUB_PRE_LANG) { - cmark_strbuf_puts(html, "</pre>\n"); - } else { - cmark_strbuf_puts(html, "</code></pre>\n"); - } + cmark_strbuf_puts(html, "</code></pre>\n"); break; case CMARK_NODE_HTML_BLOCK: From 0c167f8de2a754cd459eaae3d45ac8c2d320f9c8 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Mon, 27 Mar 2017 10:28:53 +1100 Subject: [PATCH 034/218] Fix empty table cell behaviour (#17) --- extensions/table.c | 10 ++++------ test/extensions.txt | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index 09b9681ca..2c0c059e9 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -151,11 +151,8 @@ static cmark_node *consume_until_pipe_or_eol(cmark_syntax_extension *self, } else { pipe -= *offset; - if (pipe) { - child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, pipe); - cmark_node_own(child); - } else - cmark_node_free(child); + child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, pipe); + cmark_node_own(child); *offset += pipe + 1; if (*offset >= node->as.literal.len) { @@ -181,6 +178,8 @@ static cmark_node *consume_until_pipe_or_eol(cmark_syntax_extension *self, return NULL; } + cmark_consolidate_text_nodes(result); + if (result->first_child->type == CMARK_NODE_TEXT) { cmark_chunk c = cmark_chunk_ltrim_new(parser->mem, &result->first_child->as.literal); cmark_chunk_free(parser->mem, &result->first_child->as.literal); @@ -193,7 +192,6 @@ static cmark_node *consume_until_pipe_or_eol(cmark_syntax_extension *self, result->last_child->as.literal = c; } - cmark_consolidate_text_nodes(result); return result; } diff --git a/test/extensions.txt b/test/extensions.txt index 046e4e5b2..10a51fe67 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -404,6 +404,29 @@ Here's a link to [Freedom Planet 2][]. </tr></tbody></table> ```````````````````````````````` +### Sequential cells + +```````````````````````````````` example +| a | b | c | +| --- | --- | --- | +| d || e | +. +<table> +<thead> +<tr> +<th>a</th> +<th>b</th> +<th>c</th> +</tr> +</thead> +<tbody> +<tr> +<td>d</td> +<td></td> +<td>e</td> +</tr></tbody></table> +```````````````````````````````` + ## Strikethroughs From 8fafc10f92b2321033f6a973c1f7e3b57f039899 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Mon, 27 Mar 2017 16:11:52 +1100 Subject: [PATCH 035/218] Limit arena --- extensions/table.c | 30 +++++++++++++++++++++++++----- src/arena.c | 23 +++++++++++++++++++++++ src/cmark_extension_api.h | 6 ++++++ 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index 2c0c059e9..5c19a1484 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -1,3 +1,4 @@ +#include <cmark_extension_api.h> #include <html.h> #include <inlines.h> #include <parser.h> @@ -253,15 +254,19 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, uint16_t i; if (!matched) - goto done; + return parent_container; parent_string = cmark_node_get_string_content(parent_container); + cmark_arena_push(); + header_row = row_from_string(self, parser, (unsigned char *)parent_string, (int)strlen(parent_string)); if (!header_row) { - goto done; + free_table_row(parser->mem, header_row); + cmark_arena_pop(); + return parent_container; } marker_row = row_from_string(self, parser, @@ -271,11 +276,24 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, assert(marker_row); if (header_row->n_columns != marker_row->n_columns) { - goto done; + free_table_row(parser->mem, header_row); + free_table_row(parser->mem, marker_row); + cmark_arena_pop(); + return parent_container; + } + + if (cmark_arena_pop()) { + header_row = row_from_string(self, parser, (unsigned char *)parent_string, + (int)strlen(parent_string)); + marker_row = row_from_string(self, parser, + input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); } if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { - goto done; + free_table_row(parser->mem, header_row); + free_table_row(parser->mem, marker_row); + return parent_container; } cmark_node_set_syntax_extension(parent_container, self); @@ -325,7 +343,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_parser_advance_offset( parser, (char *)input, (int)strlen((char *)input) - 1 - cmark_parser_get_offset(parser), false); -done: + free_table_row(parser->mem, header_row); free_table_row(parser->mem, marker_row); return parent_container; @@ -379,12 +397,14 @@ static int matches(cmark_syntax_extension *self, cmark_parser *parser, int res = 0; if (cmark_node_get_type(parent_container) == CMARK_NODE_TABLE) { + cmark_arena_push(); table_row *new_row = row_from_string( self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); if (new_row && new_row->n_columns) res = 1; free_table_row(parser->mem, new_row); + cmark_arena_pop(); } return res; diff --git a/src/arena.c b/src/arena.c index d9977cb4f..b8fed007c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2,9 +2,11 @@ #include <string.h> #include <stdint.h> #include "cmark.h" +#include "cmark_extension_api.h" static struct arena_chunk { size_t sz, used; + uint8_t push_point; void *ptr; struct arena_chunk *prev; } *A = NULL; @@ -21,6 +23,27 @@ static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev return c; } +void cmark_arena_push(void) { + if (!A) + return; + A->push_point = 1; + A = alloc_arena_chunk(10240, A); +} + +int cmark_arena_pop(void) { + if (!A) + return 0; + while (A && !A->push_point) { + free(A->ptr); + struct arena_chunk *n = A->prev; + free(A); + A = n; + } + if (A) + A->push_point = 0; + return 1; +} + static void init_arena(void) { A = alloc_arena_chunk(4 * 1048576, NULL); } diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index b11a985b3..39b4428eb 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -686,6 +686,12 @@ void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add); CMARK_EXPORT cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser); +CMARK_EXPORT +void cmark_arena_push(void); + +CMARK_EXPORT +int cmark_arena_pop(void); + #ifdef __cplusplus } #endif From 27141100c914b60b47b87d5b5de66f1e1a5ee195 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Tue, 28 Mar 2017 11:38:55 +1100 Subject: [PATCH 036/218] Add Dockerfile --- Makefile | 6 +++++- tools/Dockerfile | 24 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 tools/Dockerfile diff --git a/Makefile b/Makefile index 30bc65f56..5075a1091 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ CLANG_CHECK?=clang-check CLANG_FORMAT=clang-format -style llvm -sort-includes=0 -i AFL_PATH?=/usr/local/bin -.PHONY: all cmake_build leakcheck clean fuzztest test debug ubsan asan mingw archive newbench bench format update-spec afl clang-check libFuzzer +.PHONY: all cmake_build leakcheck clean fuzztest test debug ubsan asan mingw archive newbench bench format update-spec afl clang-check docker libFuzzer all: cmake_build man/man3/cmark.3 @@ -219,3 +219,7 @@ distclean: clean -rm -rf *.dSYM -rm -f README.html -rm -rf $(BENCHFILE) $(ALLTESTS) progit + +docker: + docker build -t cmark $(CURDIR)/tools + docker run --privileged -t -i -v $(CURDIR):/src/cmark -w /src/cmark cmark /bin/bash diff --git a/tools/Dockerfile b/tools/Dockerfile new file mode 100644 index 000000000..4be63dec4 --- /dev/null +++ b/tools/Dockerfile @@ -0,0 +1,24 @@ +FROM debian:jessie + +RUN apt-get update && apt-get install -y \ + build-essential autoconf libtool \ + git \ + pkg-config \ + && apt-get clean + +RUN apt-get install -y \ + cmake \ + gdb \ + valgrind \ + python3 + +RUN apt-get install -y \ + wget \ + clang + +RUN wget http://lcamtuf.coredump.cx/afl/releases/afl-latest.tgz && \ + tar xf afl-latest.tgz && \ + cd afl-* && \ + make install && \ + cd .. && \ + rm -rf afl-* From c2712a4fb5aebc2fe2740cb154fe2562ac12c959 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Tue, 28 Mar 2017 11:56:22 +1100 Subject: [PATCH 037/218] Add -gfm suffix to artifacts --- Makefile | 8 +- api_test/CMakeLists.txt | 2 +- extensions/CMakeLists.txt | 14 +- man/make_man_page.py | 4 +- man/man3/cmark.3 | 937 --------------------- src/CMakeLists.txt | 30 +- src/{libcmark.pc.in => libcmark-gfm.pc.in} | 6 +- src/main.c | 2 +- test/CMakeLists.txt | 10 +- test/cmark.py | 4 +- 10 files changed, 40 insertions(+), 977 deletions(-) delete mode 100644 man/man3/cmark.3 rename src/{libcmark.pc.in => libcmark-gfm.pc.in} (58%) diff --git a/Makefile b/Makefile index 5075a1091..b57b80b7a 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ BENCHSAMPLES=$(wildcard $(BENCHDIR)/samples/*.md) BENCHFILE=$(BENCHDIR)/benchinput.md ALLTESTS=alltests.md NUMRUNS?=20 -CMARK=$(BUILDDIR)/src/cmark +CMARK=$(BUILDDIR)/src/cmark-gfm CMARK_FUZZ=$(BUILDDIR)/src/cmark-fuzz PROG?=$(CMARK) VERSION?=$(SPECVERSION) @@ -27,7 +27,7 @@ AFL_PATH?=/usr/local/bin .PHONY: all cmake_build leakcheck clean fuzztest test debug ubsan asan mingw archive newbench bench format update-spec afl clang-check docker libFuzzer -all: cmake_build man/man3/cmark.3 +all: cmake_build man/man3/cmark-gfm.3 $(CMARK): cmake_build @@ -100,7 +100,7 @@ mingw: cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchain-mingw32.cmake -DCMAKE_INSTALL_PREFIX=$(MINGW_INSTALLDIR) ;\ $(MAKE) && $(MAKE) install -man/man3/cmark.3: src/cmark.h | $(CMARK) +man/man3/cmark-gfm.3: src/cmark.h | $(CMARK) python man/make_man_page.py $< > $@ \ archive: @@ -164,7 +164,7 @@ $(ALLTESTS): $(SPEC) $(EXTENSIONS_SPEC) leakcheck: $(ALLTESTS) for format in html man xml latex commonmark; do \ for opts in "" "--smart"; do \ - echo "cmark -t $$format -e table -e strikethrough -e autolink -e tagfilter $$opts" ; \ + echo "cmark-gfm -t $$format -e table -e strikethrough -e autolink -e tagfilter $$opts" ; \ valgrind -q --leak-check=full --dsymutil=yes --suppressions=suppressions --error-exitcode=1 $(PROG) -t $$format -e table -e strikethrough -e autolink -e tagfilter $$opts $(ALLTESTS) >/dev/null || exit 1;\ done; \ done; diff --git a/api_test/CMakeLists.txt b/api_test/CMakeLists.txt index 5c247aba0..a128255f0 100644 --- a/api_test/CMakeLists.txt +++ b/api_test/CMakeLists.txt @@ -8,7 +8,7 @@ include_directories( ${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src ) -target_link_libraries(api_test libcmark ${CMAKE_DL_LIBS}) +target_link_libraries(api_test libcmark-gfm ${CMAKE_DL_LIBS}) # Compiler flags if(MSVC) diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 5104660f8..d14ad0e8b 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 2.8) -set(LIBRARY "libcmarkextensions") -set(STATICLIBRARY "libcmarkextensions_static") +set(LIBRARY "libcmark-gfmextensions") +set(STATICLIBRARY "libcmark-gfmextensions_static") set(LIBRARY_SOURCES core-extensions.c table.c @@ -38,7 +38,7 @@ if (CMARK_SHARED) add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) set_target_properties(${LIBRARY} PROPERTIES - OUTPUT_NAME "cmarkextensions" + OUTPUT_NAME "cmark-gfmextensions" SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} VERSION ${PROJECT_VERSION}) @@ -46,10 +46,10 @@ if (CMARK_SHARED) APPEND PROPERTY MACOSX_RPATH true) # Avoid name clash between PROGRAM and LIBRARY pdb files. - set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmarkextensions_dll) + set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfmextensions_dll) list(APPEND CMARK_INSTALL ${LIBRARY}) - target_link_libraries(${LIBRARY} libcmark) + target_link_libraries(${LIBRARY} libcmark-gfm) generate_export_header(${LIBRARY} BASE_NAME cmarkextensions) endif() @@ -63,11 +63,11 @@ if (CMARK_STATIC) if (MSVC) set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmarkextensions_static" + OUTPUT_NAME "cmark-gfmextensions_static" VERSION ${PROJECT_VERSION}) else() set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmarkextensions" + OUTPUT_NAME "cmark-gfmextensions" VERSION ${PROJECT_VERSION}) endif(MSVC) endif() diff --git a/man/make_man_page.py b/man/make_man_page.py index 4b49dbd3b..b0d5882c1 100644 --- a/man/make_man_page.py +++ b/man/make_man_page.py @@ -21,9 +21,9 @@ sysname = platform.system() if sysname == 'Darwin': - cmark = CDLL("build/src/libcmark.dylib") + cmark = CDLL("build/src/libcmark-gfm.dylib") else: - cmark = CDLL("build/src/libcmark.so") + cmark = CDLL("build/src/libcmark-gfm.so") parse_document = cmark.cmark_parse_document parse_document.restype = c_void_p diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 deleted file mode 100644 index 230340346..000000000 --- a/man/man3/cmark.3 +++ /dev/null @@ -1,937 +0,0 @@ -.TH cmark 3 "June 02, 2017" "LOCAL" "Library Functions Manual" -.SH -NAME -.PP -\f[B]cmark\f[] \- CommonMark parsing, manipulating, and rendering - -.SH -DESCRIPTION -.SS -Simple Interface - -.PP -\fIchar *\f[] \fBcmark_markdown_to_html\f[](\fIconst char *text\f[], \fIsize_t len\f[], \fIint options\f[]) - -.PP -Convert \f[I]text\f[] (assumed to be a UTF\-8 encoded string with length -\f[I]len\f[]) from CommonMark Markdown to HTML, returning a -null\-terminated, UTF\-8\-encoded string. It is the caller's -responsibility to free the returned buffer. - -.SS -Node Structure - -.PP -.nf -\fC -.RS 0n -typedef enum { - /* Error status */ - CMARK_NODE_NONE = 0x0000, - - /* Block */ - CMARK_NODE_DOCUMENT = CMARK_NODE_TYPE_BLOCK | 0x0001, - CMARK_NODE_BLOCK_QUOTE = CMARK_NODE_TYPE_BLOCK | 0x0002, - CMARK_NODE_LIST = CMARK_NODE_TYPE_BLOCK | 0x0003, - CMARK_NODE_ITEM = CMARK_NODE_TYPE_BLOCK | 0x0004, - CMARK_NODE_CODE_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0005, - CMARK_NODE_HTML_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0006, - CMARK_NODE_CUSTOM_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0007, - CMARK_NODE_PARAGRAPH = CMARK_NODE_TYPE_BLOCK | 0x0008, - CMARK_NODE_HEADING = CMARK_NODE_TYPE_BLOCK | 0x0009, - CMARK_NODE_THEMATIC_BREAK = CMARK_NODE_TYPE_BLOCK | 0x000a, - - /* Inline */ - CMARK_NODE_TEXT = CMARK_NODE_TYPE_INLINE | 0x0001, - CMARK_NODE_SOFTBREAK = CMARK_NODE_TYPE_INLINE | 0x0002, - CMARK_NODE_LINEBREAK = CMARK_NODE_TYPE_INLINE | 0x0003, - CMARK_NODE_CODE = CMARK_NODE_TYPE_INLINE | 0x0004, - CMARK_NODE_HTML_INLINE = CMARK_NODE_TYPE_INLINE | 0x0005, - CMARK_NODE_CUSTOM_INLINE = CMARK_NODE_TYPE_INLINE | 0x0006, - CMARK_NODE_EMPH = CMARK_NODE_TYPE_INLINE | 0x0007, - CMARK_NODE_STRONG = CMARK_NODE_TYPE_INLINE | 0x0008, - CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009, - CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a, -} cmark_node_type; -.RE -\f[] -.fi - - - -.PP -.nf -\fC -.RS 0n -typedef enum { - CMARK_NO_LIST, - CMARK_BULLET_LIST, - CMARK_ORDERED_LIST -} cmark_list_type; -.RE -\f[] -.fi - - - -.PP -.nf -\fC -.RS 0n -typedef enum { - CMARK_NO_DELIM, - CMARK_PERIOD_DELIM, - CMARK_PAREN_DELIM -} cmark_delim_type; -.RE -\f[] -.fi - - - -.SS -Custom memory allocator support - -.PP -.nf -\fC -.RS 0n -typedef struct cmark_mem { - void *(*calloc)(size_t, size_t); - void *(*realloc)(void *, size_t); - void (*free)(void *); -} cmark_mem; -.RE -\f[] -.fi - -.PP -Defines the memory allocation functions to be used by CMark when parsing -and allocating a document tree - -.PP -\fIcmark_mem *\f[] \fBcmark_get_default_mem_allocator\f[](\fI\f[]) - -.PP -The default memory allocator; uses the system's calloc, realloc and -free. - -.PP -\fIcmark_mem *\f[] \fBcmark_get_arena_mem_allocator\f[](\fI\f[]) - -.PP -An arena allocator; uses system calloc to allocate large slabs of -memory. Memory in these slabs is not reused at all. - -.PP -\fIvoid\f[] \fBcmark_arena_reset\f[](\fIvoid\f[]) - -.PP -Resets the arena allocator, quickly returning all used memory to the -operating system. - -.PP -\fItypedef\f[] \fBvoid\f[](\fI*cmark_free_func\f[]) - -.PP -Callback for freeing user data with a \f[I]cmark_mem\f[] context. - -.SS -Linked list - -.PP -.nf -\fC -.RS 0n -typedef struct _cmark_llist -{ - struct _cmark_llist *next; - void *data; -} cmark_llist; -.RE -\f[] -.fi - -.PP -A generic singly linked list. - -.PP -\fIcmark_llist *\f[] \fBcmark_llist_append\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[], \fIvoid * data\f[]) - -.PP -Append an element to the linked list, return the possibly modified head -of the list. - -.PP -\fIvoid\f[] \fBcmark_llist_free_full\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[], \fIcmark_free_func free_func\f[]) - -.PP -Free the list starting with \f[I]head\f[], calling \f[I]free_func\f[] -with the data pointer of each of its elements - -.PP -\fIvoid\f[] \fBcmark_llist_free\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[]) - -.PP -Free the list starting with \f[I]head\f[] - -.SS -Creating and Destroying Nodes - -.PP -\fIcmark_node *\f[] \fBcmark_node_new\f[](\fIcmark_node_type type\f[]) - -.PP -Creates a new node of type \f[I]type\f[]. Note that the node may have -other required properties, which it is the caller's responsibility to -assign. - -.PP -\fIcmark_node *\f[] \fBcmark_node_new_with_mem\f[](\fIcmark_node_type type\f[], \fIcmark_mem *mem\f[]) - -.PP -Same as \f[C]cmark_node_new\f[], but explicitly listing the memory -allocator used to allocate the node. Note: be sure to use the same -allocator for every node in a tree, or bad things can happen. - -.PP -\fIvoid\f[] \fBcmark_node_free\f[](\fIcmark_node *node\f[]) - -.PP -Frees the memory allocated for a node and any children. - -.SS -Tree Traversal - -.PP -\fIcmark_node *\f[] \fBcmark_node_next\f[](\fIcmark_node *node\f[]) - -.PP -Returns the next node in the sequence after \f[I]node\f[], or NULL if -there is none. - -.PP -\fIcmark_node *\f[] \fBcmark_node_previous\f[](\fIcmark_node *node\f[]) - -.PP -Returns the previous node in the sequence after \f[I]node\f[], or NULL -if there is none. - -.PP -\fIcmark_node *\f[] \fBcmark_node_parent\f[](\fIcmark_node *node\f[]) - -.PP -Returns the parent of \f[I]node\f[], or NULL if there is none. - -.PP -\fIcmark_node *\f[] \fBcmark_node_first_child\f[](\fIcmark_node *node\f[]) - -.PP -Returns the first child of \f[I]node\f[], or NULL if \f[I]node\f[] has -no children. - -.PP -\fIcmark_node *\f[] \fBcmark_node_last_child\f[](\fIcmark_node *node\f[]) - -.PP -Returns the last child of \f[I]node\f[], or NULL if \f[I]node\f[] has no -children. - -.SS -Iterator -.PP -An iterator will walk through a tree of nodes, starting from a root -node, returning one node at a time, together with information about -whether the node is being entered or exited. The iterator will first -descend to a child node, if there is one. When there is no child, the -iterator will go to the next sibling. When there is no next sibling, the -iterator will return to the parent (but with a \f[I]cmark_event_type\f[] -of \f[C]CMARK_EVENT_EXIT\f[]). The iterator will return -\f[C]CMARK_EVENT_DONE\f[] when it reaches the root node again. One -natural application is an HTML renderer, where an \f[C]ENTER\f[] event -outputs an open tag and an \f[C]EXIT\f[] event outputs a close tag. An -iterator might also be used to transform an AST in some systematic way, -for example, turning all level\-3 headings into regular paragraphs. -.IP -.nf -\f[C] -void -usage_example(cmark_node *root) { - cmark_event_type ev_type; - cmark_iter *iter = cmark_iter_new(root); - - while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { - cmark_node *cur = cmark_iter_get_node(iter); - // Do something with `cur` and `ev_type` - } - - cmark_iter_free(iter); -} -\f[] -.fi -.PP -Iterators will never return \f[C]EXIT\f[] events for leaf nodes, which -are nodes of type: -.IP \[bu] 2 -CMARK_NODE_HTML_BLOCK -.IP \[bu] 2 -CMARK_NODE_THEMATIC_BREAK -.IP \[bu] 2 -CMARK_NODE_CODE_BLOCK -.IP \[bu] 2 -CMARK_NODE_TEXT -.IP \[bu] 2 -CMARK_NODE_SOFTBREAK -.IP \[bu] 2 -CMARK_NODE_LINEBREAK -.IP \[bu] 2 -CMARK_NODE_CODE -.IP \[bu] 2 -CMARK_NODE_HTML_INLINE -.PP -Nodes must only be modified after an \f[C]EXIT\f[] event, or an -\f[C]ENTER\f[] event for leaf nodes. - -.PP -.nf -\fC -.RS 0n -typedef enum { - CMARK_EVENT_NONE, - CMARK_EVENT_DONE, - CMARK_EVENT_ENTER, - CMARK_EVENT_EXIT -} cmark_event_type; -.RE -\f[] -.fi - - - -.PP -\fIcmark_iter *\f[] \fBcmark_iter_new\f[](\fIcmark_node *root\f[]) - -.PP -Creates a new iterator starting at \f[I]root\f[]. The current node and -event type are undefined until \f[I]cmark_iter_next\f[] is called for -the first time. The memory allocated for the iterator should be released -using \f[I]cmark_iter_free\f[] when it is no longer needed. - -.PP -\fIvoid\f[] \fBcmark_iter_free\f[](\fIcmark_iter *iter\f[]) - -.PP -Frees the memory allocated for an iterator. - -.PP -\fIcmark_event_type\f[] \fBcmark_iter_next\f[](\fIcmark_iter *iter\f[]) - -.PP -Advances to the next node and returns the event type -(\f[C]CMARK_EVENT_ENTER\f[], \f[C]CMARK_EVENT_EXIT\f[] or -\f[C]CMARK_EVENT_DONE\f[]). - -.PP -\fIcmark_node *\f[] \fBcmark_iter_get_node\f[](\fIcmark_iter *iter\f[]) - -.PP -Returns the current node. - -.PP -\fIcmark_event_type\f[] \fBcmark_iter_get_event_type\f[](\fIcmark_iter *iter\f[]) - -.PP -Returns the current event type. - -.PP -\fIcmark_node *\f[] \fBcmark_iter_get_root\f[](\fIcmark_iter *iter\f[]) - -.PP -Returns the root node. - -.PP -\fIvoid\f[] \fBcmark_iter_reset\f[](\fIcmark_iter *iter\f[], \fIcmark_node *current\f[], \fIcmark_event_type event_type\f[]) - -.PP -Resets the iterator so that the current node is \f[I]current\f[] and the -event type is \f[I]event_type\f[]. The new current node must be a -descendant of the root node or the root node itself. - -.SS -Accessors - -.PP -\fIvoid *\f[] \fBcmark_node_get_user_data\f[](\fIcmark_node *node\f[]) - -.PP -Returns the user data of \f[I]node\f[]. - -.PP -\fIint\f[] \fBcmark_node_set_user_data\f[](\fIcmark_node *node\f[], \fIvoid *user_data\f[]) - -.PP -Sets arbitrary user data for \f[I]node\f[]. Returns 1 on success, 0 on -failure. - -.PP -\fIint\f[] \fBcmark_node_set_user_data_free_func\f[](\fIcmark_node *node\f[], \fIcmark_free_func free_func\f[]) - -.PP -Set free function for user data */ - -.PP -\fIcmark_node_type\f[] \fBcmark_node_get_type\f[](\fIcmark_node *node\f[]) - -.PP -Returns the type of \f[I]node\f[], or \f[C]CMARK_NODE_NONE\f[] on error. - -.PP -\fIconst char *\f[] \fBcmark_node_get_type_string\f[](\fIcmark_node *node\f[]) - -.PP -Like \f[I]cmark_node_get_type\f[], but returns a string representation -of the type, or \f[C]"<unknown>"\f[]. - -.PP -\fIconst char *\f[] \fBcmark_node_get_literal\f[](\fIcmark_node *node\f[]) - -.PP -Returns the string contents of \f[I]node\f[], or an empty string if none -is set. Returns NULL if called on a node that does not have string -content. - -.PP -\fIint\f[] \fBcmark_node_set_literal\f[](\fIcmark_node *node\f[], \fIconst char *content\f[]) - -.PP -Sets the string contents of \f[I]node\f[]. Returns 1 on success, 0 on -failure. - -.PP -\fIint\f[] \fBcmark_node_get_heading_level\f[](\fIcmark_node *node\f[]) - -.PP -Returns the heading level of \f[I]node\f[], or 0 if \f[I]node\f[] is not -a heading. - -.PP -\fIint\f[] \fBcmark_node_set_heading_level\f[](\fIcmark_node *node\f[], \fIint level\f[]) - -.PP -Sets the heading level of \f[I]node\f[], returning 1 on success and 0 on -error. - -.PP -\fIcmark_list_type\f[] \fBcmark_node_get_list_type\f[](\fIcmark_node *node\f[]) - -.PP -Returns the list type of \f[I]node\f[], or \f[C]CMARK_NO_LIST\f[] if -\f[I]node\f[] is not a list. - -.PP -\fIint\f[] \fBcmark_node_set_list_type\f[](\fIcmark_node *node\f[], \fIcmark_list_type type\f[]) - -.PP -Sets the list type of \f[I]node\f[], returning 1 on success and 0 on -error. - -.PP -\fIcmark_delim_type\f[] \fBcmark_node_get_list_delim\f[](\fIcmark_node *node\f[]) - -.PP -Returns the list delimiter type of \f[I]node\f[], or -\f[C]CMARK_NO_DELIM\f[] if \f[I]node\f[] is not a list. - -.PP -\fIint\f[] \fBcmark_node_set_list_delim\f[](\fIcmark_node *node\f[], \fIcmark_delim_type delim\f[]) - -.PP -Sets the list delimiter type of \f[I]node\f[], returning 1 on success -and 0 on error. - -.PP -\fIint\f[] \fBcmark_node_get_list_start\f[](\fIcmark_node *node\f[]) - -.PP -Returns starting number of \f[I]node\f[], if it is an ordered list, -otherwise 0. - -.PP -\fIint\f[] \fBcmark_node_set_list_start\f[](\fIcmark_node *node\f[], \fIint start\f[]) - -.PP -Sets starting number of \f[I]node\f[], if it is an ordered list. -Returns 1 on success, 0 on failure. - -.PP -\fIint\f[] \fBcmark_node_get_list_tight\f[](\fIcmark_node *node\f[]) - -.PP -Returns 1 if \f[I]node\f[] is a tight list, 0 otherwise. - -.PP -\fIint\f[] \fBcmark_node_set_list_tight\f[](\fIcmark_node *node\f[], \fIint tight\f[]) - -.PP -Sets the "tightness" of a list. Returns 1 on success, 0 on failure. - -.PP -\fIconst char *\f[] \fBcmark_node_get_fence_info\f[](\fIcmark_node *node\f[]) - -.PP -Returns the info string from a fenced code block. - -.PP -\fIint\f[] \fBcmark_node_set_fence_info\f[](\fIcmark_node *node\f[], \fIconst char *info\f[]) - -.PP -Sets the info string in a fenced code block, returning 1 on success -and 0 on failure. - -.PP -\fIint\f[] \fBcmark_node_set_fenced\f[](\fIcmark_node * node\f[], \fIint fenced\f[], \fIint length\f[], \fIint offset\f[], \fIchar character\f[]) - -.PP -Sets code blocks fencing details - -.PP -\fIint\f[] \fBcmark_node_get_fenced\f[](\fIcmark_node *node\f[], \fIint *length\f[], \fIint *offset\f[], \fIchar *character\f[]) - -.PP -Returns code blocks fencing details - -.PP -\fIconst char *\f[] \fBcmark_node_get_url\f[](\fIcmark_node *node\f[]) - -.PP -Returns the URL of a link or image \f[I]node\f[], or an empty string if -no URL is set. Returns NULL if called on a node that is not a link or -image. - -.PP -\fIint\f[] \fBcmark_node_set_url\f[](\fIcmark_node *node\f[], \fIconst char *url\f[]) - -.PP -Sets the URL of a link or image \f[I]node\f[]. Returns 1 on success, 0 -on failure. - -.PP -\fIconst char *\f[] \fBcmark_node_get_title\f[](\fIcmark_node *node\f[]) - -.PP -Returns the title of a link or image \f[I]node\f[], or an empty string -if no title is set. Returns NULL if called on a node that is not a link -or image. - -.PP -\fIint\f[] \fBcmark_node_set_title\f[](\fIcmark_node *node\f[], \fIconst char *title\f[]) - -.PP -Sets the title of a link or image \f[I]node\f[]. Returns 1 on success, 0 -on failure. - -.PP -\fIconst char *\f[] \fBcmark_node_get_on_enter\f[](\fIcmark_node *node\f[]) - -.PP -Returns the literal "on enter" text for a custom \f[I]node\f[], or an -empty string if no on_enter is set. Returns NULL if called on a -non\-custom node. - -.PP -\fIint\f[] \fBcmark_node_set_on_enter\f[](\fIcmark_node *node\f[], \fIconst char *on_enter\f[]) - -.PP -Sets the literal text to render "on enter" for a custom \f[I]node\f[]. -Any children of the node will be rendered after this text. Returns 1 on -success 0 on failure. - -.PP -\fIconst char *\f[] \fBcmark_node_get_on_exit\f[](\fIcmark_node *node\f[]) - -.PP -Returns the literal "on exit" text for a custom \f[I]node\f[], or an -empty string if no on_exit is set. Returns NULL if called on a -non\-custom node. - -.PP -\fIint\f[] \fBcmark_node_set_on_exit\f[](\fIcmark_node *node\f[], \fIconst char *on_exit\f[]) - -.PP -Sets the literal text to render "on exit" for a custom \f[I]node\f[]. -Any children of the node will be rendered before this text. Returns 1 on -success 0 on failure. - -.PP -\fIint\f[] \fBcmark_node_get_start_line\f[](\fIcmark_node *node\f[]) - -.PP -Returns the line on which \f[I]node\f[] begins. - -.PP -\fIint\f[] \fBcmark_node_get_start_column\f[](\fIcmark_node *node\f[]) - -.PP -Returns the column at which \f[I]node\f[] begins. - -.PP -\fIint\f[] \fBcmark_node_get_end_line\f[](\fIcmark_node *node\f[]) - -.PP -Returns the line on which \f[I]node\f[] ends. - -.PP -\fIint\f[] \fBcmark_node_get_end_column\f[](\fIcmark_node *node\f[]) - -.PP -Returns the column at which \f[I]node\f[] ends. - -.SS -Tree Manipulation - -.PP -\fIvoid\f[] \fBcmark_node_unlink\f[](\fIcmark_node *node\f[]) - -.PP -Unlinks a \f[I]node\f[], removing it from the tree, but not freeing its -memory. (Use \f[I]cmark_node_free\f[] for that.) - -.PP -\fIint\f[] \fBcmark_node_insert_before\f[](\fIcmark_node *node\f[], \fIcmark_node *sibling\f[]) - -.PP -Inserts \f[I]sibling\f[] before \f[I]node\f[]. Returns 1 on success, 0 -on failure. - -.PP -\fIint\f[] \fBcmark_node_insert_after\f[](\fIcmark_node *node\f[], \fIcmark_node *sibling\f[]) - -.PP -Inserts \f[I]sibling\f[] after \f[I]node\f[]. Returns 1 on success, 0 on -failure. - -.PP -\fIint\f[] \fBcmark_node_replace\f[](\fIcmark_node *oldnode\f[], \fIcmark_node *newnode\f[]) - -.PP -Replaces \f[I]oldnode\f[] with \f[I]newnode\f[] and unlinks -\f[I]oldnode\f[] (but does not free its memory). Returns 1 on success, 0 -on failure. - -.PP -\fIint\f[] \fBcmark_node_prepend_child\f[](\fIcmark_node *node\f[], \fIcmark_node *child\f[]) - -.PP -Adds \f[I]child\f[] to the beginning of the children of \f[I]node\f[]. -Returns 1 on success, 0 on failure. - -.PP -\fIint\f[] \fBcmark_node_append_child\f[](\fIcmark_node *node\f[], \fIcmark_node *child\f[]) - -.PP -Adds \f[I]child\f[] to the end of the children of \f[I]node\f[]. -Returns 1 on success, 0 on failure. - -.PP -\fIvoid\f[] \fBcmark_consolidate_text_nodes\f[](\fIcmark_node *root\f[]) - -.PP -Consolidates adjacent text nodes. - -.PP -\fIvoid\f[] \fBcmark_node_own\f[](\fIcmark_node *root\f[]) - -.PP -Ensures a node and all its children own their own chunk memory. - -.SS -Parsing -.PP -Simple interface: -.IP -.nf -\f[C] -cmark_node *document = cmark_parse_document("Hello *world*", 13, - CMARK_OPT_DEFAULT); -\f[] -.fi -.PP -Streaming interface: -.IP -.nf -\f[C] -cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); -FILE *fp = fopen("myfile.md", "rb"); -while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { - cmark_parser_feed(parser, buffer, bytes); - if (bytes < sizeof(buffer)) { - break; - } -} -document = cmark_parser_finish(parser); -cmark_parser_free(parser); -\f[] -.fi - -.PP -\fIcmark_parser *\f[] \fBcmark_parser_new\f[](\fIint options\f[]) - -.PP -Creates a new parser object. - -.PP -\fIcmark_parser *\f[] \fBcmark_parser_new_with_mem\f[](\fIint options\f[], \fIcmark_mem *mem\f[]) - -.PP -Creates a new parser object with the given memory allocator - -.PP -\fIvoid\f[] \fBcmark_parser_free\f[](\fIcmark_parser *parser\f[]) - -.PP -Frees memory allocated for a parser object. - -.PP -\fIvoid\f[] \fBcmark_parser_feed\f[](\fIcmark_parser *parser\f[], \fIconst char *buffer\f[], \fIsize_t len\f[]) - -.PP -Feeds a string of length \f[I]len\f[] to \f[I]parser\f[]. - -.PP -\fIcmark_node *\f[] \fBcmark_parser_finish\f[](\fIcmark_parser *parser\f[]) - -.PP -Finish parsing and return a pointer to a tree of nodes. - -.PP -\fIcmark_node *\f[] \fBcmark_parse_document\f[](\fIconst char *buffer\f[], \fIsize_t len\f[], \fIint options\f[]) - -.PP -Parse a CommonMark document in \f[I]buffer\f[] of length \f[I]len\f[]. -Returns a pointer to a tree of nodes. The memory allocated for the node -tree should be released using \f[I]cmark_node_free\f[] when it is no -longer needed. - -.PP -\fIcmark_node *\f[] \fBcmark_parse_file\f[](\fIFILE *f\f[], \fIint options\f[]) - -.PP -Parse a CommonMark document in file \f[I]f\f[], returning a pointer to a -tree of nodes. The memory allocated for the node tree should be released -using \f[I]cmark_node_free\f[] when it is no longer needed. - -.SS -Rendering - -.PP -\fIchar *\f[] \fBcmark_render_xml\f[](\fIcmark_node *root\f[], \fIint options\f[]) - -.PP -Render a \f[I]node\f[] tree as XML. It is the caller's responsibility to -free the returned buffer. - -.PP -\fIchar *\f[] \fBcmark_render_xml_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[]) - -.PP -As for \f[I]cmark_render_xml\f[], but specifying the allocator to use -for the resulting string. - -.PP -\fIchar *\f[] \fBcmark_render_html\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_llist *extensions\f[]) - -.PP -Render a \f[I]node\f[] tree as an HTML fragment. It is up to the user to -add an appropriate header and footer. It is the caller's responsibility -to free the returned buffer. - -.PP -\fIchar *\f[] \fBcmark_render_html_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_llist *extensions\f[], \fIcmark_mem *mem\f[]) - -.PP -As for \f[I]cmark_render_html\f[], but specifying the allocator to use -for the resulting string. - -.PP -\fIchar *\f[] \fBcmark_render_man\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) - -.PP -Render a \f[I]node\f[] tree as a groff man page, without the header. It -is the caller's responsibility to free the returned buffer. - -.PP -\fIchar *\f[] \fBcmark_render_man_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) - -.PP -As for \f[I]cmark_render_man\f[], but specifying the allocator to use -for the resulting string. - -.PP -\fIchar *\f[] \fBcmark_render_commonmark\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) - -.PP -Render a \f[I]node\f[] tree as a commonmark document. It is the caller's -responsibility to free the returned buffer. - -.PP -\fIchar *\f[] \fBcmark_render_commonmark_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) - -.PP -As for \f[I]cmark_render_commonmark\f[], but specifying the allocator to -use for the resulting string. - -.PP -\fIchar *\f[] \fBcmark_render_latex\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) - -.PP -Render a \f[I]node\f[] tree as a LaTeX document. It is the caller's -responsibility to free the returned buffer. - -.PP -\fIchar *\f[] \fBcmark_render_latex_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) - -.PP -As for \f[I]cmark_render_latex\f[], but specifying the allocator to use -for the resulting string. - -.SS -Options - -.PP -.nf -\fC -.RS 0n -#define CMARK_OPT_DEFAULT 0 -.RE -\f[] -.fi - -.PP -Default options. - -.SS -Options affecting rendering - -.PP -.nf -\fC -.RS 0n -#define CMARK_OPT_SOURCEPOS (1 << 1) -.RE -\f[] -.fi - -.PP -Include a \f[C]data\-sourcepos\f[] attribute on all block elements. - -.PP -.nf -\fC -.RS 0n -#define CMARK_OPT_HARDBREAKS (1 << 2) -.RE -\f[] -.fi - -.PP -Render \f[C]softbreak\f[] elements as hard line breaks. - -.PP -.nf -\fC -.RS 0n -#define CMARK_OPT_SAFE (1 << 3) -.RE -\f[] -.fi - -.PP -Suppress raw HTML and unsafe links (\f[C]javascript:\f[], -\f[C]vbscript:\f[], \f[C]file:\f[], and \f[C]data:\f[], except for -\f[C]image/png\f[], \f[C]image/gif\f[], \f[C]image/jpeg\f[], or -\f[C]image/webp\f[] mime types). Raw HTML is replaced by a placeholder -HTML comment. Unsafe links are replaced by empty strings. - -.PP -.nf -\fC -.RS 0n -#define CMARK_OPT_NOBREAKS (1 << 4) -.RE -\f[] -.fi - -.PP -Render \f[C]softbreak\f[] elements as spaces. - -.SS -Options affecting parsing - -.PP -.nf -\fC -.RS 0n -#define CMARK_OPT_NORMALIZE (1 << 8) -.RE -\f[] -.fi - -.PP -Legacy option (no effect). - -.PP -.nf -\fC -.RS 0n -#define CMARK_OPT_VALIDATE_UTF8 (1 << 9) -.RE -\f[] -.fi - -.PP -Validate UTF\-8 in the input before parsing, replacing illegal sequences -with the replacement character U+FFFD. - -.PP -.nf -\fC -.RS 0n -#define CMARK_OPT_SMART (1 << 10) -.RE -\f[] -.fi - -.PP -Convert straight quotes to curly, \-\-\- to em dashes, \-\- to en -dashes. - -.SS -Version information - -.PP -\fIint\f[] \fBcmark_version\f[](\fIvoid\f[]) - -.PP -The library version as integer for runtime checks. Also available as -macro CMARK_VERSION for compile time checks. -.IP \[bu] 2 -Bits 16\-23 contain the major version. -.IP \[bu] 2 -Bits 8\-15 contain the minor version. -.IP \[bu] 2 -Bits 0\-7 contain the patchlevel. -.PP -In hexadecimal format, the number 0x010203 represents version 1.2.3. - -.PP -\fIconst char *\f[] \fBcmark_version_string\f[](\fIvoid\f[]) - -.PP -The library version string for runtime checks. Also available as macro -CMARK_VERSION_STRING for compile time checks. - -.SH -AUTHORS -.PP -John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5103d0e13..7e425d5b9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,8 +2,8 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.3") cmake_policy(SET CMP0063 NEW) endif() -set(LIBRARY "libcmark") -set(STATICLIBRARY "libcmark_static") +set(LIBRARY "libcmark-gfm") +set(STATICLIBRARY "libcmark-gfm_static") set(HEADERS cmark.h cmark_extension_api.h @@ -52,7 +52,7 @@ set(LIBRARY_SOURCES ${HEADERS} ) -set(PROGRAM "cmark") +set(PROGRAM "cmark-gfm") set(PROGRAM_SOURCES main.c ) @@ -70,10 +70,10 @@ include (GenerateExportHeader) add_executable(${PROGRAM} ${PROGRAM_SOURCES}) add_compiler_export_flags() -target_link_libraries(${PROGRAM} libcmark_static) +target_link_libraries(${PROGRAM} libcmark-gfm_static) -add_dependencies(${PROGRAM} libcmarkextensions_static) -target_link_libraries(${PROGRAM} libcmarkextensions_static) +add_dependencies(${PROGRAM} libcmark-gfmextensions_static) +target_link_libraries(${PROGRAM} libcmark-gfmextensions_static) # Disable the PUBLIC declarations when compiling the executable: set_target_properties(${PROGRAM} PROPERTIES @@ -99,7 +99,7 @@ if (CMARK_SHARED) add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) # Include minor version and patch level in soname for now. set_target_properties(${LIBRARY} PROPERTIES - OUTPUT_NAME "cmark" + OUTPUT_NAME "cmark-gfm" SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} VERSION ${PROJECT_VERSION}) @@ -107,7 +107,7 @@ if (CMARK_SHARED) APPEND PROPERTY MACOSX_RPATH true) # Avoid name clash between PROGRAM and LIBRARY pdb files. - set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark_dll) + set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfm_dll) generate_export_header(${LIBRARY} BASE_NAME ${PROJECT_NAME}) @@ -123,15 +123,15 @@ if (CMARK_STATIC) if (MSVC) set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmark_static" + OUTPUT_NAME "cmark-gfm_static" VERSION ${PROJECT_VERSION}) else() set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmark" + OUTPUT_NAME "cmark-gfm" VERSION ${PROJECT_VERSION}) endif(MSVC) -target_link_libraries(cmark ${CMAKE_DL_LIBS}) +target_link_libraries(cmark-gfm ${CMAKE_DL_LIBS}) list(APPEND CMARK_INSTALL ${STATICLIBRARY}) endif() @@ -139,7 +139,7 @@ set_property(TARGET ${LIBRARY} APPEND PROPERTY MACOSX_RPATH true) # Avoid name clash between PROGRAM and LIBRARY pdb files. -set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark_dll) +set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfm_dll) generate_export_header(${LIBRARY} BASE_NAME ${PROJECT_NAME}) @@ -160,9 +160,9 @@ install(TARGETS ${PROGRAM} ${CMARK_INSTALL} ) if(CMARK_SHARED OR CMARK_STATIC) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcmark.pc.in - ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc @ONLY) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcmark-gfm.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/libcmark-gfm.pc @ONLY) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcmark-gfm.pc DESTINATION lib${LIB_SUFFIX}/pkgconfig) install(FILES diff --git a/src/libcmark.pc.in b/src/libcmark-gfm.pc.in similarity index 58% rename from src/libcmark.pc.in rename to src/libcmark-gfm.pc.in index 024ae4832..46c9177c5 100644 --- a/src/libcmark.pc.in +++ b/src/libcmark-gfm.pc.in @@ -3,8 +3,8 @@ exec_prefix=@CMAKE_INSTALL_PREFIX@ libdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ includedir=@CMAKE_INSTALL_PREFIX@/include -Name: libcmark -Description: CommonMark parsing, rendering, and manipulation +Name: libcmark-gfm +Description: CommonMark parsing, rendering, and manipulation with GitHub Flavored Markdown extensions Version: @PROJECT_VERSION@ -Libs: -L${libdir} -lcmark +Libs: -L${libdir} -lcmark-gfm Cflags: -I${includedir} diff --git a/src/main.c b/src/main.c index db073668c..17c1fb62a 100644 --- a/src/main.c +++ b/src/main.c @@ -28,7 +28,7 @@ typedef enum { } writer_format; void print_usage() { - printf("Usage: cmark [FILE*]\n"); + printf("Usage: cmark-gfm [FILE*]\n"); printf("Options:\n"); printf(" --to, -t FORMAT Specify output format (html, xml, man, " "commonmark, latex)\n"); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1b18b0c2d..133d5cdc3 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -55,22 +55,22 @@ IF (PYTHONINTERP_FOUND) endif() add_test(spectest_executable - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark" + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" ) add_test(smartpuncttest_executable - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark --smart" + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --smart" ) add_test(extensions_executable - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark" "--extensions" "table strikethrough autolink tagfilter" + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" "--extensions" "table strikethrough autolink tagfilter" ) add_test(roundtrip_extensions_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" - "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" "--extensions" "table strikethrough autolink tagfilter" ) @@ -78,7 +78,7 @@ IF (PYTHONINTERP_FOUND) ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/regression.txt" "--program" - "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark" + "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" ) diff --git a/test/cmark.py b/test/cmark.py index 0cdb881fe..78c839db1 100644 --- a/test/cmark.py +++ b/test/cmark.py @@ -94,13 +94,13 @@ def __init__(self, prog=None, library_dir=None, extensions=None): if not library_dir: library_dir = os.path.join("build", "src") for prefix, suffix in libnames: - candidate = os.path.join(library_dir, prefix + "cmark" + suffix) + candidate = os.path.join(library_dir, prefix + "cmark-gfm" + suffix) if os.path.isfile(candidate): libpath = candidate break cmark = CDLL(libpath) extlib = CDLL(os.path.join( - library_dir, "..", "extensions", prefix + "cmarkextensions" + suffix)) + library_dir, "..", "extensions", prefix + "cmark-gfmextensions" + suffix)) self.to_html = lambda x, exts=[]: to_html(cmark, extlib, x, exts + self.extensions) self.to_commonmark = lambda x, exts=[]: to_commonmark(cmark, extlib, x, exts + self.extensions) From eaf3b8fdbad4bfcb541b076c0c38ca5650441eae Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Tue, 28 Mar 2017 12:37:31 +1100 Subject: [PATCH 038/218] README updates --- README.md | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 1c9dd69db..9c643a68a 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,19 @@ -cmark -===== +cmark-gfm +========= -[![Build Status]](https://travis-ci.org/jgm/cmark) -[![Windows Build Status]](https://ci.appveyor.com/project/jgm/cmark) +[![Build Status]](https://travis-ci.org/github/cmark) +[![Windows Build Status]](https://ci.appveyor.com/project/github/cmark) -`cmark` is the C reference implementation of [CommonMark], a -rationalized version of Markdown syntax with a [spec][the spec]. -(For the JavaScript reference implementation, see -[commonmark.js].) +`cmark-gfm` is an extended version of the C reference implementation of +[CommonMark], a rationalized version of Markdown syntax with a spec. This +repository adds GitHub Flavored Markdown extensions to +[the upstream implementation], as defined in [the spec]. + +The rest of the README is preserved as-is from the upstream source. Note that +the library and binaries produced by this fork are suffixed with `-gfm` in +order to distinguish them from the upstream. + +--- It provides a shared library (`libcmark`) with functions for parsing CommonMark documents to an abstract syntax tree (AST), manipulating @@ -187,12 +193,13 @@ Nick Wellnhofer contributed many improvements, including most of the C library's API and its test harness. [benchmarks]: benchmarks.md -[the spec]: http://spec.commonmark.org +[the spec]: https://github.github.com/gfm/ +[the upstream implementation]: https://github.com/jgm/cmark [CommonMark]: http://commonmark.org [cmake]: http://www.cmake.org/download/ [re2c]: http://re2c.org [commonmark.js]: https://github.com/jgm/commonmark.js -[Build Status]: https://img.shields.io/travis/jgm/cmark/master.svg?style=flat -[Windows Build Status]: https://ci.appveyor.com/api/projects/status/32r7s2skrgm9ubva?svg=true +[Build Status]: https://img.shields.io/travis/github/cmark/master.svg?style=flat +[Windows Build Status]: https://ci.appveyor.com/api/projects/status/wv7ifhqhv5itm3d5?svg=true [american fuzzy lop]: http://lcamtuf.coredump.cx/afl/ [libFuzzer]: http://llvm.org/docs/LibFuzzer.html From 4c4df8086b405a691c5c6999313d373709bebb25 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Mon, 3 Apr 2017 12:34:43 +1000 Subject: [PATCH 039/218] Correct manpages. Fixes #20. --- man/CMakeLists.txt | 4 +- man/make_man_page.py | 2 +- man/man1/{cmark.1 => cmark-gfm.1} | 14 +- man/man3/cmark-gfm.3 | 949 ++++++++++++++++++++++++++++++ src/cmark.h | 2 +- src/main.c | 20 +- tools/Dockerfile | 2 + 7 files changed, 975 insertions(+), 18 deletions(-) rename man/man1/{cmark.1 => cmark-gfm.1} (86%) create mode 100644 man/man3/cmark-gfm.3 diff --git a/man/CMakeLists.txt b/man/CMakeLists.txt index e0acd7544..d0c5b6066 100644 --- a/man/CMakeLists.txt +++ b/man/CMakeLists.txt @@ -2,9 +2,9 @@ if (NOT MSVC) include(GNUInstallDirs) - install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man1/cmark.1 + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man1/cmark-gfm.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) - install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man3/cmark.3 + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man3/cmark-gfm.3 DESTINATION ${CMAKE_INSTALL_MANDIR}/man3) endif(NOT MSVC) diff --git a/man/make_man_page.py b/man/make_man_page.py index b0d5882c1..a37818be1 100644 --- a/man/make_man_page.py +++ b/man/make_man_page.py @@ -129,5 +129,5 @@ def handle_quotes(s): chunk = [] mdlines.append('\n') -sys.stdout.write('.TH ' + os.path.basename(sourcefile).replace('.h','') + ' 3 "' + date.today().strftime('%B %d, %Y') + '" "LOCAL" "Library Functions Manual"\n') +sys.stdout.write('.TH cmark-gfm 3 "' + date.today().strftime('%B %d, %Y') + '" "LOCAL" "Library Functions Manual"\n') sys.stdout.write(''.join(mdlines)) diff --git a/man/man1/cmark.1 b/man/man1/cmark-gfm.1 similarity index 86% rename from man/man1/cmark.1 rename to man/man1/cmark-gfm.1 index cfde3d7e9..5c68d79c3 100644 --- a/man/man1/cmark.1 +++ b/man/man1/cmark-gfm.1 @@ -1,14 +1,14 @@ -.TH "cmark" "1" "March 24, 2016" "LOCAL" "General Commands Manual" +.TH "cmark-gfm" "1" "March 24, 2016" "LOCAL" "General Commands Manual" .SH "NAME" \fBcmark\fR -\- convert CommonMark formatted text to HTML +\- convert CommonMark formatted text with GitHub Flavored Markdown extensions to HTML .SH "SYNOPSIS" .HP 6n -\fBcmark\fR +\fBcmark-gfm\fR [options] file* .SH "DESCRIPTION" -\fBcmark\fR +\fBcmark-gfm\fR converts Markdown formatted plain text to either HTML, groff man, CommonMark XML, LaTeX, or CommonMark, using the conventions described in the CommonMark spec. It reads input from \fIstdin\fR @@ -42,6 +42,12 @@ Include source position attribute. .B \-\-normalize Consolidate adjacent text nodes. .TP 12n +.B \-\-extension, \-e \f[I]EXTENSION_NAME\f[] +Specify an extension name to use. +.TP 12n +.B \-\-list\-extensions +List available extensions and quit. +.TP 12n .B \-\-validate-utf8 Validate UTF-8, replacing illegal sequences with U+FFFD. .TP 12n diff --git a/man/man3/cmark-gfm.3 b/man/man3/cmark-gfm.3 new file mode 100644 index 000000000..b223fc9d4 --- /dev/null +++ b/man/man3/cmark-gfm.3 @@ -0,0 +1,949 @@ +.TH cmark-gfm 3 "June 27, 2017" "LOCAL" "Library Functions Manual" +.SH +NAME +.PP +\f[B]cmark\-gfm\f[] \- CommonMark parsing, manipulating, and rendering + +.SH +DESCRIPTION +.SS +Simple Interface + +.PP +\fIchar *\f[] \fBcmark_markdown_to_html\f[](\fIconst char *text\f[], \fIsize_t len\f[], \fIint options\f[]) + +.PP +Convert \f[I]text\f[] (assumed to be a UTF\-8 encoded string with length +\f[I]len\f[]) from CommonMark Markdown to HTML, returning a +null\-terminated, UTF\-8\-encoded string. It is the caller's +responsibility to free the returned buffer. + +.SS +Node Structure + +.PP +.nf +\fC +.RS 0n +typedef enum { + /* Error status */ + CMARK_NODE_NONE = 0x0000, + + /* Block */ + CMARK_NODE_DOCUMENT = CMARK_NODE_TYPE_BLOCK | 0x0001, + CMARK_NODE_BLOCK_QUOTE = CMARK_NODE_TYPE_BLOCK | 0x0002, + CMARK_NODE_LIST = CMARK_NODE_TYPE_BLOCK | 0x0003, + CMARK_NODE_ITEM = CMARK_NODE_TYPE_BLOCK | 0x0004, + CMARK_NODE_CODE_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0005, + CMARK_NODE_HTML_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0006, + CMARK_NODE_CUSTOM_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0007, + CMARK_NODE_PARAGRAPH = CMARK_NODE_TYPE_BLOCK | 0x0008, + CMARK_NODE_HEADING = CMARK_NODE_TYPE_BLOCK | 0x0009, + CMARK_NODE_THEMATIC_BREAK = CMARK_NODE_TYPE_BLOCK | 0x000a, + + /* Inline */ + CMARK_NODE_TEXT = CMARK_NODE_TYPE_INLINE | 0x0001, + CMARK_NODE_SOFTBREAK = CMARK_NODE_TYPE_INLINE | 0x0002, + CMARK_NODE_LINEBREAK = CMARK_NODE_TYPE_INLINE | 0x0003, + CMARK_NODE_CODE = CMARK_NODE_TYPE_INLINE | 0x0004, + CMARK_NODE_HTML_INLINE = CMARK_NODE_TYPE_INLINE | 0x0005, + CMARK_NODE_CUSTOM_INLINE = CMARK_NODE_TYPE_INLINE | 0x0006, + CMARK_NODE_EMPH = CMARK_NODE_TYPE_INLINE | 0x0007, + CMARK_NODE_STRONG = CMARK_NODE_TYPE_INLINE | 0x0008, + CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009, + CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a, +} cmark_node_type; +.RE +\f[] +.fi + + + +.PP +.nf +\fC +.RS 0n +typedef enum { + CMARK_NO_LIST, + CMARK_BULLET_LIST, + CMARK_ORDERED_LIST +} cmark_list_type; +.RE +\f[] +.fi + + + +.PP +.nf +\fC +.RS 0n +typedef enum { + CMARK_NO_DELIM, + CMARK_PERIOD_DELIM, + CMARK_PAREN_DELIM +} cmark_delim_type; +.RE +\f[] +.fi + + + +.SS +Custom memory allocator support + +.PP +.nf +\fC +.RS 0n +typedef struct cmark_mem { + void *(*calloc)(size_t, size_t); + void *(*realloc)(void *, size_t); + void (*free)(void *); +} cmark_mem; +.RE +\f[] +.fi + +.PP +Defines the memory allocation functions to be used by CMark when parsing +and allocating a document tree + +.PP +\fIcmark_mem *\f[] \fBcmark_get_default_mem_allocator\f[](\fI\f[]) + +.PP +The default memory allocator; uses the system's calloc, realloc and +free. + +.PP +\fIcmark_mem *\f[] \fBcmark_get_arena_mem_allocator\f[](\fI\f[]) + +.PP +An arena allocator; uses system calloc to allocate large slabs of +memory. Memory in these slabs is not reused at all. + +.PP +\fIvoid\f[] \fBcmark_arena_reset\f[](\fIvoid\f[]) + +.PP +Resets the arena allocator, quickly returning all used memory to the +operating system. + +.PP +\fItypedef\f[] \fBvoid\f[](\fI*cmark_free_func\f[]) + +.PP +Callback for freeing user data with a \f[I]cmark_mem\f[] context. + +.SS +Linked list + +.PP +.nf +\fC +.RS 0n +typedef struct _cmark_llist +{ + struct _cmark_llist *next; + void *data; +} cmark_llist; +.RE +\f[] +.fi + +.PP +A generic singly linked list. + +.PP +\fIcmark_llist *\f[] \fBcmark_llist_append\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[], \fIvoid * data\f[]) + +.PP +Append an element to the linked list, return the possibly modified head +of the list. + +.PP +\fIvoid\f[] \fBcmark_llist_free_full\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[], \fIcmark_free_func free_func\f[]) + +.PP +Free the list starting with \f[I]head\f[], calling \f[I]free_func\f[] +with the data pointer of each of its elements + +.PP +\fIvoid\f[] \fBcmark_llist_free\f[](\fIcmark_mem * mem\f[], \fIcmark_llist * head\f[]) + +.PP +Free the list starting with \f[I]head\f[] + +.SS +Creating and Destroying Nodes + +.PP +\fIcmark_node *\f[] \fBcmark_node_new\f[](\fIcmark_node_type type\f[]) + +.PP +Creates a new node of type \f[I]type\f[]. Note that the node may have +other required properties, which it is the caller's responsibility to +assign. + +.PP +\fIcmark_node *\f[] \fBcmark_node_new_with_mem\f[](\fIcmark_node_type type\f[], \fIcmark_mem *mem\f[]) + +.PP +Same as \f[C]cmark_node_new\f[], but explicitly listing the memory +allocator used to allocate the node. Note: be sure to use the same +allocator for every node in a tree, or bad things can happen. + +.PP +\fIvoid\f[] \fBcmark_node_free\f[](\fIcmark_node *node\f[]) + +.PP +Frees the memory allocated for a node and any children. + +.SS +Tree Traversal + +.PP +\fIcmark_node *\f[] \fBcmark_node_next\f[](\fIcmark_node *node\f[]) + +.PP +Returns the next node in the sequence after \f[I]node\f[], or NULL if +there is none. + +.PP +\fIcmark_node *\f[] \fBcmark_node_previous\f[](\fIcmark_node *node\f[]) + +.PP +Returns the previous node in the sequence after \f[I]node\f[], or NULL +if there is none. + +.PP +\fIcmark_node *\f[] \fBcmark_node_parent\f[](\fIcmark_node *node\f[]) + +.PP +Returns the parent of \f[I]node\f[], or NULL if there is none. + +.PP +\fIcmark_node *\f[] \fBcmark_node_first_child\f[](\fIcmark_node *node\f[]) + +.PP +Returns the first child of \f[I]node\f[], or NULL if \f[I]node\f[] has +no children. + +.PP +\fIcmark_node *\f[] \fBcmark_node_last_child\f[](\fIcmark_node *node\f[]) + +.PP +Returns the last child of \f[I]node\f[], or NULL if \f[I]node\f[] has no +children. + +.SS +Iterator +.PP +An iterator will walk through a tree of nodes, starting from a root +node, returning one node at a time, together with information about +whether the node is being entered or exited. The iterator will first +descend to a child node, if there is one. When there is no child, the +iterator will go to the next sibling. When there is no next sibling, the +iterator will return to the parent (but with a \f[I]cmark_event_type\f[] +of \f[C]CMARK_EVENT_EXIT\f[]). The iterator will return +\f[C]CMARK_EVENT_DONE\f[] when it reaches the root node again. One +natural application is an HTML renderer, where an \f[C]ENTER\f[] event +outputs an open tag and an \f[C]EXIT\f[] event outputs a close tag. An +iterator might also be used to transform an AST in some systematic way, +for example, turning all level\-3 headings into regular paragraphs. +.IP +.nf +\f[C] +void +usage_example(cmark_node *root) { + cmark_event_type ev_type; + cmark_iter *iter = cmark_iter_new(root); + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cmark_node *cur = cmark_iter_get_node(iter); + // Do something with `cur` and `ev_type` + } + + cmark_iter_free(iter); +} +\f[] +.fi +.PP +Iterators will never return \f[C]EXIT\f[] events for leaf nodes, which +are nodes of type: +.IP \[bu] 2 +CMARK_NODE_HTML_BLOCK +.IP \[bu] 2 +CMARK_NODE_THEMATIC_BREAK +.IP \[bu] 2 +CMARK_NODE_CODE_BLOCK +.IP \[bu] 2 +CMARK_NODE_TEXT +.IP \[bu] 2 +CMARK_NODE_SOFTBREAK +.IP \[bu] 2 +CMARK_NODE_LINEBREAK +.IP \[bu] 2 +CMARK_NODE_CODE +.IP \[bu] 2 +CMARK_NODE_HTML_INLINE +.PP +Nodes must only be modified after an \f[C]EXIT\f[] event, or an +\f[C]ENTER\f[] event for leaf nodes. + +.PP +.nf +\fC +.RS 0n +typedef enum { + CMARK_EVENT_NONE, + CMARK_EVENT_DONE, + CMARK_EVENT_ENTER, + CMARK_EVENT_EXIT +} cmark_event_type; +.RE +\f[] +.fi + + + +.PP +\fIcmark_iter *\f[] \fBcmark_iter_new\f[](\fIcmark_node *root\f[]) + +.PP +Creates a new iterator starting at \f[I]root\f[]. The current node and +event type are undefined until \f[I]cmark_iter_next\f[] is called for +the first time. The memory allocated for the iterator should be released +using \f[I]cmark_iter_free\f[] when it is no longer needed. + +.PP +\fIvoid\f[] \fBcmark_iter_free\f[](\fIcmark_iter *iter\f[]) + +.PP +Frees the memory allocated for an iterator. + +.PP +\fIcmark_event_type\f[] \fBcmark_iter_next\f[](\fIcmark_iter *iter\f[]) + +.PP +Advances to the next node and returns the event type +(\f[C]CMARK_EVENT_ENTER\f[], \f[C]CMARK_EVENT_EXIT\f[] or +\f[C]CMARK_EVENT_DONE\f[]). + +.PP +\fIcmark_node *\f[] \fBcmark_iter_get_node\f[](\fIcmark_iter *iter\f[]) + +.PP +Returns the current node. + +.PP +\fIcmark_event_type\f[] \fBcmark_iter_get_event_type\f[](\fIcmark_iter *iter\f[]) + +.PP +Returns the current event type. + +.PP +\fIcmark_node *\f[] \fBcmark_iter_get_root\f[](\fIcmark_iter *iter\f[]) + +.PP +Returns the root node. + +.PP +\fIvoid\f[] \fBcmark_iter_reset\f[](\fIcmark_iter *iter\f[], \fIcmark_node *current\f[], \fIcmark_event_type event_type\f[]) + +.PP +Resets the iterator so that the current node is \f[I]current\f[] and the +event type is \f[I]event_type\f[]. The new current node must be a +descendant of the root node or the root node itself. + +.SS +Accessors + +.PP +\fIvoid *\f[] \fBcmark_node_get_user_data\f[](\fIcmark_node *node\f[]) + +.PP +Returns the user data of \f[I]node\f[]. + +.PP +\fIint\f[] \fBcmark_node_set_user_data\f[](\fIcmark_node *node\f[], \fIvoid *user_data\f[]) + +.PP +Sets arbitrary user data for \f[I]node\f[]. Returns 1 on success, 0 on +failure. + +.PP +\fIint\f[] \fBcmark_node_set_user_data_free_func\f[](\fIcmark_node *node\f[], \fIcmark_free_func free_func\f[]) + +.PP +Set free function for user data */ + +.PP +\fIcmark_node_type\f[] \fBcmark_node_get_type\f[](\fIcmark_node *node\f[]) + +.PP +Returns the type of \f[I]node\f[], or \f[C]CMARK_NODE_NONE\f[] on error. + +.PP +\fIconst char *\f[] \fBcmark_node_get_type_string\f[](\fIcmark_node *node\f[]) + +.PP +Like \f[I]cmark_node_get_type\f[], but returns a string representation +of the type, or \f[C]"<unknown>"\f[]. + +.PP +\fIconst char *\f[] \fBcmark_node_get_literal\f[](\fIcmark_node *node\f[]) + +.PP +Returns the string contents of \f[I]node\f[], or an empty string if none +is set. Returns NULL if called on a node that does not have string +content. + +.PP +\fIint\f[] \fBcmark_node_set_literal\f[](\fIcmark_node *node\f[], \fIconst char *content\f[]) + +.PP +Sets the string contents of \f[I]node\f[]. Returns 1 on success, 0 on +failure. + +.PP +\fIint\f[] \fBcmark_node_get_heading_level\f[](\fIcmark_node *node\f[]) + +.PP +Returns the heading level of \f[I]node\f[], or 0 if \f[I]node\f[] is not +a heading. + +.PP +\fIint\f[] \fBcmark_node_set_heading_level\f[](\fIcmark_node *node\f[], \fIint level\f[]) + +.PP +Sets the heading level of \f[I]node\f[], returning 1 on success and 0 on +error. + +.PP +\fIcmark_list_type\f[] \fBcmark_node_get_list_type\f[](\fIcmark_node *node\f[]) + +.PP +Returns the list type of \f[I]node\f[], or \f[C]CMARK_NO_LIST\f[] if +\f[I]node\f[] is not a list. + +.PP +\fIint\f[] \fBcmark_node_set_list_type\f[](\fIcmark_node *node\f[], \fIcmark_list_type type\f[]) + +.PP +Sets the list type of \f[I]node\f[], returning 1 on success and 0 on +error. + +.PP +\fIcmark_delim_type\f[] \fBcmark_node_get_list_delim\f[](\fIcmark_node *node\f[]) + +.PP +Returns the list delimiter type of \f[I]node\f[], or +\f[C]CMARK_NO_DELIM\f[] if \f[I]node\f[] is not a list. + +.PP +\fIint\f[] \fBcmark_node_set_list_delim\f[](\fIcmark_node *node\f[], \fIcmark_delim_type delim\f[]) + +.PP +Sets the list delimiter type of \f[I]node\f[], returning 1 on success +and 0 on error. + +.PP +\fIint\f[] \fBcmark_node_get_list_start\f[](\fIcmark_node *node\f[]) + +.PP +Returns starting number of \f[I]node\f[], if it is an ordered list, +otherwise 0. + +.PP +\fIint\f[] \fBcmark_node_set_list_start\f[](\fIcmark_node *node\f[], \fIint start\f[]) + +.PP +Sets starting number of \f[I]node\f[], if it is an ordered list. +Returns 1 on success, 0 on failure. + +.PP +\fIint\f[] \fBcmark_node_get_list_tight\f[](\fIcmark_node *node\f[]) + +.PP +Returns 1 if \f[I]node\f[] is a tight list, 0 otherwise. + +.PP +\fIint\f[] \fBcmark_node_set_list_tight\f[](\fIcmark_node *node\f[], \fIint tight\f[]) + +.PP +Sets the "tightness" of a list. Returns 1 on success, 0 on failure. + +.PP +\fIconst char *\f[] \fBcmark_node_get_fence_info\f[](\fIcmark_node *node\f[]) + +.PP +Returns the info string from a fenced code block. + +.PP +\fIint\f[] \fBcmark_node_set_fence_info\f[](\fIcmark_node *node\f[], \fIconst char *info\f[]) + +.PP +Sets the info string in a fenced code block, returning 1 on success +and 0 on failure. + +.PP +\fIint\f[] \fBcmark_node_set_fenced\f[](\fIcmark_node * node\f[], \fIint fenced\f[], \fIint length\f[], \fIint offset\f[], \fIchar character\f[]) + +.PP +Sets code blocks fencing details + +.PP +\fIint\f[] \fBcmark_node_get_fenced\f[](\fIcmark_node *node\f[], \fIint *length\f[], \fIint *offset\f[], \fIchar *character\f[]) + +.PP +Returns code blocks fencing details + +.PP +\fIconst char *\f[] \fBcmark_node_get_url\f[](\fIcmark_node *node\f[]) + +.PP +Returns the URL of a link or image \f[I]node\f[], or an empty string if +no URL is set. Returns NULL if called on a node that is not a link or +image. + +.PP +\fIint\f[] \fBcmark_node_set_url\f[](\fIcmark_node *node\f[], \fIconst char *url\f[]) + +.PP +Sets the URL of a link or image \f[I]node\f[]. Returns 1 on success, 0 +on failure. + +.PP +\fIconst char *\f[] \fBcmark_node_get_title\f[](\fIcmark_node *node\f[]) + +.PP +Returns the title of a link or image \f[I]node\f[], or an empty string +if no title is set. Returns NULL if called on a node that is not a link +or image. + +.PP +\fIint\f[] \fBcmark_node_set_title\f[](\fIcmark_node *node\f[], \fIconst char *title\f[]) + +.PP +Sets the title of a link or image \f[I]node\f[]. Returns 1 on success, 0 +on failure. + +.PP +\fIconst char *\f[] \fBcmark_node_get_on_enter\f[](\fIcmark_node *node\f[]) + +.PP +Returns the literal "on enter" text for a custom \f[I]node\f[], or an +empty string if no on_enter is set. Returns NULL if called on a +non\-custom node. + +.PP +\fIint\f[] \fBcmark_node_set_on_enter\f[](\fIcmark_node *node\f[], \fIconst char *on_enter\f[]) + +.PP +Sets the literal text to render "on enter" for a custom \f[I]node\f[]. +Any children of the node will be rendered after this text. Returns 1 on +success 0 on failure. + +.PP +\fIconst char *\f[] \fBcmark_node_get_on_exit\f[](\fIcmark_node *node\f[]) + +.PP +Returns the literal "on exit" text for a custom \f[I]node\f[], or an +empty string if no on_exit is set. Returns NULL if called on a +non\-custom node. + +.PP +\fIint\f[] \fBcmark_node_set_on_exit\f[](\fIcmark_node *node\f[], \fIconst char *on_exit\f[]) + +.PP +Sets the literal text to render "on exit" for a custom \f[I]node\f[]. +Any children of the node will be rendered before this text. Returns 1 on +success 0 on failure. + +.PP +\fIint\f[] \fBcmark_node_get_start_line\f[](\fIcmark_node *node\f[]) + +.PP +Returns the line on which \f[I]node\f[] begins. + +.PP +\fIint\f[] \fBcmark_node_get_start_column\f[](\fIcmark_node *node\f[]) + +.PP +Returns the column at which \f[I]node\f[] begins. + +.PP +\fIint\f[] \fBcmark_node_get_end_line\f[](\fIcmark_node *node\f[]) + +.PP +Returns the line on which \f[I]node\f[] ends. + +.PP +\fIint\f[] \fBcmark_node_get_end_column\f[](\fIcmark_node *node\f[]) + +.PP +Returns the column at which \f[I]node\f[] ends. + +.SS +Tree Manipulation + +.PP +\fIvoid\f[] \fBcmark_node_unlink\f[](\fIcmark_node *node\f[]) + +.PP +Unlinks a \f[I]node\f[], removing it from the tree, but not freeing its +memory. (Use \f[I]cmark_node_free\f[] for that.) + +.PP +\fIint\f[] \fBcmark_node_insert_before\f[](\fIcmark_node *node\f[], \fIcmark_node *sibling\f[]) + +.PP +Inserts \f[I]sibling\f[] before \f[I]node\f[]. Returns 1 on success, 0 +on failure. + +.PP +\fIint\f[] \fBcmark_node_insert_after\f[](\fIcmark_node *node\f[], \fIcmark_node *sibling\f[]) + +.PP +Inserts \f[I]sibling\f[] after \f[I]node\f[]. Returns 1 on success, 0 on +failure. + +.PP +\fIint\f[] \fBcmark_node_replace\f[](\fIcmark_node *oldnode\f[], \fIcmark_node *newnode\f[]) + +.PP +Replaces \f[I]oldnode\f[] with \f[I]newnode\f[] and unlinks +\f[I]oldnode\f[] (but does not free its memory). Returns 1 on success, 0 +on failure. + +.PP +\fIint\f[] \fBcmark_node_prepend_child\f[](\fIcmark_node *node\f[], \fIcmark_node *child\f[]) + +.PP +Adds \f[I]child\f[] to the beginning of the children of \f[I]node\f[]. +Returns 1 on success, 0 on failure. + +.PP +\fIint\f[] \fBcmark_node_append_child\f[](\fIcmark_node *node\f[], \fIcmark_node *child\f[]) + +.PP +Adds \f[I]child\f[] to the end of the children of \f[I]node\f[]. +Returns 1 on success, 0 on failure. + +.PP +\fIvoid\f[] \fBcmark_consolidate_text_nodes\f[](\fIcmark_node *root\f[]) + +.PP +Consolidates adjacent text nodes. + +.PP +\fIvoid\f[] \fBcmark_node_own\f[](\fIcmark_node *root\f[]) + +.PP +Ensures a node and all its children own their own chunk memory. + +.SS +Parsing +.PP +Simple interface: +.IP +.nf +\f[C] +cmark_node *document = cmark_parse_document("Hello *world*", 13, + CMARK_OPT_DEFAULT); +\f[] +.fi +.PP +Streaming interface: +.IP +.nf +\f[C] +cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); +FILE *fp = fopen("myfile.md", "rb"); +while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { + cmark_parser_feed(parser, buffer, bytes); + if (bytes < sizeof(buffer)) { + break; + } +} +document = cmark_parser_finish(parser); +cmark_parser_free(parser); +\f[] +.fi + +.PP +\fIcmark_parser *\f[] \fBcmark_parser_new\f[](\fIint options\f[]) + +.PP +Creates a new parser object. + +.PP +\fIcmark_parser *\f[] \fBcmark_parser_new_with_mem\f[](\fIint options\f[], \fIcmark_mem *mem\f[]) + +.PP +Creates a new parser object with the given memory allocator + +.PP +\fIvoid\f[] \fBcmark_parser_free\f[](\fIcmark_parser *parser\f[]) + +.PP +Frees memory allocated for a parser object. + +.PP +\fIvoid\f[] \fBcmark_parser_feed\f[](\fIcmark_parser *parser\f[], \fIconst char *buffer\f[], \fIsize_t len\f[]) + +.PP +Feeds a string of length \f[I]len\f[] to \f[I]parser\f[]. + +.PP +\fIcmark_node *\f[] \fBcmark_parser_finish\f[](\fIcmark_parser *parser\f[]) + +.PP +Finish parsing and return a pointer to a tree of nodes. + +.PP +\fIcmark_node *\f[] \fBcmark_parse_document\f[](\fIconst char *buffer\f[], \fIsize_t len\f[], \fIint options\f[]) + +.PP +Parse a CommonMark document in \f[I]buffer\f[] of length \f[I]len\f[]. +Returns a pointer to a tree of nodes. The memory allocated for the node +tree should be released using \f[I]cmark_node_free\f[] when it is no +longer needed. + +.PP +\fIcmark_node *\f[] \fBcmark_parse_file\f[](\fIFILE *f\f[], \fIint options\f[]) + +.PP +Parse a CommonMark document in file \f[I]f\f[], returning a pointer to a +tree of nodes. The memory allocated for the node tree should be released +using \f[I]cmark_node_free\f[] when it is no longer needed. + +.SS +Rendering + +.PP +\fIchar *\f[] \fBcmark_render_xml\f[](\fIcmark_node *root\f[], \fIint options\f[]) + +.PP +Render a \f[I]node\f[] tree as XML. It is the caller's responsibility to +free the returned buffer. + +.PP +\fIchar *\f[] \fBcmark_render_xml_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_xml\f[], but specifying the allocator to use +for the resulting string. + +.PP +\fIchar *\f[] \fBcmark_render_html\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_llist *extensions\f[]) + +.PP +Render a \f[I]node\f[] tree as an HTML fragment. It is up to the user to +add an appropriate header and footer. It is the caller's responsibility +to free the returned buffer. + +.PP +\fIchar *\f[] \fBcmark_render_html_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_llist *extensions\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_html\f[], but specifying the allocator to use +for the resulting string. + +.PP +\fIchar *\f[] \fBcmark_render_man\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) + +.PP +Render a \f[I]node\f[] tree as a groff man page, without the header. It +is the caller's responsibility to free the returned buffer. + +.PP +\fIchar *\f[] \fBcmark_render_man_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_man\f[], but specifying the allocator to use +for the resulting string. + +.PP +\fIchar *\f[] \fBcmark_render_commonmark\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) + +.PP +Render a \f[I]node\f[] tree as a commonmark document. It is the caller's +responsibility to free the returned buffer. + +.PP +\fIchar *\f[] \fBcmark_render_commonmark_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_commonmark\f[], but specifying the allocator to +use for the resulting string. + +.PP +\fIchar *\f[] \fBcmark_render_latex\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) + +.PP +Render a \f[I]node\f[] tree as a LaTeX document. It is the caller's +responsibility to free the returned buffer. + +.PP +\fIchar *\f[] \fBcmark_render_latex_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_latex\f[], but specifying the allocator to use +for the resulting string. + +.SS +Options + +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_DEFAULT 0 +.RE +\f[] +.fi + +.PP +Default options. + +.SS +Options affecting rendering + +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_SOURCEPOS (1 << 1) +.RE +\f[] +.fi + +.PP +Include a \f[C]data\-sourcepos\f[] attribute on all block elements. + +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_HARDBREAKS (1 << 2) +.RE +\f[] +.fi + +.PP +Render \f[C]softbreak\f[] elements as hard line breaks. + +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_SAFE (1 << 3) +.RE +\f[] +.fi + +.PP +Suppress raw HTML and unsafe links (\f[C]javascript:\f[], +\f[C]vbscript:\f[], \f[C]file:\f[], and \f[C]data:\f[], except for +\f[C]image/png\f[], \f[C]image/gif\f[], \f[C]image/jpeg\f[], or +\f[C]image/webp\f[] mime types). Raw HTML is replaced by a placeholder +HTML comment. Unsafe links are replaced by empty strings. + +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_NOBREAKS (1 << 4) +.RE +\f[] +.fi + +.PP +Render \f[C]softbreak\f[] elements as spaces. + +.SS +Options affecting parsing + +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_NORMALIZE (1 << 8) +.RE +\f[] +.fi + +.PP +Legacy option (no effect). + +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_VALIDATE_UTF8 (1 << 9) +.RE +\f[] +.fi + +.PP +Validate UTF\-8 in the input before parsing, replacing illegal sequences +with the replacement character U+FFFD. + +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_SMART (1 << 10) +.RE +\f[] +.fi + +.PP +Convert straight quotes to curly, \-\-\- to em dashes, \-\- to en +dashes. + +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_GITHUB_PRE_LANG (1 << 11) +.RE +\f[] +.fi + +.PP +Use GitHub\-style tags for code blocks instead of . + +.SS +Version information + +.PP +\fIint\f[] \fBcmark_version\f[](\fIvoid\f[]) + +.PP +The library version as integer for runtime checks. Also available as +macro CMARK_VERSION for compile time checks. +.IP \[bu] 2 +Bits 16\-23 contain the major version. +.IP \[bu] 2 +Bits 8\-15 contain the minor version. +.IP \[bu] 2 +Bits 0\-7 contain the patchlevel. +.PP +In hexadecimal format, the number 0x010203 represents version 1.2.3. + +.PP +\fIconst char *\f[] \fBcmark_version_string\f[](\fIvoid\f[]) + +.PP +The library version string for runtime checks. Also available as macro +CMARK_VERSION_STRING for compile time checks. + +.SH +AUTHORS +.PP +John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. + diff --git a/src/cmark.h b/src/cmark.h index 57e5ab9fe..ccc27a824 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -11,7 +11,7 @@ extern "C" { /** # NAME * - * **cmark** - CommonMark parsing, manipulating, and rendering + * **cmark-gfm** - CommonMark parsing, manipulating, and rendering */ /** # DESCRIPTION diff --git a/src/main.c b/src/main.c index 17c1fb62a..176ab65fd 100644 --- a/src/main.c +++ b/src/main.c @@ -30,19 +30,19 @@ typedef enum { void print_usage() { printf("Usage: cmark-gfm [FILE*]\n"); printf("Options:\n"); - printf(" --to, -t FORMAT Specify output format (html, xml, man, " + printf(" --to, -t FORMAT Specify output format (html, xml, man, " "commonmark, latex)\n"); - printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n"); - printf(" --sourcepos Include source position attribute\n"); - printf(" --hardbreaks Treat newlines as hard line breaks\n"); - printf(" --nobreaks Render soft line breaks as spaces\n"); - printf(" --safe Suppress raw HTML and dangerous URLs\n"); - printf(" --smart Use smart punctuation\n"); + printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n"); + printf(" --sourcepos Include source position attribute\n"); + printf(" --hardbreaks Treat newlines as hard line breaks\n"); + printf(" --nobreaks Render soft line breaks as spaces\n"); + printf(" --safe Suppress raw HTML and dangerous URLs\n"); + printf(" --smart Use smart punctuation\n"); printf(" --github-pre-lang Use GitHub-style <pre lang> for code blocks\n"); - printf(" -e, --extension EXTENSION_NAME Specify an extension name to use\n"); + printf(" --extension, -e EXTENSION_NAME Specify an extension name to use\n"); printf(" --list-extensions List available extensions and quit\n"); - printf(" --help, -h Print usage information\n"); - printf(" --version Print version\n"); + printf(" --help, -h Print usage information\n"); + printf(" --version Print version\n"); } static bool print_document(cmark_node *document, writer_format writer, diff --git a/tools/Dockerfile b/tools/Dockerfile index 4be63dec4..7effe6ef8 100644 --- a/tools/Dockerfile +++ b/tools/Dockerfile @@ -22,3 +22,5 @@ RUN wget http://lcamtuf.coredump.cx/afl/releases/afl-latest.tgz && \ make install && \ cd .. && \ rm -rf afl-* + +RUN apt-get install -y man From 21bbc4cdb2f2ff1555fa90b6b24f0275901d0053 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Tue, 4 Apr 2017 14:24:24 +1000 Subject: [PATCH 040/218] Simple tables (#21) * Remove unused fields * Redo tests for simpler tables * Simplify the design * Add AFL_OPTIONS to `make afl` --- Makefile | 7 +- extensions/ext_scanners.c | 565 ++++++++++++++++++++++++++++++++++++- extensions/ext_scanners.h | 6 + extensions/ext_scanners.re | 32 +++ extensions/table.c | 302 ++++++-------------- src/cmark_extension_api.h | 2 - src/commonmark.c | 3 +- src/render.c | 15 + test/extensions.txt | 27 +- test/spec.txt | 8 +- tools/Dockerfile | 10 + 11 files changed, 745 insertions(+), 232 deletions(-) diff --git a/Makefile b/Makefile index b57b80b7a..d1dbdcd5d 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ VERSION?=$(SPECVERSION) RELEASE?=CommonMark-$(VERSION) INSTALL_PREFIX?=/usr/local CLANG_CHECK?=clang-check -CLANG_FORMAT=clang-format -style llvm -sort-includes=0 -i +CLANG_FORMAT=clang-format-3.5 -style llvm -sort-includes=0 -i AFL_PATH?=/usr/local/bin .PHONY: all cmake_build leakcheck clean fuzztest test debug ubsan asan mingw archive newbench bench format update-spec afl clang-check docker libFuzzer @@ -81,6 +81,7 @@ afl: -i test/afl_test_cases \ -o test/afl_results \ -x test/fuzzing_dictionary \ + $(AFL_OPTIONS) \ -t 100 \ $(CMARK) -e table -e strikethrough -e autolink -e tagfilter $(CMARK_OPTS) @@ -139,7 +140,7 @@ $(EXTDIR)/ext_scanners.c: $(EXTDIR)/ext_scanners.re esac re2c --case-insensitive -b -i --no-generation-date -8 \ --encoding-policy substitute -o $@ $< - clang-format -style llvm -i $@ + clang-format-3.5 -style llvm -i $@ # We include entities.inc in the repository, so normally this # doesn't need to be regenerated: @@ -210,7 +211,7 @@ format: $(CLANG_FORMAT) src/*.c src/*.h api_test/*.c api_test/*.h format-extensions: - clang-format -style llvm -i extensions/*.c extensions/*.h + clang-format-3.5 -style llvm -i extensions/*.c extensions/*.h operf: $(CMARK) operf $< < $(BENCHFILE) > /dev/null diff --git a/extensions/ext_scanners.c b/extensions/ext_scanners.c index 708f868e8..7e0f5f23b 100644 --- a/extensions/ext_scanners.c +++ b/extensions/ext_scanners.c @@ -1,6 +1,6 @@ -/* Generated by re2c 0.14.3 */ -#include "ext_scanners.h" +/* Generated by re2c 0.15.3 */ #include <stdlib.h> +#include "ext_scanners.h" bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset) { @@ -39,7 +39,6 @@ bufsize_t _scan_table_start(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - yych = *(marker = p); if (yych <= '{') { if (yych <= 0x1F) { @@ -380,3 +379,563 @@ bufsize_t _scan_table_start(const unsigned char *p) { goto yy8; } } + +bufsize_t _scan_table_cell(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 128, 128, 0, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 64, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 128, + 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *(marker = p); + if (yych <= 0x7F) { + if (yych <= '\r') { + if (yych == '\n') + goto yy34; + if (yych <= '\f') + goto yy36; + goto yy45; + } else { + if (yych <= '\\') { + if (yych <= '[') + goto yy36; + goto yy35; + } else { + if (yych == '|') + goto yy45; + goto yy36; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) { + if (yych >= 0xC2) + goto yy37; + } else { + if (yych <= 0xE0) + goto yy39; + if (yych <= 0xEC) + goto yy40; + goto yy44; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy40; + goto yy41; + } else { + if (yych <= 0xF3) + goto yy42; + if (yych <= 0xF4) + goto yy43; + } + } + } + yy34 : { return (bufsize_t)(p - start); } + yy35: + yych = *(marker = ++p); + if (yych == '|') + goto yy49; + goto yy50; + yy36: + yych = *(marker = ++p); + goto yy50; + yy37: + yych = *++p; + if (yych <= 0x7F) + goto yy38; + if (yych <= 0xBF) + goto yy36; + yy38: + p = marker; + goto yy34; + yy39: + yych = *++p; + if (yych <= 0x9F) + goto yy38; + if (yych <= 0xBF) + goto yy37; + goto yy38; + yy40: + yych = *++p; + if (yych <= 0x7F) + goto yy38; + if (yych <= 0xBF) + goto yy37; + goto yy38; + yy41: + yych = *++p; + if (yych <= 0x8F) + goto yy38; + if (yych <= 0xBF) + goto yy40; + goto yy38; + yy42: + yych = *++p; + if (yych <= 0x7F) + goto yy38; + if (yych <= 0xBF) + goto yy40; + goto yy38; + yy43: + yych = *++p; + if (yych <= 0x7F) + goto yy38; + if (yych <= 0x8F) + goto yy40; + goto yy38; + yy44: + yych = *++p; + if (yych <= 0x7F) + goto yy38; + if (yych <= 0x9F) + goto yy37; + goto yy38; + yy45: + ++p; + { return 0; } + yy47: + marker = ++p; + yych = *p; + if (yybm[0 + yych] & 64) { + goto yy47; + } + if (yych <= 0xDF) { + if (yych <= '\f') { + if (yych == '\n') + goto yy34; + } else { + if (yych <= '\r') + goto yy34; + if (yych <= 0x7F) + goto yy49; + if (yych <= 0xC1) + goto yy34; + goto yy51; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy52; + if (yych == 0xED) + goto yy57; + goto yy53; + } else { + if (yych <= 0xF0) + goto yy54; + if (yych <= 0xF3) + goto yy55; + if (yych <= 0xF4) + goto yy56; + goto yy34; + } + } + yy49: + marker = ++p; + yych = *p; + yy50: + if (yybm[0 + yych] & 128) { + goto yy49; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\r') + goto yy34; + if (yych <= '\\') + goto yy47; + goto yy34; + } else { + if (yych <= 0xDF) + goto yy51; + if (yych <= 0xE0) + goto yy52; + goto yy53; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy57; + if (yych <= 0xEF) + goto yy53; + goto yy54; + } else { + if (yych <= 0xF3) + goto yy55; + if (yych <= 0xF4) + goto yy56; + goto yy34; + } + } + yy51: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy38; + if (yych <= 0xBF) + goto yy49; + goto yy38; + yy52: + ++p; + yych = *p; + if (yych <= 0x9F) + goto yy38; + if (yych <= 0xBF) + goto yy51; + goto yy38; + yy53: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy38; + if (yych <= 0xBF) + goto yy51; + goto yy38; + yy54: + ++p; + yych = *p; + if (yych <= 0x8F) + goto yy38; + if (yych <= 0xBF) + goto yy53; + goto yy38; + yy55: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy38; + if (yych <= 0xBF) + goto yy53; + goto yy38; + yy56: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy38; + if (yych <= 0x8F) + goto yy53; + goto yy38; + yy57: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy38; + if (yych <= 0x9F) + goto yy51; + goto yy38; + } +} + +bufsize_t _scan_table_cell_end(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *(marker = p); + if (yych <= 0xDF) { + if (yych <= '{') { + if (yych != '\n') + goto yy63; + } else { + if (yych <= '|') + goto yy61; + if (yych <= 0x7F) + goto yy63; + if (yych >= 0xC2) + goto yy64; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy66; + if (yych == 0xED) + goto yy71; + goto yy67; + } else { + if (yych <= 0xF0) + goto yy68; + if (yych <= 0xF3) + goto yy69; + if (yych <= 0xF4) + goto yy70; + } + } + yy60 : { return 0; } + yy61: + yyaccept = 1; + yych = *(marker = ++p); + goto yy73; + yy62 : { return (bufsize_t)(p - start); } + yy63: + yych = *++p; + goto yy60; + yy64: + yych = *++p; + if (yych <= 0x7F) + goto yy65; + if (yych <= 0xBF) + goto yy63; + yy65: + p = marker; + if (yyaccept == 0) { + goto yy60; + } else { + goto yy62; + } + yy66: + yych = *++p; + if (yych <= 0x9F) + goto yy65; + if (yych <= 0xBF) + goto yy64; + goto yy65; + yy67: + yych = *++p; + if (yych <= 0x7F) + goto yy65; + if (yych <= 0xBF) + goto yy64; + goto yy65; + yy68: + yych = *++p; + if (yych <= 0x8F) + goto yy65; + if (yych <= 0xBF) + goto yy67; + goto yy65; + yy69: + yych = *++p; + if (yych <= 0x7F) + goto yy65; + if (yych <= 0xBF) + goto yy67; + goto yy65; + yy70: + yych = *++p; + if (yych <= 0x7F) + goto yy65; + if (yych <= 0x8F) + goto yy67; + goto yy65; + yy71: + yych = *++p; + if (yych <= 0x7F) + goto yy65; + if (yych <= 0x9F) + goto yy64; + goto yy65; + yy72: + yyaccept = 1; + marker = ++p; + yych = *p; + yy73: + if (yybm[0 + yych] & 128) { + goto yy72; + } + if (yych <= 0x08) + goto yy62; + if (yych <= '\n') + goto yy75; + if (yych >= 0x0E) + goto yy62; + yych = *++p; + if (yych != '\n') + goto yy65; + yy75: + ++p; + yych = *p; + goto yy62; + } +} + +bufsize_t _scan_table_row_end(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *(marker = p); + if (yych <= 0xC1) { + if (yych <= '\f') { + if (yych <= 0x08) + goto yy83; + if (yych == '\n') + goto yy81; + goto yy79; + } else { + if (yych <= 0x1F) { + if (yych <= '\r') + goto yy80; + goto yy83; + } else { + if (yych <= ' ') + goto yy79; + if (yych <= 0x7F) + goto yy83; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy84; + if (yych <= 0xE0) + goto yy86; + if (yych <= 0xEC) + goto yy87; + goto yy91; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy87; + goto yy88; + } else { + if (yych <= 0xF3) + goto yy89; + if (yych <= 0xF4) + goto yy90; + } + } + } + yy78 : { return 0; } + yy79: + yych = *(marker = ++p); + if (yych <= 0x08) + goto yy78; + if (yych <= '\r') + goto yy94; + if (yych == ' ') + goto yy94; + goto yy78; + yy80: + yych = *++p; + if (yych != '\n') + goto yy78; + yy81: + ++p; + { return (bufsize_t)(p - start); } + yy83: + yych = *++p; + goto yy78; + yy84: + yych = *++p; + if (yych <= 0x7F) + goto yy85; + if (yych <= 0xBF) + goto yy83; + yy85: + p = marker; + goto yy78; + yy86: + yych = *++p; + if (yych <= 0x9F) + goto yy85; + if (yych <= 0xBF) + goto yy84; + goto yy85; + yy87: + yych = *++p; + if (yych <= 0x7F) + goto yy85; + if (yych <= 0xBF) + goto yy84; + goto yy85; + yy88: + yych = *++p; + if (yych <= 0x8F) + goto yy85; + if (yych <= 0xBF) + goto yy87; + goto yy85; + yy89: + yych = *++p; + if (yych <= 0x7F) + goto yy85; + if (yych <= 0xBF) + goto yy87; + goto yy85; + yy90: + yych = *++p; + if (yych <= 0x7F) + goto yy85; + if (yych <= 0x8F) + goto yy87; + goto yy85; + yy91: + yych = *++p; + if (yych <= 0x7F) + goto yy85; + if (yych <= 0x9F) + goto yy84; + goto yy85; + yy92: + yych = *++p; + if (yych == '\n') + goto yy81; + goto yy85; + yy93: + ++p; + yych = *p; + yy94: + if (yybm[0 + yych] & 128) { + goto yy93; + } + if (yych <= 0x08) + goto yy85; + if (yych <= '\n') + goto yy81; + if (yych <= '\r') + goto yy92; + goto yy85; + } +} diff --git a/extensions/ext_scanners.h b/extensions/ext_scanners.h index da36a9a32..53584d891 100644 --- a/extensions/ext_scanners.h +++ b/extensions/ext_scanners.h @@ -8,8 +8,14 @@ extern "C" { bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset); bufsize_t _scan_table_start(const unsigned char *p); +bufsize_t _scan_table_cell(const unsigned char *p); +bufsize_t _scan_table_cell_end(const unsigned char *p); +bufsize_t _scan_table_row_end(const unsigned char *p); #define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n) +#define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n) +#define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n) +#define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n) #ifdef __cplusplus } diff --git a/extensions/ext_scanners.re b/extensions/ext_scanners.re index 793f0f09e..b7a649f63 100644 --- a/extensions/ext_scanners.re +++ b/extensions/ext_scanners.re @@ -27,8 +27,10 @@ bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned cha spacechar = [ \t\v\f]; newline = [\r]?[\n]; + escaped_char = [\\][|!"#$%&'()*+,./:;<=>?@[\\\]^_`{}~-]; table_marker = (spacechar*[:]?[-]+[:]?spacechar*); + table_cell = (escaped_char|[^|\r\n])*; */ bufsize_t _scan_table_start(const unsigned char *p) @@ -40,3 +42,33 @@ bufsize_t _scan_table_start(const unsigned char *p) .? { return 0; } */ } + +bufsize_t _scan_table_cell(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + table_cell { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} + +bufsize_t _scan_table_cell_end(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [|] spacechar* newline? { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} + +bufsize_t _scan_table_row_end(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + spacechar* newline { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} diff --git a/extensions/table.c b/extensions/table.c index 5c19a1484..8f06a11fb 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -24,12 +24,11 @@ typedef struct { typedef struct { bool is_header; - unsigned char *raw_content; - size_t raw_content_len; } node_table_row; static void free_table_cell(cmark_mem *mem, void *data) { - cmark_node_free((cmark_node *)data); + cmark_strbuf_free((cmark_strbuf *)data); + mem->free(data); } static void free_table_row(cmark_mem *mem, table_row *row) { @@ -48,9 +47,7 @@ static void free_node_table(cmark_mem *mem, void *ptr) { } static void free_node_table_row(cmark_mem *mem, void *ptr) { - node_table_row *ntr = (node_table_row *)ptr; - mem->free(ntr->raw_content); - mem->free(ntr); + mem->free(ptr); } static int get_n_table_columns(cmark_node *node) { @@ -83,159 +80,64 @@ static int set_table_alignments(cmark_node *node, uint8_t *alignments) { return 1; } -static void maybe_consume_pipe(cmark_node **n, int *offset) { - if (*n && (*n)->type == CMARK_NODE_TEXT && *offset < (*n)->as.literal.len && - (*n)->as.literal.data[*offset] == '|') - ++*offset; -} - -static int find_unescaped_pipe(const cmark_chunk *chunk, int offset) { - bool escaping = false; - for (; offset < chunk->len; ++offset) { - if (escaping) - escaping = false; - else if (chunk->data[offset] == '\\') - escaping = true; - else if (chunk->data[offset] == '|') - return offset; - } - return -1; -} - -static cmark_node *consume_until_pipe_or_eol(cmark_syntax_extension *self, - cmark_parser *parser, - cmark_node **n, int *offset) { - cmark_node *result = - cmark_node_new_with_mem(CMARK_NODE_TABLE_CELL, parser->mem); - cmark_node_set_syntax_extension(result, self); - bool was_escape = false; - - while (*n) { - cmark_node *node = *n; - - if (node->type == CMARK_NODE_TEXT) { - cmark_node *child = cmark_parser_add_child( - parser, result, CMARK_NODE_TEXT, cmark_parser_get_offset(parser)); - - if (was_escape) { - child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, 1); - cmark_node_own(child); - if (child->as.literal.data[0] == '|') - cmark_node_free(child->prev); - ++*offset; - if (*offset >= node->as.literal.len) { - *offset = 0; - *n = node->next; - } - was_escape = false; - continue; - } - - const char *lit = (char *)node->as.literal.data + *offset; - const int lit_len = node->as.literal.len - *offset; - - if (lit_len == 1 && lit[0] == '\\' && - node->next && - node->next->type == CMARK_NODE_TEXT) { - child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, 1); - cmark_node_own(child); - was_escape = true; - *n = node->next; - continue; - } - - int pipe = find_unescaped_pipe(&node->as.literal, *offset); - if (pipe == -1) { - child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, - node->as.literal.len - *offset); - cmark_node_own(child); - } else { - pipe -= *offset; - - child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, pipe); - cmark_node_own(child); - - *offset += pipe + 1; - if (*offset >= node->as.literal.len) { - *offset = 0; - *n = node->next; - } - break; - } - - *n = node->next; - *offset = 0; - } else { - cmark_node *next = node->next; - cmark_node_append_child(result, node); - cmark_node_own(node); - *n = next; - *offset = 0; - } - } +static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsize_t len) +{ + cmark_strbuf *res = (cmark_strbuf *)mem->calloc(1, sizeof(cmark_strbuf)); + bufsize_t r, w; - if (!result->first_child) { - cmark_node_free(result); - return NULL; - } + cmark_strbuf_init(mem, res, len + 1); + cmark_strbuf_put(res, string, len); + cmark_strbuf_putc(res, '\0'); - cmark_consolidate_text_nodes(result); + for (r = 0, w = 0; r < len; ++r) { + if (res->ptr[r] == '\\' && res->ptr[r + 1] == '|') + r++; - if (result->first_child->type == CMARK_NODE_TEXT) { - cmark_chunk c = cmark_chunk_ltrim_new(parser->mem, &result->first_child->as.literal); - cmark_chunk_free(parser->mem, &result->first_child->as.literal); - result->first_child->as.literal = c; + res->ptr[w++] = res->ptr[r]; } - if (result->last_child->type == CMARK_NODE_TEXT) { - cmark_chunk c = cmark_chunk_rtrim_new(parser->mem, &result->last_child->as.literal); - cmark_chunk_free(parser->mem, &result->last_child->as.literal); - result->last_child->as.literal = c; - } + cmark_strbuf_truncate(res, w); - return result; -} - -static int table_ispunct(char c) { - return cmark_ispunct(c) && c != '|'; + return res; } static table_row *row_from_string(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *string, int len) { table_row *row = NULL; - - cmark_node *temp_container = - cmark_node_new_with_mem(CMARK_NODE_PARAGRAPH, parser->mem); - cmark_strbuf_set(&temp_container->content, string, len); - - cmark_manage_extensions_special_characters(parser, true); - cmark_parser_set_backslash_ispunct_func(parser, table_ispunct); - cmark_parse_inlines(parser, temp_container, parser->refmap, parser->options); - cmark_parser_set_backslash_ispunct_func(parser, NULL); - cmark_manage_extensions_special_characters(parser, false); - - if (!temp_container->first_child) { - cmark_node_free(temp_container); - return NULL; - } + bufsize_t cell_matched, pipe_matched, offset = 0; row = (table_row *)parser->mem->calloc(1, sizeof(table_row)); row->n_columns = 0; row->cells = NULL; - cmark_node *node = temp_container->first_child; - int offset = 0; + if (len > 0 && string[0] == '|') + ++offset; - maybe_consume_pipe(&node, &offset); - cmark_node *child; - while ((child = consume_until_pipe_or_eol(self, parser, &node, &offset)) != - NULL) { - ++row->n_columns; - row->cells = cmark_llist_append(parser->mem, row->cells, child); - } + do { + cell_matched = scan_table_cell(string, len, offset); + pipe_matched = scan_table_cell_end(string, len, offset + cell_matched); + + if (cell_matched || pipe_matched) { + cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, + cell_matched); + cmark_strbuf_trim(cell_buf); + row->n_columns += 1; + row->cells = cmark_llist_append(parser->mem, row->cells, cell_buf); + } - cmark_node_free(temp_container); + offset += cell_matched + pipe_matched; + + if (!pipe_matched) { + pipe_matched = scan_table_row_end(string, len, offset); + offset += pipe_matched; + } + } while ((cell_matched || pipe_matched) && offset < len); + + if (offset != len || !row->n_columns) { + free_table_row(parser->mem, row); + row = NULL; + } return row; } @@ -300,25 +202,15 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table)); + set_n_table_columns(parent_container, header_row->n_columns); uint8_t *alignments = (uint8_t *)parser->mem->calloc(header_row->n_columns, sizeof(uint8_t)); cmark_llist *it = marker_row->cells; for (i = 0; it; it = it->next, ++i) { - cmark_node *node = (cmark_node *)it->data; - assert(node->type == CMARK_NODE_TABLE_CELL); - - cmark_strbuf strbuf; - cmark_strbuf_init(parser->mem, &strbuf, 0); - assert(node->first_child->type == CMARK_NODE_TEXT); - assert(node->first_child == node->last_child); - cmark_strbuf_put(&strbuf, node->first_child->as.literal.data, node->first_child->as.literal.len); - cmark_strbuf_trim(&strbuf); - char const *text = cmark_strbuf_cstr(&strbuf); - - bool left = text[0] == ':', right = text[strbuf.size - 1] == ':'; - cmark_strbuf_free(&strbuf); + cmark_strbuf *node = (cmark_strbuf *)it->data; + bool left = node->ptr[0] == ':', right = node->ptr[node->size - 1] == ':'; if (left && right) alignments[i] = 'c'; @@ -336,9 +228,18 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row)); ntr->is_header = true; - ntr->raw_content_len = strlen(parent_string); - ntr->raw_content = (unsigned char *)malloc(ntr->raw_content_len); - memcpy(ntr->raw_content, parent_string, ntr->raw_content_len); + + { + cmark_llist *tmp; + + for (tmp = header_row->cells; tmp; tmp = tmp->next) { + cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; + cmark_node *header_cell = cmark_parser_add_child(parser, table_header, + CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); + cmark_node_set_string_content(header_cell, (char *) cell_buf->ptr); + cmark_node_set_syntax_extension(header_cell, self); + } + } cmark_parser_advance_offset( parser, (char *)input, @@ -354,7 +255,7 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, cmark_node *parent_container, unsigned char *input, int len) { cmark_node *table_row_block; - node_table_row *ntr; + table_row *row; if (cmark_parser_is_blank(parser)) return NULL; @@ -364,11 +265,31 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, cmark_parser_get_offset(parser)); cmark_node_set_syntax_extension(table_row_block, self); - table_row_block->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row)); + table_row_block->as.opaque = parser->mem->calloc(1, sizeof(node_table_row)); + + row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); - ntr->raw_content_len = len - cmark_parser_get_first_nonspace(parser); - ntr->raw_content = (unsigned char *)malloc(len); - memcpy(ntr->raw_content, input + cmark_parser_get_first_nonspace(parser), ntr->raw_content_len); + { + cmark_llist *tmp; + int i, table_columns = get_n_table_columns(parent_container); + + for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = tmp->next, ++i) { + cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; + cmark_node *cell = cmark_parser_add_child(parser, table_row_block, + CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); + cmark_node_set_string_content(cell, (char *) cell_buf->ptr); + cmark_node_set_syntax_extension(cell, self); + } + + for (; i < table_columns; ++i) { + cmark_node *cell = cmark_parser_add_child( + parser, table_row_block, CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); + cmark_node_set_syntax_extension(cell, self); + } + } + + free_table_row(parser->mem, row); cmark_parser_advance_offset(parser, (char *)input, len - 1 - cmark_parser_get_offset(parser), false); @@ -695,59 +616,11 @@ static void opaque_free(cmark_syntax_extension *self, cmark_mem *mem, cmark_node } static int escape(cmark_syntax_extension *self, cmark_node *node, int c) { - return c == '|'; -} - -static cmark_node *postprocess(cmark_syntax_extension *self, cmark_parser *parser, cmark_node *root) { - cmark_iter *iter; - cmark_event_type ev; - cmark_node *node; - node_table_row *ntr; - table_row *row; - - iter = cmark_iter_new(root); - - while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { - node = cmark_iter_get_node(iter); - if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_TABLE_ROW) { - ntr = (node_table_row *)node->as.opaque; - if (!ntr->raw_content) - continue; - row = row_from_string(self, parser, - ntr->raw_content, - (int)ntr->raw_content_len); - free(ntr->raw_content); - ntr->raw_content = NULL; - ntr->raw_content_len = 0; - - { - cmark_llist *tmp, *next; - int i; - int table_columns = get_n_table_columns(node->parent); - - for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = next, ++i) { - cmark_node *cell = (cmark_node *)tmp->data; - assert(cell->type == CMARK_NODE_TABLE_CELL); - cmark_node_append_child(node, cell); - row->cells = next = tmp->next; - parser->mem->free(tmp); - } - - for (; i < table_columns; ++i) { - cmark_node *cell = - cmark_parser_add_child(parser, node, CMARK_NODE_TABLE_CELL, - cmark_parser_get_offset(parser)); - cmark_node_set_syntax_extension(cell, self); - } - } - - free_table_row(parser->mem, row); - } - } - - cmark_iter_free(iter); - - return root; + return + node->type != CMARK_NODE_TABLE && + node->type != CMARK_NODE_TABLE_ROW && + node->type != CMARK_NODE_TABLE_CELL && + c == '|'; } cmark_syntax_extension *create_table_extension(void) { @@ -764,7 +637,6 @@ cmark_syntax_extension *create_table_extension(void) { cmark_syntax_extension_set_html_render_func(self, html_render); cmark_syntax_extension_set_opaque_free_func(self, opaque_free); cmark_syntax_extension_set_commonmark_escape_func(self, escape); - cmark_syntax_extension_set_postprocess_func(self, postprocess); CMARK_NODE_TABLE = cmark_syntax_extension_add_node(0); CMARK_NODE_TABLE_ROW = cmark_syntax_extension_add_node(0); CMARK_NODE_TABLE_CELL = cmark_syntax_extension_add_node(0); diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index 39b4428eb..cd38ced8c 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -115,11 +115,9 @@ typedef struct delimiter { struct delimiter *next; cmark_node *inl_text; bufsize_t length; - int position; unsigned char delim_char; int can_open; int can_close; - int active; } delimiter; /** diff --git a/src/commonmark.c b/src/commonmark.c index 3ce35b987..132369ebb 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -34,8 +34,7 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, needs_escaping = c < 0x80 && escape != LITERAL && ((escape == NORMAL && - ((node->parent && node->parent->extension && node->parent->extension->commonmark_escape_func && node->parent->extension->commonmark_escape_func(node->extension, node->parent, c)) || - c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || + (c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || c == '>' || c == '\\' || c == '`' || c == '!' || (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || (renderer->begin_content && (c == '-' || c == '+' || c == '=') && diff --git a/src/render.c b/src/render.c index 5582d3792..e731748a5 100644 --- a/src/render.c +++ b/src/render.c @@ -5,6 +5,7 @@ #include "utf8.h" #include "render.h" #include "node.h" +#include "syntax_extension.h" static CMARK_INLINE void S_cr(cmark_renderer *renderer) { if (renderer->need_cr < 1) { @@ -30,6 +31,16 @@ static void S_out(cmark_renderer *renderer, cmark_node *node, cmark_chunk remainder = cmark_chunk_literal(""); int k = renderer->buffer->size - 1; + cmark_syntax_extension *ext = NULL; + cmark_node *n = node; + while (n && !ext) { + ext = n->extension; + if (!ext) + n = n->parent; + } + if (ext && !ext->commonmark_escape_func) + ext = NULL; + wrap = wrap && !renderer->no_linebreaks; if (renderer->in_tight_list_item && renderer->need_cr > 1) { @@ -63,6 +74,10 @@ static void S_out(cmark_renderer *renderer, cmark_node *node, if (len == -1) { // error condition return; // return without rendering rest of string } + + if (ext && ext->commonmark_escape_func(ext, node, c)) + cmark_strbuf_putc(renderer->buffer, '\\'); + nextc = source[i + len]; if (c == 32 && wrap) { if (!renderer->begin_line) { diff --git a/test/extensions.txt b/test/extensions.txt index 10a51fe67..4340b618f 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -254,8 +254,8 @@ Tables with embedded pipes could be tricky. | a | b | | --- | --- | | Escaped pipes are \|okay\|. | Like \| this. | -| Within `|code| is okay` too. | -| _**`c|`**_ \| complex +| Within `\|code\| is okay` too. | +| _**`c\|`**_ \| complex | don't **\_reparse\_** . <table> @@ -344,7 +344,7 @@ This shouldn't assert. </tr> <tr> <td>|</td> -<td><code>\|</code></td> +<td><code>|</code></td> </tr> <tr> <td>\a</td> @@ -427,6 +427,27 @@ Here's a link to [Freedom Planet 2][]. </tr></tbody></table> ```````````````````````````````` +### Interaction with emphasis + +```````````````````````````````` example +| a | b | +| --- | --- | +|***(a)***| +. +<table> +<thead> +<tr> +<th>a</th> +<th>b</th> +</tr> +</thead> +<tbody> +<tr> +<td><em><strong>(a)</strong></em></td> +<td></td> +</tr></tbody></table> +```````````````````````````````` + ## Strikethroughs diff --git a/test/spec.txt b/test/spec.txt index 6a920d1f2..dec28affa 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -3231,8 +3231,8 @@ spans (such as emphasis, code, etc.) will not break a cell: ```````````````````````````````` example table | f\|oo | | ------ | -| b `|` az | -| b **|** im | +| b `\|` az | +| b **\|** im | . <table> <thead> @@ -3281,7 +3281,7 @@ block-level structure: | bar | baz | bar -bar +bim . <table> <thead> @@ -3299,7 +3299,7 @@ bar <td>bar</td> <td></td> </tr></tbody></table> -<p>bar</p> +<p>bim</p> ```````````````````````````````` The header row must match the [delimiter row] in the number of cells. If not, diff --git a/tools/Dockerfile b/tools/Dockerfile index 7effe6ef8..3fb7f8b34 100644 --- a/tools/Dockerfile +++ b/tools/Dockerfile @@ -24,3 +24,13 @@ RUN wget http://lcamtuf.coredump.cx/afl/releases/afl-latest.tgz && \ rm -rf afl-* RUN apt-get install -y man + +RUN wget https://github.com/skvadrik/re2c/releases/download/0.15.3/re2c-0.15.3.tar.gz && \ + tar xf re2c-0.15.3.tar.gz && \ + cd re2c-* && \ + ./configure && \ + make install && \ + cd .. && \ + rm -rf re2c-* + +RUN apt-get install -y clang-format-3.5 From 36b5588536d60601a978d9d3f4806f45c1371f95 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Tue, 4 Apr 2017 14:25:17 +1000 Subject: [PATCH 041/218] Latest spec --- test/spec.txt | 99 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 84 insertions(+), 15 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index dec28affa..269b5853f 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -1,13 +1,25 @@ --- -title: CommonMark Spec -author: John MacFarlane +title: GitHub Flavored Markdown Spec version: 0.27 -date: '2016-11-18' +date: '2017-2-20' license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' ... # Introduction +## What is GitHub Flavored Markdown? + +GitHub Flavored Markdown, often shortened as GFM, is the dialect of Markdown +that is currently supported for user content on GitHub.com and GitHub +Enterprise. + +This formal specification, based on the CommonMark Spec, defines the syntax and +semantics of this dialect. + +GFM is a strict superset of CommonMark. All the features which are supported in +GitHub user content and that are not specified on the original CommonMark Spec +are hence known as **extensions**, and highlighted as such. + ## What is Markdown? Markdown is a plain text format for writing structured documents, @@ -3167,8 +3179,8 @@ aaa ## Tables (extension) -If the `table` extension is enabled, an additional leaf block type is -available +GFM enables the `table` extension, where an additional leaf block type is +available. A [table](@) is an arrangement of data with rows and columns, consisting of a single header row, a [delimiter row] separating the header from the data, and @@ -3225,8 +3237,8 @@ bar | baz </tr></tbody></table> ```````````````````````````````` -Include a pipe in a cell's content by escaping it. Pipes inside other inline -spans (such as emphasis, code, etc.) will not break a cell: +Include a pipe in a cell's content by escaping it, including inside other +inline spans: ```````````````````````````````` example table | f\|oo | @@ -3281,7 +3293,7 @@ block-level structure: | bar | baz | bar -bim +bar . <table> <thead> @@ -3299,7 +3311,7 @@ bim <td>bar</td> <td></td> </tr></tbody></table> -<p>bim</p> +<p>bar</p> ```````````````````````````````` The header row must match the [delimiter row] in the number of cells. If not, @@ -3316,8 +3328,8 @@ a table will not be recognized: ```````````````````````````````` The remainder of the table's rows may vary in the number of cells. If there -are a number of cells than the header, empty cells are inserted. If there are -greater, the excess is ignored: +are a number of cells fewer than the number of cells in the header row, empty +cells are inserted. If there are greater, the excess is ignored: ```````````````````````````````` example table | abc | def | @@ -4909,6 +4921,63 @@ that in such cases, we require one space indentation from the list marker four-space rule in cases where the list marker plus its initial indentation takes four spaces (a common case), but diverge in other cases. +<div class="extension"> + +## Task list items (extension) + +GFM enables the `tasklist` extension, where an additional processing step is +performed on [list items]. + +A [task list item](@) is a [list item][list items] where the first block in it +is a paragraph which begins with a [task list item marker] and at least one +whitespace character before any other content. + +A [task list item marker](@) consists of an optional number of spaces, a left +bracket (`[`), either a whitespace character or the letter `x` in either +lowercase or uppercase, and then a right bracket (`]`). + +When rendered, the [task list item marker] is replaced with a semantic checkbox element; +in an HTML output, this would be an `<input type="checkbox">` element. + +If the character between the brackets is a whitespace character, the checkbox +is unchecked. Otherwise, the checkbox is checked. + +This spec does not define how the checkbox elements are interacted with: in practice, +implementors are free to render the checkboxes as disabled or inmutable elements, +or they may dynamically handle dynamic interactions (i.e. checking, unchecking) in +the final rendered document. + +```````````````````````````````` example disabled +- [ ] foo +- [x] bar +. +<ul> +<li><input disabled="" type="checkbox"> foo</li> +<li><input checked="" disabled="" type="checkbox"> bar</li> +</ul> +```````````````````````````````` + +Task lists can be arbitrarily nested: + +```````````````````````````````` example disabled +- [x] foo + - [ ] bar + - [x] baz +- [ ] bim +. +<ul> +<li><input checked="" disabled="" type="checkbox"> foo +<ul> +<li><input disabled="" type="checkbox"> bar</li> +<li><input checked="" disabled="" type="checkbox"> baz</li> +</ul> +</li> +<li><input disabled="" type="checkbox"> bim</li> +</ul> +```````````````````````````````` + +</div> + ## Lists A [list](@) is a sequence of one or more @@ -7303,7 +7372,7 @@ __a<http://foo.bar/?q=__> ## Strikethrough (extension) -If the `strikethrough` extension is enabled, an additional emphasis type is +GFM enables the `strikethrough` extension, where an additional emphasis type is available. Strikethrough text is any text wrapped in tildes (`~`). @@ -8761,7 +8830,7 @@ foo@bar.example.com ## Autolinks (extension) -If the `autolink` extension is enabled, autolinks will be recognised in a +GFM enables the `autolink` extension, where autolinks will be recognised in a greater number of conditions. [Autolink]s can also be constructed without requiring the use of `<` and to `>` @@ -9190,9 +9259,9 @@ foo <a href="\*"> <div class="extension"> -## Raw HTML (extension) +## Disallowed Raw HTML (extension) -If the `tagfilter` extension is enabled, the following HTML tags will be +GFM enables the `tagfilter` extension, where the following HTML tags will be filtered when rendering HTML output: * `<title>` From 5dfedc70ffd3f493c8c3db2b9ad7c6659744e37c Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Tue, 4 Apr 2017 14:28:46 +1000 Subject: [PATCH 042/218] Skip disabled extensions --- test/spec_tests.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/test/spec_tests.py b/test/spec_tests.py index 8931ef7ad..194c51043 100755 --- a/test/spec_tests.py +++ b/test/spec_tests.py @@ -104,14 +104,15 @@ def get_tests(specfile): state = 0 example_number = example_number + 1 end_line = line_number - tests.append({ - "markdown":''.join(markdown_lines).replace('→',"\t"), - "html":''.join(html_lines).replace('→',"\t"), - "example": example_number, - "start_line": start_line, - "end_line": end_line, - "section": headertext, - "extensions": extensions}) + if 'disabled' not in extensions: + tests.append({ + "markdown":''.join(markdown_lines).replace('→',"\t"), + "html":''.join(html_lines).replace('→',"\t"), + "example": example_number, + "start_line": start_line, + "end_line": end_line, + "section": headertext, + "extensions": extensions}) start_line = 0 markdown_lines = [] html_lines = [] From f2fa4a1e29fe4db0582cf1dd1f5f6f2d048df48c Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Tue, 4 Apr 2017 23:58:19 +1000 Subject: [PATCH 043/218] roundtrip_tests reports results --- test/roundtrip_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/roundtrip_tests.py b/test/roundtrip_tests.py index 7436db9a2..4fe817fd6 100644 --- a/test/roundtrip_tests.py +++ b/test/roundtrip_tests.py @@ -46,4 +46,5 @@ def converter(md, exts): for test in tests: do_test(converter, test, args.normalize, result_counts) +sys.stdout.buffer.write("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts).encode('utf-8')) exit(result_counts['fail'] + result_counts['error']) From 9dc52d807aa88d9f0cbe3fd82ada5f951398908b Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Mon, 1 May 2017 12:58:40 +1000 Subject: [PATCH 044/218] Add GFM version number --- CMakeLists.txt | 3 ++- changelog.txt | 7 +++++++ extensions/CMakeLists.txt | 2 +- src/CMakeLists.txt | 2 +- src/cmark_version.h.in | 4 ++-- 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49eba20aa..ced3be9b2 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,8 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 27) set(PROJECT_VERSION_PATCH 1) -set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} ) +set(PROJECT_VERSION_GFM 0) +set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) option(CMARK_STATIC "Build static libcmark library" ON) diff --git a/changelog.txt b/changelog.txt index 883ef6c20..5c5265230 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,10 @@ +[0.27.1.gfm.0] + + * Add extensions: tagfilter, strikethrough, table, autolink. + * Add arena memory implementation. + * Add CMARK_OPT_GITHUB_PRE_LANG for fenced code blocks. + * Skip UTF-8 BOM on input. + [0.27.1] * Set policy for CMP0063 to avoid a warning (#162). diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index d14ad0e8b..e46be2c5b 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -39,7 +39,7 @@ if (CMARK_SHARED) set_target_properties(${LIBRARY} PROPERTIES OUTPUT_NAME "cmark-gfmextensions" - SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} + SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} VERSION ${PROJECT_VERSION}) set_property(TARGET ${LIBRARY} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7e425d5b9..5ab6cdb36 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -100,7 +100,7 @@ if (CMARK_SHARED) # Include minor version and patch level in soname for now. set_target_properties(${LIBRARY} PROPERTIES OUTPUT_NAME "cmark-gfm" - SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} + SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} VERSION ${PROJECT_VERSION}) set_property(TARGET ${LIBRARY} diff --git a/src/cmark_version.h.in b/src/cmark_version.h.in index 41de3ac67..73e599b25 100644 --- a/src/cmark_version.h.in +++ b/src/cmark_version.h.in @@ -1,7 +1,7 @@ #ifndef CMARK_VERSION_H #define CMARK_VERSION_H -#define CMARK_VERSION ((@PROJECT_VERSION_MAJOR@ << 16) | (@PROJECT_VERSION_MINOR@ << 8) | @PROJECT_VERSION_PATCH@) -#define CMARK_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@" +#define CMARK_VERSION ((@PROJECT_VERSION_MAJOR@ << 24) | (@PROJECT_VERSION_MINOR@ << 16) | (@PROJECT_VERSION_PATCH@ << 8) | @PROJECT_VERSION_GFM@) +#define CMARK_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@.gfm.@PROJECT_VERSION_GFM@" #endif From c0f16ae8358123b021b01375d9a4529ce8560697 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Mon, 8 May 2017 17:31:58 +1000 Subject: [PATCH 045/218] Plaintext renderer (#25) * Add base test * Add plaintext renderer * Remove header marks * Remove headers until it complains * Scan HTML tags liberally on request * MSVC10 fix --- api_test/main.c | 45 + extensions/table.c | 1 + src/CMakeLists.txt | 1 + src/cmark.h | 16 + src/cmark_extension_api.h | 6 + src/inlines.c | 13 +- src/main.c | 10 +- src/plaintext.c | 212 ++ src/scanners.c | 6845 +++++++++++++++++++------------------ src/scanners.h | 2 + src/scanners.re | 11 + src/syntax_extension.c | 5 + src/syntax_extension.h | 1 + 13 files changed, 3871 insertions(+), 3297 deletions(-) create mode 100644 src/plaintext.c diff --git a/api_test/main.c b/api_test/main.c index c8a8f3748..c64ffb33c 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -691,6 +691,50 @@ static void render_commonmark(test_batch_runner *runner) { cmark_node_free(doc); } +static void render_plaintext(test_batch_runner *runner) { + char *plaintext; + + static const char markdown[] = "> \\- foo *bar* \\*bar\\*\n" + "\n" + "- Lorem ipsum dolor sit amet,\n" + " consectetur adipiscing elit,\n" + "- sed do eiusmod tempor incididunt\n" + " ut labore et dolore magna aliqua.\n"; + cmark_node *doc = + cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + + plaintext = cmark_render_plaintext(doc, CMARK_OPT_DEFAULT, 26); + STR_EQ(runner, plaintext, "- foo bar *bar*\n" + "\n" + " - Lorem ipsum dolor sit\n" + " amet, consectetur\n" + " adipiscing elit,\n" + " - sed do eiusmod tempor\n" + " incididunt ut labore\n" + " et dolore magna\n" + " aliqua.\n", + "render document with wrapping"); + free(plaintext); + plaintext = cmark_render_plaintext(doc, CMARK_OPT_DEFAULT, 0); + STR_EQ(runner, plaintext, "- foo bar *bar*\n" + "\n" + " - Lorem ipsum dolor sit amet,\n" + " consectetur adipiscing elit,\n" + " - sed do eiusmod tempor incididunt\n" + " ut labore et dolore magna aliqua.\n", + "render document without wrapping"); + free(plaintext); + + cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); + cmark_node_set_literal(text, "Hi"); + plaintext = cmark_render_plaintext(text, CMARK_OPT_DEFAULT, 0); + STR_EQ(runner, plaintext, "Hi\n", "render single inline node"); + free(plaintext); + + cmark_node_free(text); + cmark_node_free(doc); +} + static void utf8(test_batch_runner *runner) { // Ranges test_char(runner, 1, "\x01", "valid utf8 01"); @@ -908,6 +952,7 @@ int main() { render_man(runner); render_latex(runner); render_commonmark(runner); + render_plaintext(runner); utf8(runner); line_endings(runner); numeric_entities(runner); diff --git a/extensions/table.c b/extensions/table.c index 8f06a11fb..e71fd81c2 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -632,6 +632,7 @@ cmark_syntax_extension *create_table_extension(void) { cmark_syntax_extension_set_can_contain_func(self, can_contain); cmark_syntax_extension_set_contains_inlines_func(self, contains_inlines); cmark_syntax_extension_set_commonmark_render_func(self, commonmark_render); + cmark_syntax_extension_set_plaintext_render_func(self, commonmark_render); cmark_syntax_extension_set_latex_render_func(self, latex_render); cmark_syntax_extension_set_man_render_func(self, man_render); cmark_syntax_extension_set_html_render_func(self, html_render); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5ab6cdb36..42fc16ebb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -39,6 +39,7 @@ set(LIBRARY_SOURCES xml.c html.c commonmark.c + plaintext.c latex.c houdini_href_e.c houdini_html_e.c diff --git a/src/cmark.h b/src/cmark.h index ccc27a824..d98e00e88 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -635,6 +635,18 @@ char *cmark_render_commonmark(cmark_node *root, int options, int width); CMARK_EXPORT char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); +/** Render a 'node' tree as a plain text document. + * It is the caller's responsibility to free the returned buffer. + */ +CMARK_EXPORT +char *cmark_render_plaintext(cmark_node *root, int options, int width); + +/** As for 'cmark_render_plaintext', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_plaintext_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); + /** Render a 'node' tree as a LaTeX document. * It is the caller's responsibility to free the returned buffer. */ @@ -701,6 +713,10 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar */ #define CMARK_OPT_GITHUB_PRE_LANG (1 << 11) +/** Be liberal in interpreting inline HTML tags. + */ +#define CMARK_OPT_LIBERAL_HTML_TAG (1 << 12) + /** * ## Version information */ diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index cd38ced8c..8f6941863 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -323,6 +323,12 @@ CMARK_EXPORT void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func); + /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT diff --git a/src/inlines.c b/src/inlines.c index d8b6bbd93..7c3eb05f1 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -754,7 +754,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) { // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. -static cmark_node *handle_pointy_brace(subject *subj) { +static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) { bufsize_t matchlen = 0; cmark_chunk contents; @@ -786,6 +786,15 @@ static cmark_node *handle_pointy_brace(subject *subj) { return make_raw_html(subj->mem, contents); } + if (liberal_html_tag) { + matchlen = scan_liberal_html_tag(&subj->input, subj->pos); + if (matchlen > 0) { + contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); + subj->pos += matchlen; + return make_raw_html(subj->mem, contents); + } + } + // if nothing matches, just return the opening <: return make_str(subj->mem, cmark_chunk_literal("<")); } @@ -1141,7 +1150,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, new_inl = handle_entity(subj); break; case '<': - new_inl = handle_pointy_brace(subj); + new_inl = handle_pointy_brace(subj, (options & CMARK_OPT_LIBERAL_HTML_TAG) != 0); break; case '*': case '_': diff --git a/src/main.c b/src/main.c index 176ab65fd..27424cf8c 100644 --- a/src/main.c +++ b/src/main.c @@ -24,6 +24,7 @@ typedef enum { FORMAT_XML, FORMAT_MAN, FORMAT_COMMONMARK, + FORMAT_PLAINTEXT, FORMAT_LATEX } writer_format; @@ -31,7 +32,7 @@ void print_usage() { printf("Usage: cmark-gfm [FILE*]\n"); printf("Options:\n"); printf(" --to, -t FORMAT Specify output format (html, xml, man, " - "commonmark, latex)\n"); + "commonmark, plaintext, latex)\n"); printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n"); printf(" --sourcepos Include source position attribute\n"); printf(" --hardbreaks Treat newlines as hard line breaks\n"); @@ -64,6 +65,9 @@ static bool print_document(cmark_node *document, writer_format writer, case FORMAT_COMMONMARK: result = cmark_render_commonmark_with_mem(document, options, width, mem); break; + case FORMAT_PLAINTEXT: + result = cmark_render_plaintext_with_mem(document, options, width, mem); + break; case FORMAT_LATEX: result = cmark_render_latex_with_mem(document, options, width, mem); break; @@ -137,6 +141,8 @@ int main(int argc, char *argv[]) { options |= CMARK_OPT_SAFE; } else if (strcmp(argv[i], "--validate-utf8") == 0) { options |= CMARK_OPT_VALIDATE_UTF8; + } else if (strcmp(argv[i], "--liberal-html-tag") == 0) { + options |= CMARK_OPT_LIBERAL_HTML_TAG; } else if ((strcmp(argv[i], "--help") == 0) || (strcmp(argv[i], "-h") == 0)) { print_usage(); @@ -165,6 +171,8 @@ int main(int argc, char *argv[]) { writer = FORMAT_XML; } else if (strcmp(argv[i], "commonmark") == 0) { writer = FORMAT_COMMONMARK; + } else if (strcmp(argv[i], "plaintext") == 0) { + writer = FORMAT_PLAINTEXT; } else if (strcmp(argv[i], "latex") == 0) { writer = FORMAT_LATEX; } else { diff --git a/src/plaintext.c b/src/plaintext.c new file mode 100644 index 000000000..a274827c9 --- /dev/null +++ b/src/plaintext.c @@ -0,0 +1,212 @@ +#include "node.h" +#include "syntax_extension.h" + +#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, node, s, false, LITERAL) +#define CR() renderer->cr(renderer) +#define BLANKLINE() renderer->blankline(renderer) +#define LISTMARKER_SIZE 20 + +// Functions to convert cmark_nodes to plain text strings. + +static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, + cmark_escaping escape, + int32_t c, unsigned char nextc) { + cmark_render_code_point(renderer, c); +} + +// if node is a block node, returns node. +// otherwise returns first block-level node that is an ancestor of node. +// if there is no block-level ancestor, returns NULL. +static cmark_node *get_containing_block(cmark_node *node) { + while (node) { + if (CMARK_NODE_BLOCK_P(node)) { + return node; + } else { + node = node->parent; + } + } + return NULL; +} + +static int S_render_node(cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + cmark_node *tmp; + int list_number; + cmark_delim_type list_delim; + int i; + bool entering = (ev_type == CMARK_EVENT_ENTER); + char listmarker[LISTMARKER_SIZE]; + bool first_in_list_item; + bufsize_t marker_width; + bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && + !(CMARK_OPT_HARDBREAKS & options); + + // Don't adjust tight list status til we've started the list. + // Otherwise we loose the blank line between a paragraph and + // a following list. + if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { + tmp = get_containing_block(node); + renderer->in_tight_list_item = + tmp && // tmp might be NULL if there is no containing block + ((tmp->type == CMARK_NODE_ITEM && + cmark_node_get_list_tight(tmp->parent)) || + (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM && + cmark_node_get_list_tight(tmp->parent->parent))); + } + + if (node->extension && node->extension->plaintext_render_func) { + node->extension->plaintext_render_func(node->extension, renderer, node, ev_type, options); + return 1; + } + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + break; + + case CMARK_NODE_BLOCK_QUOTE: + break; + + case CMARK_NODE_LIST: + if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK || + node->next->type == CMARK_NODE_LIST)) { + CR(); + } + break; + + case CMARK_NODE_ITEM: + if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { + marker_width = 4; + } else { + list_number = cmark_node_get_list_start(node->parent); + list_delim = cmark_node_get_list_delim(node->parent); + tmp = node; + while (tmp->prev) { + tmp = tmp->prev; + list_number += 1; + } + // we ensure a width of at least 4 so + // we get nice transition from single digits + // to double + snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, + list_delim == CMARK_PAREN_DELIM ? ")" : ".", + list_number < 10 ? " " : " "); + marker_width = (bufsize_t)strlen(listmarker); + } + if (entering) { + if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { + LIT(" - "); + renderer->begin_content = true; + } else { + LIT(listmarker); + renderer->begin_content = true; + } + for (i = marker_width; i--;) { + cmark_strbuf_putc(renderer->prefix, ' '); + } + } else { + cmark_strbuf_truncate(renderer->prefix, + renderer->prefix->size - marker_width); + CR(); + } + break; + + case CMARK_NODE_HEADING: + if (entering) { + renderer->begin_content = true; + renderer->no_linebreaks = true; + } else { + renderer->no_linebreaks = false; + BLANKLINE(); + } + break; + + case CMARK_NODE_CODE_BLOCK: + first_in_list_item = node->prev == NULL && node->parent && + node->parent->type == CMARK_NODE_ITEM; + + if (!first_in_list_item) { + BLANKLINE(); + } + OUT(cmark_node_get_literal(node), false, LITERAL); + BLANKLINE(); + break; + + case CMARK_NODE_HTML_BLOCK: + break; + + case CMARK_NODE_CUSTOM_BLOCK: + break; + + case CMARK_NODE_THEMATIC_BREAK: + BLANKLINE(); + break; + + case CMARK_NODE_PARAGRAPH: + if (!entering) { + BLANKLINE(); + } + break; + + case CMARK_NODE_TEXT: + OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); + break; + + case CMARK_NODE_LINEBREAK: + CR(); + break; + + case CMARK_NODE_SOFTBREAK: + if (CMARK_OPT_HARDBREAKS & options) { + CR(); + } else if (!renderer->no_linebreaks && renderer->width == 0 && + !(CMARK_OPT_HARDBREAKS & options) && + !(CMARK_OPT_NOBREAKS & options)) { + CR(); + } else { + OUT(" ", allow_wrap, LITERAL); + } + break; + + case CMARK_NODE_CODE: + OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); + break; + + case CMARK_NODE_HTML_INLINE: + break; + + case CMARK_NODE_CUSTOM_INLINE: + break; + + case CMARK_NODE_STRONG: + break; + + case CMARK_NODE_EMPH: + break; + + case CMARK_NODE_LINK: + break; + + case CMARK_NODE_IMAGE: + break; + + default: + assert(false); + break; + } + + return 1; +} + +char *cmark_render_plaintext(cmark_node *root, int options, int width) { + return cmark_render_plaintext_with_mem(root, options, width, cmark_node_mem(root)); +} + +char *cmark_render_plaintext_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { + if (options & CMARK_OPT_HARDBREAKS) { + // disable breaking on width, since it has + // a different meaning with OPT_HARDBREAKS + width = 0; + } + return cmark_render(mem, root, options, width, outc, S_render_node); +} diff --git a/src/scanners.c b/src/scanners.c index c96490dda..b4c75aa3b 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -1,7 +1,7 @@ /* Generated by re2c 0.15.3 */ -#include <stdlib.h> -#include "chunk.h" #include "scanners.h" +#include "chunk.h" +#include <stdlib.h> bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) { @@ -9744,6 +9744,263 @@ bufsize_t _scan_html_tag(const unsigned char *p) { } } +// Try to (liberally) match an HTML tag after first <, returning num of chars +// matched. +bufsize_t _scan_liberal_html_tag(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych <= 0xE0) { + if (yych <= 0x7F) { + if (yych == '\n') + goto yy495; + } else { + if (yych <= 0xC1) + goto yy495; + if (yych <= 0xDF) + goto yy496; + goto yy497; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy499; + goto yy498; + } else { + if (yych <= 0xF0) + goto yy500; + if (yych <= 0xF3) + goto yy501; + if (yych <= 0xF4) + goto yy502; + goto yy495; + } + } + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) { + if (yych != '\n') + goto yy507; + } else { + if (yych <= 0xC1) + goto yy494; + if (yych <= 0xF4) + goto yy507; + } + yy494 : { return 0; } + yy495: + yych = *++p; + goto yy494; + yy496: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy494; + if (yych <= 0xBF) + goto yy506; + goto yy494; + yy497: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) + goto yy494; + if (yych <= 0xBF) + goto yy505; + goto yy494; + yy498: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy494; + if (yych <= 0xBF) + goto yy505; + goto yy494; + yy499: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy494; + if (yych <= 0x9F) + goto yy505; + goto yy494; + yy500: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) + goto yy494; + if (yych <= 0xBF) + goto yy503; + goto yy494; + yy501: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy494; + if (yych <= 0xBF) + goto yy503; + goto yy494; + yy502: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy494; + if (yych >= 0x90) + goto yy494; + yy503: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy504; + if (yych <= 0xBF) + goto yy505; + yy504: + p = marker; + if (yyaccept == 0) { + goto yy494; + } else { + goto yy510; + } + yy505: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy504; + if (yych >= 0xC0) + goto yy504; + yy506: + ++p; + yych = *p; + yy507: + if (yybm[0 + yych] & 64) { + goto yy506; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy504; + if (yych >= '?') + goto yy504; + } else { + if (yych <= 0xDF) + goto yy505; + if (yych <= 0xE0) + goto yy511; + goto yy503; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy515; + if (yych <= 0xEF) + goto yy503; + goto yy512; + } else { + if (yych <= 0xF3) + goto yy513; + if (yych <= 0xF4) + goto yy514; + goto yy504; + } + } + yy508: + yyaccept = 1; + marker = ++p; + yych = *p; + if (yybm[0 + yych] & 64) { + goto yy506; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy510; + if (yych <= '>') + goto yy508; + } else { + if (yych <= 0xDF) + goto yy505; + if (yych <= 0xE0) + goto yy511; + goto yy503; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy515; + if (yych <= 0xEF) + goto yy503; + goto yy512; + } else { + if (yych <= 0xF3) + goto yy513; + if (yych <= 0xF4) + goto yy514; + } + } + yy510 : { return (bufsize_t)(p - start); } + yy511: + ++p; + yych = *p; + if (yych <= 0x9F) + goto yy504; + if (yych <= 0xBF) + goto yy505; + goto yy504; + yy512: + ++p; + yych = *p; + if (yych <= 0x8F) + goto yy504; + if (yych <= 0xBF) + goto yy503; + goto yy504; + yy513: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy504; + if (yych <= 0xBF) + goto yy503; + goto yy504; + yy514: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy504; + if (yych <= 0x8F) + goto yy503; + goto yy504; + yy515: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy504; + if (yych <= 0x9F) + goto yy505; + goto yy504; + } +} + // Try to match an HTML block tag start line, returning // an integer code for the type of block (1-6, matching the spec). // #7 is handled by a separate function, below. @@ -9754,239 +10011,239 @@ bufsize_t _scan_html_block_start(const unsigned char *p) { unsigned char yych; yych = *p; if (yych == '<') - goto yy495; + goto yy520; ++p; - yy494 : { return 0; } - yy495: + yy519 : { return 0; } + yy520: yych = *(marker = ++p); switch (yych) { case '!': - goto yy513; + goto yy538; case '/': - goto yy496; + goto yy521; case '?': - goto yy514; + goto yy539; case 'A': case 'a': - goto yy499; + goto yy524; case 'B': case 'b': - goto yy500; + goto yy525; case 'C': case 'c': - goto yy501; + goto yy526; case 'D': case 'd': - goto yy502; + goto yy527; case 'F': case 'f': - goto yy503; + goto yy528; case 'H': case 'h': - goto yy504; + goto yy529; case 'I': case 'i': - goto yy505; + goto yy530; case 'L': case 'l': - goto yy506; + goto yy531; case 'M': case 'm': - goto yy507; + goto yy532; case 'N': case 'n': - goto yy508; + goto yy533; case 'O': case 'o': - goto yy509; + goto yy534; case 'P': case 'p': - goto yy498; + goto yy523; case 'S': case 's': - goto yy510; + goto yy535; case 'T': case 't': - goto yy511; + goto yy536; case 'U': case 'u': - goto yy512; + goto yy537; default: - goto yy494; + goto yy519; } - yy496: + yy521: yych = *++p; switch (yych) { case 'A': case 'a': - goto yy499; + goto yy524; case 'B': case 'b': - goto yy500; + goto yy525; case 'C': case 'c': - goto yy501; + goto yy526; case 'D': case 'd': - goto yy502; + goto yy527; case 'F': case 'f': - goto yy503; + goto yy528; case 'H': case 'h': - goto yy504; + goto yy529; case 'I': case 'i': - goto yy505; + goto yy530; case 'L': case 'l': - goto yy506; + goto yy531; case 'M': case 'm': - goto yy507; + goto yy532; case 'N': case 'n': - goto yy508; + goto yy533; case 'O': case 'o': - goto yy509; + goto yy534; case 'P': case 'p': - goto yy698; + goto yy723; case 'S': case 's': - goto yy699; + goto yy724; case 'T': case 't': - goto yy511; + goto yy536; case 'U': case 'u': - goto yy512; + goto yy537; default: - goto yy497; + goto yy522; } - yy497: + yy522: p = marker; - goto yy494; - yy498: + goto yy519; + yy523: yych = *++p; if (yych <= '>') { if (yych <= ' ') { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; + goto yy555; if (yych <= 0x1F) - goto yy497; - goto yy530; + goto yy522; + goto yy555; } else { if (yych == '/') - goto yy532; + goto yy557; if (yych <= '=') - goto yy497; - goto yy530; + goto yy522; + goto yy555; } } else { if (yych <= 'R') { if (yych == 'A') - goto yy695; + goto yy720; if (yych <= 'Q') - goto yy497; - goto yy694; + goto yy522; + goto yy719; } else { if (yych <= 'a') { if (yych <= '`') - goto yy497; - goto yy695; + goto yy522; + goto yy720; } else { if (yych == 'r') - goto yy694; - goto yy497; + goto yy719; + goto yy522; } } } - yy499: + yy524: yych = *++p; if (yych <= 'S') { if (yych <= 'D') { if (yych <= 'C') - goto yy497; - goto yy683; + goto yy522; + goto yy708; } else { if (yych <= 'Q') - goto yy497; + goto yy522; if (yych <= 'R') - goto yy682; - goto yy681; + goto yy707; + goto yy706; } } else { if (yych <= 'q') { if (yych == 'd') - goto yy683; - goto yy497; + goto yy708; + goto yy522; } else { if (yych <= 'r') - goto yy682; + goto yy707; if (yych <= 's') - goto yy681; - goto yy497; + goto yy706; + goto yy522; } } - yy500: + yy525: yych = *++p; if (yych <= 'O') { if (yych <= 'K') { if (yych == 'A') - goto yy667; - goto yy497; + goto yy692; + goto yy522; } else { if (yych <= 'L') - goto yy666; + goto yy691; if (yych <= 'N') - goto yy497; - goto yy665; + goto yy522; + goto yy690; } } else { if (yych <= 'k') { if (yych == 'a') - goto yy667; - goto yy497; + goto yy692; + goto yy522; } else { if (yych <= 'l') - goto yy666; + goto yy691; if (yych == 'o') - goto yy665; - goto yy497; + goto yy690; + goto yy522; } } - yy501: + yy526: yych = *++p; if (yych <= 'O') { if (yych <= 'D') { if (yych == 'A') - goto yy652; - goto yy497; + goto yy677; + goto yy522; } else { if (yych <= 'E') - goto yy651; + goto yy676; if (yych <= 'N') - goto yy497; - goto yy650; + goto yy522; + goto yy675; } } else { if (yych <= 'd') { if (yych == 'a') - goto yy652; - goto yy497; + goto yy677; + goto yy522; } else { if (yych <= 'e') - goto yy651; + goto yy676; if (yych == 'o') - goto yy650; - goto yy497; + goto yy675; + goto yy522; } } - yy502: + yy527: yych = *++p; switch (yych) { case 'D': @@ -9995,1664 +10252,1664 @@ bufsize_t _scan_html_block_start(const unsigned char *p) { case 'd': case 'l': case 't': - goto yy529; + goto yy554; case 'E': case 'e': - goto yy642; + goto yy667; case 'I': case 'i': - goto yy641; + goto yy666; default: - goto yy497; + goto yy522; } - yy503: + yy528: yych = *++p; if (yych <= 'R') { if (yych <= 'N') { if (yych == 'I') - goto yy617; - goto yy497; + goto yy642; + goto yy522; } else { if (yych <= 'O') - goto yy616; + goto yy641; if (yych <= 'Q') - goto yy497; - goto yy615; + goto yy522; + goto yy640; } } else { if (yych <= 'n') { if (yych == 'i') - goto yy617; - goto yy497; + goto yy642; + goto yy522; } else { if (yych <= 'o') - goto yy616; + goto yy641; if (yych == 'r') - goto yy615; - goto yy497; + goto yy640; + goto yy522; } } - yy504: + yy529: yych = *++p; if (yych <= 'S') { if (yych <= 'D') { if (yych <= '0') - goto yy497; + goto yy522; if (yych <= '6') - goto yy529; - goto yy497; + goto yy554; + goto yy522; } else { if (yych <= 'E') - goto yy610; + goto yy635; if (yych == 'R') - goto yy529; - goto yy497; + goto yy554; + goto yy522; } } else { if (yych <= 'q') { if (yych <= 'T') - goto yy609; + goto yy634; if (yych == 'e') - goto yy610; - goto yy497; + goto yy635; + goto yy522; } else { if (yych <= 'r') - goto yy529; + goto yy554; if (yych == 't') - goto yy609; - goto yy497; + goto yy634; + goto yy522; } } - yy505: + yy530: yych = *++p; if (yych == 'F') - goto yy605; + goto yy630; if (yych == 'f') - goto yy605; - goto yy497; - yy506: + goto yy630; + goto yy522; + yy531: yych = *++p; if (yych <= 'I') { if (yych == 'E') - goto yy600; + goto yy625; if (yych <= 'H') - goto yy497; - goto yy599; + goto yy522; + goto yy624; } else { if (yych <= 'e') { if (yych <= 'd') - goto yy497; - goto yy600; + goto yy522; + goto yy625; } else { if (yych == 'i') - goto yy599; - goto yy497; + goto yy624; + goto yy522; } } - yy507: + yy532: yych = *++p; if (yych <= 'E') { if (yych == 'A') - goto yy591; + goto yy616; if (yych <= 'D') - goto yy497; - goto yy590; + goto yy522; + goto yy615; } else { if (yych <= 'a') { if (yych <= '`') - goto yy497; - goto yy591; + goto yy522; + goto yy616; } else { if (yych == 'e') - goto yy590; - goto yy497; + goto yy615; + goto yy522; } } - yy508: + yy533: yych = *++p; if (yych <= 'O') { if (yych == 'A') - goto yy584; + goto yy609; if (yych <= 'N') - goto yy497; - goto yy583; + goto yy522; + goto yy608; } else { if (yych <= 'a') { if (yych <= '`') - goto yy497; - goto yy584; + goto yy522; + goto yy609; } else { if (yych == 'o') - goto yy583; - goto yy497; + goto yy608; + goto yy522; } } - yy509: + yy534: yych = *++p; if (yych <= 'P') { if (yych == 'L') - goto yy529; + goto yy554; if (yych <= 'O') - goto yy497; - goto yy575; + goto yy522; + goto yy600; } else { if (yych <= 'l') { if (yych <= 'k') - goto yy497; - goto yy529; + goto yy522; + goto yy554; } else { if (yych == 'p') - goto yy575; - goto yy497; + goto yy600; + goto yy522; } } - yy510: + yy535: yych = *++p; switch (yych) { case 'C': case 'c': - goto yy552; + goto yy577; case 'E': case 'e': - goto yy555; + goto yy580; case 'O': case 'o': - goto yy554; + goto yy579; case 'T': case 't': - goto yy551; + goto yy576; case 'U': case 'u': - goto yy553; + goto yy578; default: - goto yy497; + goto yy522; } - yy511: + yy536: yych = *++p; switch (yych) { case 'A': case 'a': - goto yy538; + goto yy563; case 'B': case 'b': - goto yy537; + goto yy562; case 'D': case 'd': - goto yy529; + goto yy554; case 'F': case 'f': - goto yy536; + goto yy561; case 'H': case 'h': - goto yy535; + goto yy560; case 'I': case 'i': - goto yy534; + goto yy559; case 'R': case 'r': - goto yy533; + goto yy558; default: - goto yy497; + goto yy522; } - yy512: + yy537: yych = *++p; if (yych == 'L') - goto yy529; + goto yy554; if (yych == 'l') - goto yy529; - goto yy497; - yy513: + goto yy554; + goto yy522; + yy538: yych = *++p; if (yych <= '@') { if (yych == '-') - goto yy516; - goto yy497; + goto yy541; + goto yy522; } else { if (yych <= 'Z') - goto yy517; + goto yy542; if (yych <= '[') - goto yy519; - goto yy497; + goto yy544; + goto yy522; } - yy514: + yy539: ++p; { return 3; } - yy516: + yy541: yych = *++p; if (yych == '-') - goto yy527; - goto yy497; - yy517: + goto yy552; + goto yy522; + yy542: ++p; { return 4; } - yy519: + yy544: yych = *++p; if (yych == 'C') - goto yy520; + goto yy545; if (yych != 'c') - goto yy497; - yy520: + goto yy522; + yy545: yych = *++p; if (yych == 'D') - goto yy521; + goto yy546; if (yych != 'd') - goto yy497; - yy521: + goto yy522; + yy546: yych = *++p; if (yych == 'A') - goto yy522; + goto yy547; if (yych != 'a') - goto yy497; - yy522: + goto yy522; + yy547: yych = *++p; if (yych == 'T') - goto yy523; + goto yy548; if (yych != 't') - goto yy497; - yy523: + goto yy522; + yy548: yych = *++p; if (yych == 'A') - goto yy524; + goto yy549; if (yych != 'a') - goto yy497; - yy524: + goto yy522; + yy549: yych = *++p; if (yych != '[') - goto yy497; + goto yy522; ++p; { return 5; } - yy527: + yy552: ++p; { return 2; } - yy529: + yy554: yych = *++p; if (yych <= ' ') { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; + goto yy555; if (yych <= 0x1F) - goto yy497; + goto yy522; } else { if (yych <= '/') { if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } else { if (yych != '>') - goto yy497; + goto yy522; } } - yy530: + yy555: ++p; { return 6; } - yy532: + yy557: yych = *++p; if (yych == '>') - goto yy530; - goto yy497; - yy533: + goto yy555; + goto yy522; + yy558: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= '@') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'A') - goto yy549; + goto yy574; if (yych == 'a') - goto yy549; - goto yy497; + goto yy574; + goto yy522; } } - yy534: + yy559: yych = *++p; if (yych == 'T') - goto yy547; + goto yy572; if (yych == 't') - goto yy547; - goto yy497; - yy535: + goto yy572; + goto yy522; + yy560: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'D') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'E') - goto yy545; + goto yy570; if (yych == 'e') - goto yy545; - goto yy497; + goto yy570; + goto yy522; } } - yy536: + yy561: yych = *++p; if (yych == 'O') - goto yy543; + goto yy568; if (yych == 'o') - goto yy543; - goto yy497; - yy537: + goto yy568; + goto yy522; + yy562: yych = *++p; if (yych == 'O') - goto yy541; + goto yy566; if (yych == 'o') - goto yy541; - goto yy497; - yy538: + goto yy566; + goto yy522; + yy563: yych = *++p; if (yych == 'B') - goto yy539; + goto yy564; if (yych != 'b') - goto yy497; - yy539: + goto yy522; + yy564: yych = *++p; if (yych == 'L') - goto yy540; + goto yy565; if (yych != 'l') - goto yy497; - yy540: + goto yy522; + yy565: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy541: + goto yy554; + goto yy522; + yy566: yych = *++p; if (yych == 'D') - goto yy542; + goto yy567; if (yych != 'd') - goto yy497; - yy542: + goto yy522; + yy567: yych = *++p; if (yych == 'Y') - goto yy529; + goto yy554; if (yych == 'y') - goto yy529; - goto yy497; - yy543: + goto yy554; + goto yy522; + yy568: yych = *++p; if (yych == 'O') - goto yy544; + goto yy569; if (yych != 'o') - goto yy497; - yy544: + goto yy522; + yy569: yych = *++p; if (yych == 'T') - goto yy529; + goto yy554; if (yych == 't') - goto yy529; - goto yy497; - yy545: + goto yy554; + goto yy522; + yy570: yych = *++p; if (yych == 'A') - goto yy546; + goto yy571; if (yych != 'a') - goto yy497; - yy546: + goto yy522; + yy571: yych = *++p; if (yych == 'D') - goto yy529; + goto yy554; if (yych == 'd') - goto yy529; - goto yy497; - yy547: + goto yy554; + goto yy522; + yy572: yych = *++p; if (yych == 'L') - goto yy548; + goto yy573; if (yych != 'l') - goto yy497; - yy548: + goto yy522; + yy573: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy549: + goto yy554; + goto yy522; + yy574: yych = *++p; if (yych == 'C') - goto yy550; + goto yy575; if (yych != 'c') - goto yy497; - yy550: + goto yy522; + yy575: yych = *++p; if (yych == 'K') - goto yy529; + goto yy554; if (yych == 'k') - goto yy529; - goto yy497; - yy551: + goto yy554; + goto yy522; + yy576: yych = *++p; if (yych == 'Y') - goto yy573; + goto yy598; if (yych == 'y') - goto yy573; - goto yy497; - yy552: + goto yy598; + goto yy522; + yy577: yych = *++p; if (yych == 'R') - goto yy567; + goto yy592; if (yych == 'r') - goto yy567; - goto yy497; - yy553: + goto yy592; + goto yy522; + yy578: yych = *++p; if (yych == 'M') - goto yy563; + goto yy588; if (yych == 'm') - goto yy563; - goto yy497; - yy554: + goto yy588; + goto yy522; + yy579: yych = *++p; if (yych == 'U') - goto yy560; + goto yy585; if (yych == 'u') - goto yy560; - goto yy497; - yy555: + goto yy585; + goto yy522; + yy580: yych = *++p; if (yych == 'C') - goto yy556; + goto yy581; if (yych != 'c') - goto yy497; - yy556: + goto yy522; + yy581: yych = *++p; if (yych == 'T') - goto yy557; + goto yy582; if (yych != 't') - goto yy497; - yy557: + goto yy522; + yy582: yych = *++p; if (yych == 'I') - goto yy558; + goto yy583; if (yych != 'i') - goto yy497; - yy558: + goto yy522; + yy583: yych = *++p; if (yych == 'O') - goto yy559; + goto yy584; if (yych != 'o') - goto yy497; - yy559: + goto yy522; + yy584: yych = *++p; if (yych == 'N') - goto yy529; + goto yy554; if (yych == 'n') - goto yy529; - goto yy497; - yy560: + goto yy554; + goto yy522; + yy585: yych = *++p; if (yych == 'R') - goto yy561; + goto yy586; if (yych != 'r') - goto yy497; - yy561: + goto yy522; + yy586: yych = *++p; if (yych == 'C') - goto yy562; + goto yy587; if (yych != 'c') - goto yy497; - yy562: + goto yy522; + yy587: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy563: + goto yy554; + goto yy522; + yy588: yych = *++p; if (yych == 'M') - goto yy564; + goto yy589; if (yych != 'm') - goto yy497; - yy564: + goto yy522; + yy589: yych = *++p; if (yych == 'A') - goto yy565; + goto yy590; if (yych != 'a') - goto yy497; - yy565: + goto yy522; + yy590: yych = *++p; if (yych == 'R') - goto yy566; + goto yy591; if (yych != 'r') - goto yy497; - yy566: + goto yy522; + yy591: yych = *++p; if (yych == 'Y') - goto yy529; + goto yy554; if (yych == 'y') - goto yy529; - goto yy497; - yy567: + goto yy554; + goto yy522; + yy592: yych = *++p; if (yych == 'I') - goto yy568; + goto yy593; if (yych != 'i') - goto yy497; - yy568: + goto yy522; + yy593: yych = *++p; if (yych == 'P') - goto yy569; + goto yy594; if (yych != 'p') - goto yy497; - yy569: + goto yy522; + yy594: yych = *++p; if (yych == 'T') - goto yy570; + goto yy595; if (yych != 't') - goto yy497; - yy570: + goto yy522; + yy595: yych = *++p; if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych >= 0x0E) - goto yy497; + goto yy522; } else { if (yych <= ' ') - goto yy571; + goto yy596; if (yych != '>') - goto yy497; + goto yy522; } - yy571: + yy596: ++p; { return 1; } - yy573: + yy598: yych = *++p; if (yych == 'L') - goto yy574; + goto yy599; if (yych != 'l') - goto yy497; - yy574: + goto yy522; + yy599: yych = *++p; if (yych == 'E') - goto yy570; + goto yy595; if (yych == 'e') - goto yy570; - goto yy497; - yy575: + goto yy595; + goto yy522; + yy600: yych = *++p; if (yych == 'T') - goto yy576; + goto yy601; if (yych != 't') - goto yy497; - yy576: + goto yy522; + yy601: yych = *++p; if (yych <= 'I') { if (yych == 'G') - goto yy578; + goto yy603; if (yych <= 'H') - goto yy497; + goto yy522; } else { if (yych <= 'g') { if (yych <= 'f') - goto yy497; - goto yy578; + goto yy522; + goto yy603; } else { if (yych != 'i') - goto yy497; + goto yy522; } } yych = *++p; if (yych == 'O') - goto yy582; + goto yy607; if (yych == 'o') - goto yy582; - goto yy497; - yy578: + goto yy607; + goto yy522; + yy603: yych = *++p; if (yych == 'R') - goto yy579; + goto yy604; if (yych != 'r') - goto yy497; - yy579: + goto yy522; + yy604: yych = *++p; if (yych == 'O') - goto yy580; + goto yy605; if (yych != 'o') - goto yy497; - yy580: + goto yy522; + yy605: yych = *++p; if (yych == 'U') - goto yy581; + goto yy606; if (yych != 'u') - goto yy497; - yy581: + goto yy522; + yy606: yych = *++p; if (yych == 'P') - goto yy529; + goto yy554; if (yych == 'p') - goto yy529; - goto yy497; - yy582: + goto yy554; + goto yy522; + yy607: yych = *++p; if (yych == 'N') - goto yy529; + goto yy554; if (yych == 'n') - goto yy529; - goto yy497; - yy583: + goto yy554; + goto yy522; + yy608: yych = *++p; if (yych == 'F') - goto yy585; + goto yy610; if (yych == 'f') - goto yy585; - goto yy497; - yy584: + goto yy610; + goto yy522; + yy609: yych = *++p; if (yych == 'V') - goto yy529; + goto yy554; if (yych == 'v') - goto yy529; - goto yy497; - yy585: + goto yy554; + goto yy522; + yy610: yych = *++p; if (yych == 'R') - goto yy586; + goto yy611; if (yych != 'r') - goto yy497; - yy586: + goto yy522; + yy611: yych = *++p; if (yych == 'A') - goto yy587; + goto yy612; if (yych != 'a') - goto yy497; - yy587: + goto yy522; + yy612: yych = *++p; if (yych == 'M') - goto yy588; + goto yy613; if (yych != 'm') - goto yy497; - yy588: + goto yy522; + yy613: yych = *++p; if (yych == 'E') - goto yy589; + goto yy614; if (yych != 'e') - goto yy497; - yy589: + goto yy522; + yy614: yych = *++p; if (yych == 'S') - goto yy529; + goto yy554; if (yych == 's') - goto yy529; - goto yy497; - yy590: + goto yy554; + goto yy522; + yy615: yych = *++p; if (yych <= 'T') { if (yych == 'N') - goto yy593; + goto yy618; if (yych <= 'S') - goto yy497; - goto yy594; + goto yy522; + goto yy619; } else { if (yych <= 'n') { if (yych <= 'm') - goto yy497; - goto yy593; + goto yy522; + goto yy618; } else { if (yych == 't') - goto yy594; - goto yy497; + goto yy619; + goto yy522; } } - yy591: + yy616: yych = *++p; if (yych == 'I') - goto yy592; + goto yy617; if (yych != 'i') - goto yy497; - yy592: + goto yy522; + yy617: yych = *++p; if (yych == 'N') - goto yy529; + goto yy554; if (yych == 'n') - goto yy529; - goto yy497; - yy593: + goto yy554; + goto yy522; + yy618: yych = *++p; if (yych == 'U') - goto yy595; + goto yy620; if (yych == 'u') - goto yy595; - goto yy497; - yy594: + goto yy620; + goto yy522; + yy619: yych = *++p; if (yych == 'A') - goto yy529; + goto yy554; if (yych == 'a') - goto yy529; - goto yy497; - yy595: + goto yy554; + goto yy522; + yy620: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'H') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'I') - goto yy596; + goto yy621; if (yych != 'i') - goto yy497; + goto yy522; } } - yy596: + yy621: yych = *++p; if (yych == 'T') - goto yy597; + goto yy622; if (yych != 't') - goto yy497; - yy597: + goto yy522; + yy622: yych = *++p; if (yych == 'E') - goto yy598; + goto yy623; if (yych != 'e') - goto yy497; - yy598: + goto yy522; + yy623: yych = *++p; if (yych == 'M') - goto yy529; + goto yy554; if (yych == 'm') - goto yy529; - goto yy497; - yy599: + goto yy554; + goto yy522; + yy624: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'M') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'N') - goto yy604; + goto yy629; if (yych == 'n') - goto yy604; - goto yy497; + goto yy629; + goto yy522; } } - yy600: + yy625: yych = *++p; if (yych == 'G') - goto yy601; + goto yy626; if (yych != 'g') - goto yy497; - yy601: + goto yy522; + yy626: yych = *++p; if (yych == 'E') - goto yy602; + goto yy627; if (yych != 'e') - goto yy497; - yy602: + goto yy522; + yy627: yych = *++p; if (yych == 'N') - goto yy603; + goto yy628; if (yych != 'n') - goto yy497; - yy603: + goto yy522; + yy628: yych = *++p; if (yych == 'D') - goto yy529; + goto yy554; if (yych == 'd') - goto yy529; - goto yy497; - yy604: + goto yy554; + goto yy522; + yy629: yych = *++p; if (yych == 'K') - goto yy529; + goto yy554; if (yych == 'k') - goto yy529; - goto yy497; - yy605: + goto yy554; + goto yy522; + yy630: yych = *++p; if (yych == 'R') - goto yy606; + goto yy631; if (yych != 'r') - goto yy497; - yy606: + goto yy522; + yy631: yych = *++p; if (yych == 'A') - goto yy607; + goto yy632; if (yych != 'a') - goto yy497; - yy607: + goto yy522; + yy632: yych = *++p; if (yych == 'M') - goto yy608; + goto yy633; if (yych != 'm') - goto yy497; - yy608: + goto yy522; + yy633: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy609: + goto yy554; + goto yy522; + yy634: yych = *++p; if (yych == 'M') - goto yy614; + goto yy639; if (yych == 'm') - goto yy614; - goto yy497; - yy610: + goto yy639; + goto yy522; + yy635: yych = *++p; if (yych == 'A') - goto yy611; + goto yy636; if (yych != 'a') - goto yy497; - yy611: + goto yy522; + yy636: yych = *++p; if (yych == 'D') - goto yy612; + goto yy637; if (yych != 'd') - goto yy497; - yy612: + goto yy522; + yy637: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'D') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'E') - goto yy613; + goto yy638; if (yych != 'e') - goto yy497; + goto yy522; } } - yy613: + yy638: yych = *++p; if (yych == 'R') - goto yy529; + goto yy554; if (yych == 'r') - goto yy529; - goto yy497; - yy614: + goto yy554; + goto yy522; + yy639: yych = *++p; if (yych == 'L') - goto yy529; + goto yy554; if (yych == 'l') - goto yy529; - goto yy497; - yy615: + goto yy554; + goto yy522; + yy640: yych = *++p; if (yych == 'A') - goto yy636; + goto yy661; if (yych == 'a') - goto yy636; - goto yy497; - yy616: + goto yy661; + goto yy522; + yy641: yych = *++p; if (yych <= 'R') { if (yych == 'O') - goto yy632; + goto yy657; if (yych <= 'Q') - goto yy497; - goto yy633; + goto yy522; + goto yy658; } else { if (yych <= 'o') { if (yych <= 'n') - goto yy497; - goto yy632; + goto yy522; + goto yy657; } else { if (yych == 'r') - goto yy633; - goto yy497; + goto yy658; + goto yy522; } } - yy617: + yy642: yych = *++p; if (yych <= 'G') { if (yych == 'E') - goto yy618; + goto yy643; if (yych <= 'F') - goto yy497; - goto yy619; + goto yy522; + goto yy644; } else { if (yych <= 'e') { if (yych <= 'd') - goto yy497; + goto yy522; } else { if (yych == 'g') - goto yy619; - goto yy497; + goto yy644; + goto yy522; } } - yy618: + yy643: yych = *++p; if (yych == 'L') - goto yy628; + goto yy653; if (yych == 'l') - goto yy628; - goto yy497; - yy619: + goto yy653; + goto yy522; + yy644: yych = *++p; if (yych <= 'U') { if (yych == 'C') - goto yy621; + goto yy646; if (yych <= 'T') - goto yy497; + goto yy522; } else { if (yych <= 'c') { if (yych <= 'b') - goto yy497; - goto yy621; + goto yy522; + goto yy646; } else { if (yych != 'u') - goto yy497; + goto yy522; } } yych = *++p; if (yych == 'R') - goto yy627; + goto yy652; if (yych == 'r') - goto yy627; - goto yy497; - yy621: + goto yy652; + goto yy522; + yy646: yych = *++p; if (yych == 'A') - goto yy622; + goto yy647; if (yych != 'a') - goto yy497; - yy622: + goto yy522; + yy647: yych = *++p; if (yych == 'P') - goto yy623; + goto yy648; if (yych != 'p') - goto yy497; - yy623: + goto yy522; + yy648: yych = *++p; if (yych == 'T') - goto yy624; + goto yy649; if (yych != 't') - goto yy497; - yy624: + goto yy522; + yy649: yych = *++p; if (yych == 'I') - goto yy625; + goto yy650; if (yych != 'i') - goto yy497; - yy625: + goto yy522; + yy650: yych = *++p; if (yych == 'O') - goto yy626; + goto yy651; if (yych != 'o') - goto yy497; - yy626: + goto yy522; + yy651: yych = *++p; if (yych == 'N') - goto yy529; + goto yy554; if (yych == 'n') - goto yy529; - goto yy497; - yy627: + goto yy554; + goto yy522; + yy652: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy628: + goto yy554; + goto yy522; + yy653: yych = *++p; if (yych == 'D') - goto yy629; + goto yy654; if (yych != 'd') - goto yy497; - yy629: + goto yy522; + yy654: yych = *++p; if (yych == 'S') - goto yy630; + goto yy655; if (yych != 's') - goto yy497; - yy630: + goto yy522; + yy655: yych = *++p; if (yych == 'E') - goto yy631; + goto yy656; if (yych != 'e') - goto yy497; - yy631: + goto yy522; + yy656: yych = *++p; if (yych == 'T') - goto yy529; + goto yy554; if (yych == 't') - goto yy529; - goto yy497; - yy632: + goto yy554; + goto yy522; + yy657: yych = *++p; if (yych == 'T') - goto yy634; + goto yy659; if (yych == 't') - goto yy634; - goto yy497; - yy633: + goto yy659; + goto yy522; + yy658: yych = *++p; if (yych == 'M') - goto yy529; + goto yy554; if (yych == 'm') - goto yy529; - goto yy497; - yy634: + goto yy554; + goto yy522; + yy659: yych = *++p; if (yych == 'E') - goto yy635; + goto yy660; if (yych != 'e') - goto yy497; - yy635: + goto yy522; + yy660: yych = *++p; if (yych == 'R') - goto yy529; + goto yy554; if (yych == 'r') - goto yy529; - goto yy497; - yy636: + goto yy554; + goto yy522; + yy661: yych = *++p; if (yych == 'M') - goto yy637; + goto yy662; if (yych != 'm') - goto yy497; - yy637: + goto yy522; + yy662: yych = *++p; if (yych == 'E') - goto yy638; + goto yy663; if (yych != 'e') - goto yy497; - yy638: + goto yy522; + yy663: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'R') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'S') - goto yy639; + goto yy664; if (yych != 's') - goto yy497; + goto yy522; } } - yy639: + yy664: yych = *++p; if (yych == 'E') - goto yy640; + goto yy665; if (yych != 'e') - goto yy497; - yy640: + goto yy522; + yy665: yych = *++p; if (yych == 'T') - goto yy529; + goto yy554; if (yych == 't') - goto yy529; - goto yy497; - yy641: + goto yy554; + goto yy522; + yy666: yych = *++p; if (yych <= 'V') { if (yych <= 'Q') { if (yych == 'A') - goto yy647; - goto yy497; + goto yy672; + goto yy522; } else { if (yych <= 'R') - goto yy529; + goto yy554; if (yych <= 'U') - goto yy497; - goto yy529; + goto yy522; + goto yy554; } } else { if (yych <= 'q') { if (yych == 'a') - goto yy647; - goto yy497; + goto yy672; + goto yy522; } else { if (yych <= 'r') - goto yy529; + goto yy554; if (yych == 'v') - goto yy529; - goto yy497; + goto yy554; + goto yy522; } } - yy642: + yy667: yych = *++p; if (yych == 'T') - goto yy643; + goto yy668; if (yych != 't') - goto yy497; - yy643: + goto yy522; + yy668: yych = *++p; if (yych == 'A') - goto yy644; + goto yy669; if (yych != 'a') - goto yy497; - yy644: + goto yy522; + yy669: yych = *++p; if (yych == 'I') - goto yy645; + goto yy670; if (yych != 'i') - goto yy497; - yy645: + goto yy522; + yy670: yych = *++p; if (yych == 'L') - goto yy646; + goto yy671; if (yych != 'l') - goto yy497; - yy646: + goto yy522; + yy671: yych = *++p; if (yych == 'S') - goto yy529; + goto yy554; if (yych == 's') - goto yy529; - goto yy497; - yy647: + goto yy554; + goto yy522; + yy672: yych = *++p; if (yych == 'L') - goto yy648; + goto yy673; if (yych != 'l') - goto yy497; - yy648: + goto yy522; + yy673: yych = *++p; if (yych == 'O') - goto yy649; + goto yy674; if (yych != 'o') - goto yy497; - yy649: + goto yy522; + yy674: yych = *++p; if (yych == 'G') - goto yy529; + goto yy554; if (yych == 'g') - goto yy529; - goto yy497; - yy650: + goto yy554; + goto yy522; + yy675: yych = *++p; if (yych == 'L') - goto yy660; + goto yy685; if (yych == 'l') - goto yy660; - goto yy497; - yy651: + goto yy685; + goto yy522; + yy676: yych = *++p; if (yych == 'N') - goto yy657; + goto yy682; if (yych == 'n') - goto yy657; - goto yy497; - yy652: + goto yy682; + goto yy522; + yy677: yych = *++p; if (yych == 'P') - goto yy653; + goto yy678; if (yych != 'p') - goto yy497; - yy653: + goto yy522; + yy678: yych = *++p; if (yych == 'T') - goto yy654; + goto yy679; if (yych != 't') - goto yy497; - yy654: + goto yy522; + yy679: yych = *++p; if (yych == 'I') - goto yy655; + goto yy680; if (yych != 'i') - goto yy497; - yy655: + goto yy522; + yy680: yych = *++p; if (yych == 'O') - goto yy656; + goto yy681; if (yych != 'o') - goto yy497; - yy656: + goto yy522; + yy681: yych = *++p; if (yych == 'N') - goto yy529; + goto yy554; if (yych == 'n') - goto yy529; - goto yy497; - yy657: + goto yy554; + goto yy522; + yy682: yych = *++p; if (yych == 'T') - goto yy658; + goto yy683; if (yych != 't') - goto yy497; - yy658: + goto yy522; + yy683: yych = *++p; if (yych == 'E') - goto yy659; + goto yy684; if (yych != 'e') - goto yy497; - yy659: + goto yy522; + yy684: yych = *++p; if (yych == 'R') - goto yy529; + goto yy554; if (yych == 'r') - goto yy529; - goto yy497; - yy660: + goto yy554; + goto yy522; + yy685: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'F') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'G') - goto yy661; + goto yy686; if (yych != 'g') - goto yy497; + goto yy522; } } - yy661: + yy686: yych = *++p; if (yych == 'R') - goto yy662; + goto yy687; if (yych != 'r') - goto yy497; - yy662: + goto yy522; + yy687: yych = *++p; if (yych == 'O') - goto yy663; + goto yy688; if (yych != 'o') - goto yy497; - yy663: + goto yy522; + yy688: yych = *++p; if (yych == 'U') - goto yy664; + goto yy689; if (yych != 'u') - goto yy497; - yy664: + goto yy522; + yy689: yych = *++p; if (yych == 'P') - goto yy529; + goto yy554; if (yych == 'p') - goto yy529; - goto yy497; - yy665: + goto yy554; + goto yy522; + yy690: yych = *++p; if (yych == 'D') - goto yy680; + goto yy705; if (yych == 'd') - goto yy680; - goto yy497; - yy666: + goto yy705; + goto yy522; + yy691: yych = *++p; if (yych == 'O') - goto yy673; + goto yy698; if (yych == 'o') - goto yy673; - goto yy497; - yy667: + goto yy698; + goto yy522; + yy692: yych = *++p; if (yych == 'S') - goto yy668; + goto yy693; if (yych != 's') - goto yy497; - yy668: + goto yy522; + yy693: yych = *++p; if (yych == 'E') - goto yy669; + goto yy694; if (yych != 'e') - goto yy497; - yy669: + goto yy522; + yy694: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'E') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'F') - goto yy670; + goto yy695; if (yych != 'f') - goto yy497; + goto yy522; } } - yy670: + yy695: yych = *++p; if (yych == 'O') - goto yy671; + goto yy696; if (yych != 'o') - goto yy497; - yy671: + goto yy522; + yy696: yych = *++p; if (yych == 'N') - goto yy672; + goto yy697; if (yych != 'n') - goto yy497; - yy672: + goto yy522; + yy697: yych = *++p; if (yych == 'T') - goto yy529; + goto yy554; if (yych == 't') - goto yy529; - goto yy497; - yy673: + goto yy554; + goto yy522; + yy698: yych = *++p; if (yych == 'C') - goto yy674; + goto yy699; if (yych != 'c') - goto yy497; - yy674: + goto yy522; + yy699: yych = *++p; if (yych == 'K') - goto yy675; + goto yy700; if (yych != 'k') - goto yy497; - yy675: + goto yy522; + yy700: yych = *++p; if (yych == 'Q') - goto yy676; + goto yy701; if (yych != 'q') - goto yy497; - yy676: + goto yy522; + yy701: yych = *++p; if (yych == 'U') - goto yy677; + goto yy702; if (yych != 'u') - goto yy497; - yy677: + goto yy522; + yy702: yych = *++p; if (yych == 'O') - goto yy678; + goto yy703; if (yych != 'o') - goto yy497; - yy678: + goto yy522; + yy703: yych = *++p; if (yych == 'T') - goto yy679; + goto yy704; if (yych != 't') - goto yy497; - yy679: + goto yy522; + yy704: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy680: + goto yy554; + goto yy522; + yy705: yych = *++p; if (yych == 'Y') - goto yy529; + goto yy554; if (yych == 'y') - goto yy529; - goto yy497; - yy681: + goto yy554; + goto yy522; + yy706: yych = *++p; if (yych == 'I') - goto yy692; + goto yy717; if (yych == 'i') - goto yy692; - goto yy497; - yy682: + goto yy717; + goto yy522; + yy707: yych = *++p; if (yych == 'T') - goto yy688; + goto yy713; if (yych == 't') - goto yy688; - goto yy497; - yy683: + goto yy713; + goto yy522; + yy708: yych = *++p; if (yych == 'D') - goto yy684; + goto yy709; if (yych != 'd') - goto yy497; - yy684: + goto yy522; + yy709: yych = *++p; if (yych == 'R') - goto yy685; + goto yy710; if (yych != 'r') - goto yy497; - yy685: + goto yy522; + yy710: yych = *++p; if (yych == 'E') - goto yy686; + goto yy711; if (yych != 'e') - goto yy497; - yy686: + goto yy522; + yy711: yych = *++p; if (yych == 'S') - goto yy687; + goto yy712; if (yych != 's') - goto yy497; - yy687: + goto yy522; + yy712: yych = *++p; if (yych == 'S') - goto yy529; + goto yy554; if (yych == 's') - goto yy529; - goto yy497; - yy688: + goto yy554; + goto yy522; + yy713: yych = *++p; if (yych == 'I') - goto yy689; + goto yy714; if (yych != 'i') - goto yy497; - yy689: + goto yy522; + yy714: yych = *++p; if (yych == 'C') - goto yy690; + goto yy715; if (yych != 'c') - goto yy497; - yy690: + goto yy522; + yy715: yych = *++p; if (yych == 'L') - goto yy691; + goto yy716; if (yych != 'l') - goto yy497; - yy691: + goto yy522; + yy716: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy692: + goto yy554; + goto yy522; + yy717: yych = *++p; if (yych == 'D') - goto yy693; + goto yy718; if (yych != 'd') - goto yy497; - yy693: + goto yy522; + yy718: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy694: + goto yy554; + goto yy522; + yy719: yych = *++p; if (yych == 'E') - goto yy570; + goto yy595; if (yych == 'e') - goto yy570; - goto yy497; - yy695: + goto yy595; + goto yy522; + yy720: yych = *++p; if (yych == 'R') - goto yy696; + goto yy721; if (yych != 'r') - goto yy497; - yy696: + goto yy522; + yy721: yych = *++p; if (yych == 'A') - goto yy697; + goto yy722; if (yych != 'a') - goto yy497; - yy697: + goto yy522; + yy722: yych = *++p; if (yych == 'M') - goto yy529; + goto yy554; if (yych == 'm') - goto yy529; - goto yy497; - yy698: + goto yy554; + goto yy522; + yy723: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= '@') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'A') - goto yy695; + goto yy720; if (yych == 'a') - goto yy695; - goto yy497; + goto yy720; + goto yy522; } } - yy699: + yy724: ++p; if ((yych = *p) <= 'U') { if (yych <= 'N') { if (yych == 'E') - goto yy555; - goto yy497; + goto yy580; + goto yy522; } else { if (yych <= 'O') - goto yy554; + goto yy579; if (yych <= 'T') - goto yy497; - goto yy553; + goto yy522; + goto yy578; } } else { if (yych <= 'n') { if (yych == 'e') - goto yy555; - goto yy497; + goto yy580; + goto yy522; } else { if (yych <= 'o') - goto yy554; + goto yy579; if (yych == 'u') - goto yy553; - goto yy497; + goto yy578; + goto yy522; } } } @@ -11689,735 +11946,735 @@ bufsize_t _scan_html_block_start_7(const unsigned char *p) { }; yych = *p; if (yych == '<') - goto yy704; + goto yy729; ++p; - yy703 : { return 0; } - yy704: + yy728 : { return 0; } + yy729: yyaccept = 0; yych = *(marker = ++p); if (yych <= '@') { if (yych == '/') - goto yy708; - goto yy703; + goto yy733; + goto yy728; } else { if (yych <= 'Z') - goto yy705; + goto yy730; if (yych <= '`') - goto yy703; + goto yy728; if (yych >= '{') - goto yy703; + goto yy728; } - yy705: + yy730: ++p; yych = *p; if (yybm[0 + yych] & 1) { - goto yy705; + goto yy730; } if (yych <= ' ') { if (yych <= 0x08) - goto yy707; + goto yy732; if (yych <= '\r') - goto yy719; + goto yy744; if (yych >= ' ') - goto yy719; + goto yy744; } else { if (yych <= '/') { if (yych >= '/') - goto yy721; + goto yy746; } else { if (yych == '>') - goto yy713; + goto yy738; } } - yy707: + yy732: p = marker; if (yyaccept == 0) { - goto yy703; + goto yy728; } else { - goto yy717; + goto yy742; } - yy708: + yy733: yych = *++p; if (yych <= '@') - goto yy707; + goto yy732; if (yych <= 'Z') - goto yy709; + goto yy734; if (yych <= '`') - goto yy707; + goto yy732; if (yych >= '{') - goto yy707; - yy709: + goto yy732; + yy734: ++p; yych = *p; if (yybm[0 + yych] & 2) { - goto yy711; + goto yy736; } if (yych <= '=') { if (yych <= '-') { if (yych <= ',') - goto yy707; - goto yy709; + goto yy732; + goto yy734; } else { if (yych <= '/') - goto yy707; + goto yy732; if (yych <= '9') - goto yy709; - goto yy707; + goto yy734; + goto yy732; } } else { if (yych <= 'Z') { if (yych <= '>') - goto yy713; + goto yy738; if (yych <= '@') - goto yy707; - goto yy709; + goto yy732; + goto yy734; } else { if (yych <= '`') - goto yy707; + goto yy732; if (yych <= 'z') - goto yy709; - goto yy707; + goto yy734; + goto yy732; } } - yy711: + yy736: ++p; yych = *p; if (yybm[0 + yych] & 2) { - goto yy711; + goto yy736; } if (yych != '>') - goto yy707; - yy713: + goto yy732; + yy738: ++p; yych = *p; if (yybm[0 + yych] & 4) { - goto yy713; + goto yy738; } if (yych <= 0x08) - goto yy707; + goto yy732; if (yych <= '\n') - goto yy715; + goto yy740; if (yych <= '\v') - goto yy707; + goto yy732; if (yych <= '\r') - goto yy718; - goto yy707; - yy715: + goto yy743; + goto yy732; + yy740: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 4) { - goto yy713; + goto yy738; } if (yych <= 0x08) - goto yy717; + goto yy742; if (yych <= '\n') - goto yy715; + goto yy740; if (yych <= '\v') - goto yy717; + goto yy742; if (yych <= '\r') - goto yy718; - yy717 : { return 7; } - yy718: + goto yy743; + yy742 : { return 7; } + yy743: yych = *++p; - goto yy717; - yy719: + goto yy742; + yy744: ++p; yych = *p; if (yych <= ':') { if (yych <= ' ') { if (yych <= 0x08) - goto yy707; + goto yy732; if (yych <= '\r') - goto yy719; + goto yy744; if (yych <= 0x1F) - goto yy707; - goto yy719; + goto yy732; + goto yy744; } else { if (yych == '/') - goto yy721; + goto yy746; if (yych <= '9') - goto yy707; - goto yy722; + goto yy732; + goto yy747; } } else { if (yych <= 'Z') { if (yych == '>') - goto yy713; + goto yy738; if (yych <= '@') - goto yy707; - goto yy722; + goto yy732; + goto yy747; } else { if (yych <= '_') { if (yych <= '^') - goto yy707; - goto yy722; + goto yy732; + goto yy747; } else { if (yych <= '`') - goto yy707; + goto yy732; if (yych <= 'z') - goto yy722; - goto yy707; + goto yy747; + goto yy732; } } } - yy721: + yy746: yych = *++p; if (yych == '>') - goto yy713; - goto yy707; - yy722: + goto yy738; + goto yy732; + yy747: ++p; yych = *p; if (yybm[0 + yych] & 16) { - goto yy722; + goto yy747; } if (yych <= ',') { if (yych <= '\r') { if (yych <= 0x08) - goto yy707; + goto yy732; } else { if (yych != ' ') - goto yy707; + goto yy732; } } else { if (yych <= '<') { if (yych <= '/') - goto yy721; - goto yy707; + goto yy746; + goto yy732; } else { if (yych <= '=') - goto yy726; + goto yy751; if (yych <= '>') - goto yy713; - goto yy707; + goto yy738; + goto yy732; } } - yy724: + yy749: ++p; yych = *p; if (yych <= '<') { if (yych <= ' ') { if (yych <= 0x08) - goto yy707; + goto yy732; if (yych <= '\r') - goto yy724; + goto yy749; if (yych <= 0x1F) - goto yy707; - goto yy724; + goto yy732; + goto yy749; } else { if (yych <= '/') { if (yych <= '.') - goto yy707; - goto yy721; + goto yy732; + goto yy746; } else { if (yych == ':') - goto yy722; - goto yy707; + goto yy747; + goto yy732; } } } else { if (yych <= 'Z') { if (yych <= '=') - goto yy726; + goto yy751; if (yych <= '>') - goto yy713; + goto yy738; if (yych <= '@') - goto yy707; - goto yy722; + goto yy732; + goto yy747; } else { if (yych <= '_') { if (yych <= '^') - goto yy707; - goto yy722; + goto yy732; + goto yy747; } else { if (yych <= '`') - goto yy707; + goto yy732; if (yych <= 'z') - goto yy722; - goto yy707; + goto yy747; + goto yy732; } } } - yy726: + yy751: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy728; + goto yy753; } if (yych <= 0xE0) { if (yych <= '"') { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych >= '!') - goto yy739; + goto yy764; } else { if (yych <= '\'') - goto yy737; + goto yy762; if (yych <= 0xC1) - goto yy707; + goto yy732; if (yych <= 0xDF) - goto yy730; - goto yy731; + goto yy755; + goto yy756; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy728; + goto yy753; } if (yych <= 0xDF) { if (yych <= '\'') { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= ' ') - goto yy762; + goto yy787; if (yych <= '"') - goto yy739; - goto yy737; + goto yy764; + goto yy762; } else { if (yych == '>') - goto yy713; + goto yy738; if (yych <= 0xC1) - goto yy707; - goto yy730; + goto yy732; + goto yy755; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy731; + goto yy756; if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } - yy728: + yy753: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy728; + goto yy753; } if (yych <= 0xE0) { if (yych <= '=') { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= ' ') - goto yy756; - goto yy707; + goto yy781; + goto yy732; } else { if (yych <= '>') - goto yy713; + goto yy738; if (yych <= 0xC1) - goto yy707; + goto yy732; if (yych >= 0xE0) - goto yy731; + goto yy756; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } - yy730: + yy755: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy728; - goto yy707; - yy731: + goto yy753; + goto yy732; + yy756: ++p; yych = *p; if (yych <= 0x9F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy730; - goto yy707; - yy732: + goto yy755; + goto yy732; + yy757: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy730; - goto yy707; - yy733: + goto yy755; + goto yy732; + yy758: ++p; yych = *p; if (yych <= 0x8F) - goto yy707; - if (yych <= 0xBF) goto yy732; - goto yy707; - yy734: + if (yych <= 0xBF) + goto yy757; + goto yy732; + yy759: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; - if (yych <= 0xBF) goto yy732; - goto yy707; - yy735: + if (yych <= 0xBF) + goto yy757; + goto yy732; + yy760: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; - if (yych <= 0x8F) goto yy732; - goto yy707; - yy736: + if (yych <= 0x8F) + goto yy757; + goto yy732; + yy761: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0x9F) - goto yy730; - goto yy707; - yy737: + goto yy755; + goto yy732; + yy762: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy737; + goto yy762; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= '\'') - goto yy748; - goto yy707; + goto yy773; + goto yy732; } else { if (yych <= 0xDF) - goto yy749; + goto yy774; if (yych <= 0xE0) - goto yy750; - goto yy751; + goto yy775; + goto yy776; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy755; + goto yy780; if (yych <= 0xEF) - goto yy751; - goto yy752; + goto yy776; + goto yy777; } else { if (yych <= 0xF3) - goto yy753; + goto yy778; if (yych <= 0xF4) - goto yy754; - goto yy707; + goto yy779; + goto yy732; } } - yy739: + yy764: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy739; + goto yy764; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= '"') - goto yy748; - goto yy707; + goto yy773; + goto yy732; } else { if (yych <= 0xDF) - goto yy741; + goto yy766; if (yych <= 0xE0) - goto yy742; - goto yy743; + goto yy767; + goto yy768; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy747; + goto yy772; if (yych <= 0xEF) - goto yy743; - goto yy744; + goto yy768; + goto yy769; } else { if (yych <= 0xF3) - goto yy745; + goto yy770; if (yych <= 0xF4) - goto yy746; - goto yy707; + goto yy771; + goto yy732; } } - yy741: + yy766: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy739; - goto yy707; - yy742: + goto yy764; + goto yy732; + yy767: ++p; yych = *p; if (yych <= 0x9F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy741; - goto yy707; - yy743: + goto yy766; + goto yy732; + yy768: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy741; - goto yy707; - yy744: + goto yy766; + goto yy732; + yy769: ++p; yych = *p; if (yych <= 0x8F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy743; - goto yy707; - yy745: + goto yy768; + goto yy732; + yy770: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy743; - goto yy707; - yy746: + goto yy768; + goto yy732; + yy771: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0x8F) - goto yy743; - goto yy707; - yy747: + goto yy768; + goto yy732; + yy772: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0x9F) - goto yy741; - goto yy707; - yy748: + goto yy766; + goto yy732; + yy773: ++p; yych = *p; if (yych <= ' ') { if (yych <= 0x08) - goto yy707; + goto yy732; if (yych <= '\r') - goto yy719; + goto yy744; if (yych <= 0x1F) - goto yy707; - goto yy719; + goto yy732; + goto yy744; } else { if (yych <= '/') { if (yych <= '.') - goto yy707; - goto yy721; + goto yy732; + goto yy746; } else { if (yych == '>') - goto yy713; - goto yy707; + goto yy738; + goto yy732; } } - yy749: + yy774: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy737; - goto yy707; - yy750: + goto yy762; + goto yy732; + yy775: ++p; yych = *p; if (yych <= 0x9F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy749; - goto yy707; - yy751: + goto yy774; + goto yy732; + yy776: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy749; - goto yy707; - yy752: + goto yy774; + goto yy732; + yy777: ++p; yych = *p; if (yych <= 0x8F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy751; - goto yy707; - yy753: + goto yy776; + goto yy732; + yy778: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy751; - goto yy707; - yy754: + goto yy776; + goto yy732; + yy779: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0x8F) - goto yy751; - goto yy707; - yy755: + goto yy776; + goto yy732; + yy780: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0x9F) - goto yy749; - goto yy707; - yy756: + goto yy774; + goto yy732; + yy781: ++p; yych = *p; if (yych <= '@') { if (yych <= '"') { if (yych <= '\r') { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= 0x08) - goto yy728; - goto yy756; + goto yy753; + goto yy781; } else { if (yych == ' ') - goto yy756; + goto yy781; if (yych <= '!') - goto yy728; - goto yy707; + goto yy753; + goto yy732; } } else { if (yych <= ':') { if (yych == '\'') - goto yy707; + goto yy732; if (yych <= '9') - goto yy728; + goto yy753; } else { if (yych <= ';') - goto yy728; + goto yy753; if (yych <= '=') - goto yy707; + goto yy732; if (yych <= '>') - goto yy713; - goto yy728; + goto yy738; + goto yy753; } } } else { if (yych <= 0xDF) { if (yych <= '`') { if (yych <= 'Z') - goto yy758; + goto yy783; if (yych <= '^') - goto yy728; + goto yy753; if (yych >= '`') - goto yy707; + goto yy732; } else { if (yych <= 'z') - goto yy758; + goto yy783; if (yych <= 0x7F) - goto yy728; + goto yy753; if (yych <= 0xC1) - goto yy707; - goto yy730; + goto yy732; + goto yy755; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy731; + goto yy756; if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } } - yy758: + yy783: ++p; yych = *p; if (yych <= '>') { if (yych <= '&') { if (yych <= 0x1F) { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= 0x08) - goto yy728; + goto yy753; if (yych >= 0x0E) - goto yy728; + goto yy753; } else { if (yych <= ' ') - goto yy760; + goto yy785; if (yych == '"') - goto yy707; - goto yy728; + goto yy732; + goto yy753; } } else { if (yych <= '/') { if (yych <= '\'') - goto yy707; + goto yy732; if (yych <= ',') - goto yy728; + goto yy753; if (yych <= '.') - goto yy758; - goto yy728; + goto yy783; + goto yy753; } else { if (yych <= ';') { if (yych <= ':') - goto yy758; - goto yy728; + goto yy783; + goto yy753; } else { if (yych <= '<') - goto yy707; + goto yy732; if (yych <= '=') - goto yy726; - goto yy713; + goto yy751; + goto yy738; } } } @@ -12425,188 +12682,188 @@ bufsize_t _scan_html_block_start_7(const unsigned char *p) { if (yych <= 0xC1) { if (yych <= '_') { if (yych <= '@') - goto yy728; + goto yy753; if (yych <= 'Z') - goto yy758; + goto yy783; if (yych <= '^') - goto yy728; - goto yy758; + goto yy753; + goto yy783; } else { if (yych <= '`') - goto yy707; + goto yy732; if (yych <= 'z') - goto yy758; + goto yy783; if (yych <= 0x7F) - goto yy728; - goto yy707; + goto yy753; + goto yy732; } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy730; + goto yy755; if (yych <= 0xE0) - goto yy731; + goto yy756; if (yych <= 0xEC) - goto yy732; - goto yy736; + goto yy757; + goto yy761; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy732; - goto yy733; + goto yy757; + goto yy758; } else { if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } } } - yy760: + yy785: ++p; yych = *p; if (yych <= '@') { if (yych <= '&') { if (yych <= 0x1F) { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= 0x08) - goto yy728; + goto yy753; if (yych <= '\r') - goto yy760; - goto yy728; + goto yy785; + goto yy753; } else { if (yych <= ' ') - goto yy760; + goto yy785; if (yych == '"') - goto yy707; - goto yy728; + goto yy732; + goto yy753; } } else { if (yych <= ';') { if (yych <= '\'') - goto yy707; + goto yy732; if (yych == ':') - goto yy758; - goto yy728; + goto yy783; + goto yy753; } else { if (yych <= '<') - goto yy707; + goto yy732; if (yych <= '=') - goto yy726; + goto yy751; if (yych <= '>') - goto yy713; - goto yy728; + goto yy738; + goto yy753; } } } else { if (yych <= 0xDF) { if (yych <= '`') { if (yych <= 'Z') - goto yy758; + goto yy783; if (yych <= '^') - goto yy728; + goto yy753; if (yych <= '_') - goto yy758; - goto yy707; + goto yy783; + goto yy732; } else { if (yych <= 'z') - goto yy758; + goto yy783; if (yych <= 0x7F) - goto yy728; + goto yy753; if (yych <= 0xC1) - goto yy707; - goto yy730; + goto yy732; + goto yy755; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy731; + goto yy756; if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } } - yy762: + yy787: ++p; yych = *p; if (yych <= '@') { if (yych <= '"') { if (yych <= '\r') { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= 0x08) - goto yy728; - goto yy762; + goto yy753; + goto yy787; } else { if (yych == ' ') - goto yy762; + goto yy787; if (yych <= '!') - goto yy728; - goto yy739; + goto yy753; + goto yy764; } } else { if (yych <= ':') { if (yych == '\'') - goto yy737; + goto yy762; if (yych <= '9') - goto yy728; - goto yy758; + goto yy753; + goto yy783; } else { if (yych <= ';') - goto yy728; + goto yy753; if (yych <= '=') - goto yy707; + goto yy732; if (yych <= '>') - goto yy713; - goto yy728; + goto yy738; + goto yy753; } } } else { if (yych <= 0xDF) { if (yych <= '`') { if (yych <= 'Z') - goto yy758; + goto yy783; if (yych <= '^') - goto yy728; + goto yy753; if (yych <= '_') - goto yy758; - goto yy707; + goto yy783; + goto yy732; } else { if (yych <= 'z') - goto yy758; + goto yy783; if (yych <= 0x7F) - goto yy728; + goto yy753; if (yych <= 0xC1) - goto yy707; - goto yy730; + goto yy732; + goto yy755; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy731; + goto yy756; if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } } @@ -12642,891 +12899,891 @@ bufsize_t _scan_html_block_end_1(const unsigned char *p) { if (yych <= 0xDF) { if (yych <= ';') { if (yych <= 0x00) - goto yy766; + goto yy791; if (yych != '\n') - goto yy768; + goto yy793; } else { if (yych <= '<') - goto yy769; + goto yy794; if (yych <= 0x7F) - goto yy768; + goto yy793; if (yych >= 0xC2) - goto yy770; + goto yy795; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy771; + goto yy796; if (yych == 0xED) - goto yy773; - goto yy772; + goto yy798; + goto yy797; } else { if (yych <= 0xF0) - goto yy774; + goto yy799; if (yych <= 0xF3) - goto yy775; + goto yy800; if (yych <= 0xF4) - goto yy776; + goto yy801; } } - yy766: + yy791: ++p; - yy767 : { return 0; } - yy768: + yy792 : { return 0; } + yy793: yyaccept = 0; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy767; + goto yy792; if (yych <= '\t') - goto yy781; - goto yy767; + goto yy806; + goto yy792; } else { if (yych <= 0x7F) - goto yy781; + goto yy806; if (yych <= 0xC1) - goto yy767; + goto yy792; if (yych <= 0xF4) - goto yy781; - goto yy767; + goto yy806; + goto yy792; } - yy769: + yy794: yyaccept = 0; yych = *(marker = ++p); if (yych <= '.') { if (yych <= 0x00) - goto yy767; + goto yy792; if (yych == '\n') - goto yy767; - goto yy781; + goto yy792; + goto yy806; } else { if (yych <= 0x7F) { if (yych <= '/') - goto yy789; - goto yy781; + goto yy814; + goto yy806; } else { if (yych <= 0xC1) - goto yy767; + goto yy792; if (yych <= 0xF4) - goto yy781; - goto yy767; + goto yy806; + goto yy792; } } - yy770: + yy795: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy767; + goto yy792; if (yych <= 0xBF) - goto yy780; - goto yy767; - yy771: + goto yy805; + goto yy792; + yy796: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x9F) - goto yy767; + goto yy792; if (yych <= 0xBF) - goto yy779; - goto yy767; - yy772: + goto yy804; + goto yy792; + yy797: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy767; + goto yy792; if (yych <= 0xBF) - goto yy779; - goto yy767; - yy773: + goto yy804; + goto yy792; + yy798: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy767; + goto yy792; if (yych <= 0x9F) - goto yy779; - goto yy767; - yy774: + goto yy804; + goto yy792; + yy799: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x8F) - goto yy767; + goto yy792; if (yych <= 0xBF) - goto yy777; - goto yy767; - yy775: + goto yy802; + goto yy792; + yy800: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy767; + goto yy792; if (yych <= 0xBF) - goto yy777; - goto yy767; - yy776: + goto yy802; + goto yy792; + yy801: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy767; + goto yy792; if (yych >= 0x90) - goto yy767; - yy777: + goto yy792; + yy802: ++p; yych = *p; if (yych <= 0x7F) - goto yy778; + goto yy803; if (yych <= 0xBF) - goto yy779; - yy778: + goto yy804; + yy803: p = marker; if (yyaccept == 0) { - goto yy767; + goto yy792; } else { - goto yy795; + goto yy820; } - yy779: + yy804: ++p; yych = *p; if (yych <= 0x7F) - goto yy778; + goto yy803; if (yych >= 0xC0) - goto yy778; - yy780: + goto yy803; + yy805: ++p; yych = *p; - yy781: + yy806: if (yybm[0 + yych] & 64) { - goto yy780; + goto yy805; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy778; + goto yy803; if (yych >= '=') - goto yy778; + goto yy803; } else { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; - goto yy777; + goto yy809; + goto yy802; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy788; + goto yy813; if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } - yy782: + yy807: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xDF) { if (yych <= '.') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= '/') - goto yy789; + goto yy814; if (yych <= 0x7F) - goto yy780; + goto yy805; if (yych <= 0xC1) - goto yy778; - goto yy779; + goto yy803; + goto yy804; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych == 0xED) - goto yy788; - goto yy777; + goto yy813; + goto yy802; } else { if (yych <= 0xF0) - goto yy785; + goto yy810; if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } - yy784: + yy809: ++p; yych = *p; if (yych <= 0x9F) - goto yy778; + goto yy803; if (yych <= 0xBF) - goto yy779; - goto yy778; - yy785: + goto yy804; + goto yy803; + yy810: ++p; yych = *p; if (yych <= 0x8F) - goto yy778; + goto yy803; if (yych <= 0xBF) - goto yy777; - goto yy778; - yy786: + goto yy802; + goto yy803; + yy811: ++p; yych = *p; if (yych <= 0x7F) - goto yy778; + goto yy803; if (yych <= 0xBF) - goto yy777; - goto yy778; - yy787: + goto yy802; + goto yy803; + yy812: ++p; yych = *p; if (yych <= 0x7F) - goto yy778; + goto yy803; if (yych <= 0x8F) - goto yy777; - goto yy778; - yy788: + goto yy802; + goto yy803; + yy813: ++p; yych = *p; if (yych <= 0x7F) - goto yy778; + goto yy803; if (yych <= 0x9F) - goto yy779; - goto yy778; - yy789: + goto yy804; + goto yy803; + yy814: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 's') { if (yych <= 'P') { if (yych <= '\t') { if (yych <= 0x00) - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= '\n') - goto yy778; + goto yy803; if (yych <= 'O') - goto yy780; - goto yy791; + goto yy805; + goto yy816; } } else { if (yych <= 'o') { if (yych != 'S') - goto yy780; + goto yy805; } else { if (yych <= 'p') - goto yy791; + goto yy816; if (yych <= 'r') - goto yy780; + goto yy805; } } } else { if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } else { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; - goto yy777; + goto yy809; + goto yy802; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy788; + goto yy813; if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 't') { if (yych <= 'C') { if (yych <= '\t') { if (yych <= 0x00) - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= '\n') - goto yy778; + goto yy803; if (yych <= 'B') - goto yy780; - goto yy796; + goto yy805; + goto yy821; } } else { if (yych <= 'b') { if (yych == 'T') - goto yy797; - goto yy780; + goto yy822; + goto yy805; } else { if (yych <= 'c') - goto yy796; + goto yy821; if (yych <= 's') - goto yy780; - goto yy797; + goto yy805; + goto yy822; } } } else { if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } else { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; - goto yy777; + goto yy809; + goto yy802; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy788; + goto yy813; if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy791: + yy816: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'Q') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'q') { if (yych >= 'S') - goto yy780; + goto yy805; } else { if (yych <= 'r') - goto yy792; + goto yy817; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy792: + yy817: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'D') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'd') { if (yych >= 'F') - goto yy780; + goto yy805; } else { if (yych <= 'e') - goto yy793; + goto yy818; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy793: + yy818: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xDF) { if (yych <= '=') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= '>') - goto yy794; + goto yy819; if (yych <= 0x7F) - goto yy780; + goto yy805; if (yych <= 0xC1) - goto yy778; - goto yy779; + goto yy803; + goto yy804; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych == 0xED) - goto yy788; - goto yy777; + goto yy813; + goto yy802; } else { if (yych <= 0xF0) - goto yy785; + goto yy810; if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } - yy794: + yy819: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy780; + goto yy805; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy795; + goto yy820; if (yych <= '<') - goto yy782; + goto yy807; } else { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; - goto yy777; + goto yy809; + goto yy802; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy788; + goto yy813; if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; + goto yy812; } } - yy795 : { return (bufsize_t)(p - start); } - yy796: + yy820 : { return (bufsize_t)(p - start); } + yy821: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'Q') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'q') { if (yych <= 'R') - goto yy800; - goto yy780; + goto yy825; + goto yy805; } else { if (yych <= 'r') - goto yy800; + goto yy825; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy797: + yy822: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'X') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'x') { if (yych >= 'Z') - goto yy780; + goto yy805; } else { if (yych <= 'y') - goto yy798; + goto yy823; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy798: + yy823: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'K') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'k') { if (yych >= 'M') - goto yy780; + goto yy805; } else { if (yych <= 'l') - goto yy799; + goto yy824; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy799: + yy824: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'D') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'd') { if (yych <= 'E') - goto yy793; - goto yy780; + goto yy818; + goto yy805; } else { if (yych <= 'e') - goto yy793; + goto yy818; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy800: + yy825: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'H') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'h') { if (yych >= 'J') - goto yy780; + goto yy805; } else { if (yych <= 'i') - goto yy801; + goto yy826; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy801: + yy826: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'O') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'o') { if (yych >= 'Q') - goto yy780; + goto yy805; } else { if (yych <= 'p') - goto yy802; + goto yy827; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy802: + yy827: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'S') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 's') { if (yych <= 'T') - goto yy793; - goto yy780; + goto yy818; + goto yy805; } else { if (yych <= 't') - goto yy793; + goto yy818; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } @@ -13562,334 +13819,334 @@ bufsize_t _scan_html_block_end_2(const unsigned char *p) { if (yych <= 0xDF) { if (yych <= ',') { if (yych <= 0x00) - goto yy805; + goto yy830; if (yych != '\n') - goto yy807; + goto yy832; } else { if (yych <= '-') - goto yy808; + goto yy833; if (yych <= 0x7F) - goto yy807; + goto yy832; if (yych >= 0xC2) - goto yy809; + goto yy834; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy810; + goto yy835; if (yych == 0xED) - goto yy812; - goto yy811; + goto yy837; + goto yy836; } else { if (yych <= 0xF0) - goto yy813; + goto yy838; if (yych <= 0xF3) - goto yy814; + goto yy839; if (yych <= 0xF4) - goto yy815; + goto yy840; } } - yy805: + yy830: ++p; - yy806 : { return 0; } - yy807: + yy831 : { return 0; } + yy832: yyaccept = 0; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy806; + goto yy831; if (yych <= '\t') - goto yy820; - goto yy806; + goto yy845; + goto yy831; } else { if (yych <= 0x7F) - goto yy820; + goto yy845; if (yych <= 0xC1) - goto yy806; + goto yy831; if (yych <= 0xF4) - goto yy820; - goto yy806; + goto yy845; + goto yy831; } - yy808: + yy833: yyaccept = 0; yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy827; + goto yy852; } if (yych <= '\n') { if (yych <= 0x00) - goto yy806; + goto yy831; if (yych <= '\t') - goto yy820; - goto yy806; + goto yy845; + goto yy831; } else { if (yych <= 0x7F) - goto yy820; + goto yy845; if (yych <= 0xC1) - goto yy806; + goto yy831; if (yych <= 0xF4) - goto yy820; - goto yy806; + goto yy845; + goto yy831; } - yy809: + yy834: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy806; + goto yy831; if (yych <= 0xBF) - goto yy819; - goto yy806; - yy810: + goto yy844; + goto yy831; + yy835: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x9F) - goto yy806; + goto yy831; if (yych <= 0xBF) - goto yy818; - goto yy806; - yy811: + goto yy843; + goto yy831; + yy836: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy806; + goto yy831; if (yych <= 0xBF) - goto yy818; - goto yy806; - yy812: + goto yy843; + goto yy831; + yy837: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy806; + goto yy831; if (yych <= 0x9F) - goto yy818; - goto yy806; - yy813: + goto yy843; + goto yy831; + yy838: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x8F) - goto yy806; + goto yy831; if (yych <= 0xBF) - goto yy816; - goto yy806; - yy814: + goto yy841; + goto yy831; + yy839: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy806; + goto yy831; if (yych <= 0xBF) - goto yy816; - goto yy806; - yy815: + goto yy841; + goto yy831; + yy840: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy806; + goto yy831; if (yych >= 0x90) - goto yy806; - yy816: + goto yy831; + yy841: ++p; yych = *p; if (yych <= 0x7F) - goto yy817; + goto yy842; if (yych <= 0xBF) - goto yy818; - yy817: + goto yy843; + yy842: p = marker; if (yyaccept == 0) { - goto yy806; + goto yy831; } else { - goto yy830; + goto yy855; } - yy818: + yy843: ++p; yych = *p; if (yych <= 0x7F) - goto yy817; + goto yy842; if (yych >= 0xC0) - goto yy817; - yy819: + goto yy842; + yy844: ++p; yych = *p; - yy820: + yy845: if (yybm[0 + yych] & 64) { - goto yy819; + goto yy844; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy817; + goto yy842; if (yych >= '.') - goto yy817; + goto yy842; } else { if (yych <= 0xDF) - goto yy818; + goto yy843; if (yych <= 0xE0) - goto yy822; - goto yy816; + goto yy847; + goto yy841; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy826; + goto yy851; if (yych <= 0xEF) - goto yy816; - goto yy823; + goto yy841; + goto yy848; } else { if (yych <= 0xF3) - goto yy824; + goto yy849; if (yych <= 0xF4) - goto yy825; - goto yy817; + goto yy850; + goto yy842; } } - yy821: + yy846: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy819; + goto yy844; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy817; + goto yy842; if (yych <= '-') - goto yy827; - goto yy817; + goto yy852; + goto yy842; } else { if (yych <= 0xDF) - goto yy818; + goto yy843; if (yych >= 0xE1) - goto yy816; + goto yy841; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy826; + goto yy851; if (yych <= 0xEF) - goto yy816; - goto yy823; + goto yy841; + goto yy848; } else { if (yych <= 0xF3) - goto yy824; + goto yy849; if (yych <= 0xF4) - goto yy825; - goto yy817; + goto yy850; + goto yy842; } } - yy822: + yy847: ++p; yych = *p; if (yych <= 0x9F) - goto yy817; + goto yy842; if (yych <= 0xBF) - goto yy818; - goto yy817; - yy823: + goto yy843; + goto yy842; + yy848: ++p; yych = *p; if (yych <= 0x8F) - goto yy817; + goto yy842; if (yych <= 0xBF) - goto yy816; - goto yy817; - yy824: + goto yy841; + goto yy842; + yy849: ++p; yych = *p; if (yych <= 0x7F) - goto yy817; + goto yy842; if (yych <= 0xBF) - goto yy816; - goto yy817; - yy825: + goto yy841; + goto yy842; + yy850: ++p; yych = *p; if (yych <= 0x7F) - goto yy817; + goto yy842; if (yych <= 0x8F) - goto yy816; - goto yy817; - yy826: + goto yy841; + goto yy842; + yy851: ++p; yych = *p; if (yych <= 0x7F) - goto yy817; + goto yy842; if (yych <= 0x9F) - goto yy818; - goto yy817; - yy827: + goto yy843; + goto yy842; + yy852: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy827; + goto yy852; } if (yych <= 0xDF) { if (yych <= '=') { if (yych <= 0x00) - goto yy817; + goto yy842; if (yych == '\n') - goto yy817; - goto yy819; + goto yy842; + goto yy844; } else { if (yych <= '>') - goto yy829; + goto yy854; if (yych <= 0x7F) - goto yy819; + goto yy844; if (yych <= 0xC1) - goto yy817; - goto yy818; + goto yy842; + goto yy843; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy822; + goto yy847; if (yych == 0xED) - goto yy826; - goto yy816; + goto yy851; + goto yy841; } else { if (yych <= 0xF0) - goto yy823; + goto yy848; if (yych <= 0xF3) - goto yy824; + goto yy849; if (yych <= 0xF4) - goto yy825; - goto yy817; + goto yy850; + goto yy842; } } - yy829: + yy854: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy819; + goto yy844; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy830; + goto yy855; if (yych <= '-') - goto yy821; + goto yy846; } else { if (yych <= 0xDF) - goto yy818; + goto yy843; if (yych <= 0xE0) - goto yy822; - goto yy816; + goto yy847; + goto yy841; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy826; + goto yy851; if (yych <= 0xEF) - goto yy816; - goto yy823; + goto yy841; + goto yy848; } else { if (yych <= 0xF3) - goto yy824; + goto yy849; if (yych <= 0xF4) - goto yy825; + goto yy850; } } - yy830 : { return (bufsize_t)(p - start); } + yy855 : { return (bufsize_t)(p - start); } } } @@ -13922,301 +14179,301 @@ bufsize_t _scan_html_block_end_3(const unsigned char *p) { if (yych <= 0xDF) { if (yych <= '>') { if (yych <= 0x00) - goto yy833; + goto yy858; if (yych != '\n') - goto yy835; + goto yy860; } else { if (yych <= '?') - goto yy836; + goto yy861; if (yych <= 0x7F) - goto yy835; + goto yy860; if (yych >= 0xC2) - goto yy837; + goto yy862; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy838; + goto yy863; if (yych == 0xED) - goto yy840; - goto yy839; + goto yy865; + goto yy864; } else { if (yych <= 0xF0) - goto yy841; + goto yy866; if (yych <= 0xF3) - goto yy842; + goto yy867; if (yych <= 0xF4) - goto yy843; + goto yy868; } } - yy833: + yy858: ++p; - yy834 : { return 0; } - yy835: + yy859 : { return 0; } + yy860: yyaccept = 0; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy834; + goto yy859; if (yych <= '\t') - goto yy848; - goto yy834; + goto yy873; + goto yy859; } else { if (yych <= 0x7F) - goto yy848; + goto yy873; if (yych <= 0xC1) - goto yy834; + goto yy859; if (yych <= 0xF4) - goto yy848; - goto yy834; + goto yy873; + goto yy859; } - yy836: + yy861: yyaccept = 0; yych = *(marker = ++p); if (yych <= '=') { if (yych <= 0x00) - goto yy834; + goto yy859; if (yych == '\n') - goto yy834; - goto yy848; + goto yy859; + goto yy873; } else { if (yych <= 0x7F) { if (yych <= '>') - goto yy856; - goto yy848; + goto yy881; + goto yy873; } else { if (yych <= 0xC1) - goto yy834; + goto yy859; if (yych <= 0xF4) - goto yy848; - goto yy834; + goto yy873; + goto yy859; } } - yy837: + yy862: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy834; + goto yy859; if (yych <= 0xBF) - goto yy847; - goto yy834; - yy838: + goto yy872; + goto yy859; + yy863: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x9F) - goto yy834; + goto yy859; if (yych <= 0xBF) - goto yy846; - goto yy834; - yy839: + goto yy871; + goto yy859; + yy864: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy834; + goto yy859; if (yych <= 0xBF) - goto yy846; - goto yy834; - yy840: + goto yy871; + goto yy859; + yy865: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy834; + goto yy859; if (yych <= 0x9F) - goto yy846; - goto yy834; - yy841: + goto yy871; + goto yy859; + yy866: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x8F) - goto yy834; + goto yy859; if (yych <= 0xBF) - goto yy844; - goto yy834; - yy842: + goto yy869; + goto yy859; + yy867: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy834; + goto yy859; if (yych <= 0xBF) - goto yy844; - goto yy834; - yy843: + goto yy869; + goto yy859; + yy868: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy834; + goto yy859; if (yych >= 0x90) - goto yy834; - yy844: + goto yy859; + yy869: ++p; yych = *p; if (yych <= 0x7F) - goto yy845; + goto yy870; if (yych <= 0xBF) - goto yy846; - yy845: + goto yy871; + yy870: p = marker; if (yyaccept == 0) { - goto yy834; + goto yy859; } else { - goto yy857; + goto yy882; } - yy846: + yy871: ++p; yych = *p; if (yych <= 0x7F) - goto yy845; + goto yy870; if (yych >= 0xC0) - goto yy845; - yy847: + goto yy870; + yy872: ++p; yych = *p; - yy848: + yy873: if (yybm[0 + yych] & 64) { - goto yy847; + goto yy872; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy845; + goto yy870; if (yych >= '@') - goto yy845; + goto yy870; } else { if (yych <= 0xDF) - goto yy846; + goto yy871; if (yych <= 0xE0) - goto yy851; - goto yy844; + goto yy876; + goto yy869; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy855; + goto yy880; if (yych <= 0xEF) - goto yy844; - goto yy852; + goto yy869; + goto yy877; } else { if (yych <= 0xF3) - goto yy853; + goto yy878; if (yych <= 0xF4) - goto yy854; - goto yy845; + goto yy879; + goto yy870; } } - yy849: + yy874: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy849; + goto yy874; } if (yych <= 0xDF) { if (yych <= '=') { if (yych <= 0x00) - goto yy845; + goto yy870; if (yych == '\n') - goto yy845; - goto yy847; + goto yy870; + goto yy872; } else { if (yych <= '>') - goto yy856; + goto yy881; if (yych <= 0x7F) - goto yy847; + goto yy872; if (yych <= 0xC1) - goto yy845; - goto yy846; + goto yy870; + goto yy871; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy851; + goto yy876; if (yych == 0xED) - goto yy855; - goto yy844; + goto yy880; + goto yy869; } else { if (yych <= 0xF0) - goto yy852; + goto yy877; if (yych <= 0xF3) - goto yy853; + goto yy878; if (yych <= 0xF4) - goto yy854; - goto yy845; + goto yy879; + goto yy870; } } - yy851: + yy876: ++p; yych = *p; if (yych <= 0x9F) - goto yy845; + goto yy870; if (yych <= 0xBF) - goto yy846; - goto yy845; - yy852: + goto yy871; + goto yy870; + yy877: ++p; yych = *p; if (yych <= 0x8F) - goto yy845; + goto yy870; if (yych <= 0xBF) - goto yy844; - goto yy845; - yy853: + goto yy869; + goto yy870; + yy878: ++p; yych = *p; if (yych <= 0x7F) - goto yy845; + goto yy870; if (yych <= 0xBF) - goto yy844; - goto yy845; - yy854: + goto yy869; + goto yy870; + yy879: ++p; yych = *p; if (yych <= 0x7F) - goto yy845; + goto yy870; if (yych <= 0x8F) - goto yy844; - goto yy845; - yy855: + goto yy869; + goto yy870; + yy880: ++p; yych = *p; if (yych <= 0x7F) - goto yy845; + goto yy870; if (yych <= 0x9F) - goto yy846; - goto yy845; - yy856: + goto yy871; + goto yy870; + yy881: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy847; + goto yy872; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy857; + goto yy882; if (yych <= '?') - goto yy849; + goto yy874; } else { if (yych <= 0xDF) - goto yy846; + goto yy871; if (yych <= 0xE0) - goto yy851; - goto yy844; + goto yy876; + goto yy869; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy855; + goto yy880; if (yych <= 0xEF) - goto yy844; - goto yy852; + goto yy869; + goto yy877; } else { if (yych <= 0xF3) - goto yy853; + goto yy878; if (yych <= 0xF4) - goto yy854; + goto yy879; } } - yy857 : { return (bufsize_t)(p - start); } + yy882 : { return (bufsize_t)(p - start); } } } @@ -14249,257 +14506,257 @@ bufsize_t _scan_html_block_end_4(const unsigned char *p) { if (yych <= 0xDF) { if (yych <= '=') { if (yych <= 0x00) - goto yy860; + goto yy885; if (yych != '\n') - goto yy862; + goto yy887; } else { if (yych <= '>') - goto yy863; + goto yy888; if (yych <= 0x7F) - goto yy862; + goto yy887; if (yych >= 0xC2) - goto yy865; + goto yy890; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy866; + goto yy891; if (yych == 0xED) - goto yy868; - goto yy867; + goto yy893; + goto yy892; } else { if (yych <= 0xF0) - goto yy869; + goto yy894; if (yych <= 0xF3) - goto yy870; + goto yy895; if (yych <= 0xF4) - goto yy871; + goto yy896; } } - yy860: + yy885: ++p; - yy861 : { return 0; } - yy862: + yy886 : { return 0; } + yy887: yyaccept = 0; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy861; + goto yy886; if (yych <= '\t') - goto yy876; - goto yy861; + goto yy901; + goto yy886; } else { if (yych <= 0x7F) - goto yy876; + goto yy901; if (yych <= 0xC1) - goto yy861; + goto yy886; if (yych <= 0xF4) - goto yy876; - goto yy861; + goto yy901; + goto yy886; } - yy863: + yy888: yyaccept = 1; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy864; + goto yy889; if (yych <= '\t') - goto yy876; + goto yy901; } else { if (yych <= 0x7F) - goto yy876; + goto yy901; if (yych <= 0xC1) - goto yy864; + goto yy889; if (yych <= 0xF4) - goto yy876; + goto yy901; } - yy864 : { return (bufsize_t)(p - start); } - yy865: + yy889 : { return (bufsize_t)(p - start); } + yy890: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy861; + goto yy886; if (yych <= 0xBF) - goto yy875; - goto yy861; - yy866: + goto yy900; + goto yy886; + yy891: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x9F) - goto yy861; + goto yy886; if (yych <= 0xBF) - goto yy874; - goto yy861; - yy867: + goto yy899; + goto yy886; + yy892: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy861; + goto yy886; if (yych <= 0xBF) - goto yy874; - goto yy861; - yy868: + goto yy899; + goto yy886; + yy893: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy861; + goto yy886; if (yych <= 0x9F) - goto yy874; - goto yy861; - yy869: + goto yy899; + goto yy886; + yy894: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x8F) - goto yy861; + goto yy886; if (yych <= 0xBF) - goto yy872; - goto yy861; - yy870: + goto yy897; + goto yy886; + yy895: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy861; + goto yy886; if (yych <= 0xBF) - goto yy872; - goto yy861; - yy871: + goto yy897; + goto yy886; + yy896: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy861; + goto yy886; if (yych >= 0x90) - goto yy861; - yy872: + goto yy886; + yy897: ++p; yych = *p; if (yych <= 0x7F) - goto yy873; + goto yy898; if (yych <= 0xBF) - goto yy874; - yy873: + goto yy899; + yy898: p = marker; if (yyaccept == 0) { - goto yy861; + goto yy886; } else { - goto yy864; + goto yy889; } - yy874: + yy899: ++p; yych = *p; if (yych <= 0x7F) - goto yy873; + goto yy898; if (yych >= 0xC0) - goto yy873; - yy875: + goto yy898; + yy900: ++p; yych = *p; - yy876: + yy901: if (yybm[0 + yych] & 64) { - goto yy875; + goto yy900; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy873; + goto yy898; if (yych >= '?') - goto yy873; + goto yy898; } else { if (yych <= 0xDF) - goto yy874; + goto yy899; if (yych <= 0xE0) - goto yy879; - goto yy872; + goto yy904; + goto yy897; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy883; + goto yy908; if (yych <= 0xEF) - goto yy872; - goto yy880; + goto yy897; + goto yy905; } else { if (yych <= 0xF3) - goto yy881; + goto yy906; if (yych <= 0xF4) - goto yy882; - goto yy873; + goto yy907; + goto yy898; } } - yy877: + yy902: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy875; + goto yy900; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy864; + goto yy889; if (yych <= '>') - goto yy877; - goto yy864; + goto yy902; + goto yy889; } else { if (yych <= 0xDF) - goto yy874; + goto yy899; if (yych >= 0xE1) - goto yy872; + goto yy897; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy883; + goto yy908; if (yych <= 0xEF) - goto yy872; - goto yy880; + goto yy897; + goto yy905; } else { if (yych <= 0xF3) - goto yy881; + goto yy906; if (yych <= 0xF4) - goto yy882; - goto yy864; + goto yy907; + goto yy889; } } - yy879: + yy904: ++p; yych = *p; if (yych <= 0x9F) - goto yy873; + goto yy898; if (yych <= 0xBF) - goto yy874; - goto yy873; - yy880: + goto yy899; + goto yy898; + yy905: ++p; yych = *p; if (yych <= 0x8F) - goto yy873; + goto yy898; if (yych <= 0xBF) - goto yy872; - goto yy873; - yy881: + goto yy897; + goto yy898; + yy906: ++p; yych = *p; if (yych <= 0x7F) - goto yy873; + goto yy898; if (yych <= 0xBF) - goto yy872; - goto yy873; - yy882: + goto yy897; + goto yy898; + yy907: ++p; yych = *p; if (yych <= 0x7F) - goto yy873; + goto yy898; if (yych <= 0x8F) - goto yy872; - goto yy873; - yy883: + goto yy897; + goto yy898; + yy908: ++p; yych = *p; if (yych <= 0x7F) - goto yy873; + goto yy898; if (yych <= 0x9F) - goto yy874; - goto yy873; + goto yy899; + goto yy898; } } @@ -14532,334 +14789,334 @@ bufsize_t _scan_html_block_end_5(const unsigned char *p) { if (yych <= 0xDF) { if (yych <= '\\') { if (yych <= 0x00) - goto yy886; + goto yy911; if (yych != '\n') - goto yy888; + goto yy913; } else { if (yych <= ']') - goto yy889; + goto yy914; if (yych <= 0x7F) - goto yy888; + goto yy913; if (yych >= 0xC2) - goto yy890; + goto yy915; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy891; + goto yy916; if (yych == 0xED) - goto yy893; - goto yy892; + goto yy918; + goto yy917; } else { if (yych <= 0xF0) - goto yy894; + goto yy919; if (yych <= 0xF3) - goto yy895; + goto yy920; if (yych <= 0xF4) - goto yy896; + goto yy921; } } - yy886: + yy911: ++p; - yy887 : { return 0; } - yy888: + yy912 : { return 0; } + yy913: yyaccept = 0; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy887; + goto yy912; if (yych <= '\t') - goto yy901; - goto yy887; + goto yy926; + goto yy912; } else { if (yych <= 0x7F) - goto yy901; + goto yy926; if (yych <= 0xC1) - goto yy887; + goto yy912; if (yych <= 0xF4) - goto yy901; - goto yy887; + goto yy926; + goto yy912; } - yy889: + yy914: yyaccept = 0; yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy908; + goto yy933; } if (yych <= '\n') { if (yych <= 0x00) - goto yy887; + goto yy912; if (yych <= '\t') - goto yy901; - goto yy887; + goto yy926; + goto yy912; } else { if (yych <= 0x7F) - goto yy901; + goto yy926; if (yych <= 0xC1) - goto yy887; + goto yy912; if (yych <= 0xF4) - goto yy901; - goto yy887; + goto yy926; + goto yy912; } - yy890: + yy915: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy887; + goto yy912; if (yych <= 0xBF) - goto yy900; - goto yy887; - yy891: + goto yy925; + goto yy912; + yy916: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x9F) - goto yy887; + goto yy912; if (yych <= 0xBF) - goto yy899; - goto yy887; - yy892: + goto yy924; + goto yy912; + yy917: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy887; + goto yy912; if (yych <= 0xBF) - goto yy899; - goto yy887; - yy893: + goto yy924; + goto yy912; + yy918: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy887; + goto yy912; if (yych <= 0x9F) - goto yy899; - goto yy887; - yy894: + goto yy924; + goto yy912; + yy919: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x8F) - goto yy887; + goto yy912; if (yych <= 0xBF) - goto yy897; - goto yy887; - yy895: + goto yy922; + goto yy912; + yy920: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy887; + goto yy912; if (yych <= 0xBF) - goto yy897; - goto yy887; - yy896: + goto yy922; + goto yy912; + yy921: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy887; + goto yy912; if (yych >= 0x90) - goto yy887; - yy897: + goto yy912; + yy922: ++p; yych = *p; if (yych <= 0x7F) - goto yy898; + goto yy923; if (yych <= 0xBF) - goto yy899; - yy898: + goto yy924; + yy923: p = marker; if (yyaccept == 0) { - goto yy887; + goto yy912; } else { - goto yy911; + goto yy936; } - yy899: + yy924: ++p; yych = *p; if (yych <= 0x7F) - goto yy898; + goto yy923; if (yych >= 0xC0) - goto yy898; - yy900: + goto yy923; + yy925: ++p; yych = *p; - yy901: + yy926: if (yybm[0 + yych] & 64) { - goto yy900; + goto yy925; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy898; + goto yy923; if (yych >= '^') - goto yy898; + goto yy923; } else { if (yych <= 0xDF) - goto yy899; + goto yy924; if (yych <= 0xE0) - goto yy903; - goto yy897; + goto yy928; + goto yy922; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy907; + goto yy932; if (yych <= 0xEF) - goto yy897; - goto yy904; + goto yy922; + goto yy929; } else { if (yych <= 0xF3) - goto yy905; + goto yy930; if (yych <= 0xF4) - goto yy906; - goto yy898; + goto yy931; + goto yy923; } } - yy902: + yy927: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy900; + goto yy925; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy898; + goto yy923; if (yych <= ']') - goto yy908; - goto yy898; + goto yy933; + goto yy923; } else { if (yych <= 0xDF) - goto yy899; + goto yy924; if (yych >= 0xE1) - goto yy897; + goto yy922; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy907; + goto yy932; if (yych <= 0xEF) - goto yy897; - goto yy904; + goto yy922; + goto yy929; } else { if (yych <= 0xF3) - goto yy905; + goto yy930; if (yych <= 0xF4) - goto yy906; - goto yy898; + goto yy931; + goto yy923; } } - yy903: + yy928: ++p; yych = *p; if (yych <= 0x9F) - goto yy898; + goto yy923; if (yych <= 0xBF) - goto yy899; - goto yy898; - yy904: + goto yy924; + goto yy923; + yy929: ++p; yych = *p; if (yych <= 0x8F) - goto yy898; + goto yy923; if (yych <= 0xBF) - goto yy897; - goto yy898; - yy905: + goto yy922; + goto yy923; + yy930: ++p; yych = *p; if (yych <= 0x7F) - goto yy898; + goto yy923; if (yych <= 0xBF) - goto yy897; - goto yy898; - yy906: + goto yy922; + goto yy923; + yy931: ++p; yych = *p; if (yych <= 0x7F) - goto yy898; + goto yy923; if (yych <= 0x8F) - goto yy897; - goto yy898; - yy907: + goto yy922; + goto yy923; + yy932: ++p; yych = *p; if (yych <= 0x7F) - goto yy898; + goto yy923; if (yych <= 0x9F) - goto yy899; - goto yy898; - yy908: + goto yy924; + goto yy923; + yy933: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy908; + goto yy933; } if (yych <= 0xDF) { if (yych <= '=') { if (yych <= 0x00) - goto yy898; + goto yy923; if (yych == '\n') - goto yy898; - goto yy900; + goto yy923; + goto yy925; } else { if (yych <= '>') - goto yy910; + goto yy935; if (yych <= 0x7F) - goto yy900; + goto yy925; if (yych <= 0xC1) - goto yy898; - goto yy899; + goto yy923; + goto yy924; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy903; + goto yy928; if (yych == 0xED) - goto yy907; - goto yy897; + goto yy932; + goto yy922; } else { if (yych <= 0xF0) - goto yy904; + goto yy929; if (yych <= 0xF3) - goto yy905; + goto yy930; if (yych <= 0xF4) - goto yy906; - goto yy898; + goto yy931; + goto yy923; } } - yy910: + yy935: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy900; + goto yy925; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy911; + goto yy936; if (yych <= ']') - goto yy902; + goto yy927; } else { if (yych <= 0xDF) - goto yy899; + goto yy924; if (yych <= 0xE0) - goto yy903; - goto yy897; + goto yy928; + goto yy922; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy907; + goto yy932; if (yych <= 0xEF) - goto yy897; - goto yy904; + goto yy922; + goto yy929; } else { if (yych <= 0xF3) - goto yy905; + goto yy930; if (yych <= 0xF4) - goto yy906; + goto yy931; } } - yy911 : { return (bufsize_t)(p - start); } + yy936 : { return (bufsize_t)(p - start); } } } @@ -14897,558 +15154,558 @@ bufsize_t _scan_link_title(const unsigned char *p) { yych = *p; if (yych <= '&') { if (yych == '"') - goto yy916; + goto yy941; } else { if (yych <= '\'') - goto yy917; + goto yy942; if (yych <= '(') - goto yy918; + goto yy943; } ++p; - yy915 : { return 0; } - yy916: + yy940 : { return 0; } + yy941: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x00) - goto yy915; + goto yy940; if (yych <= 0x7F) - goto yy951; + goto yy976; if (yych <= 0xC1) - goto yy915; + goto yy940; if (yych <= 0xF4) - goto yy951; - goto yy915; - yy917: + goto yy976; + goto yy940; + yy942: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x00) - goto yy915; + goto yy940; if (yych <= 0x7F) - goto yy937; + goto yy962; if (yych <= 0xC1) - goto yy915; + goto yy940; if (yych <= 0xF4) - goto yy937; - goto yy915; - yy918: + goto yy962; + goto yy940; + yy943: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x00) - goto yy915; + goto yy940; if (yych <= 0x7F) - goto yy923; + goto yy948; if (yych <= 0xC1) - goto yy915; + goto yy940; if (yych <= 0xF4) - goto yy923; - goto yy915; - yy919: + goto yy948; + goto yy940; + yy944: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy922; + goto yy947; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= ')') - goto yy933; - goto yy919; + goto yy958; + goto yy944; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych <= 0xDF) - goto yy924; - goto yy925; + goto yy949; + goto yy950; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy930; - goto yy926; + goto yy955; + goto yy951; } else { if (yych <= 0xF0) - goto yy927; + goto yy952; if (yych <= 0xF3) - goto yy928; + goto yy953; if (yych <= 0xF4) - goto yy929; + goto yy954; } } - yy921: + yy946: p = marker; if (yyaccept <= 1) { if (yyaccept == 0) { - goto yy915; + goto yy940; } else { - goto yy932; + goto yy957; } } else { if (yyaccept == 2) { - goto yy946; + goto yy971; } else { - goto yy960; + goto yy985; } } - yy922: + yy947: ++p; yych = *p; - yy923: + yy948: if (yybm[0 + yych] & 32) { - goto yy922; + goto yy947; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= ')') - goto yy931; - goto yy919; + goto yy956; + goto yy944; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych >= 0xE0) - goto yy925; + goto yy950; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy930; - goto yy926; + goto yy955; + goto yy951; } else { if (yych <= 0xF0) - goto yy927; + goto yy952; if (yych <= 0xF3) - goto yy928; + goto yy953; if (yych <= 0xF4) - goto yy929; - goto yy921; + goto yy954; + goto yy946; } } - yy924: + yy949: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy922; - goto yy921; - yy925: + goto yy947; + goto yy946; + yy950: ++p; yych = *p; if (yych <= 0x9F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy924; - goto yy921; - yy926: + goto yy949; + goto yy946; + yy951: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy924; - goto yy921; - yy927: + goto yy949; + goto yy946; + yy952: ++p; yych = *p; if (yych <= 0x8F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy926; - goto yy921; - yy928: + goto yy951; + goto yy946; + yy953: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy926; - goto yy921; - yy929: + goto yy951; + goto yy946; + yy954: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x8F) - goto yy926; - goto yy921; - yy930: + goto yy951; + goto yy946; + yy955: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x9F) - goto yy924; - goto yy921; - yy931: + goto yy949; + goto yy946; + yy956: ++p; - yy932 : { return (bufsize_t)(p - start); } - yy933: + yy957 : { return (bufsize_t)(p - start); } + yy958: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy922; + goto yy947; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy932; + goto yy957; if (yych <= ')') - goto yy931; - goto yy919; + goto yy956; + goto yy944; } else { if (yych <= 0xC1) - goto yy932; + goto yy957; if (yych <= 0xDF) - goto yy924; - goto yy925; + goto yy949; + goto yy950; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy930; - goto yy926; + goto yy955; + goto yy951; } else { if (yych <= 0xF0) - goto yy927; + goto yy952; if (yych <= 0xF3) - goto yy928; + goto yy953; if (yych <= 0xF4) - goto yy929; - goto yy932; + goto yy954; + goto yy957; } } - yy934: + yy959: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy936; + goto yy961; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= '\'') - goto yy947; - goto yy934; + goto yy972; + goto yy959; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych <= 0xDF) - goto yy938; - goto yy939; + goto yy963; + goto yy964; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy944; - goto yy940; + goto yy969; + goto yy965; } else { if (yych <= 0xF0) - goto yy941; + goto yy966; if (yych <= 0xF3) - goto yy942; + goto yy967; if (yych <= 0xF4) - goto yy943; - goto yy921; + goto yy968; + goto yy946; } } - yy936: + yy961: ++p; yych = *p; - yy937: + yy962: if (yybm[0 + yych] & 64) { - goto yy936; + goto yy961; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= '\'') - goto yy945; - goto yy934; + goto yy970; + goto yy959; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych >= 0xE0) - goto yy939; + goto yy964; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy944; - goto yy940; + goto yy969; + goto yy965; } else { if (yych <= 0xF0) - goto yy941; + goto yy966; if (yych <= 0xF3) - goto yy942; + goto yy967; if (yych <= 0xF4) - goto yy943; - goto yy921; + goto yy968; + goto yy946; } } - yy938: + yy963: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy936; - goto yy921; - yy939: + goto yy961; + goto yy946; + yy964: ++p; yych = *p; if (yych <= 0x9F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy938; - goto yy921; - yy940: + goto yy963; + goto yy946; + yy965: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy938; - goto yy921; - yy941: + goto yy963; + goto yy946; + yy966: ++p; yych = *p; if (yych <= 0x8F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy940; - goto yy921; - yy942: + goto yy965; + goto yy946; + yy967: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy940; - goto yy921; - yy943: + goto yy965; + goto yy946; + yy968: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x8F) - goto yy940; - goto yy921; - yy944: + goto yy965; + goto yy946; + yy969: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x9F) - goto yy938; - goto yy921; - yy945: + goto yy963; + goto yy946; + yy970: ++p; - yy946 : { return (bufsize_t)(p - start); } - yy947: + yy971 : { return (bufsize_t)(p - start); } + yy972: yyaccept = 2; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy936; + goto yy961; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy946; + goto yy971; if (yych <= '\'') - goto yy945; - goto yy934; + goto yy970; + goto yy959; } else { if (yych <= 0xC1) - goto yy946; + goto yy971; if (yych <= 0xDF) - goto yy938; - goto yy939; + goto yy963; + goto yy964; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy944; - goto yy940; + goto yy969; + goto yy965; } else { if (yych <= 0xF0) - goto yy941; + goto yy966; if (yych <= 0xF3) - goto yy942; + goto yy967; if (yych <= 0xF4) - goto yy943; - goto yy946; + goto yy968; + goto yy971; } } - yy948: + yy973: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy950; + goto yy975; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= '"') - goto yy961; - goto yy948; + goto yy986; + goto yy973; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych <= 0xDF) - goto yy952; - goto yy953; + goto yy977; + goto yy978; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy958; - goto yy954; + goto yy983; + goto yy979; } else { if (yych <= 0xF0) - goto yy955; + goto yy980; if (yych <= 0xF3) - goto yy956; + goto yy981; if (yych <= 0xF4) - goto yy957; - goto yy921; + goto yy982; + goto yy946; } } - yy950: + yy975: ++p; yych = *p; - yy951: + yy976: if (yybm[0 + yych] & 128) { - goto yy950; + goto yy975; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= '"') - goto yy959; - goto yy948; + goto yy984; + goto yy973; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych >= 0xE0) - goto yy953; + goto yy978; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy958; - goto yy954; + goto yy983; + goto yy979; } else { if (yych <= 0xF0) - goto yy955; + goto yy980; if (yych <= 0xF3) - goto yy956; + goto yy981; if (yych <= 0xF4) - goto yy957; - goto yy921; + goto yy982; + goto yy946; } } - yy952: + yy977: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy950; - goto yy921; - yy953: + goto yy975; + goto yy946; + yy978: ++p; yych = *p; if (yych <= 0x9F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy952; - goto yy921; - yy954: + goto yy977; + goto yy946; + yy979: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy952; - goto yy921; - yy955: + goto yy977; + goto yy946; + yy980: ++p; yych = *p; if (yych <= 0x8F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy954; - goto yy921; - yy956: + goto yy979; + goto yy946; + yy981: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy954; - goto yy921; - yy957: + goto yy979; + goto yy946; + yy982: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x8F) - goto yy954; - goto yy921; - yy958: + goto yy979; + goto yy946; + yy983: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x9F) - goto yy952; - goto yy921; - yy959: + goto yy977; + goto yy946; + yy984: ++p; - yy960 : { return (bufsize_t)(p - start); } - yy961: + yy985 : { return (bufsize_t)(p - start); } + yy986: yyaccept = 3; marker = ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy950; + goto yy975; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy960; + goto yy985; if (yych <= '"') - goto yy959; - goto yy948; + goto yy984; + goto yy973; } else { if (yych <= 0xC1) - goto yy960; + goto yy985; if (yych <= 0xDF) - goto yy952; - goto yy953; + goto yy977; + goto yy978; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy958; - goto yy954; + goto yy983; + goto yy979; } else { if (yych <= 0xF0) - goto yy955; + goto yy980; if (yych <= 0xF3) - goto yy956; + goto yy981; if (yych <= 0xF4) - goto yy957; - goto yy960; + goto yy982; + goto yy985; } } } @@ -15477,27 +15734,27 @@ bufsize_t _scan_spacechars(const unsigned char *p) { }; yych = *p; if (yych <= 0x08) - goto yy964; + goto yy989; if (yych <= '\r') - goto yy966; + goto yy991; if (yych == ' ') - goto yy966; - yy964: + goto yy991; + yy989: ++p; { return 0; } - yy966: + yy991: ++p; yych = *p; - goto yy969; - yy967 : { return (bufsize_t)(p - start); } - yy968: + goto yy994; + yy992 : { return (bufsize_t)(p - start); } + yy993: ++p; yych = *p; - yy969: + yy994: if (yybm[0 + yych] & 128) { - goto yy968; + goto yy993; } - goto yy967; + goto yy992; } } @@ -15524,115 +15781,115 @@ bufsize_t _scan_atx_heading_start(const unsigned char *p) { }; yych = *p; if (yych == '#') - goto yy974; + goto yy999; ++p; - yy973 : { return 0; } - yy974: + yy998 : { return 0; } + yy999: yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } if (yych <= '\f') { if (yych <= 0x08) - goto yy973; + goto yy998; if (yych >= '\v') - goto yy973; + goto yy998; } else { if (yych <= '\r') - goto yy975; + goto yy1000; if (yych == '#') - goto yy979; - goto yy973; + goto yy1004; + goto yy998; } - yy975: + yy1000: ++p; - yy976 : { return (bufsize_t)(p - start); } - yy977: + yy1001 : { return (bufsize_t)(p - start); } + yy1002: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } - goto yy976; - yy979: + goto yy1001; + yy1004: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } if (yych <= '\f') { if (yych <= 0x08) - goto yy980; + goto yy1005; if (yych <= '\n') - goto yy975; + goto yy1000; } else { if (yych <= '\r') - goto yy975; + goto yy1000; if (yych == '#') - goto yy981; + goto yy1006; } - yy980: + yy1005: p = marker; - goto yy973; - yy981: + goto yy998; + yy1006: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } if (yych <= '\f') { if (yych <= 0x08) - goto yy980; + goto yy1005; if (yych <= '\n') - goto yy975; - goto yy980; + goto yy1000; + goto yy1005; } else { if (yych <= '\r') - goto yy975; + goto yy1000; if (yych != '#') - goto yy980; + goto yy1005; } yych = *++p; if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } if (yych <= '\f') { if (yych <= 0x08) - goto yy980; + goto yy1005; if (yych <= '\n') - goto yy975; - goto yy980; + goto yy1000; + goto yy1005; } else { if (yych <= '\r') - goto yy975; + goto yy1000; if (yych != '#') - goto yy980; + goto yy1005; } yych = *++p; if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } if (yych <= '\f') { if (yych <= 0x08) - goto yy980; + goto yy1005; if (yych <= '\n') - goto yy975; - goto yy980; + goto yy1000; + goto yy1005; } else { if (yych <= '\r') - goto yy975; + goto yy1000; if (yych != '#') - goto yy980; + goto yy1005; } ++p; if (yybm[0 + (yych = *p)] & 128) { - goto yy977; + goto yy1002; } if (yych <= 0x08) - goto yy980; + goto yy1005; if (yych <= '\n') - goto yy975; + goto yy1000; if (yych == '\r') - goto yy975; - goto yy980; + goto yy1000; + goto yy1005; } } @@ -15659,126 +15916,126 @@ bufsize_t _scan_setext_heading_line(const unsigned char *p) { }; yych = *p; if (yych == '-') - goto yy989; + goto yy1014; if (yych == '=') - goto yy990; + goto yy1015; ++p; - yy988 : { return 0; } - yy989: + yy1013 : { return 0; } + yy1014: yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy1002; + goto yy1027; } if (yych <= '\f') { if (yych <= 0x08) - goto yy988; + goto yy1013; if (yych <= '\n') - goto yy999; - goto yy988; + goto yy1024; + goto yy1013; } else { if (yych <= '\r') - goto yy999; + goto yy1024; if (yych == ' ') - goto yy999; - goto yy988; + goto yy1024; + goto yy1013; } - yy990: + yy1015: yych = *(marker = ++p); if (yybm[0 + yych] & 64) { - goto yy996; + goto yy1021; } if (yych <= '\f') { if (yych <= 0x08) - goto yy988; + goto yy1013; if (yych <= '\n') - goto yy992; - goto yy988; + goto yy1017; + goto yy1013; } else { if (yych <= '\r') - goto yy992; + goto yy1017; if (yych == ' ') - goto yy992; - goto yy988; + goto yy1017; + goto yy1013; } - yy991: + yy1016: ++p; yych = *p; - yy992: + yy1017: if (yybm[0 + yych] & 32) { - goto yy991; + goto yy1016; } if (yych <= 0x08) - goto yy993; + goto yy1018; if (yych <= '\n') - goto yy994; + goto yy1019; if (yych == '\r') - goto yy994; - yy993: + goto yy1019; + yy1018: p = marker; - goto yy988; - yy994: + goto yy1013; + yy1019: ++p; { return 1; } - yy996: + yy1021: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy991; + goto yy1016; } if (yych <= '\f') { if (yych <= 0x08) - goto yy993; + goto yy1018; if (yych <= '\n') - goto yy994; - goto yy993; + goto yy1019; + goto yy1018; } else { if (yych <= '\r') - goto yy994; + goto yy1019; if (yych == '=') - goto yy996; - goto yy993; + goto yy1021; + goto yy1018; } - yy998: + yy1023: ++p; yych = *p; - yy999: + yy1024: if (yych <= '\f') { if (yych <= 0x08) - goto yy993; + goto yy1018; if (yych <= '\t') - goto yy998; + goto yy1023; if (yych >= '\v') - goto yy993; + goto yy1018; } else { if (yych <= '\r') - goto yy1000; + goto yy1025; if (yych == ' ') - goto yy998; - goto yy993; + goto yy1023; + goto yy1018; } - yy1000: + yy1025: ++p; { return 2; } - yy1002: + yy1027: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy1002; + goto yy1027; } if (yych <= '\f') { if (yych <= 0x08) - goto yy993; + goto yy1018; if (yych <= '\t') - goto yy998; + goto yy1023; if (yych <= '\n') - goto yy1000; - goto yy993; + goto yy1025; + goto yy1018; } else { if (yych <= '\r') - goto yy1000; + goto yy1025; if (yych == ' ') - goto yy998; - goto yy993; + goto yy1023; + goto yy1018; } } } @@ -15810,248 +16067,248 @@ bufsize_t _scan_thematic_break(const unsigned char *p) { yych = *p; if (yych <= ',') { if (yych == '*') - goto yy1008; + goto yy1033; } else { if (yych <= '-') - goto yy1009; + goto yy1034; if (yych == '_') - goto yy1010; + goto yy1035; } ++p; - yy1007 : { return 0; } - yy1008: + yy1032 : { return 0; } + yy1033: yych = *(marker = ++p); if (yych <= 0x1F) { if (yych == '\t') - goto yy1032; - goto yy1007; + goto yy1057; + goto yy1032; } else { if (yych <= ' ') - goto yy1032; + goto yy1057; if (yych == '*') - goto yy1034; - goto yy1007; + goto yy1059; + goto yy1032; } - yy1009: + yy1034: yych = *(marker = ++p); if (yych <= 0x1F) { if (yych == '\t') - goto yy1022; - goto yy1007; + goto yy1047; + goto yy1032; } else { if (yych <= ' ') - goto yy1022; + goto yy1047; if (yych == '-') - goto yy1024; - goto yy1007; + goto yy1049; + goto yy1032; } - yy1010: + yy1035: yych = *(marker = ++p); if (yybm[0 + yych] & 16) { - goto yy1011; + goto yy1036; } if (yych == '_') - goto yy1014; - goto yy1007; - yy1011: + goto yy1039; + goto yy1032; + yy1036: ++p; yych = *p; if (yybm[0 + yych] & 16) { - goto yy1011; + goto yy1036; } if (yych == '_') - goto yy1014; - yy1013: + goto yy1039; + yy1038: p = marker; - goto yy1007; - yy1014: + goto yy1032; + yy1039: ++p; yych = *p; if (yych <= 0x1F) { if (yych == '\t') - goto yy1014; - goto yy1013; + goto yy1039; + goto yy1038; } else { if (yych <= ' ') - goto yy1014; + goto yy1039; if (yych != '_') - goto yy1013; + goto yy1038; } - yy1016: + yy1041: ++p; yych = *p; if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) - goto yy1013; - goto yy1016; + goto yy1038; + goto yy1041; } else { if (yych <= '\n') - goto yy1018; + goto yy1043; if (yych <= '\f') - goto yy1013; + goto yy1038; } } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy1013; - goto yy1016; + goto yy1038; + goto yy1041; } else { if (yych == '_') - goto yy1020; - goto yy1013; + goto yy1045; + goto yy1038; } } - yy1018: + yy1043: ++p; { return (bufsize_t)(p - start); } - yy1020: + yy1045: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy1020; + goto yy1045; } if (yych <= 0x08) - goto yy1013; + goto yy1038; if (yych <= '\n') - goto yy1018; + goto yy1043; if (yych == '\r') - goto yy1018; - goto yy1013; - yy1022: + goto yy1043; + goto yy1038; + yy1047: ++p; yych = *p; if (yych <= 0x1F) { if (yych == '\t') - goto yy1022; - goto yy1013; + goto yy1047; + goto yy1038; } else { if (yych <= ' ') - goto yy1022; + goto yy1047; if (yych != '-') - goto yy1013; + goto yy1038; } - yy1024: + yy1049: ++p; yych = *p; if (yych <= 0x1F) { if (yych == '\t') - goto yy1024; - goto yy1013; + goto yy1049; + goto yy1038; } else { if (yych <= ' ') - goto yy1024; + goto yy1049; if (yych != '-') - goto yy1013; + goto yy1038; } - yy1026: + yy1051: ++p; yych = *p; if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) - goto yy1013; - goto yy1026; + goto yy1038; + goto yy1051; } else { if (yych <= '\n') - goto yy1028; + goto yy1053; if (yych <= '\f') - goto yy1013; + goto yy1038; } } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy1013; - goto yy1026; + goto yy1038; + goto yy1051; } else { if (yych == '-') - goto yy1030; - goto yy1013; + goto yy1055; + goto yy1038; } } - yy1028: + yy1053: ++p; { return (bufsize_t)(p - start); } - yy1030: + yy1055: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy1030; + goto yy1055; } if (yych <= 0x08) - goto yy1013; + goto yy1038; if (yych <= '\n') - goto yy1028; + goto yy1053; if (yych == '\r') - goto yy1028; - goto yy1013; - yy1032: + goto yy1053; + goto yy1038; + yy1057: ++p; yych = *p; if (yych <= 0x1F) { if (yych == '\t') - goto yy1032; - goto yy1013; + goto yy1057; + goto yy1038; } else { if (yych <= ' ') - goto yy1032; + goto yy1057; if (yych != '*') - goto yy1013; + goto yy1038; } - yy1034: + yy1059: ++p; yych = *p; if (yych <= 0x1F) { if (yych == '\t') - goto yy1034; - goto yy1013; + goto yy1059; + goto yy1038; } else { if (yych <= ' ') - goto yy1034; + goto yy1059; if (yych != '*') - goto yy1013; + goto yy1038; } - yy1036: + yy1061: ++p; yych = *p; if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) - goto yy1013; - goto yy1036; + goto yy1038; + goto yy1061; } else { if (yych <= '\n') - goto yy1038; + goto yy1063; if (yych <= '\f') - goto yy1013; + goto yy1038; } } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy1013; - goto yy1036; + goto yy1038; + goto yy1061; } else { if (yych == '*') - goto yy1040; - goto yy1013; + goto yy1065; + goto yy1038; } } - yy1038: + yy1063: ++p; { return (bufsize_t)(p - start); } - yy1040: + yy1065: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy1040; + goto yy1065; } if (yych <= 0x08) - goto yy1013; - if (yych <= '\n') goto yy1038; + if (yych <= '\n') + goto yy1063; if (yych == '\r') - goto yy1038; - goto yy1013; + goto yy1063; + goto yy1038; } } @@ -16085,292 +16342,292 @@ bufsize_t _scan_open_code_fence(const unsigned char *p) { }; yych = *p; if (yych == '`') - goto yy1046; + goto yy1071; if (yych == '~') - goto yy1047; + goto yy1072; ++p; - yy1045 : { return 0; } - yy1046: + yy1070 : { return 0; } + yy1071: yych = *(marker = ++p); if (yych == '`') - goto yy1063; - goto yy1045; - yy1047: + goto yy1088; + goto yy1070; + yy1072: yych = *(marker = ++p); if (yych != '~') - goto yy1045; + goto yy1070; yych = *++p; if (yybm[0 + yych] & 16) { - goto yy1050; + goto yy1075; } - yy1049: + yy1074: p = marker; - goto yy1045; - yy1050: + goto yy1070; + yy1075: ++p; yych = *p; marker = p; if (yybm[0 + yych] & 32) { - goto yy1052; + goto yy1077; } if (yych <= 0xE0) { if (yych <= '~') { if (yych <= 0x00) - goto yy1049; + goto yy1074; if (yych <= '\r') - goto yy1061; - goto yy1050; + goto yy1086; + goto yy1075; } else { if (yych <= 0xC1) - goto yy1049; + goto yy1074; if (yych <= 0xDF) - goto yy1054; - goto yy1055; + goto yy1079; + goto yy1080; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy1060; - goto yy1056; + goto yy1085; + goto yy1081; } else { if (yych <= 0xF0) - goto yy1057; + goto yy1082; if (yych <= 0xF3) - goto yy1058; + goto yy1083; if (yych <= 0xF4) - goto yy1059; - goto yy1049; + goto yy1084; + goto yy1074; } } - yy1052: + yy1077: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy1052; + goto yy1077; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x00) - goto yy1049; + goto yy1074; if (yych <= '\r') - goto yy1061; - goto yy1049; + goto yy1086; + goto yy1074; } else { if (yych <= 0xDF) - goto yy1054; + goto yy1079; if (yych <= 0xE0) - goto yy1055; - goto yy1056; + goto yy1080; + goto yy1081; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy1060; + goto yy1085; if (yych <= 0xEF) - goto yy1056; - goto yy1057; + goto yy1081; + goto yy1082; } else { if (yych <= 0xF3) - goto yy1058; + goto yy1083; if (yych <= 0xF4) - goto yy1059; - goto yy1049; + goto yy1084; + goto yy1074; } } - yy1054: + yy1079: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1052; - goto yy1049; - yy1055: + goto yy1077; + goto yy1074; + yy1080: ++p; yych = *p; if (yych <= 0x9F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1054; - goto yy1049; - yy1056: + goto yy1079; + goto yy1074; + yy1081: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1054; - goto yy1049; - yy1057: + goto yy1079; + goto yy1074; + yy1082: ++p; yych = *p; if (yych <= 0x8F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1056; - goto yy1049; - yy1058: + goto yy1081; + goto yy1074; + yy1083: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1056; - goto yy1049; - yy1059: + goto yy1081; + goto yy1074; + yy1084: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0x8F) - goto yy1056; - goto yy1049; - yy1060: + goto yy1081; + goto yy1074; + yy1085: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0x9F) - goto yy1054; - goto yy1049; - yy1061: + goto yy1079; + goto yy1074; + yy1086: ++p; p = marker; { return (bufsize_t)(p - start); } - yy1063: + yy1088: yych = *++p; if (yybm[0 + yych] & 64) { - goto yy1064; + goto yy1089; } - goto yy1049; - yy1064: + goto yy1074; + yy1089: ++p; yych = *p; marker = p; if (yybm[0 + yych] & 128) { - goto yy1066; + goto yy1091; } if (yych <= 0xE0) { if (yych <= '`') { if (yych <= 0x00) - goto yy1049; + goto yy1074; if (yych <= '\r') - goto yy1075; - goto yy1064; + goto yy1100; + goto yy1089; } else { if (yych <= 0xC1) - goto yy1049; + goto yy1074; if (yych <= 0xDF) - goto yy1068; - goto yy1069; + goto yy1093; + goto yy1094; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy1074; - goto yy1070; + goto yy1099; + goto yy1095; } else { if (yych <= 0xF0) - goto yy1071; + goto yy1096; if (yych <= 0xF3) - goto yy1072; + goto yy1097; if (yych <= 0xF4) - goto yy1073; - goto yy1049; + goto yy1098; + goto yy1074; } } - yy1066: + yy1091: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy1066; + goto yy1091; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x00) - goto yy1049; + goto yy1074; if (yych <= '\r') - goto yy1075; - goto yy1049; + goto yy1100; + goto yy1074; } else { if (yych <= 0xDF) - goto yy1068; + goto yy1093; if (yych <= 0xE0) - goto yy1069; - goto yy1070; + goto yy1094; + goto yy1095; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy1074; + goto yy1099; if (yych <= 0xEF) - goto yy1070; - goto yy1071; + goto yy1095; + goto yy1096; } else { if (yych <= 0xF3) - goto yy1072; + goto yy1097; if (yych <= 0xF4) - goto yy1073; - goto yy1049; + goto yy1098; + goto yy1074; } } - yy1068: + yy1093: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1066; - goto yy1049; - yy1069: + goto yy1091; + goto yy1074; + yy1094: ++p; yych = *p; if (yych <= 0x9F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1068; - goto yy1049; - yy1070: + goto yy1093; + goto yy1074; + yy1095: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1068; - goto yy1049; - yy1071: + goto yy1093; + goto yy1074; + yy1096: ++p; yych = *p; if (yych <= 0x8F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1070; - goto yy1049; - yy1072: + goto yy1095; + goto yy1074; + yy1097: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1070; - goto yy1049; - yy1073: + goto yy1095; + goto yy1074; + yy1098: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0x8F) - goto yy1070; - goto yy1049; - yy1074: + goto yy1095; + goto yy1074; + yy1099: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0x9F) - goto yy1068; - goto yy1049; - yy1075: + goto yy1093; + goto yy1074; + yy1100: ++p; p = marker; { return (bufsize_t)(p - start); } @@ -16400,108 +16657,108 @@ bufsize_t _scan_close_code_fence(const unsigned char *p) { }; yych = *p; if (yych == '`') - goto yy1081; + goto yy1106; if (yych == '~') - goto yy1082; + goto yy1107; ++p; - yy1080 : { return 0; } - yy1081: + yy1105 : { return 0; } + yy1106: yych = *(marker = ++p); if (yych == '`') - goto yy1091; - goto yy1080; - yy1082: + goto yy1116; + goto yy1105; + yy1107: yych = *(marker = ++p); if (yych != '~') - goto yy1080; + goto yy1105; yych = *++p; if (yybm[0 + yych] & 32) { - goto yy1085; + goto yy1110; } - yy1084: + yy1109: p = marker; - goto yy1080; - yy1085: + goto yy1105; + yy1110: ++p; yych = *p; marker = p; if (yybm[0 + yych] & 64) { - goto yy1087; + goto yy1112; } if (yych <= '\f') { if (yych <= 0x08) - goto yy1084; + goto yy1109; if (yych <= '\n') - goto yy1089; - goto yy1084; + goto yy1114; + goto yy1109; } else { if (yych <= '\r') - goto yy1089; + goto yy1114; if (yych == '~') - goto yy1085; - goto yy1084; + goto yy1110; + goto yy1109; } - yy1087: + yy1112: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy1087; + goto yy1112; } if (yych <= 0x08) - goto yy1084; + goto yy1109; if (yych <= '\n') - goto yy1089; + goto yy1114; if (yych != '\r') - goto yy1084; - yy1089: + goto yy1109; + yy1114: ++p; p = marker; { return (bufsize_t)(p - start); } - yy1091: + yy1116: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy1092; + goto yy1117; } - goto yy1084; - yy1092: + goto yy1109; + yy1117: ++p; yych = *p; marker = p; if (yybm[0 + yych] & 128) { - goto yy1092; + goto yy1117; } if (yych <= '\f') { if (yych <= 0x08) - goto yy1084; + goto yy1109; if (yych <= '\t') - goto yy1094; + goto yy1119; if (yych <= '\n') - goto yy1096; - goto yy1084; + goto yy1121; + goto yy1109; } else { if (yych <= '\r') - goto yy1096; + goto yy1121; if (yych != ' ') - goto yy1084; + goto yy1109; } - yy1094: + yy1119: ++p; yych = *p; if (yych <= '\f') { if (yych <= 0x08) - goto yy1084; + goto yy1109; if (yych <= '\t') - goto yy1094; + goto yy1119; if (yych >= '\v') - goto yy1084; + goto yy1109; } else { if (yych <= '\r') - goto yy1096; + goto yy1121; if (yych == ' ') - goto yy1094; - goto yy1084; + goto yy1119; + goto yy1109; } - yy1096: + yy1121: ++p; p = marker; { return (bufsize_t)(p - start); } @@ -16518,919 +16775,919 @@ bufsize_t _scan_entity(const unsigned char *p) { unsigned char yych; yych = *p; if (yych == '&') - goto yy1102; + goto yy1127; ++p; - yy1101 : { return 0; } - yy1102: + yy1126 : { return 0; } + yy1127: yych = *(marker = ++p); if (yych <= '@') { if (yych != '#') - goto yy1101; + goto yy1126; } else { if (yych <= 'Z') - goto yy1105; + goto yy1130; if (yych <= '`') - goto yy1101; + goto yy1126; if (yych <= 'z') - goto yy1105; - goto yy1101; + goto yy1130; + goto yy1126; } yych = *++p; if (yych <= 'W') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1140; + goto yy1165; } else { if (yych <= 'X') - goto yy1139; + goto yy1164; if (yych == 'x') - goto yy1139; + goto yy1164; } - yy1104: + yy1129: p = marker; - goto yy1101; - yy1105: + goto yy1126; + yy1130: yych = *++p; if (yych <= '@') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych >= ':') - goto yy1104; + goto yy1129; } else { if (yych <= 'Z') - goto yy1106; + goto yy1131; if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } - yy1106: + yy1131: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1109; + goto yy1134; if (yych <= ':') - goto yy1104; + goto yy1129; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; - goto yy1109; + goto yy1129; + goto yy1134; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych <= 'z') - goto yy1109; - goto yy1104; + goto yy1134; + goto yy1129; } } - yy1107: + yy1132: ++p; { return (bufsize_t)(p - start); } - yy1109: + yy1134: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1110; + goto yy1135; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1110: + yy1135: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1111; + goto yy1136; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1111: + yy1136: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1112; + goto yy1137; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1112: + yy1137: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1113; + goto yy1138; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1113: + yy1138: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1114; + goto yy1139; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1114: + yy1139: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1115; + goto yy1140; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1115: + yy1140: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1116; + goto yy1141; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1116: + yy1141: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1117; + goto yy1142; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1117: + yy1142: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1118; + goto yy1143; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1118: + yy1143: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1119; + goto yy1144; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1119: + yy1144: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1120; + goto yy1145; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1120: + yy1145: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1121; + goto yy1146; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1121: + yy1146: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1122; + goto yy1147; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1122: + yy1147: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1123; + goto yy1148; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1123: + yy1148: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1124; + goto yy1149; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1124: + yy1149: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1125; + goto yy1150; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1125: + yy1150: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1126; + goto yy1151; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1126: + yy1151: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1127; + goto yy1152; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1127: + yy1152: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1128; + goto yy1153; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1128: + yy1153: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; - if (yych <= '9') goto yy1129; + if (yych <= '9') + goto yy1154; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1129: + yy1154: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1130; + goto yy1155; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1130: + yy1155: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1131; + goto yy1156; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1131: + yy1156: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1132; + goto yy1157; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1132: + yy1157: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1133; + goto yy1158; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1133: + yy1158: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1134; + goto yy1159; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1134: + yy1159: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1135; + goto yy1160; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1135: + yy1160: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1136; + goto yy1161; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1136: + yy1161: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1137; + goto yy1162; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1137: + yy1162: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1138; + goto yy1163; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1138: + yy1163: yych = *++p; if (yych == ';') - goto yy1107; - goto yy1104; - yy1139: + goto yy1132; + goto yy1129; + yy1164: yych = *++p; if (yych <= '@') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1147; - goto yy1104; + goto yy1172; + goto yy1129; } else { if (yych <= 'F') - goto yy1147; + goto yy1172; if (yych <= '`') - goto yy1104; + goto yy1129; if (yych <= 'f') - goto yy1147; - goto yy1104; + goto yy1172; + goto yy1129; } - yy1140: + yy1165: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1141; + goto yy1166; if (yych == ';') - goto yy1107; - goto yy1104; - yy1141: + goto yy1132; + goto yy1129; + yy1166: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1142; + goto yy1167; if (yych == ';') - goto yy1107; - goto yy1104; - yy1142: + goto yy1132; + goto yy1129; + yy1167: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1143; + goto yy1168; if (yych == ';') - goto yy1107; - goto yy1104; - yy1143: + goto yy1132; + goto yy1129; + yy1168: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1144; + goto yy1169; if (yych == ';') - goto yy1107; - goto yy1104; - yy1144: + goto yy1132; + goto yy1129; + yy1169: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1145; + goto yy1170; if (yych == ';') - goto yy1107; - goto yy1104; - yy1145: + goto yy1132; + goto yy1129; + yy1170: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1146; + goto yy1171; if (yych == ';') - goto yy1107; - goto yy1104; - yy1146: + goto yy1132; + goto yy1129; + yy1171: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1138; + goto yy1163; if (yych == ';') - goto yy1107; - goto yy1104; - yy1147: + goto yy1132; + goto yy1129; + yy1172: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1148; + goto yy1173; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1148: + yy1173: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1149; + goto yy1174; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1149: + yy1174: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1150; + goto yy1175; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1150: + yy1175: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1151; + goto yy1176; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1151: + yy1176: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1152; + goto yy1177; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1152: + yy1177: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1153; + goto yy1178; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1153: + yy1178: ++p; if ((yych = *p) <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1138; + goto yy1163; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; - goto yy1138; + goto yy1129; + goto yy1163; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych <= 'f') - goto yy1138; - goto yy1104; + goto yy1163; + goto yy1129; } } } @@ -17449,335 +17706,335 @@ bufsize_t _scan_dangerous_url(const unsigned char *p) { if (yych <= 'V') { if (yych <= 'F') { if (yych == 'D') - goto yy1158; + goto yy1183; if (yych >= 'F') - goto yy1159; + goto yy1184; } else { if (yych == 'J') - goto yy1160; + goto yy1185; if (yych >= 'V') - goto yy1161; + goto yy1186; } } else { if (yych <= 'f') { if (yych == 'd') - goto yy1158; + goto yy1183; if (yych >= 'f') - goto yy1159; + goto yy1184; } else { if (yych <= 'j') { if (yych >= 'j') - goto yy1160; + goto yy1185; } else { if (yych == 'v') - goto yy1161; + goto yy1186; } } } ++p; - yy1157 : { return 0; } - yy1158: + yy1182 : { return 0; } + yy1183: yyaccept = 0; yych = *(marker = ++p); if (yych == 'A') - goto yy1184; + goto yy1209; if (yych == 'a') - goto yy1184; - goto yy1157; - yy1159: + goto yy1209; + goto yy1182; + yy1184: yyaccept = 0; yych = *(marker = ++p); if (yych == 'I') - goto yy1181; + goto yy1206; if (yych == 'i') - goto yy1181; - goto yy1157; - yy1160: + goto yy1206; + goto yy1182; + yy1185: yyaccept = 0; yych = *(marker = ++p); if (yych == 'A') - goto yy1172; + goto yy1197; if (yych == 'a') - goto yy1172; - goto yy1157; - yy1161: + goto yy1197; + goto yy1182; + yy1186: yyaccept = 0; yych = *(marker = ++p); if (yych == 'B') - goto yy1162; + goto yy1187; if (yych != 'b') - goto yy1157; - yy1162: + goto yy1182; + yy1187: yych = *++p; if (yych == 'S') - goto yy1164; + goto yy1189; if (yych == 's') - goto yy1164; - yy1163: + goto yy1189; + yy1188: p = marker; if (yyaccept == 0) { - goto yy1157; + goto yy1182; } else { - goto yy1171; + goto yy1196; } - yy1164: + yy1189: yych = *++p; if (yych == 'C') - goto yy1165; + goto yy1190; if (yych != 'c') - goto yy1163; - yy1165: + goto yy1188; + yy1190: yych = *++p; if (yych == 'R') - goto yy1166; + goto yy1191; if (yych != 'r') - goto yy1163; - yy1166: + goto yy1188; + yy1191: yych = *++p; if (yych == 'I') - goto yy1167; + goto yy1192; if (yych != 'i') - goto yy1163; - yy1167: + goto yy1188; + yy1192: yych = *++p; if (yych == 'P') - goto yy1168; + goto yy1193; if (yych != 'p') - goto yy1163; - yy1168: + goto yy1188; + yy1193: yych = *++p; if (yych == 'T') - goto yy1169; + goto yy1194; if (yych != 't') - goto yy1163; - yy1169: + goto yy1188; + yy1194: yych = *++p; if (yych != ':') - goto yy1163; - yy1170: + goto yy1188; + yy1195: ++p; - yy1171 : { return (bufsize_t)(p - start); } - yy1172: + yy1196 : { return (bufsize_t)(p - start); } + yy1197: yych = *++p; if (yych == 'V') - goto yy1173; + goto yy1198; if (yych != 'v') - goto yy1163; - yy1173: + goto yy1188; + yy1198: yych = *++p; if (yych == 'A') - goto yy1174; + goto yy1199; if (yych != 'a') - goto yy1163; - yy1174: + goto yy1188; + yy1199: yych = *++p; if (yych == 'S') - goto yy1175; + goto yy1200; if (yych != 's') - goto yy1163; - yy1175: + goto yy1188; + yy1200: yych = *++p; if (yych == 'C') - goto yy1176; + goto yy1201; if (yych != 'c') - goto yy1163; - yy1176: + goto yy1188; + yy1201: yych = *++p; if (yych == 'R') - goto yy1177; + goto yy1202; if (yych != 'r') - goto yy1163; - yy1177: + goto yy1188; + yy1202: yych = *++p; if (yych == 'I') - goto yy1178; + goto yy1203; if (yych != 'i') - goto yy1163; - yy1178: + goto yy1188; + yy1203: yych = *++p; if (yych == 'P') - goto yy1179; + goto yy1204; if (yych != 'p') - goto yy1163; - yy1179: + goto yy1188; + yy1204: yych = *++p; if (yych == 'T') - goto yy1180; + goto yy1205; if (yych != 't') - goto yy1163; - yy1180: + goto yy1188; + yy1205: yych = *++p; if (yych == ':') - goto yy1170; - goto yy1163; - yy1181: + goto yy1195; + goto yy1188; + yy1206: yych = *++p; if (yych == 'L') - goto yy1182; + goto yy1207; if (yych != 'l') - goto yy1163; - yy1182: + goto yy1188; + yy1207: yych = *++p; if (yych == 'E') - goto yy1183; + goto yy1208; if (yych != 'e') - goto yy1163; - yy1183: + goto yy1188; + yy1208: yych = *++p; if (yych == ':') - goto yy1170; - goto yy1163; - yy1184: + goto yy1195; + goto yy1188; + yy1209: yych = *++p; if (yych == 'T') - goto yy1185; + goto yy1210; if (yych != 't') - goto yy1163; - yy1185: + goto yy1188; + yy1210: yych = *++p; if (yych == 'A') - goto yy1186; + goto yy1211; if (yych != 'a') - goto yy1163; - yy1186: + goto yy1188; + yy1211: yych = *++p; if (yych != ':') - goto yy1163; + goto yy1188; yyaccept = 1; yych = *(marker = ++p); if (yych == 'I') - goto yy1188; + goto yy1213; if (yych != 'i') - goto yy1171; - yy1188: + goto yy1196; + yy1213: yych = *++p; if (yych == 'M') - goto yy1189; + goto yy1214; if (yych != 'm') - goto yy1163; - yy1189: + goto yy1188; + yy1214: yych = *++p; if (yych == 'A') - goto yy1190; + goto yy1215; if (yych != 'a') - goto yy1163; - yy1190: + goto yy1188; + yy1215: yych = *++p; if (yych == 'G') - goto yy1191; + goto yy1216; if (yych != 'g') - goto yy1163; - yy1191: + goto yy1188; + yy1216: yych = *++p; if (yych == 'E') - goto yy1192; + goto yy1217; if (yych != 'e') - goto yy1163; - yy1192: + goto yy1188; + yy1217: yych = *++p; if (yych != '/') - goto yy1163; + goto yy1188; yych = *++p; if (yych <= 'W') { if (yych <= 'J') { if (yych == 'G') - goto yy1195; + goto yy1220; if (yych <= 'I') - goto yy1163; - goto yy1196; + goto yy1188; + goto yy1221; } else { if (yych == 'P') - goto yy1194; + goto yy1219; if (yych <= 'V') - goto yy1163; - goto yy1197; + goto yy1188; + goto yy1222; } } else { if (yych <= 'j') { if (yych == 'g') - goto yy1195; + goto yy1220; if (yych <= 'i') - goto yy1163; - goto yy1196; + goto yy1188; + goto yy1221; } else { if (yych <= 'p') { if (yych <= 'o') - goto yy1163; + goto yy1188; } else { if (yych == 'w') - goto yy1197; - goto yy1163; + goto yy1222; + goto yy1188; } } } - yy1194: + yy1219: yych = *++p; if (yych == 'N') - goto yy1205; + goto yy1230; if (yych == 'n') - goto yy1205; - goto yy1163; - yy1195: + goto yy1230; + goto yy1188; + yy1220: yych = *++p; if (yych == 'I') - goto yy1204; + goto yy1229; if (yych == 'i') - goto yy1204; - goto yy1163; - yy1196: + goto yy1229; + goto yy1188; + yy1221: yych = *++p; if (yych == 'P') - goto yy1202; + goto yy1227; if (yych == 'p') - goto yy1202; - goto yy1163; - yy1197: + goto yy1227; + goto yy1188; + yy1222: yych = *++p; if (yych == 'E') - goto yy1198; + goto yy1223; if (yych != 'e') - goto yy1163; - yy1198: + goto yy1188; + yy1223: yych = *++p; if (yych == 'B') - goto yy1199; + goto yy1224; if (yych != 'b') - goto yy1163; - yy1199: + goto yy1188; + yy1224: yych = *++p; if (yych == 'P') - goto yy1200; + goto yy1225; if (yych != 'p') - goto yy1163; - yy1200: + goto yy1188; + yy1225: ++p; { return 0; } - yy1202: + yy1227: yych = *++p; if (yych == 'E') - goto yy1203; + goto yy1228; if (yych != 'e') - goto yy1163; - yy1203: + goto yy1188; + yy1228: yych = *++p; if (yych == 'G') - goto yy1200; + goto yy1225; if (yych == 'g') - goto yy1200; - goto yy1163; - yy1204: + goto yy1225; + goto yy1188; + yy1229: yych = *++p; if (yych == 'F') - goto yy1200; + goto yy1225; if (yych == 'f') - goto yy1200; - goto yy1163; - yy1205: + goto yy1225; + goto yy1188; + yy1230: ++p; if ((yych = *p) == 'G') - goto yy1200; + goto yy1225; if (yych == 'g') - goto yy1200; - goto yy1163; + goto yy1225; + goto yy1188; } } diff --git a/src/scanners.h b/src/scanners.h index b48ca25e7..d54d9d272 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -14,6 +14,7 @@ bufsize_t _scan_scheme(const unsigned char *p); bufsize_t _scan_autolink_uri(const unsigned char *p); bufsize_t _scan_autolink_email(const unsigned char *p); bufsize_t _scan_html_tag(const unsigned char *p); +bufsize_t _scan_liberal_html_tag(const unsigned char *p); bufsize_t _scan_html_block_start(const unsigned char *p); bufsize_t _scan_html_block_start_7(const unsigned char *p); bufsize_t _scan_html_block_end_1(const unsigned char *p); @@ -35,6 +36,7 @@ bufsize_t _scan_dangerous_url(const unsigned char *p); #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) #define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n) #define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n) +#define scan_liberal_html_tag(c, n) _scan_at(&_scan_liberal_html_tag, c, n) #define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n) #define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n) #define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n) diff --git a/src/scanners.re b/src/scanners.re index a0650f2ba..a3326336c 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -122,6 +122,17 @@ bufsize_t _scan_html_tag(const unsigned char *p) */ } +// Try to (liberally) match an HTML tag after first <, returning num of chars matched. +bufsize_t _scan_liberal_html_tag(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + .+ [>] { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + // Try to match an HTML block tag start line, returning // an integer code for the type of block (1-6, matching the spec). // #7 is handled by a separate function, below. diff --git a/src/syntax_extension.c b/src/syntax_extension.c index 38984d808..7c549a924 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -81,6 +81,11 @@ void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *e extension->commonmark_render_func = func; } +void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func) { + extension->plaintext_render_func = func; +} + void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension, cmark_common_render_func func) { extension->latex_render_func = func; diff --git a/src/syntax_extension.h b/src/syntax_extension.h index 060461deb..6fcf109d2 100644 --- a/src/syntax_extension.h +++ b/src/syntax_extension.h @@ -17,6 +17,7 @@ struct cmark_syntax_extension { cmark_can_contain_func can_contain_func; cmark_contains_inlines_func contains_inlines_func; cmark_common_render_func commonmark_render_func; + cmark_common_render_func plaintext_render_func; cmark_common_render_func latex_render_func; cmark_common_render_func man_render_func; cmark_html_render_func html_render_func; From 47e724d520070cbf47de92652bdb4c99e00472aa Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Mon, 8 May 2017 17:35:39 +1000 Subject: [PATCH 046/218] Remove normalize as an option per #190 (#194) --- Makefile | 2 +- man/man3/cmark-gfm.3 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index d1dbdcd5d..51a2c1967 100644 --- a/Makefile +++ b/Makefile @@ -167,7 +167,7 @@ leakcheck: $(ALLTESTS) for opts in "" "--smart"; do \ echo "cmark-gfm -t $$format -e table -e strikethrough -e autolink -e tagfilter $$opts" ; \ valgrind -q --leak-check=full --dsymutil=yes --suppressions=suppressions --error-exitcode=1 $(PROG) -t $$format -e table -e strikethrough -e autolink -e tagfilter $$opts $(ALLTESTS) >/dev/null || exit 1;\ - done; \ + done; \ done; fuzztest: diff --git a/man/man3/cmark-gfm.3 b/man/man3/cmark-gfm.3 index b223fc9d4..471658d30 100644 --- a/man/man3/cmark-gfm.3 +++ b/man/man3/cmark-gfm.3 @@ -1,4 +1,4 @@ -.TH cmark-gfm 3 "June 27, 2017" "LOCAL" "Library Functions Manual" +.TH cmark-gfm 3 "April 03, 2017" "LOCAL" "Library Functions Manual" .SH NAME .PP From afd530f469bf1364968d25b13ccb5ddd83c17c9c Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Mon, 15 May 2017 12:50:53 +1000 Subject: [PATCH 047/218] Remove dead/misleading code [ is not a valid delimiter for an autolink to begin at. --- extensions/autolink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/autolink.c b/extensions/autolink.c index 6ceca5733..3d2a1853c 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -152,7 +152,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent, size_t link_end; - if (max_rewind > 0 && strchr("*_~([", data[-1]) == NULL && + if (max_rewind > 0 && strchr("*_~(", data[-1]) == NULL && !cmark_isspace(data[-1])) return 0; From 9f81059f1511414b3ff3ee4397741152027c89ce Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Mon, 22 May 2017 12:37:46 +1000 Subject: [PATCH 048/218] Add table alignment getters (#29) Example use: cmark_iter *iter = cmark_iter_new(document); cmark_event_type ev_type; cmark_node *cur; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER) { uint16_t n = cmarkextensions_get_table_columns(cur); if (n > 0) { uint8_t *a = cmarkextensions_get_table_alignments(cur); printf("table with %d cols: ", n); for (uint16_t i = 0; i < n; ++i) printf("%c", a[i] == 0 ? '-' : (char)a[i]); } } } --- extensions/core-extensions.h | 6 ++++++ extensions/table.c | 14 ++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/extensions/core-extensions.h b/extensions/core-extensions.h index 78ae8de64..45f199443 100644 --- a/extensions/core-extensions.h +++ b/extensions/core-extensions.h @@ -11,6 +11,12 @@ extern "C" { CMARKEXTENSIONS_EXPORT int core_extensions_registration(cmark_plugin *plugin); +CMARKEXTENSIONS_EXPORT +uint16_t cmarkextensions_get_table_columns(cmark_node *node); + +CMARKEXTENSIONS_EXPORT +uint8_t *cmarkextensions_get_table_alignments(cmark_node *node); + #ifdef __cplusplus } #endif diff --git a/extensions/table.c b/extensions/table.c index e71fd81c2..7dfa50116 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -644,3 +644,17 @@ cmark_syntax_extension *create_table_extension(void) { return self; } + +uint16_t cmarkextensions_get_table_columns(cmark_node *node) { + if (node->type != CMARK_NODE_TABLE) + return 0; + + return ((node_table *)node->as.opaque)->n_columns; +} + +uint8_t *cmarkextensions_get_table_alignments(cmark_node *node) { + if (node->type != CMARK_NODE_TABLE) + return 0; + + return ((node_table *)node->as.opaque)->alignments; +} From b0707b68f62457f86b7790619c7f76804fd7f883 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Mon, 29 May 2017 12:40:59 +1000 Subject: [PATCH 049/218] make install also installs extensions (#32) Also clean up CMakeLists considerably. --- CMakeLists.txt | 4 +-- api_test/CMakeLists.txt | 2 +- extensions/CMakeLists.txt | 41 ++++++++++++++++++++---------- src/CMakeLists.txt | 52 ++++++++++++++++----------------------- 4 files changed, 52 insertions(+), 47 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ced3be9b2..27c0ad97e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,8 +23,8 @@ set(PROJECT_VERSION_GFM 0) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) -option(CMARK_STATIC "Build static libcmark library" ON) -option(CMARK_SHARED "Build shared libcmark library" ON) +option(CMARK_STATIC "Build static libcmark-gfm library" ON) +option(CMARK_SHARED "Build shared libcmark-gfm library" ON) option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF) add_subdirectory(src) diff --git a/api_test/CMakeLists.txt b/api_test/CMakeLists.txt index a128255f0..039ce9294 100644 --- a/api_test/CMakeLists.txt +++ b/api_test/CMakeLists.txt @@ -8,7 +8,7 @@ include_directories( ${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src ) -target_link_libraries(api_test libcmark-gfm ${CMAKE_DL_LIBS}) +target_link_libraries(api_test libcmark-gfm) # Compiler flags if(MSVC) diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index e46be2c5b..218b39bfe 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -19,15 +19,6 @@ include_directories( include (GenerateExportHeader) -# We make LIB_INSTALL_DIR configurable rather than -# hard-coding lib, because on some OSes different locations -# are used for different architectures (e.g. /usr/lib64 on -# 64-bit Fedora). -if(NOT LIB_INSTALL_DIR) - set(LIB_INSTALL_DIR "lib" CACHE STRING - "Set the installation directory for libraries." FORCE) -endif(NOT LIB_INSTALL_DIR) - include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") @@ -38,9 +29,9 @@ if (CMARK_SHARED) add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) set_target_properties(${LIBRARY} PROPERTIES - OUTPUT_NAME "cmark-gfmextensions" - SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} - VERSION ${PROJECT_VERSION}) + OUTPUT_NAME "cmark-gfmextensions" + SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} + VERSION ${PROJECT_VERSION}) set_property(TARGET ${LIBRARY} APPEND PROPERTY MACOSX_RPATH true) @@ -48,10 +39,12 @@ if (CMARK_SHARED) # Avoid name clash between PROGRAM and LIBRARY pdb files. set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfmextensions_dll) + generate_export_header(${LIBRARY} + BASE_NAME cmarkextensions) + list(APPEND CMARK_INSTALL ${LIBRARY}) target_link_libraries(${LIBRARY} libcmark-gfm) - generate_export_header(${LIBRARY} BASE_NAME cmarkextensions) endif() if (CMARK_STATIC) @@ -70,6 +63,28 @@ if (CMARK_STATIC) OUTPUT_NAME "cmark-gfmextensions" VERSION ${PROJECT_VERSION}) endif(MSVC) + + list(APPEND CMARK_INSTALL ${STATICLIBRARY}) +endif() + +set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON) + +include (InstallRequiredSystemLibraries) +install(TARGETS ${CMARK_INSTALL} + EXPORT cmark-gfmextensions + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib${LIB_SUFFIX} + ARCHIVE DESTINATION lib${LIB_SUFFIX} + ) + +if (CMARK_SHARED OR CMARK_STATIC) + install(FILES + core-extensions.h + ${CMAKE_CURRENT_BINARY_DIR}/cmarkextensions_export.h + DESTINATION include + ) + + install(EXPORT cmark-gfmextensions DESTINATION lib${LIB_SUFFIX}/cmake-gfmextensions) endif() # Feature tests diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 42fc16ebb..21ded9b4a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -100,9 +100,9 @@ if (CMARK_SHARED) add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) # Include minor version and patch level in soname for now. set_target_properties(${LIBRARY} PROPERTIES - OUTPUT_NAME "cmark-gfm" - SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} - VERSION ${PROJECT_VERSION}) + OUTPUT_NAME "cmark-gfm" + SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} + VERSION ${PROJECT_VERSION}) set_property(TARGET ${LIBRARY} APPEND PROPERTY MACOSX_RPATH true) @@ -119,31 +119,21 @@ endif() if (CMARK_STATIC) add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES}) set_target_properties(${STATICLIBRARY} PROPERTIES - COMPILE_FLAGS -DCMARK_STATIC_DEFINE - POSITION_INDEPENDENT_CODE ON) + COMPILE_FLAGS -DCMARK_STATIC_DEFINE + POSITION_INDEPENDENT_CODE ON) if (MSVC) - set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmark-gfm_static" - VERSION ${PROJECT_VERSION}) + set_target_properties(${STATICLIBRARY} PROPERTIES + OUTPUT_NAME "cmark-gfm_static" + VERSION ${PROJECT_VERSION}) else() - set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmark-gfm" - VERSION ${PROJECT_VERSION}) -endif(MSVC) - -target_link_libraries(cmark-gfm ${CMAKE_DL_LIBS}) + set_target_properties(${STATICLIBRARY} PROPERTIES + OUTPUT_NAME "cmark-gfm" + VERSION ${PROJECT_VERSION}) + endif(MSVC) list(APPEND CMARK_INSTALL ${STATICLIBRARY}) endif() -set_property(TARGET ${LIBRARY} - APPEND PROPERTY MACOSX_RPATH true) - -# Avoid name clash between PROGRAM and LIBRARY pdb files. -set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfm_dll) - -generate_export_header(${LIBRARY} - BASE_NAME ${PROJECT_NAME}) if (MSVC) set_property(TARGET ${PROGRAM} @@ -154,7 +144,7 @@ set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON) include (InstallRequiredSystemLibraries) install(TARGETS ${PROGRAM} ${CMARK_INSTALL} - EXPORT cmark + EXPORT cmark-gfm RUNTIME DESTINATION bin LIBRARY DESTINATION lib${LIB_SUFFIX} ARCHIVE DESTINATION lib${LIB_SUFFIX} @@ -167,14 +157,14 @@ if(CMARK_SHARED OR CMARK_STATIC) DESTINATION lib${LIB_SUFFIX}/pkgconfig) install(FILES - cmark.h - cmark_extension_api.h - ${CMAKE_CURRENT_BINARY_DIR}/cmark_export.h - ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h - DESTINATION include - ) - - install(EXPORT cmark DESTINATION lib${LIB_SUFFIX}/cmake) + cmark.h + cmark_extension_api.h + ${CMAKE_CURRENT_BINARY_DIR}/cmark_export.h + ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h + DESTINATION include + ) + + install(EXPORT cmark-gfm DESTINATION lib${LIB_SUFFIX}/cmake-gfm) endif() # Feature tests From fff48d573882d24d6aa661f1e9bf91dd58eda3b7 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Mon, 29 May 2017 12:44:14 +1000 Subject: [PATCH 050/218] 0.27.1.gfm.1 --- CMakeLists.txt | 2 +- changelog.txt | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 27c0ad97e..4d60bf4b8 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 27) set(PROJECT_VERSION_PATCH 1) -set(PROJECT_VERSION_GFM 0) +set(PROJECT_VERSION_GFM 1) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 5c5265230..99257163e 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,10 @@ +[0.27.1.gfm.1] + + * Add plaintext renderer. + * Remove normalize option; we now always normalize the AST. + * Add getters for table alignment. + * `make install` also installs the extensions static/shared library. + [0.27.1.gfm.0] * Add extensions: tagfilter, strikethrough, table, autolink. From d9526ab1cce9826c8626e7302723525ff672fe0b Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Mon, 29 May 2017 16:28:36 +1000 Subject: [PATCH 051/218] Add CMARK_GFM_VERSION define. --- src/cmark_version.h.in | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cmark_version.h.in b/src/cmark_version.h.in index 73e599b25..f1ae0990a 100644 --- a/src/cmark_version.h.in +++ b/src/cmark_version.h.in @@ -3,5 +3,6 @@ #define CMARK_VERSION ((@PROJECT_VERSION_MAJOR@ << 24) | (@PROJECT_VERSION_MINOR@ << 16) | (@PROJECT_VERSION_PATCH@ << 8) | @PROJECT_VERSION_GFM@) #define CMARK_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@.gfm.@PROJECT_VERSION_GFM@" +#define CMARK_GFM_VERSION @PROJECT_VERSION_GFM@ #endif From 1cd892e2ee856cccce6bfaf14d27dd40df07f30d Mon Sep 17 00:00:00 2001 From: Stefan Winkler <mail@stefanwinkler.de> Date: Wed, 31 May 2017 03:26:38 +0200 Subject: [PATCH 052/218] Fix link order of cmark-gfm (#35) --- src/CMakeLists.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 21ded9b4a..8d0219b29 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -71,10 +71,7 @@ include (GenerateExportHeader) add_executable(${PROGRAM} ${PROGRAM_SOURCES}) add_compiler_export_flags() -target_link_libraries(${PROGRAM} libcmark-gfm_static) - -add_dependencies(${PROGRAM} libcmark-gfmextensions_static) -target_link_libraries(${PROGRAM} libcmark-gfmextensions_static) +target_link_libraries(${PROGRAM} libcmark-gfmextensions_static libcmark-gfm_static) # Disable the PUBLIC declarations when compiling the executable: set_target_properties(${PROGRAM} PROPERTIES From ec5dcf4d9ef8e9a25f2ba5c2a905036e1b7343a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20M=C3=BCller?= <git@foonathan.net> Date: Fri, 2 Jun 2017 04:40:41 +0200 Subject: [PATCH 053/218] Add `cmark_syntax_extension_get_private()` (#36) Fixes #34. --- src/cmark_extension_api.h | 5 +++++ src/syntax_extension.c | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index 8f6941863..8a273b54e 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -366,6 +366,11 @@ void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, void *priv, cmark_free_func free_func); +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension); + /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT diff --git a/src/syntax_extension.c b/src/syntax_extension.c index 7c549a924..ee86b66d2 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -118,6 +118,10 @@ void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, extension->free_function = free_func; } +void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension) { + return extension->priv; +} + void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension, cmark_opaque_free_func func) { extension->opaque_free_func = func; From fa66ade67b5adbbbff32479e3054302fcd929ccb Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Fri, 16 Jun 2017 17:06:37 +1000 Subject: [PATCH 054/218] Unmark as static --- extensions/table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/table.c b/extensions/table.c index 7dfa50116..27b469765 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -9,7 +9,7 @@ #include "strikethrough.h" #include "table.h" -static cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, +cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, CMARK_NODE_TABLE_CELL; typedef struct { From a38cfc0a5b829260cf7e5666e2acc8534f942d51 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Wed, 21 Jun 2017 14:51:30 +1000 Subject: [PATCH 055/218] Don't scan past an EOL (#37) --- src/scanners.re | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanners.re b/src/scanners.re index a3326336c..cd4635617 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -128,7 +128,7 @@ bufsize_t _scan_liberal_html_tag(const unsigned char *p) const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - .+ [>] { return (bufsize_t)(p - start); } + [^\n\x00]+ [>] { return (bufsize_t)(p - start); } * { return 0; } */ } From cb914446a2f64146d9c1e675b7b82c35188d3eda Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Wed, 21 Jun 2017 15:46:11 +1000 Subject: [PATCH 056/218] Regenerate scanner --- src/scanners.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/scanners.c b/src/scanners.c index b4c75aa3b..f0d80e6f6 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -9754,7 +9754,7 @@ bufsize_t _scan_liberal_html_tag(const unsigned char *p) { unsigned char yych; unsigned int yyaccept = 0; static const unsigned char yybm[] = { - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, + 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, @@ -9772,12 +9772,16 @@ bufsize_t _scan_liberal_html_tag(const unsigned char *p) { }; yych = *p; if (yych <= 0xE0) { - if (yych <= 0x7F) { - if (yych == '\n') + if (yych <= '\n') { + if (yych <= 0x00) + goto yy493; + if (yych <= '\t') goto yy495; } else { - if (yych <= 0xC1) + if (yych <= 0x7F) goto yy495; + if (yych <= 0xC1) + goto yy493; if (yych <= 0xDF) goto yy496; goto yy497; @@ -9794,24 +9798,29 @@ bufsize_t _scan_liberal_html_tag(const unsigned char *p) { goto yy501; if (yych <= 0xF4) goto yy502; - goto yy495; } } + yy493: + ++p; + yy494 : { return 0; } + yy495: yyaccept = 0; yych = *(marker = ++p); - if (yych <= 0x7F) { - if (yych != '\n') + if (yych <= '\n') { + if (yych <= 0x00) + goto yy494; + if (yych <= '\t') goto yy507; + goto yy494; } else { + if (yych <= 0x7F) + goto yy507; if (yych <= 0xC1) goto yy494; if (yych <= 0xF4) goto yy507; + goto yy494; } - yy494 : { return 0; } - yy495: - yych = *++p; - goto yy494; yy496: yyaccept = 0; yych = *(marker = ++p); From 4b3267ddb4da08c7cad855a8dbe5b1dd1d8518ec Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Wed, 21 Jun 2017 16:39:00 +1000 Subject: [PATCH 057/218] Also exclude \n --- src/scanners.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scanners.c b/src/scanners.c index f0d80e6f6..319a0c031 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -1,7 +1,7 @@ /* Generated by re2c 0.15.3 */ -#include "scanners.h" -#include "chunk.h" #include <stdlib.h> +#include "chunk.h" +#include "scanners.h" bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) { From a758a35b13c2c665491e17a364f707726137db89 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Thu, 29 Jun 2017 16:20:33 +1000 Subject: [PATCH 058/218] Update cmark-fuzz for cmark-gfm --- test/cmark-fuzz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cmark-fuzz.c b/test/cmark-fuzz.c index f09db5212..6425c1492 100644 --- a/test/cmark-fuzz.c +++ b/test/cmark-fuzz.c @@ -17,7 +17,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { cmark_node *doc = cmark_parse_document(markdown, markdown_size, options); free(cmark_render_commonmark(doc, options, 80)); - free(cmark_render_html(doc, options)); + free(cmark_render_html(doc, options, NULL)); free(cmark_render_latex(doc, options, 80)); free(cmark_render_man(doc, options, 80)); free(cmark_render_xml(doc, options)); From 71b0d0040e85b7776fa517696083874e91911111 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Fri, 30 Jun 2017 11:42:52 +1000 Subject: [PATCH 059/218] Latest cmake in Docker --- tools/Dockerfile | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tools/Dockerfile b/tools/Dockerfile index 3fb7f8b34..70ec833c3 100644 --- a/tools/Dockerfile +++ b/tools/Dockerfile @@ -1,20 +1,19 @@ FROM debian:jessie RUN apt-get update && apt-get install -y \ - build-essential autoconf libtool \ + build-essential \ + autoconf \ + libtool \ git \ pkg-config \ - && apt-get clean - -RUN apt-get install -y \ - cmake \ gdb \ valgrind \ - python3 - -RUN apt-get install -y \ + python3 \ wget \ - clang + clang \ + man \ + clang-format-3.5 \ + && apt-get clean RUN wget http://lcamtuf.coredump.cx/afl/releases/afl-latest.tgz && \ tar xf afl-latest.tgz && \ @@ -23,8 +22,6 @@ RUN wget http://lcamtuf.coredump.cx/afl/releases/afl-latest.tgz && \ cd .. && \ rm -rf afl-* -RUN apt-get install -y man - RUN wget https://github.com/skvadrik/re2c/releases/download/0.15.3/re2c-0.15.3.tar.gz && \ tar xf re2c-0.15.3.tar.gz && \ cd re2c-* && \ @@ -33,4 +30,10 @@ RUN wget https://github.com/skvadrik/re2c/releases/download/0.15.3/re2c-0.15.3.t cd .. && \ rm -rf re2c-* -RUN apt-get install -y clang-format-3.5 +RUN wget https://cmake.org/files/v3.8/cmake-3.8.2.tar.gz && \ + tar xf cmake-3.8.2.tar.gz && \ + cd cmake-* && \ + ./bootstrap && \ + make install && \ + cd .. && \ + rm -rf cmake-* From 0fbab47ee4930f1725b0f914dffd1d514831f4b8 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Fri, 30 Jun 2017 12:11:40 +1000 Subject: [PATCH 060/218] Fix a misaligned write --- src/arena.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index b8fed007c..f852a5405 100644 --- a/src/arena.c +++ b/src/arena.c @@ -71,7 +71,8 @@ static void *arena_calloc(size_t nmem, size_t size) { } void *ptr = (uint8_t *) A->ptr + A->used; A->used += sz; - *((size_t *) ptr) = nmem * size; + size_t new_sz = nmem * size; + memcpy(ptr, &new_sz, sizeof(new_sz)); return (uint8_t *) ptr + sizeof(size_t); } From 2af1c5c0edf06c61a560f8c5e6c696fe1f2deb48 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Fri, 30 Jun 2017 12:41:00 +1000 Subject: [PATCH 061/218] 0.27.1.gfm.2 --- CMakeLists.txt | 2 +- changelog.txt | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d60bf4b8..7d8736cff 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 27) set(PROJECT_VERSION_PATCH 1) -set(PROJECT_VERSION_GFM 1) +set(PROJECT_VERSION_GFM 2) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 99257163e..1257cd50b 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,8 @@ +[0.27.1.gfm.2] + + * Fix a number of bugs (reading past end of buffer, undefined behavior. + * Add `cmark_syntax_extension_get_private()`. (Jonathan Müller) + [0.27.1.gfm.1] * Add plaintext renderer. From ffa8c3e555bc64d1a97778f4fae08c706fbae985 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <yuki@kivikakk.ee> Date: Fri, 30 Jun 2017 18:48:51 +1000 Subject: [PATCH 062/218] Install ninja-build --- tools/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/Dockerfile b/tools/Dockerfile index 70ec833c3..65b7fe99e 100644 --- a/tools/Dockerfile +++ b/tools/Dockerfile @@ -37,3 +37,5 @@ RUN wget https://cmake.org/files/v3.8/cmake-3.8.2.tar.gz && \ make install && \ cd .. && \ rm -rf cmake-* + +RUN apt-get update && apt-get install -y ninja-build From 3a3ac47191350c925044145095d90beaf448e4b3 Mon Sep 17 00:00:00 2001 From: Phil Turnbull <philipturnbull@github.com> Date: Wed, 5 Jul 2017 21:36:00 -0400 Subject: [PATCH 063/218] Avoid memcpy'ing NULL pointers (#38) A UBSAN warning can be triggered because the link title is an empty string: src/inlines.c:113:20: runtime error: null pointer passed as argument 2, which is declared to never be null which can be triggered by: ``` [f]:_ [f] ``` The length of the memcpy is zero so the NULL pointer is not dereferenced but it is still undefined behaviour. --- src/inlines.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/inlines.c b/src/inlines.c index 7c3eb05f1..2f907b185 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -106,7 +106,8 @@ static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) { c.len = len; c.data = (unsigned char *)mem->calloc(len + 1, 1); c.alloc = 1; - memcpy(c.data, src->data, len); + if (len) + memcpy(c.data, src->data, len); c.data[len] = '\0'; return c; From fca380ca85c046233c39523717073153e2458c1e Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Fri, 7 Jul 2017 12:22:57 +1000 Subject: [PATCH 064/218] Case-insensitive tagfilter. Fixes #42. (#43) --- extensions/tagfilter.c | 3 ++- test/spec.txt | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/extensions/tagfilter.c b/extensions/tagfilter.c index 80cbd6b9b..262352e0c 100644 --- a/extensions/tagfilter.c +++ b/extensions/tagfilter.c @@ -1,5 +1,6 @@ #include "tagfilter.h" #include <parser.h> +#include <ctype.h> static const char *blacklist[] = { "title", "textarea", "style", "xmp", "iframe", @@ -23,7 +24,7 @@ static int is_tag(const unsigned char *tag_data, size_t tag_size, if (*tagname == 0) break; - if (tag_data[i] != *tagname) + if (tolower(tag_data[i]) != *tagname) return 0; } diff --git a/test/spec.txt b/test/spec.txt index 269b5853f..cf7cadf00 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -9285,12 +9285,12 @@ All other HTML tags are left untouched. <strong> <title> <style> <em> <blockquote> - <xmp> is disallowed. + <xmp> is disallowed. <XMP> is also disallowed. </blockquote> . <p><strong> &lt;title> &lt;style> <em></p> <blockquote> - &lt;xmp> is disallowed. + &lt;xmp> is disallowed. &lt;XMP> is also disallowed. </blockquote> ```````````````````````````````` From 7d70d2c3eeb17745de5bc2220f6d8d151f4bfa63 Mon Sep 17 00:00:00 2001 From: Phil Turnbull <philipturnbull@github.com> Date: Mon, 10 Jul 2017 20:10:44 -0700 Subject: [PATCH 065/218] Allocate memory from arena with correct alignment (#40) The arena allocator does not align memory when allocating blocks which can trigger undefined behaviour. From UBSAN: src/inlines.c:83:30: runtime error: member access within misaligned address 0x00010b30ede3 for type 'cmark_node' (aka 'struct cmark_node'), which requires 8 byte alignment This could cause a crash if the unaligned memory is passed to functions using instructions which require alignment, such as SSE. Round-up the size of the allocation to a multiple of `sizeof(size_t)` to ensure every allocation is correctly aligned. --- src/arena.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index f852a5405..801fb3c1b 100644 --- a/src/arena.c +++ b/src/arena.c @@ -62,6 +62,12 @@ static void *arena_calloc(size_t nmem, size_t size) { init_arena(); size_t sz = nmem * size + sizeof(size_t); + + // Round allocation sizes to largest integer size to + // ensure returned memory is correctly aligned + const size_t align = sizeof(size_t) - 1; + sz = (sz + align) & ~align; + if (sz > A->sz) { A->prev = alloc_arena_chunk(sz, A->prev); return (uint8_t *) A->prev->ptr + sizeof(size_t); @@ -71,8 +77,7 @@ static void *arena_calloc(size_t nmem, size_t size) { } void *ptr = (uint8_t *) A->ptr + A->used; A->used += sz; - size_t new_sz = nmem * size; - memcpy(ptr, &new_sz, sizeof(new_sz)); + *((size_t *) ptr) = sz - sizeof(size_t); return (uint8_t *) ptr + sizeof(size_t); } From 494f10abd6870dd17498544fc57d66a626e0b9dc Mon Sep 17 00:00:00 2001 From: Phil Turnbull <philipturnbull@github.com> Date: Mon, 10 Jul 2017 20:13:21 -0700 Subject: [PATCH 066/218] Use unsigned integer when shifting (#39) A UBSAN warning can be triggered when handling a long sequence of backticks: src/commonmark.c:98:20: runtime error: left shift of 1 by 31 places cannot be represented in type 'int' which can be triggered by: ``` | a | b | | --- | --** `c```````````````````````````````- | | c | `|d` \| e | ``` --- src/commonmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commonmark.c b/src/commonmark.c index 132369ebb..8063acb65 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -95,7 +95,7 @@ static int shortest_unused_backtick_sequence(const char *code) { current++; } else { if (current > 0 && current < 32) { - used |= (1 << current); + used |= (1U << current); } current = 0; } From 970e029ce944ba6136d67c0ae1af2b8a94a6c21c Mon Sep 17 00:00:00 2001 From: Yuki Izumi <ashe@kivikakk.ee> Date: Tue, 11 Jul 2017 13:27:28 +1000 Subject: [PATCH 067/218] 0.27.1.gfm.3 --- CMakeLists.txt | 2 +- changelog.txt | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d8736cff..65b58baea 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 27) set(PROJECT_VERSION_PATCH 1) -set(PROJECT_VERSION_GFM 2) +set(PROJECT_VERSION_GFM 3) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 1257cd50b..b417d8567 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,8 @@ +[0.27.1.gfm.3] + + * Various undefined behavior issues fixed (#38, #39, #40). + * Tag filter is case-insensitive (#43). + [0.27.1.gfm.2] * Fix a number of bugs (reading past end of buffer, undefined behavior. From 298d9d28390146b070fa47980a0356367c67bdf0 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <kivikakk@github.com> Date: Wed, 12 Jul 2017 18:43:34 +1000 Subject: [PATCH 068/218] > 32 nested balanced parens in a link is bananas (#48) --- src/inlines.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/inlines.c b/src/inlines.c index 2f907b185..e30c2affe 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -871,6 +871,8 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) { else if (input->data[i] == '(') { ++nb_p; ++i; + if (nb_p > 32) + return -1; } else if (input->data[i] == ')') { if (nb_p == 0) break; From 175542b2f37fb854117580268060d6cd19ef78bf Mon Sep 17 00:00:00 2001 From: Yuki Izumi <ashe@kivikakk.ee> Date: Wed, 12 Jul 2017 18:44:23 +1000 Subject: [PATCH 069/218] 0.27.1.gfm.4 --- CMakeLists.txt | 2 +- changelog.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 65b58baea..e16caa92c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 27) set(PROJECT_VERSION_PATCH 1) -set(PROJECT_VERSION_GFM 3) +set(PROJECT_VERSION_GFM 4) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index b417d8567..512602ae4 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,7 @@ +[0.27.1.gfm.4] + + * Fix regression with nested parentheses in link targets (#48). + [0.27.1.gfm.3] * Various undefined behavior issues fixed (#38, #39, #40). From c93dc8bdfb64e4d29f5b2a0d637e3b9216dc66f4 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <ashe@kivikakk.ee> Date: Fri, 14 Jul 2017 14:25:09 +1000 Subject: [PATCH 070/218] Latest spec --- test/spec.txt | 64 +++++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index cf7cadf00..ccaa8523c 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -20,15 +20,17 @@ GFM is a strict superset of CommonMark. All the features which are supported in GitHub user content and that are not specified on the original CommonMark Spec are hence known as **extensions**, and highlighted as such. +While GFM supports a wide range of inputs, it's worth noting that GitHub.com +and GitHub Enterprise perform additional post-processing and sanitization after +GFM is converted to HTML to ensure security and consistency of the website. + ## What is Markdown? Markdown is a plain text format for writing structured documents, -based on conventions for indicating formatting in email -and usenet posts. It was developed by John Gruber (with -help from Aaron Swartz) and released in 2004 in the form of a -[syntax description](http://daringfireball.net/projects/markdown/syntax) -and a Perl script (`Markdown.pl`) for converting Markdown to -HTML. In the next decade, dozens of implementations were +based on conventions used for indicating formatting in email and +usenet posts. It was developed in 2004 by John Gruber, who wrote +the first Markdown-to-HTML converter in Perl, and it soon became +ubiquitous. In the next decade, dozens of implementations were developed in many languages. Some extended the original Markdown syntax with conventions for footnotes, tables, and other document elements. Some allowed Markdown documents to be @@ -326,7 +328,7 @@ form feed (`U+000C`), or carriage return (`U+000D`). characters]. A [Unicode whitespace character](@) is -any code point in the Unicode `Zs` general category, or a tab (`U+0009`), +any code point in the Unicode `Zs` class, or a tab (`U+0009`), carriage return (`U+000D`), newline (`U+000A`), or form feed (`U+000C`). @@ -345,7 +347,7 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, A [punctuation character](@) is an [ASCII punctuation character] or anything in -the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. +the Unicode classes `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. ## Tabs @@ -416,8 +418,8 @@ as indentation with four spaces would: Normally the `>` that begins a block quote may be followed optionally by a space, which is not considered part of the content. In the following case `>` is followed by a tab, -which is treated as if it were expanded into three spaces. -Since one of these spaces is considered part of the +which is treated as if it were expanded into spaces. +Since one of theses spaces is considered part of the delimiter, `foo` is considered to be indented six spaces inside the block quote context, so we get an indented code block starting with two spaces. @@ -495,7 +497,7 @@ We can think of a document as a sequence of quotations, lists, headings, rules, and code blocks. Some blocks (like block quotes and list items) contain other blocks; others (like headings and paragraphs) contain [inline](@) content---text, -links, emphasized text, images, code spans, and so on. +links, emphasized text, images, code, and so on. ## Precedence @@ -6047,15 +6049,6 @@ we just have literal backticks: <p>`foo</p> ```````````````````````````````` -The following case also illustrates the need for opening and -closing backtick strings to be equal in length: - -```````````````````````````````` example -`foo``bar`` -. -<p>`foo<code>bar</code></p> -```````````````````````````````` - ## Emphasis and strong emphasis @@ -6110,14 +6103,14 @@ characters that is not preceded or followed by a `_` character. A [left-flanking delimiter run](@) is a [delimiter run] that is (a) not followed by [Unicode whitespace], -and (b) not followed by a [punctuation character], or +and (b) either not followed by a [punctuation character], or preceded by [Unicode whitespace] or a [punctuation character]. For purposes of this definition, the beginning and the end of the line count as Unicode whitespace. A [right-flanking delimiter run](@) is a [delimiter run] that is (a) not preceded by [Unicode whitespace], -and (b) not preceded by a [punctuation character], or +and (b) either not preceded by a [punctuation character], or followed by [Unicode whitespace] or a [punctuation character]. For purposes of this definition, the beginning and the end of the line count as Unicode whitespace. @@ -6196,7 +6189,7 @@ The following rules define emphasis and strong emphasis: 7. A double `**` [can close strong emphasis](@) iff it is part of a [right-flanking delimiter run]. -8. A double `__` [can close strong emphasis] iff +8. A double `__` [can close strong emphasis] it is part of a [right-flanking delimiter run] and either (a) not part of a [left-flanking delimiter run] or (b) part of a [left-flanking delimiter run] @@ -6237,7 +6230,7 @@ the following principles resolve ambiguity: `<em><em>...</em></em>`. 14. An interpretation `<em><strong>...</strong></em>` is always - preferred to `<strong><em>...</em></strong>`. + preferred to `<strong><em>..</em></strong>`. 15. When two potential emphasis or strong emphasis spans overlap, so that the second begins before the first ends and ends after @@ -8616,11 +8609,11 @@ The link labels are case-insensitive: ```````````````````````````````` -If you just want a literal `!` followed by bracketed text, you can -backslash-escape the opening `[`: +If you just want bracketed text, you can backslash-escape the +opening `!` and `[`: ```````````````````````````````` example -!\[foo] +\!\[foo] [foo]: /url "title" . @@ -8835,14 +8828,15 @@ greater number of conditions. [Autolink]s can also be constructed without requiring the use of `<` and to `>` to delimit them, although they will be recognized under a smaller set of -circumstances. All such recognized autolinks can only come after whitespace, -or any of the delimiting characters `*`, `_`, `~`, `(`, and `[`. - -An [extended www autolink](@) will be recognized when a [valid domain] is -found. A [valid domain](@) consists of the text `www.`, followed by -alphanumeric characters, underscores (`_`), hyphens (`-`) and periods (`.`). -There must be at least one period, and no underscores may be present in the -last two segments of the domain. +circumstances. All such recognized autolinks can only come at the beginning of +a line, after whitespace, or any of the delimiting characters `*`, `_`, `~`, +and `(`. + +An [extended www autolink](@) will be recognized when the text `www.` is found +followed by a [valid domain]. A [valid domain](@) consists of alphanumeric +characters, underscores (`_`), hyphens (`-`) and periods (`.`). There must be +at least one period, and no underscores may be present in the last two segments +of the domain. The scheme `http` will be inserted automatically: From d219c0a6f58d311f38a4de8f2e8bbb6482a05bb8 Mon Sep 17 00:00:00 2001 From: John Gardner <gardnerjohng@gmail.com> Date: Mon, 17 Jul 2017 11:04:43 +1000 Subject: [PATCH 071/218] Fix typo (#52) --- test/spec.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index ccaa8523c..5cceebb6c 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -419,7 +419,7 @@ Normally the `>` that begins a block quote may be followed optionally by a space, which is not considered part of the content. In the following case `>` is followed by a tab, which is treated as if it were expanded into spaces. -Since one of theses spaces is considered part of the +Since one of these spaces is considered part of the delimiter, `foo` is considered to be indented six spaces inside the block quote context, so we get an indented code block starting with two spaces. @@ -9840,4 +9840,3 @@ closers: After we're done, we remove all delimiters above `stack_bottom` from the delimiter stack. - From e45ee8dc8c1e54e8b9038a279a892f606e29c8ba Mon Sep 17 00:00:00 2001 From: Vicent Marti <tanoku@gmail.com> Date: Fri, 14 Jul 2017 12:35:42 +0200 Subject: [PATCH 072/218] test: Add test case for pathological collisions As reported by @mity. The reference map implementation in cmark can be trivially DDoSed by crafting a document where all link references have names that collide. Generating a list of colliding reference names is trivial, since the hash function we use is not cryptographically safe, and our hash table always has a fixed amount of buckets. You can see an example on how to generate such documents in the `hash_collisions` helper in `pathological_tests.py`: a document with 50000 link references, all colliding on the same bucket, is generated. Parsing such Markdown document takes more than 5 minutes in the current version of CMark. In order to allow the test suite to finish, and to allow us to test other pathological inputs in the future, the pathological test runner has been modified to use Python's `multiprocess` module and spawn a worker to run each test. With a worker running on a separate process, we can indeed see that that the generated document does not finish parsing in a reasonable amount of time. The timeout for all pathological tests has been set to 5 seconds, which may need tuning for slow CI environments in the future. --- test/cmark.py | 2 +- test/pathological_tests.py | 71 +++++++++++++++++++++++++++++--------- 2 files changed, 56 insertions(+), 17 deletions(-) diff --git a/test/cmark.py b/test/cmark.py index 78c839db1..2c382ed13 100644 --- a/test/cmark.py +++ b/test/cmark.py @@ -92,7 +92,7 @@ def __init__(self, prog=None, library_dir=None, extensions=None): else: libnames = [ ["lib", ".so"] ] if not library_dir: - library_dir = os.path.join("build", "src") + library_dir = os.path.join("..", "build", "src") for prefix, suffix in libnames: candidate = os.path.join(library_dir, prefix + "cmark-gfm" + suffix) if os.path.isfile(candidate): diff --git a/test/pathological_tests.py b/test/pathological_tests.py index 77e4165c6..6bbef7f98 100644 --- a/test/pathological_tests.py +++ b/test/pathological_tests.py @@ -5,6 +5,8 @@ import argparse import sys import platform +import itertools +import multiprocessing from cmark import CMark if __name__ == "__main__": @@ -17,6 +19,28 @@ cmark = CMark(prog=args.program, library_dir=args.library_dir) +def hash_collisions(): + REFMAP_SIZE = 16 + COUNT = 50000 + + def badhash(ref): + h = 0 + for c in ref: + a = (h << 6) & 0xFFFFFFFF + b = (h << 16) & 0xFFFFFFFF + h = ord(c) + a + b - h + h = h & 0xFFFFFFFF + + return (h % REFMAP_SIZE) == 0 + + keys = ("x%d" % i for i in itertools.count()) + collisions = itertools.islice((k for k in keys if badhash(k)), COUNT) + bad_key = next(collisions) + + document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions) + + return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1)) + # list of pairs consisting of input and a regex that must match the output. pathological = { # note - some pythons have limit of 65535 for {num-matches} in re. @@ -58,32 +82,47 @@ re.compile("abc\ufffd?de\ufffd?")), "backticks": ("".join(map(lambda x: ("e" + "`" * x), range(1,10000))), - re.compile("^<p>[e`]*</p>\n$")) + re.compile("^<p>[e`]*</p>\n$")), + "reference collisions": hash_collisions() } whitespace_re = re.compile('/s+/') passed = 0 errored = 0 -failed = 0 +TIMEOUT = 5 -print("Testing pathological cases:") -for description in pathological: - (inp, regex) = pathological[description] +def run_test(inp, regex): [rc, actual, err] = cmark.to_html(inp) if rc != 0: - errored += 1 - print(description, '[ERRORED (return code %d)]' %rc) + print('[ERRORED (return code %d)]' % rc) print(err) + exit(1) elif regex.search(actual): - print(description, '[PASSED]') - passed += 1 + print('[PASSED]') else: - print(description, '[FAILED]') + print('[FAILED (mismatch)]') print(repr(actual)) - failed += 1 + exit(1) + +print("Testing pathological cases:") +for description in pathological: + (inp, regex) = pathological[description] + print(description, "... ", end='') + sys.stdout.flush() + + p = multiprocessing.Process(target=run_test, args=(inp, regex)) + p.start() + p.join(TIMEOUT) + + if p.is_alive(): + p.terminate() + p.join() + print('[TIMED OUT]') + errored += 1 + elif p.exitcode != 0: + errored += 1 + else: + passed += 1 -print("%d passed, %d failed, %d errored" % (passed, failed, errored)) -if (failed == 0 and errored == 0): - exit(0) -else: - exit(1) +print("%d passed, %d errored" % (passed, errored)) +exit(errored) From 384cc9db4cd7a90f59c0751e58eb7b3023d38b85 Mon Sep 17 00:00:00 2001 From: Vicent Marti <tanoku@gmail.com> Date: Fri, 14 Jul 2017 17:55:20 +0200 Subject: [PATCH 073/218] references: Fix pathological quadatric behavior As explained on the previous commit, it is trivial to DoS the CMark parser by generating a document where all the link reference names hash to the same bucket in the hash table. This will cause the lookup process for each reference to take linear time on the amount of references in the document, and with enough link references to lookup, the end result is a pathological O(N^2) that causes medium-sized documents to finish parsing in 5+ minutes. To avoid this issue, we propose the present commit. Based on the fact that all reference lookup/resolution in a Markdown document is always performed as a last step during the parse process, we've reimplemented reference storage as follows: 1. New references are always inserted at the end of a linked list. This is an O(1) operation, and does not check whether an existing (duplicate) reference with the same label already exists in the document. 2. Upon the first call to `cmark_reference_lookup` (when it is expected that no further references will be added to the reference map), the linked list of references is written into a fixed-size array. 3. The fixed size array can then be efficiently sorted in-place in O(n log n). This operation only happens once. We perform this sort in a _stable_ manner to ensure that the earliest link reference in the document always has preference, as the spec dictates. To accomplish this, every reference is tagged with a generation number when initially inserted in the linked list. 4. The sorted array is then compacted in O(n). Since it was sorted in a stable way, the first reference for each label is preserved and the duplicates are removed, matching the spec. 5. We can now simply perform a binary search for the current `cmark_reference_lookup` query in O(log n). Any further lookup calls will also be O(log n), since the sorted references table only needs to be generated once. The resulting implementation is notably simple (as it uses standard library builtins `qsort` and `bsearch`), whilst performing better than the fixed size hash table in documents that have a high number of references and never becoming pathological regardless of the input. --- src/references.c | 108 ++++++++++++++++++++++++++--------------------- src/references.h | 8 ++-- 2 files changed, 63 insertions(+), 53 deletions(-) diff --git a/src/references.c b/src/references.c index 89f2dc8cb..941333949 100644 --- a/src/references.c +++ b/src/references.c @@ -5,15 +5,6 @@ #include "inlines.h" #include "chunk.h" -static unsigned int refhash(const unsigned char *link_ref) { - unsigned int hash = 0; - - while (*link_ref) - hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash; - - return hash; -} - static void reference_free(cmark_reference_map *map, cmark_reference *ref) { cmark_mem *mem = map->mem; if (ref != NULL) { @@ -53,21 +44,6 @@ static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) { return result; } -static void add_reference(cmark_reference_map *map, cmark_reference *ref) { - cmark_reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE]; - - while (t) { - if (t->hash == ref->hash && !strcmp((char *)t->label, (char *)ref->label)) { - reference_free(map, ref); - return; - } - - t = t->next; - } - - map->table[ref->hash % REFMAP_SIZE] = ref; -} - void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, cmark_chunk *url, cmark_chunk *title) { cmark_reference *ref; @@ -77,64 +53,98 @@ void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, if (reflabel == NULL) return; + assert(map->sorted == NULL); + ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref)); ref->label = reflabel; - ref->hash = refhash(ref->label); ref->url = cmark_clean_url(map->mem, url); ref->title = cmark_clean_title(map->mem, title); - ref->next = NULL; + ref->age = map->size; + ref->next = map->refs; + + map->refs = ref; + map->size++; +} + +static int +labelcmp(const unsigned char *a, const unsigned char *b) { + return strcmp((const char *)a, (const char *)b); +} + +static int +refcmp(const void *p1, const void *p2) { + cmark_reference *r1 = *(cmark_reference **)p1; + cmark_reference *r2 = *(cmark_reference **)p2; + int res = labelcmp(r1->label, r2->label); + return res ? res : ((int)r1->age - (int)r2->age); +} + +static int +refsearch(const void *label, const void *p2) { + cmark_reference *ref = *(cmark_reference **)p2; + return labelcmp(label, ref->label); +} + +static void sort_references(cmark_reference_map *map) { + unsigned int i = 0, last = 0, size = map->size; + cmark_reference *r = map->refs, **sorted = NULL; + + sorted = map->mem->calloc(size, sizeof(cmark_reference *)); + while (r) { + sorted[i++] = r; + r = r->next; + } + + qsort(sorted, size, sizeof(cmark_reference *), refcmp); + + for (i = 1; i < size; i++) { + if (labelcmp(sorted[i]->label, sorted[last]->label) != 0) + sorted[++last] = sorted[i]; + } - add_reference(map, ref); + map->sorted = sorted; + map->size = last + 1; } // Returns reference if refmap contains a reference with matching // label, otherwise NULL. cmark_reference *cmark_reference_lookup(cmark_reference_map *map, cmark_chunk *label) { - cmark_reference *ref = NULL; + cmark_reference **ref = NULL; unsigned char *norm; - unsigned int hash; if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH) return NULL; - if (map == NULL) + if (map == NULL || !map->size) return NULL; norm = normalize_reference(map->mem, label); if (norm == NULL) return NULL; - hash = refhash(norm); - ref = map->table[hash % REFMAP_SIZE]; - - while (ref) { - if (ref->hash == hash && !strcmp((char *)ref->label, (char *)norm)) - break; - ref = ref->next; - } + if (!map->sorted) + sort_references(map); + ref = bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), refsearch); map->mem->free(norm); - return ref; + return ref ? ref[0] : NULL; } void cmark_reference_map_free(cmark_reference_map *map) { - unsigned int i; + cmark_reference *ref; if (map == NULL) return; - for (i = 0; i < REFMAP_SIZE; ++i) { - cmark_reference *ref = map->table[i]; - cmark_reference *next; - - while (ref) { - next = ref->next; - reference_free(map, ref); - ref = next; - } + ref = map->refs; + while (ref) { + cmark_reference *next = ref->next; + reference_free(map, ref); + ref = next; } + map->mem->free(map->sorted); map->mem->free(map); } diff --git a/src/references.h b/src/references.h index f075bbbd9..0bbbd5f4b 100644 --- a/src/references.h +++ b/src/references.h @@ -8,21 +8,21 @@ extern "C" { #endif -#define REFMAP_SIZE 16 - struct cmark_reference { struct cmark_reference *next; unsigned char *label; cmark_chunk url; cmark_chunk title; - unsigned int hash; + unsigned int age; }; typedef struct cmark_reference cmark_reference; struct cmark_reference_map { cmark_mem *mem; - cmark_reference *table[REFMAP_SIZE]; + cmark_reference *refs; + cmark_reference **sorted; + unsigned int size; }; typedef struct cmark_reference_map cmark_reference_map; From 45ca3006dba605a5c5eb6098b6fc13e8aa662dd9 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <ashe@kivikakk.ee> Date: Mon, 17 Jul 2017 11:38:08 +1000 Subject: [PATCH 074/218] Fix pathological test runner on Windows --- test/pathological_tests.py | 63 +++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/test/pathological_tests.py b/test/pathological_tests.py index 6bbef7f98..38df70736 100644 --- a/test/pathological_tests.py +++ b/test/pathological_tests.py @@ -9,16 +9,6 @@ import multiprocessing from cmark import CMark -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Run cmark tests.') - parser.add_argument('--program', dest='program', nargs='?', default=None, - help='program to test') - parser.add_argument('--library-dir', dest='library_dir', nargs='?', - default=None, help='directory containing dynamic library') - args = parser.parse_args(sys.argv[1:]) - -cmark = CMark(prog=args.program, library_dir=args.library_dir) - def hash_collisions(): REFMAP_SIZE = 16 COUNT = 50000 @@ -92,6 +82,14 @@ def badhash(ref): TIMEOUT = 5 def run_test(inp, regex): + parser = argparse.ArgumentParser(description='Run cmark tests.') + parser.add_argument('--program', dest='program', nargs='?', default=None, + help='program to test') + parser.add_argument('--library-dir', dest='library_dir', nargs='?', + default=None, help='directory containing dynamic library') + args = parser.parse_args(sys.argv[1:]) + cmark = CMark(prog=args.program, library_dir=args.library_dir) + [rc, actual, err] = cmark.to_html(inp) if rc != 0: print('[ERRORED (return code %d)]' % rc) @@ -104,25 +102,26 @@ def run_test(inp, regex): print(repr(actual)) exit(1) -print("Testing pathological cases:") -for description in pathological: - (inp, regex) = pathological[description] - print(description, "... ", end='') - sys.stdout.flush() - - p = multiprocessing.Process(target=run_test, args=(inp, regex)) - p.start() - p.join(TIMEOUT) - - if p.is_alive(): - p.terminate() - p.join() - print('[TIMED OUT]') - errored += 1 - elif p.exitcode != 0: - errored += 1 - else: - passed += 1 - -print("%d passed, %d errored" % (passed, errored)) -exit(errored) +if __name__ == '__main__': + print("Testing pathological cases:") + for description in pathological: + (inp, regex) = pathological[description] + print(description, "... ", end='') + sys.stdout.flush() + + p = multiprocessing.Process(target=run_test, args=(inp, regex)) + p.start() + p.join(TIMEOUT) + + if p.is_alive(): + p.terminate() + p.join() + print('[TIMED OUT]') + errored += 1 + elif p.exitcode != 0: + errored += 1 + else: + passed += 1 + + print("%d passed, %d errored" % (passed, errored)) + exit(errored) From 62166fe3b6b07068ed4c4207113e3c4b060ad4a8 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <ashe@kivikakk.ee> Date: Mon, 17 Jul 2017 11:39:09 +1000 Subject: [PATCH 075/218] Add casts for MSVC10 --- src/references.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/references.c b/src/references.c index 941333949..1648e9b24 100644 --- a/src/references.c +++ b/src/references.c @@ -82,14 +82,14 @@ refcmp(const void *p1, const void *p2) { static int refsearch(const void *label, const void *p2) { cmark_reference *ref = *(cmark_reference **)p2; - return labelcmp(label, ref->label); + return labelcmp((const unsigned char *)label, ref->label); } static void sort_references(cmark_reference_map *map) { unsigned int i = 0, last = 0, size = map->size; cmark_reference *r = map->refs, **sorted = NULL; - sorted = map->mem->calloc(size, sizeof(cmark_reference *)); + sorted = (cmark_reference **)map->mem->calloc(size, sizeof(cmark_reference *)); while (r) { sorted[i++] = r; r = r->next; @@ -126,7 +126,7 @@ cmark_reference *cmark_reference_lookup(cmark_reference_map *map, if (!map->sorted) sort_references(map); - ref = bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), refsearch); + ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), refsearch); map->mem->free(norm); return ref ? ref[0] : NULL; } From 2b158e135eb7c391dba8b380e76a485d284a7b66 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <ashe@kivikakk.ee> Date: Wed, 2 Aug 2017 12:23:30 +1000 Subject: [PATCH 076/218] Update to latest spec --- test/spec.txt | 114 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 88 insertions(+), 26 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index 5cceebb6c..babf3c8c9 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -1,7 +1,7 @@ --- title: GitHub Flavored Markdown Spec -version: 0.27 -date: '2017-2-20' +version: 0.28 +date: '2017-08-01' license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' ... @@ -27,10 +27,12 @@ GFM is converted to HTML to ensure security and consistency of the website. ## What is Markdown? Markdown is a plain text format for writing structured documents, -based on conventions used for indicating formatting in email and -usenet posts. It was developed in 2004 by John Gruber, who wrote -the first Markdown-to-HTML converter in Perl, and it soon became -ubiquitous. In the next decade, dozens of implementations were +based on conventions for indicating formatting in email +and usenet posts. It was developed by John Gruber (with +help from Aaron Swartz) and released in 2004 in the form of a +[syntax description](http://daringfireball.net/projects/markdown/syntax) +and a Perl script (`Markdown.pl`) for converting Markdown to +HTML. In the next decade, dozens of implementations were developed in many languages. Some extended the original Markdown syntax with conventions for footnotes, tables, and other document elements. Some allowed Markdown documents to be @@ -328,7 +330,7 @@ form feed (`U+000C`), or carriage return (`U+000D`). characters]. A [Unicode whitespace character](@) is -any code point in the Unicode `Zs` class, or a tab (`U+0009`), +any code point in the Unicode `Zs` general category, or a tab (`U+0009`), carriage return (`U+000D`), newline (`U+000A`), or form feed (`U+000C`). @@ -347,7 +349,7 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, A [punctuation character](@) is an [ASCII punctuation character] or anything in -the Unicode classes `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. +the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. ## Tabs @@ -418,7 +420,7 @@ as indentation with four spaces would: Normally the `>` that begins a block quote may be followed optionally by a space, which is not considered part of the content. In the following case `>` is followed by a tab, -which is treated as if it were expanded into spaces. +which is treated as if it were expanded into three spaces. Since one of these spaces is considered part of the delimiter, `foo` is considered to be indented six spaces inside the block quote context, so we get an indented @@ -497,7 +499,7 @@ We can think of a document as a sequence of quotations, lists, headings, rules, and code blocks. Some blocks (like block quotes and list items) contain other blocks; others (like headings and paragraphs) contain [inline](@) content---text, -links, emphasized text, images, code, and so on. +links, emphasized text, images, code spans, and so on. ## Precedence @@ -1659,6 +1661,15 @@ With tildes: </code></pre> ```````````````````````````````` +Fewer than three backticks is not enough: + +```````````````````````````````` example +`` +foo +`` +. +<p><code>foo</code></p> +```````````````````````````````` The closing code fence must use the same character as the opening fence: @@ -2047,6 +2058,37 @@ or [closing tag] (with any [tag name] other than `script`, or the end of the line.\ **End condition:** line is followed by a [blank line]. +HTML blocks continue until they are closed by their appropriate +[end condition], or the last line of the document or other [container block]. +This means any HTML **within an HTML block** that might otherwise be recognised +as a start condition will be ignored by the parser and passed through as-is, +without changing the parser's state. + +For instance, `<pre>` within a HTML block started by `<table>` will not affect +the parser state; as the HTML block was started in by start condition 6, it +will end at any blank line. This can be surprising: + +```````````````````````````````` example +<table><tr><td> +<pre> +**Hello**, + +_world_. +</pre> +</td></tr></table> +. +<table><tr><td> +<pre> +**Hello**, +<p><em>world</em>. +</pre></p> +</td></tr></table> +```````````````````````````````` + +In this case, the HTML block is terminated by the newline — the `**hello**` +text remains verbatim — and regular parsing resumes, with a paragraph, +emphasised `world` and inline and block HTML following. + All types of [HTML blocks] except type 7 may interrupt a paragraph. Blocks of type 7 may not interrupt a paragraph. (This restriction is intended to prevent unwanted interpretation @@ -3833,11 +3875,15 @@ The following rules define [list items]: If the list item is ordered, then it is also assigned a start number, based on the ordered list marker. - Exceptions: When the first list item in a [list] interrupts + Exceptions: + + 1. When the first list item in a [list] interrupts a paragraph---that is, when it starts on a line that would otherwise count as [paragraph continuation text]---then (a) the lines *Ls* must not begin with a blank line, and (b) if the list item is ordered, the start number must be 1. + 2. If any line is a [thematic break][thematic breaks] then + that line is not a list item. For example, let *Ls* be the lines @@ -6049,6 +6095,15 @@ we just have literal backticks: <p>`foo</p> ```````````````````````````````` +The following case also illustrates the need for opening and +closing backtick strings to be equal in length: + +```````````````````````````````` example +`foo``bar`` +. +<p>`foo<code>bar</code></p> +```````````````````````````````` + ## Emphasis and strong emphasis @@ -6098,19 +6153,20 @@ for efficient parsing strategies that do not backtrack. First, some definitions. A [delimiter run](@) is either a sequence of one or more `*` characters that is not preceded or -followed by a `*` character, or a sequence of one or more `_` -characters that is not preceded or followed by a `_` character. +followed by a non-backslash-escaped `*` character, or a sequence +of one or more `_` characters that is not preceded or followed by +a non-backslash-escaped `_` character. A [left-flanking delimiter run](@) is a [delimiter run] that is (a) not followed by [Unicode whitespace], -and (b) either not followed by a [punctuation character], or +and (b) not followed by a [punctuation character], or preceded by [Unicode whitespace] or a [punctuation character]. For purposes of this definition, the beginning and the end of the line count as Unicode whitespace. A [right-flanking delimiter run](@) is a [delimiter run] that is (a) not preceded by [Unicode whitespace], -and (b) either not preceded by a [punctuation character], or +and (b) not preceded by a [punctuation character], or followed by [Unicode whitespace] or a [punctuation character]. For purposes of this definition, the beginning and the end of the line count as Unicode whitespace. @@ -6189,7 +6245,7 @@ The following rules define emphasis and strong emphasis: 7. A double `**` [can close strong emphasis](@) iff it is part of a [right-flanking delimiter run]. -8. A double `__` [can close strong emphasis] +8. A double `__` [can close strong emphasis] iff it is part of a [right-flanking delimiter run] and either (a) not part of a [left-flanking delimiter run] or (b) part of a [left-flanking delimiter run] @@ -6230,7 +6286,7 @@ the following principles resolve ambiguity: `<em><em>...</em></em>`. 14. An interpretation `<em><strong>...</strong></em>` is always - preferred to `<strong><em>..</em></strong>`. + preferred to `<strong><em>...</em></strong>`. 15. When two potential emphasis or strong emphasis spans overlap, so that the second begins before the first ends and ends after @@ -7438,7 +7494,9 @@ A [link destination](@) consists of either - a nonempty sequence of characters that does not include ASCII space or control characters, and includes parentheses only if (a) they are backslash-escaped or (b) they are part of - a balanced pair of unescaped parentheses. + a balanced pair of unescaped parentheses. (Implementations + may impose limits on parentheses nesting to avoid performance + issues, but at least three levels of nesting should be supported.) A [link title](@) consists of either @@ -7544,7 +7602,7 @@ Parentheses inside the link destination may be escaped: <p><a href="(foo)">link</a></p> ```````````````````````````````` -Any number parentheses are allowed without escaping, as long as they are +Any number of parentheses are allowed without escaping, as long as they are balanced: ```````````````````````````````` example @@ -7850,13 +7908,16 @@ that [matches] a [link reference definition] elsewhere in the document. A [link label](@) begins with a left bracket (`[`) and ends with the first right bracket (`]`) that is not backslash-escaped. Between these brackets there must be at least one [non-whitespace character]. -Unescaped square bracket characters are not allowed in -[link labels]. A link label can have at most 999 -characters inside the square brackets. +Unescaped square bracket characters are not allowed inside the +opening and closing square brackets of [link labels]. A link +label can have at most 999 characters inside the square +brackets. One label [matches](@) another just in case their normalized forms are equal. To normalize a -label, perform the *Unicode case fold* and collapse consecutive internal +label, strip off the opening and closing brackets, +perform the *Unicode case fold*, strip leading and trailing +[whitespace] and collapse consecutive internal [whitespace] to a single space. If there are multiple matching reference link definitions, the one that comes first in the document is used. (It is desirable in such cases to emit a warning.) @@ -8609,11 +8670,11 @@ The link labels are case-insensitive: ```````````````````````````````` -If you just want bracketed text, you can backslash-escape the -opening `!` and `[`: +If you just want a literal `!` followed by bracketed text, you can +backslash-escape the opening `[`: ```````````````````````````````` example -\!\[foo] +!\[foo] [foo]: /url "title" . @@ -9840,3 +9901,4 @@ closers: After we're done, we remove all delimiters above `stack_bottom` from the delimiter stack. + From 2731cb080b770531d043251c45311d637955a485 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <ashe@kivikakk.ee> Date: Mon, 7 Aug 2017 10:38:27 +1000 Subject: [PATCH 077/218] Add the idempotent core_extensions_ensure_registered --- extensions/core-extensions.c | 12 +++++++++++- extensions/core-extensions.h | 2 +- src/main.c | 2 +- test/cmark.py | 7 ++----- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 49bd8d42e..7bb9e1296 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -3,8 +3,9 @@ #include "strikethrough.h" #include "table.h" #include "tagfilter.h" +#include "registry.h" -int core_extensions_registration(cmark_plugin *plugin) { +static int core_extensions_registration(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, create_table_extension()); cmark_plugin_register_syntax_extension(plugin, create_strikethrough_extension()); @@ -12,3 +13,12 @@ int core_extensions_registration(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension()); return 1; } + +void core_extensions_ensure_registered(void) { + static int registered = 0; + + if (!registered) { + cmark_register_plugin(core_extensions_registration); + registered = 1; + } +} diff --git a/extensions/core-extensions.h b/extensions/core-extensions.h index 45f199443..aca255c08 100644 --- a/extensions/core-extensions.h +++ b/extensions/core-extensions.h @@ -9,7 +9,7 @@ extern "C" { #include "cmarkextensions_export.h" CMARKEXTENSIONS_EXPORT -int core_extensions_registration(cmark_plugin *plugin); +void core_extensions_ensure_registered(void); CMARKEXTENSIONS_EXPORT uint16_t cmarkextensions_get_table_columns(cmark_node *node); diff --git a/src/main.c b/src/main.c index 27424cf8c..5f075027a 100644 --- a/src/main.c +++ b/src/main.c @@ -110,7 +110,7 @@ int main(int argc, char *argv[]) { int options = CMARK_OPT_DEFAULT; int res = 1; - cmark_register_plugin(core_extensions_registration); + core_extensions_ensure_registered(); #if defined(_WIN32) && !defined(__CYGWIN__) _setmode(_fileno(stdin), _O_BINARY); diff --git a/test/cmark.py b/test/cmark.py index 2c382ed13..44d0dc8bf 100644 --- a/test/cmark.py +++ b/test/cmark.py @@ -12,10 +12,7 @@ def pipe_through_prog(prog, text): return [p1.returncode, result.decode('utf-8'), err] def parse(lib, extlib, text, extensions): - register_plugin = lib.cmark_register_plugin - register_plugin.argtypes = [c_void_p] - - core_extensions_registration = extlib.core_extensions_registration + core_extensions_ensure_registered = extlib.core_extensions_ensure_registered find_syntax_extension = lib.cmark_find_syntax_extension find_syntax_extension.restype = c_void_p @@ -35,7 +32,7 @@ def parse(lib, extlib, text, extensions): parser_finish.restype = c_void_p parser_finish.argtypes = [c_void_p] - register_plugin(core_extensions_registration) + core_extensions_ensure_registered() parser = parser_new(0) for e in set(extensions): From b9459ef1100902fbf2557601975e69cd79c60286 Mon Sep 17 00:00:00 2001 From: Ashe <kivikakk@github.com> Date: Wed, 9 Aug 2017 15:48:15 +1000 Subject: [PATCH 078/218] Inline sourcepos (#53) * Start working on sourcepos for inlines. * Add a sourcepos test case * Fix up some offsets * Record end_column * Fix a test sourcepos Just need to work out how to correctly increment line numbers over breaks now * Handle multi-line paras * Refactor * Another fix and another failing test * maintain separate block offset * Correct link offset calculations * Add image to test * Break some more tests! * Fix code length calculation * Consolidate end_column in text nodes * WIP! * Complete test for extensions * Improve test harness output * Strikethrough sourcepos * Start on table sourcepos * Table sourcepos WIP * Finish up table sourcepos * Fix up emphasis offset calc * Reference link sourcepos is the ref itself This is fine! * Add casts for MSVC * Use the more portable remove(3) * Windows :( * Link against dynamic exts in api_test * Include extensions DLL in PATH --- api_test/CMakeLists.txt | 3 +- api_test/harness.c | 27 ++++++ api_test/main.c | 182 ++++++++++++++++++++++++++++++++++++- extensions/autolink.c | 8 ++ extensions/strikethrough.c | 3 + extensions/table.c | 61 ++++++++----- src/cmark_extension_api.h | 6 ++ src/inlines.c | 127 ++++++++++++++++---------- src/iterator.c | 1 + test/CMakeLists.txt | 5 +- 10 files changed, 348 insertions(+), 75 deletions(-) diff --git a/api_test/CMakeLists.txt b/api_test/CMakeLists.txt index 039ce9294..5fe573db3 100644 --- a/api_test/CMakeLists.txt +++ b/api_test/CMakeLists.txt @@ -7,8 +7,9 @@ add_executable(api_test include_directories( ${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src + ${PROJECT_BINARY_DIR}/extensions ) -target_link_libraries(api_test libcmark-gfm) +target_link_libraries(api_test libcmark-gfmextensions libcmark-gfm) # Compiler flags if(MSVC) diff --git a/api_test/harness.c b/api_test/harness.c index f6fd0bc96..702dc9098 100644 --- a/api_test/harness.c +++ b/api_test/harness.c @@ -50,6 +50,21 @@ void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg, } } +#ifndef _WIN32 +#include <unistd.h> + +static char *write_tmp(char const *header, char const *data) { + char *name = strdup("/tmp/fileXXXXXX"); + int fd = mkstemp(name); + FILE *f = fdopen(fd, "w+"); + fputs(header, f); + fwrite(data, 1, strlen(data), f); + fclose(f); + return name; +} + +#endif + void STR_EQ(test_batch_runner *runner, const char *got, const char *expected, const char *msg, ...) { int cond = strcmp(got, expected) == 0; @@ -60,8 +75,20 @@ void STR_EQ(test_batch_runner *runner, const char *got, const char *expected, va_end(ap); if (!cond) { +#ifndef _WIN32 + char *got_fn = write_tmp("actual\n", got); + char *expected_fn = write_tmp("expected\n", expected); + char buf[1024]; + snprintf(buf, sizeof(buf), "git diff --no-index %s %s", expected_fn, got_fn); + system(buf); + remove(got_fn); + remove(expected_fn); + free(got_fn); + free(expected_fn); +#else fprintf(stderr, " Got: \"%s\"\n", got); fprintf(stderr, " Expected: \"%s\"\n", expected); +#endif } } diff --git a/api_test/main.c b/api_test/main.c index c64ffb33c..c10601a9b 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -5,6 +5,7 @@ #define CMARK_NO_SHORT_NAMES #include "cmark.h" #include "node.h" +#include "../extensions/core-extensions.h" #include "harness.h" #include "cplusplus.h" @@ -551,9 +552,9 @@ static void render_xml(test_batch_runner *runner) { STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" "<paragraph sourcepos=\"1:1-1:9\">\n" - " <text>foo </text>\n" - " <emph>\n" - " <text>bar</text>\n" + " <text sourcepos=\"1:1-1:4\">foo </text>\n" + " <emph sourcepos=\"1:5-1:9\">\n" + " <text sourcepos=\"1:6-1:8\">bar</text>\n" " </emph>\n" "</paragraph>\n", "render first paragraph with source pos"); @@ -933,6 +934,178 @@ static void test_feed_across_line_ending(test_batch_runner *runner) { cmark_node_free(document); } +static void source_pos(test_batch_runner *runner) { + static const char markdown[] = + "Hi *there*.\n" + "\n" + "Hello &ldquo; <http://www.google.com>\n" + "there `hi` -- [okay](www.google.com (ok)).\n" + "\n" + "> 1. Okay.\n" + "> Sure.\n" + ">\n" + "> 2. Yes, okay.\n" + "> ![ok](hi \"yes\")\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" + "<document sourcepos=\"1:1-10:20\" xmlns=\"http://commonmark.org/xml/1.0\">\n" + " <paragraph sourcepos=\"1:1-1:11\">\n" + " <text sourcepos=\"1:1-1:3\">Hi </text>\n" + " <emph sourcepos=\"1:4-1:10\">\n" + " <text sourcepos=\"1:5-1:9\">there</text>\n" + " </emph>\n" + " <text sourcepos=\"1:11-1:11\">.</text>\n" + " </paragraph>\n" + " <paragraph sourcepos=\"3:1-4:42\">\n" + " <text sourcepos=\"3:1-3:14\">Hello “ </text>\n" + " <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n" + " <text sourcepos=\"3:16-3:36\">http://www.google.com</text>\n" + " </link>\n" + " <softbreak />\n" + " <text sourcepos=\"4:1-4:6\">there </text>\n" + " <code sourcepos=\"4:8-4:9\">hi</code>\n" + " <text sourcepos=\"4:11-4:14\"> -- </text>\n" + " <link sourcepos=\"4:15-4:41\" destination=\"www.google.com\" title=\"ok\">\n" + " <text sourcepos=\"4:16-4:19\">okay</text>\n" + " </link>\n" + " <text sourcepos=\"4:42-4:42\">.</text>\n" + " </paragraph>\n" + " <block_quote sourcepos=\"6:1-10:20\">\n" + " <list sourcepos=\"6:3-10:20\" type=\"ordered\" start=\"1\" delim=\"period\" tight=\"false\">\n" + " <item sourcepos=\"6:3-8:1\">\n" + " <paragraph sourcepos=\"6:6-7:10\">\n" + " <text sourcepos=\"6:6-6:10\">Okay.</text>\n" + " <softbreak />\n" + " <text sourcepos=\"7:6-7:10\">Sure.</text>\n" + " </paragraph>\n" + " </item>\n" + " <item sourcepos=\"9:3-10:20\">\n" + " <paragraph sourcepos=\"9:6-10:20\">\n" + " <text sourcepos=\"9:6-9:15\">Yes, okay.</text>\n" + " <softbreak />\n" + " <image sourcepos=\"10:6-10:20\" destination=\"hi\" title=\"yes\">\n" + " <text sourcepos=\"10:8-10:9\">ok</text>\n" + " </image>\n" + " </paragraph>\n" + " </item>\n" + " </list>\n" + " </block_quote>\n" + "</document>\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); +} + +static void ext_source_pos(test_batch_runner *runner) { + static const char *extensions[3] = { + "strikethrough", + "table", + "autolink", + }; + + static const char markdown[] = + "Hi ~~friend~~.\n" + "\n" + "> www.github.com\n" + "\n" + "1. | a | b | *c* |\n" + " | - | - | --: |\n" + " | 1 | 2 | ~3~ |\n"; + + cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); + core_extensions_ensure_registered(); + + for (int i = 0; i < (int)(sizeof(extensions) / sizeof(*extensions)); ++i) { + cmark_syntax_extension *ext = cmark_find_syntax_extension(extensions[i]); + cmark_parser_attach_syntax_extension(parser, ext); + } + + cmark_parser_feed(parser, markdown, sizeof(markdown) - 1); + + cmark_node *doc = cmark_parser_finish(parser); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" + "<document sourcepos=\"1:1-7:18\" xmlns=\"http://commonmark.org/xml/1.0\">\n" + " <paragraph sourcepos=\"1:1-1:14\">\n" + " <text sourcepos=\"1:1-1:3\">Hi </text>\n" + " <strikethrough sourcepos=\"1:4-1:13\">\n" + " <text sourcepos=\"1:6-1:11\">friend</text>\n" + " </strikethrough>\n" + " <text sourcepos=\"1:14-1:14\">.</text>\n" + " </paragraph>\n" + " <block_quote sourcepos=\"3:1-3:16\">\n" + " <paragraph sourcepos=\"3:3-3:16\">\n" + " <link sourcepos=\"3:2-3:16\" destination=\"http://www.github.com\" title=\"\">\n" + " <text sourcepos=\"3:2-3:16\">www.github.com</text>\n" + " </link>\n" + " </paragraph>\n" + " </block_quote>\n" + " <list sourcepos=\"5:1-7:18\" type=\"ordered\" start=\"1\" delim=\"period\" tight=\"true\">\n" + " <item sourcepos=\"5:1-7:18\">\n" + " <table sourcepos=\"5:4-7:18\">\n" + " <table_header sourcepos=\"5:4-5:18\">\n" + " <table_cell sourcepos=\"5:6-5:7\">\n" + " <text sourcepos=\"5:6-5:6\">a</text>\n" + " </table_cell>\n" + " <table_cell sourcepos=\"5:10-5:11\">\n" + " <text sourcepos=\"5:10-5:10\">b</text>\n" + " </table_cell>\n" + " <table_cell sourcepos=\"5:14-5:17\">\n" + " <emph sourcepos=\"5:14-5:16\">\n" + " <text sourcepos=\"5:15-5:15\">c</text>\n" + " </emph>\n" + " </table_cell>\n" + " </table_header>\n" + " <table_row sourcepos=\"7:4-7:18\">\n" + " <table_cell sourcepos=\"7:6-7:7\">\n" + " <text sourcepos=\"7:6-7:6\">1</text>\n" + " </table_cell>\n" + " <table_cell sourcepos=\"7:10-7:11\">\n" + " <text sourcepos=\"7:10-7:10\">2</text>\n" + " </table_cell>\n" + " <table_cell sourcepos=\"7:14-7:17\">\n" + " <strikethrough sourcepos=\"7:14-7:16\">\n" + " <text sourcepos=\"7:15-7:15\">3</text>\n" + " </strikethrough>\n" + " </table_cell>\n" + " </table_row>\n" + " </table>\n" + " </item>\n" + " </list>\n" + "</document>\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); +} + +static void ref_source_pos(test_batch_runner *runner) { + static const char markdown[] = + "Let's try [reference] links.\n" + "\n" + "[reference]: https://github.com (GitHub)\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" + "<document sourcepos=\"1:1-3:40\" xmlns=\"http://commonmark.org/xml/1.0\">\n" + " <paragraph sourcepos=\"1:1-1:28\">\n" + " <text sourcepos=\"1:1-1:10\">Let's try </text>\n" + " <link sourcepos=\"1:11-1:21\" destination=\"https://github.com\" title=\"GitHub\">\n" + " <text sourcepos=\"1:12-1:20\">reference</text>\n" + " </link>\n" + " <text sourcepos=\"1:22-1:28\"> links.</text>\n" + " </paragraph>\n" + "</document>\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); +} + int main() { int retval; test_batch_runner *runner = test_batch_runner_new(); @@ -959,6 +1132,9 @@ int main() { test_cplusplus(runner); test_safe(runner); test_feed_across_line_ending(runner); + source_pos(runner); + ext_source_pos(runner); + ref_source_pos(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; diff --git a/extensions/autolink.c b/extensions/autolink.c index 3d2a1853c..b204ccfd7 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -149,6 +149,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent, size_t max_rewind = cmark_inline_parser_get_offset(inline_parser); uint8_t *data = chunk->data + max_rewind; size_t size = chunk->len - max_rewind; + int start = cmark_inline_parser_get_column(inline_parser); size_t link_end; @@ -187,6 +188,13 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent, cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end); cmark_node_append_child(node, text); + node->start_line = text->start_line = + node->end_line = text->end_line = + cmark_inline_parser_get_line(inline_parser); + + node->start_column = text->start_column = start - 1; + node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1; + return node; } diff --git a/extensions/strikethrough.c b/extensions/strikethrough.c index 802a6bb7a..3153723d5 100644 --- a/extensions/strikethrough.c +++ b/extensions/strikethrough.c @@ -23,6 +23,8 @@ static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser, res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); cmark_node_set_literal(res, buffer); + res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser); + res->start_column = cmark_inline_parser_get_column(inline_parser) - delims; if (left_flanking || right_flanking) { cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking, @@ -58,6 +60,7 @@ static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser, tmp = next; } + strikethrough->end_column = closer->inl_text->start_column + closer->inl_text->as.literal.len - 1; cmark_node_free(closer->inl_text); delim = closer; diff --git a/extensions/table.c b/extensions/table.c index 27b469765..73de5031d 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -26,9 +26,16 @@ typedef struct { bool is_header; } node_table_row; +typedef struct { + cmark_strbuf *buf; + int start_offset, end_offset; +} node_cell; + static void free_table_cell(cmark_mem *mem, void *data) { - cmark_strbuf_free((cmark_strbuf *)data); - mem->free(data); + node_cell *cell = (node_cell *)data; + cmark_strbuf_free((cmark_strbuf *)cell->buf); + mem->free(cell->buf); + mem->free(cell); } static void free_table_row(cmark_mem *mem, table_row *row) { @@ -105,14 +112,13 @@ static table_row *row_from_string(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *string, int len) { table_row *row = NULL; - bufsize_t cell_matched, pipe_matched, offset = 0; + bufsize_t cell_matched, pipe_matched, offset; row = (table_row *)parser->mem->calloc(1, sizeof(table_row)); row->n_columns = 0; row->cells = NULL; - if (len > 0 && string[0] == '|') - ++offset; + offset = scan_table_cell_end(string, len, 0); do { cell_matched = scan_table_cell(string, len, offset); @@ -122,8 +128,13 @@ static table_row *row_from_string(cmark_syntax_extension *self, cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, cell_matched); cmark_strbuf_trim(cell_buf); + + node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell)); + cell->buf = cell_buf; + cell->start_offset = offset; + cell->end_offset = offset + cell_matched - 1; row->n_columns += 1; - row->cells = cmark_llist_append(parser->mem, row->cells, cell_buf); + row->cells = cmark_llist_append(parser->mem, row->cells, cell); } offset += cell_matched + pipe_matched; @@ -202,15 +213,14 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table)); - set_n_table_columns(parent_container, header_row->n_columns); uint8_t *alignments = (uint8_t *)parser->mem->calloc(header_row->n_columns, sizeof(uint8_t)); cmark_llist *it = marker_row->cells; for (i = 0; it; it = it->next, ++i) { - cmark_strbuf *node = (cmark_strbuf *)it->data; - bool left = node->ptr[0] == ':', right = node->ptr[node->size - 1] == ':'; + node_cell *node = (node_cell *)it->data; + bool left = node->buf->ptr[0] == ':', right = node->buf->ptr[node->buf->size - 1] == ':'; if (left && right) alignments[i] = 'c'; @@ -223,8 +233,10 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, table_header = cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW, - cmark_parser_get_offset(parser)); + parent_container->start_column); cmark_node_set_syntax_extension(table_header, self); + table_header->end_column = parent_container->start_column + (int)strlen(parent_string) - 2; + table_header->start_line = table_header->end_line = parent_container->start_line; table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row)); ntr->is_header = true; @@ -233,10 +245,12 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_llist *tmp; for (tmp = header_row->cells; tmp; tmp = tmp->next) { - cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; + node_cell *cell = (node_cell *) tmp->data; cmark_node *header_cell = cmark_parser_add_child(parser, table_header, - CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); - cmark_node_set_string_content(header_cell, (char *) cell_buf->ptr); + CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); + header_cell->start_line = header_cell->end_line = parent_container->start_line; + header_cell->end_column = parent_container->start_column + cell->end_offset; + cmark_node_set_string_content(header_cell, (char *) cell->buf->ptr); cmark_node_set_syntax_extension(header_cell, self); } } @@ -262,9 +276,9 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, table_row_block = cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW, - cmark_parser_get_offset(parser)); - + parent_container->start_column); cmark_node_set_syntax_extension(table_row_block, self); + table_row_block->end_column = parent_container->end_column; table_row_block->as.opaque = parser->mem->calloc(1, sizeof(node_table_row)); row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser), @@ -275,17 +289,18 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, int i, table_columns = get_n_table_columns(parent_container); for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = tmp->next, ++i) { - cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; - cmark_node *cell = cmark_parser_add_child(parser, table_row_block, - CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); - cmark_node_set_string_content(cell, (char *) cell_buf->ptr); - cmark_node_set_syntax_extension(cell, self); + node_cell *cell = (node_cell *) tmp->data; + cmark_node *node = cmark_parser_add_child(parser, table_row_block, + CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); + node->end_column = parent_container->start_column + cell->end_offset; + cmark_node_set_string_content(node, (char *) cell->buf->ptr); + cmark_node_set_syntax_extension(node, self); } for (; i < table_columns; ++i) { - cmark_node *cell = cmark_parser_add_child( - parser, table_row_block, CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); - cmark_node_set_syntax_extension(cell, self); + cmark_node *node = cmark_parser_add_child( + parser, table_row_block, CMARK_NODE_TABLE_CELL, 0); + cmark_node_set_syntax_extension(node, self); } } diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index 8a273b54e..f999d3338 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -666,6 +666,12 @@ void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter CMARK_EXPORT delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser); +CMARK_EXPORT +int cmark_inline_parser_get_line(cmark_inline_parser *parser); + +CMARK_EXPORT +int cmark_inline_parser_get_column(cmark_inline_parser *parser); + /** Convenience function to scan a given delimiter. * * 'left_flanking' and 'right_flanking' will be set to true if they diff --git a/src/inlines.c b/src/inlines.c index e30c2affe..ab73f59ba 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -23,9 +23,9 @@ static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98"; static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99"; // Macros for creating various kinds of simple. -#define make_str(mem, s) make_literal(mem, CMARK_NODE_TEXT, s) -#define make_code(mem, s) make_literal(mem, CMARK_NODE_CODE, s) -#define make_raw_html(mem, s) make_literal(mem, CMARK_NODE_HTML_INLINE, s) +#define make_str(subj, sc, ec, s) make_literal(subj, CMARK_NODE_TEXT, sc, ec, s) +#define make_code(subj, sc, ec, s) make_literal(subj, CMARK_NODE_CODE, sc, ec, s) +#define make_raw_html(subj, sc, ec, s) make_literal(subj, CMARK_NODE_HTML_INLINE, sc, ec, s) #define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK) #define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK) #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH) @@ -46,7 +46,10 @@ typedef struct bracket { typedef struct subject{ cmark_mem *mem; cmark_chunk input; + int line; bufsize_t pos; + int block_offset; + int column_offset; cmark_reference_map *refmap; delimiter *last_delim; bracket *last_bracket; @@ -63,17 +66,22 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options); -static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, - cmark_reference_map *refmap); +static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, + cmark_strbuf *buffer, cmark_reference_map *refmap); static bufsize_t subject_find_special_char(subject *subj, int options); // Create an inline with a literal string value. -static CMARK_INLINE cmark_node *make_literal(cmark_mem *mem, cmark_node_type t, +static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t, + int start_column, int end_column, cmark_chunk s) { - cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e)); - cmark_strbuf_init(mem, &e->content, 0); + cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e)); + cmark_strbuf_init(subj->mem, &e->content, 0); e->type = (uint16_t)t; e->as.literal = s; + e->start_line = e->end_line = subj->line; + // columns are 1 based. + e->start_column = start_column + 1 + subj->column_offset + subj->block_offset; + e->end_column = end_column + 1 + subj->column_offset + subj->block_offset; return e; } @@ -86,14 +94,15 @@ static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) { } // Like make_str, but parses entities. -static cmark_node *make_str_with_entities(cmark_mem *mem, +static cmark_node *make_str_with_entities(subject *subj, + int start_column, int end_column, cmark_chunk *content) { - cmark_strbuf unescaped = CMARK_BUF_INIT(mem); + cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem); if (houdini_unescape_html(&unescaped, content->data, content->len)) { - return make_str(mem, cmark_chunk_buf_detach(&unescaped)); + return make_str(subj, start_column, end_column, cmark_chunk_buf_detach(&unescaped)); } else { - return make_str(mem, *content); + return make_str(subj, start_column, end_column, *content); } } @@ -131,23 +140,30 @@ static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url, return cmark_chunk_buf_detach(&buf); } -static CMARK_INLINE cmark_node *make_autolink(cmark_mem *mem, cmark_chunk url, - int is_email) { - cmark_node *link = make_simple(mem, CMARK_NODE_LINK); - link->as.link.url = cmark_clean_autolink(mem, &url, is_email); +static CMARK_INLINE cmark_node *make_autolink(subject *subj, + int start_column, int end_column, + cmark_chunk url, int is_email) { + cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK); + link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email); link->as.link.title = cmark_chunk_literal(""); - cmark_node_append_child(link, make_str_with_entities(mem, &url)); + link->start_line = link->end_line = subj->line; + link->start_column = start_column + 1; + link->end_column = end_column + 1; + cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url)); return link; } -static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, - cmark_reference_map *refmap) { +static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, + cmark_strbuf *buffer, cmark_reference_map *refmap) { int i; e->mem = mem; e->input.data = buffer->ptr; e->input.len = buffer->size; e->input.alloc = 0; + e->line = line_number; e->pos = 0; + e->block_offset = block_offset; + e->column_offset = 0; e->refmap = refmap; e->last_delim = NULL; e->last_bracket = NULL; @@ -268,7 +284,7 @@ static cmark_node *handle_backticks(subject *subj) { if (endpos == 0) { // not found subj->pos = startpos; // rewind - return make_str(subj->mem, openticks); + return make_str(subj, subj->pos, subj->pos, openticks); } else { cmark_strbuf buf = CMARK_BUF_INIT(subj->mem); @@ -277,7 +293,7 @@ static cmark_node *handle_backticks(subject *subj) { cmark_strbuf_trim(&buf); cmark_strbuf_normalize_whitespace(&buf); - return make_code(subj->mem, cmark_chunk_buf_detach(&buf)); + return make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf)); } } @@ -434,7 +450,7 @@ static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) { contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims); } - inl_text = make_str(subj->mem, contents); + inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents); if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) { push_delimiter(subj, c, can_open, can_close, inl_text); @@ -450,7 +466,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) { advance(subj); if (!smart || peek_char(subj) != '-') { - return make_str(subj->mem, cmark_chunk_literal("-")); + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-")); } while (smart && peek_char(subj) == '-') { @@ -483,7 +499,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) { cmark_strbuf_puts(&buf, ENDASH); } - return make_str(subj->mem, cmark_chunk_buf_detach(&buf)); + return make_str(subj, startpos, subj->pos - 1, cmark_chunk_buf_detach(&buf)); } // Assumes we have a period at the current position. @@ -493,12 +509,12 @@ static cmark_node *handle_period(subject *subj, bool smart) { advance(subj); if (peek_char(subj) == '.') { advance(subj); - return make_str(subj->mem, cmark_chunk_literal(ELLIPSES)); + return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES)); } else { - return make_str(subj->mem, cmark_chunk_literal("..")); + return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("..")); } } else { - return make_str(subj->mem, cmark_chunk_literal(".")); + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal(".")); } } @@ -655,6 +671,10 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, } cmark_node_insert_after(opener_inl, emph); + emph->start_line = emph->end_line = subj->line; + emph->start_column = opener_inl->start_column + subj->column_offset; + emph->end_column = closer_inl->end_column + subj->column_offset; + // if opener has 0 characters, remove it and its associated inline if (opener_num_chars == 0) { cmark_node_free(opener_inl); @@ -681,11 +701,11 @@ static cmark_node *handle_backslash(cmark_parser *parser, subject *subj) { if ((parser->backslash_ispunct ? parser->backslash_ispunct : cmark_ispunct)(nextchar)) { // only ascii symbols and newline can be escaped advance(subj); - return make_str(subj->mem, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); + return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (!is_eof(subj) && skip_line_end(subj)) { return make_linebreak(subj->mem); } else { - return make_str(subj->mem, cmark_chunk_literal("\\")); + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\")); } } @@ -701,10 +721,10 @@ static cmark_node *handle_entity(subject *subj) { subj->input.len - subj->pos); if (len == 0) - return make_str(subj->mem, cmark_chunk_literal("&")); + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&")); subj->pos += len; - return make_str(subj->mem, cmark_chunk_buf_detach(&ent)); + return make_str(subj, subj->pos - 1 - len, subj->pos - 1, cmark_chunk_buf_detach(&ent)); } // Clean a URL: remove surrounding whitespace and surrounding <>, @@ -767,7 +787,7 @@ static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_autolink(subj->mem, contents, 0); + return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0); } // next try to match an email autolink @@ -776,7 +796,7 @@ static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_autolink(subj->mem, contents, 1); + return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1); } // finally, try to match an html tag @@ -784,7 +804,7 @@ static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) { if (matchlen > 0) { contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; - return make_raw_html(subj->mem, contents); + return make_raw_html(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents); } if (liberal_html_tag) { @@ -792,12 +812,12 @@ static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) { if (matchlen > 0) { contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; - return make_raw_html(subj->mem, contents); + return make_raw_html(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents); } } // if nothing matches, just return the opening <: - return make_str(subj->mem, cmark_chunk_literal("<")); + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<")); } // Parse a link label. Returns 1 if successful. @@ -912,13 +932,13 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { opener = subj->last_bracket; if (opener == NULL) { - return make_str(subj->mem, cmark_chunk_literal("]")); + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); } if (!opener->active) { // take delimiter off stack pop_bracket(subj); - return make_str(subj->mem, cmark_chunk_literal("]")); + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); } // If we got here, we matched a potential link/image text. @@ -996,12 +1016,15 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { // If we fall through to here, it means we didn't match a link: pop_bracket(subj); // remove this opener from delimiter list subj->pos = initial_pos; - return make_str(subj->mem, cmark_chunk_literal("]")); + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); match: inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK); inl->as.link.url = url; inl->as.link.title = title; + inl->start_line = inl->end_line = subj->line; + inl->start_column = opener->inl_text->start_column; + inl->end_column = subj->pos + subj->column_offset + subj->block_offset; cmark_node_insert_before(opener->inl_text, inl); // Add link text: tmp = opener->inl_text->next; @@ -1048,6 +1071,8 @@ static cmark_node *handle_newline(subject *subj) { if (peek_at(subj, subj->pos) == '\n') { advance(subj); } + ++subj->line; + subj->column_offset = -subj->pos; // skip spaces at beginning of line skip_spaces(subj); if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' && @@ -1133,7 +1158,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, cmark_node *new_inl = NULL; cmark_chunk contents; unsigned char c; - bufsize_t endpos; + bufsize_t startpos, endpos; c = peek_char(subj); if (c == 0) { return 0; @@ -1169,7 +1194,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, break; case '[': advance(subj); - new_inl = make_str(subj->mem, cmark_chunk_literal("[")); + new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("[")); push_bracket(subj, false, new_inl); break; case ']': @@ -1179,10 +1204,10 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, advance(subj); if (peek_char(subj) == '[') { advance(subj); - new_inl = make_str(subj->mem, cmark_chunk_literal("![")); + new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("![")); push_bracket(subj, true, new_inl); } else { - new_inl = make_str(subj->mem, cmark_chunk_literal("!")); + new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!")); } break; default: @@ -1192,6 +1217,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, endpos = subject_find_special_char(subj, options); contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); + startpos = subj->pos; subj->pos = endpos; // if we're at a newline, strip trailing spaces. @@ -1199,7 +1225,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, cmark_chunk_rtrim(&contents); } - new_inl = make_str(subj->mem, contents); + new_inl = make_str(subj, startpos, endpos - 1, contents); } if (new_inl != NULL) { cmark_node_append_child(parent, new_inl); @@ -1214,7 +1240,7 @@ void cmark_parse_inlines(cmark_parser *parser, cmark_reference_map *refmap, int options) { subject subj; - subject_from_buf(parser->mem, &subj, &parent->content, refmap); + subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1, &subj, &parent->content, refmap); cmark_chunk_rtrim(&subj.input); while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options)) @@ -1253,7 +1279,8 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, bufsize_t matchlen = 0; bufsize_t beforetitle; - subject_from_buf(mem, &subj, input, NULL); + // TODO XXX + subject_from_buf(mem, -1, 0, &subj, input, NULL); // parse label: if (!link_label(&subj, &lab) || lab.len == 0) @@ -1425,6 +1452,10 @@ void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset) { parser->pos = offset; } +int cmark_inline_parser_get_column(cmark_inline_parser *parser) { + return parser->pos + 1 + parser->column_offset + parser->block_offset; +} + cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser) { return &parser->input; } @@ -1453,3 +1484,7 @@ void cmark_node_unput(cmark_node *node, int n) { delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser) { return parser->last_delim; } + +int cmark_inline_parser_get_line(cmark_inline_parser *parser) { + return parser->line; +} diff --git a/src/iterator.c b/src/iterator.c index 149a445e1..5557dff31 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -111,6 +111,7 @@ void cmark_consolidate_text_nodes(cmark_node *root) { while (tmp && tmp->type == CMARK_NODE_TEXT) { cmark_iter_next(iter); // advance pointer cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len); + cur->end_column = tmp->end_column; next = tmp->next; cmark_node_free(tmp); tmp = next; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 133d5cdc3..74347a62e 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,8 +14,9 @@ if (CMARK_SHARED) endif() if (WIN32) - file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/src WIN_DLL_DIR) - set(NEWPATH "${WIN_DLL_DIR};$ENV{PATH}") + file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/src WIN_SRC_DLL_DIR) + file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/extensions WIN_EXTENSIONS_DLL_DIR) + set(NEWPATH "${WIN_SRC_DLL_DIR};${WIN_EXTENSIONS_DLL_DIR};$ENV{PATH}") string(REPLACE ";" "\\;" NEWPATH "${NEWPATH}") set_tests_properties(api_test PROPERTIES ENVIRONMENT "PATH=${NEWPATH}") set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.bat") From 7cf993f4b0d3ec87e1c28c5cbeef156e47a20c3c Mon Sep 17 00:00:00 2001 From: Yuki Izumi <ashe@kivikakk.ee> Date: Wed, 9 Aug 2017 16:07:28 +1000 Subject: [PATCH 079/218] Remove unneeded TODO --- src/inlines.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/inlines.c b/src/inlines.c index ab73f59ba..4faf873b1 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1279,7 +1279,6 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, bufsize_t matchlen = 0; bufsize_t beforetitle; - // TODO XXX subject_from_buf(mem, -1, 0, &subj, input, NULL); // parse label: From 8595dda9aff18a8dca239f64b47aa3a3feb83e6f Mon Sep 17 00:00:00 2001 From: Yuki Izumi <ashe@kivikakk.ee> Date: Wed, 9 Aug 2017 16:07:42 +1000 Subject: [PATCH 080/218] 0.28.0.gfm.5 --- CMakeLists.txt | 6 +++--- changelog.txt | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e16caa92c..2c39e3732 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,9 +17,9 @@ endif() set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) -set(PROJECT_VERSION_MINOR 27) -set(PROJECT_VERSION_PATCH 1) -set(PROJECT_VERSION_GFM 4) +set(PROJECT_VERSION_MINOR 28) +set(PROJECT_VERSION_PATCH 0) +set(PROJECT_VERSION_GFM 5) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 512602ae4..4a26aa691 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,11 @@ +[0.28.0.gfm.5] + + * Latest spec. + * Fix a typo in the spec (John Gardner). + * Fix quadratic behavior in reference lookups. + * Add `core_extensions_ensure_registered`. + * Add sourcepos information for inlines. + [0.27.1.gfm.4] * Fix regression with nested parentheses in link targets (#48). From fd7d177fcf7dbb65faaec36662b93542fa4512e7 Mon Sep 17 00:00:00 2001 From: Ashe <kivikakk@github.com> Date: Thu, 10 Aug 2017 13:52:20 +1000 Subject: [PATCH 081/218] Sourcepos fixes (#54) * Emphasis sourcepos broken when in block * internal_offset for blocks containing inlines * Really awkward newline handling * Count newlines in every inline that could contain * Document * Skip sourcepos tracking if disabled --- api_test/main.c | 26 +++++++++---------- extensions/table.c | 8 +++++- src/blocks.c | 1 + src/inlines.c | 65 +++++++++++++++++++++++++++++++++++++++------- src/node.h | 1 + 5 files changed, 78 insertions(+), 23 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index c10601a9b..057ee51ce 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -936,7 +936,7 @@ static void test_feed_across_line_ending(test_batch_runner *runner) { static void source_pos(test_batch_runner *runner) { static const char markdown[] = - "Hi *there*.\n" + "# Hi *there*.\n" "\n" "Hello &ldquo; <http://www.google.com>\n" "there `hi` -- [okay](www.google.com (ok)).\n" @@ -952,13 +952,13 @@ static void source_pos(test_batch_runner *runner) { STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" "<document sourcepos=\"1:1-10:20\" xmlns=\"http://commonmark.org/xml/1.0\">\n" - " <paragraph sourcepos=\"1:1-1:11\">\n" - " <text sourcepos=\"1:1-1:3\">Hi </text>\n" - " <emph sourcepos=\"1:4-1:10\">\n" - " <text sourcepos=\"1:5-1:9\">there</text>\n" + " <heading sourcepos=\"1:1-1:13\" level=\"1\">\n" + " <text sourcepos=\"1:3-1:5\">Hi </text>\n" + " <emph sourcepos=\"1:6-1:12\">\n" + " <text sourcepos=\"1:7-1:11\">there</text>\n" " </emph>\n" - " <text sourcepos=\"1:11-1:11\">.</text>\n" - " </paragraph>\n" + " <text sourcepos=\"1:13-1:13\">.</text>\n" + " </heading>\n" " <paragraph sourcepos=\"3:1-4:42\">\n" " <text sourcepos=\"3:1-3:14\">Hello “ </text>\n" " <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n" @@ -1048,26 +1048,26 @@ static void ext_source_pos(test_batch_runner *runner) { " <item sourcepos=\"5:1-7:18\">\n" " <table sourcepos=\"5:4-7:18\">\n" " <table_header sourcepos=\"5:4-5:18\">\n" - " <table_cell sourcepos=\"5:6-5:7\">\n" + " <table_cell sourcepos=\"5:5-5:7\">\n" " <text sourcepos=\"5:6-5:6\">a</text>\n" " </table_cell>\n" - " <table_cell sourcepos=\"5:10-5:11\">\n" + " <table_cell sourcepos=\"5:9-5:11\">\n" " <text sourcepos=\"5:10-5:10\">b</text>\n" " </table_cell>\n" - " <table_cell sourcepos=\"5:14-5:17\">\n" + " <table_cell sourcepos=\"5:13-5:17\">\n" " <emph sourcepos=\"5:14-5:16\">\n" " <text sourcepos=\"5:15-5:15\">c</text>\n" " </emph>\n" " </table_cell>\n" " </table_header>\n" " <table_row sourcepos=\"7:4-7:18\">\n" - " <table_cell sourcepos=\"7:6-7:7\">\n" + " <table_cell sourcepos=\"7:5-7:7\">\n" " <text sourcepos=\"7:6-7:6\">1</text>\n" " </table_cell>\n" - " <table_cell sourcepos=\"7:10-7:11\">\n" + " <table_cell sourcepos=\"7:9-7:11\">\n" " <text sourcepos=\"7:10-7:10\">2</text>\n" " </table_cell>\n" - " <table_cell sourcepos=\"7:14-7:17\">\n" + " <table_cell sourcepos=\"7:13-7:17\">\n" " <strikethrough sourcepos=\"7:14-7:16\">\n" " <text sourcepos=\"7:15-7:15\">3</text>\n" " </strikethrough>\n" diff --git a/extensions/table.c b/extensions/table.c index 73de5031d..f6d3e7a07 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -28,7 +28,7 @@ typedef struct { typedef struct { cmark_strbuf *buf; - int start_offset, end_offset; + int start_offset, end_offset, internal_offset; } node_cell; static void free_table_cell(cmark_mem *mem, void *data) { @@ -133,6 +133,10 @@ static table_row *row_from_string(cmark_syntax_extension *self, cell->buf = cell_buf; cell->start_offset = offset; cell->end_offset = offset + cell_matched - 1; + while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') { + --cell->start_offset; + ++cell->internal_offset; + } row->n_columns += 1; row->cells = cmark_llist_append(parser->mem, row->cells, cell); } @@ -249,6 +253,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_node *header_cell = cmark_parser_add_child(parser, table_header, CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); header_cell->start_line = header_cell->end_line = parent_container->start_line; + header_cell->internal_offset = cell->internal_offset; header_cell->end_column = parent_container->start_column + cell->end_offset; cmark_node_set_string_content(header_cell, (char *) cell->buf->ptr); cmark_node_set_syntax_extension(header_cell, self); @@ -292,6 +297,7 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, node_cell *cell = (node_cell *) tmp->data; cmark_node *node = cmark_parser_add_child(parser, table_row_block, CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); + node->internal_offset = cell->internal_offset; node->end_column = parent_container->start_column + cell->end_offset; cmark_node_set_string_content(node, (char *) cell->buf->ptr); cmark_node_set_syntax_extension(node, self); diff --git a/src/blocks.c b/src/blocks.c index 0c2222056..723abcc32 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -980,6 +980,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.heading.level = level; (*container)->as.heading.setext = false; + (*container)->internal_offset = matched; } else if (!indented && (matched = scan_open_code_fence( input, parser->first_nonspace))) { diff --git a/src/inlines.c b/src/inlines.c index 4faf873b1..876bbde14 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -230,6 +230,47 @@ static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) { return cmark_chunk_dup(&subj->input, startpos, len); } +// Return the number of newlines in a given span of text in a subject. If +// the number is greater than zero, also return the number of characters +// between the last newline and the end of the span in `since_newline`. +static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) { + int nls = 0; + int since_nl = 0; + + while (len--) { + if (subj->input.data[from++] == '\n') { + ++nls; + since_nl = 0; + } else { + ++since_nl; + } + } + + if (!nls) + return 0; + + *since_newline = since_nl; + return nls; +} + +// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and +// `column_offset` according to the number of newlines in a just-matched span +// of text in `subj`. +static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) { + if (!(options & CMARK_OPT_SOURCEPOS)) { + return; + } + + int since_newline; + int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline); + if (newlines) { + subj->line += newlines; + node->end_line += newlines; + node->end_column = since_newline; + subj->column_offset = -subj->pos + since_newline + extra; + } +} + // Try to process a backtick code span that began with a // span of ticks of length openticklength length (already // parsed). Return 0 if you don't find matching closing @@ -277,7 +318,7 @@ static bufsize_t scan_to_closing_backticks(subject *subj, // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. -static cmark_node *handle_backticks(subject *subj) { +static cmark_node *handle_backticks(subject *subj, int options) { cmark_chunk openticks = take_while(subj, isbacktick); bufsize_t startpos = subj->pos; bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); @@ -293,7 +334,9 @@ static cmark_node *handle_backticks(subject *subj) { cmark_strbuf_trim(&buf); cmark_strbuf_normalize_whitespace(&buf); - return make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf)); + cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf)); + adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options); + return node; } } @@ -775,7 +818,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) { // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. -static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) { +static cmark_node *handle_pointy_brace(subject *subj, int options) { bufsize_t matchlen = 0; cmark_chunk contents; @@ -804,15 +847,19 @@ static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) { if (matchlen > 0) { contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; - return make_raw_html(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents); + cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents); + adjust_subj_node_newlines(subj, node, matchlen, 1, options); + return node; } - if (liberal_html_tag) { + if (options & CMARK_OPT_LIBERAL_HTML_TAG) { matchlen = scan_liberal_html_tag(&subj->input, subj->pos); if (matchlen > 0) { contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; - return make_raw_html(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents); + cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents); + adjust_subj_node_newlines(subj, node, matchlen, 1, options); + return node; } } @@ -1169,7 +1216,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, new_inl = handle_newline(subj); break; case '`': - new_inl = handle_backticks(subj); + new_inl = handle_backticks(subj, options); break; case '\\': new_inl = handle_backslash(parser, subj); @@ -1178,7 +1225,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, new_inl = handle_entity(subj); break; case '<': - new_inl = handle_pointy_brace(subj, (options & CMARK_OPT_LIBERAL_HTML_TAG) != 0); + new_inl = handle_pointy_brace(subj, options); break; case '*': case '_': @@ -1240,7 +1287,7 @@ void cmark_parse_inlines(cmark_parser *parser, cmark_reference_map *refmap, int options) { subject subj; - subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1, &subj, &parent->content, refmap); + subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &parent->content, refmap); cmark_chunk_rtrim(&subj.input); while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options)) diff --git a/src/node.h b/src/node.h index e32814bcf..1d8aa50df 100644 --- a/src/node.h +++ b/src/node.h @@ -68,6 +68,7 @@ struct cmark_node { int start_column; int end_line; int end_column; + int internal_offset; uint16_t type; uint16_t flags; From 3a9081bcaaa5bd85880ca1ce769176944c2a81f6 Mon Sep 17 00:00:00 2001 From: Yuki Izumi <ashe@kivikakk.ee> Date: Thu, 10 Aug 2017 13:53:38 +1000 Subject: [PATCH 082/218] 0.28.0.gfm.6 --- CMakeLists.txt | 2 +- changelog.txt | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c39e3732..e013e9a01 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 0) -set(PROJECT_VERSION_GFM 5) +set(PROJECT_VERSION_GFM 6) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 4a26aa691..81cd04268 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,8 @@ +[0.28.0.gfm.6] + + * Fix inline sourcepos info when inlines span multiple lines, and in + ATX headings. + [0.28.0.gfm.5] * Latest spec. From 8eb09f8fb08165b0f1dde05a88d329397151d0ca Mon Sep 17 00:00:00 2001 From: Ashe <kivikakk@github.com> Date: Thu, 17 Aug 2017 11:04:37 +1000 Subject: [PATCH 083/218] Skip strikethroughs when considering emphasis (#55) * Add regression test to demo what we want with ~ * Feature complete hack * Generalize into SKIP_CHARS * define higher * Fewer terrible overruns * Zero zeroes required. --- src/inlines.c | 28 ++++++++++++++++++++-------- test/regression.txt | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index 876bbde14..7dc81b143 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -57,6 +57,9 @@ typedef struct subject{ bool scanned_for_backticks; } subject; +// Extensions may populate this. +static int8_t SKIP_CHARS[256]; + static CMARK_INLINE bool S_is_line_end_char(char c) { return (c == '\n' || c == '\r'); } @@ -345,7 +348,7 @@ static cmark_node *handle_backticks(subject *subj, int options) { static int scan_delims(subject *subj, unsigned char c, bool *can_open, bool *can_close) { int numdelims = 0; - bufsize_t before_char_pos; + bufsize_t before_char_pos, after_char_pos; int32_t after_char = 0; int32_t before_char = 0; int len; @@ -356,12 +359,12 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, } else { before_char_pos = subj->pos - 1; // walk back to the beginning of the UTF_8 sequence: - while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) { + while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) { before_char_pos -= 1; } len = cmark_utf8proc_iterate(subj->input.data + before_char_pos, subj->pos - before_char_pos, &before_char); - if (len == -1) { + if (len == -1 || (before_char < 256 && SKIP_CHARS[(unsigned char) before_char])) { before_char = 10; } } @@ -376,11 +379,20 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, } } - len = cmark_utf8proc_iterate(subj->input.data + subj->pos, - subj->input.len - subj->pos, &after_char); - if (len == -1) { + if (subj->pos == subj->input.len) { after_char = 10; + } else { + after_char_pos = subj->pos; + while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) { + after_char_pos += 1; + } + len = cmark_utf8proc_iterate(subj->input.data + after_char_pos, + subj->input.len - after_char_pos, &after_char); + if (len == -1 || (after_char < 256 && SKIP_CHARS[(unsigned char) after_char])) { + after_char = 10; + } } + left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && (!cmark_utf8proc_is_punctuation(after_char) || cmark_utf8proc_is_space(before_char) || @@ -1174,11 +1186,11 @@ static bufsize_t subject_find_special_char(subject *subj, int options) { } void cmark_inlines_add_special_character(unsigned char c) { - SPECIAL_CHARS[c] = 1; + SPECIAL_CHARS[c] = SKIP_CHARS[c] = 1; } void cmark_inlines_remove_special_character(unsigned char c) { - SPECIAL_CHARS[c] = 0; + SPECIAL_CHARS[c] = SKIP_CHARS[c] = 0; } static cmark_node *try_extensions(cmark_parser *parser, diff --git a/test/regression.txt b/test/regression.txt index 18b7d79d7..1420475cd 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -92,3 +92,37 @@ Issue #192 - escaped spaces in link destination . <p>[a](te\ st)</p> ```````````````````````````````` + +Issue github/github#76615: multiple delimiter combinations gets sketchy + + +```````````````````````````````` example strikethrough +~~**_`this`_**~~ +~~***`this`***~~ +~~___`this`___~~ + +**_`this`_** +***`this`*** +___`this`___ + +~~**_this_**~~ +~~***this***~~ +~~___this___~~ + +**_this_** +***this*** +___this___ +. +<p><del><strong><em><code>this</code></em></strong></del><br /> +<del><em><strong><code>this</code></strong></em></del><br /> +<del><em><strong><code>this</code></strong></em></del></p> +<p><strong><em><code>this</code></em></strong><br /> +<em><strong><code>this</code></strong></em><br /> +<em><strong><code>this</code></strong></em></p> +<p><del><strong><em>this</em></strong></del><br /> +<del><em><strong>this</strong></em></del><br /> +<del><em><strong>this</strong></em></del></p> +<p><strong><em>this</em></strong><br /> +<em><strong>this</strong></em><br /> +<em><strong>this</strong></em></p> +```````````````````````````````` From 2667be942b419761aaca7cffcc9de3d247f3187e Mon Sep 17 00:00:00 2001 From: Yuki Izumi <ashe@kivikakk.ee> Date: Thu, 17 Aug 2017 11:07:33 +1000 Subject: [PATCH 084/218] 0.28.0.gfm.7 --- CMakeLists.txt | 2 +- changelog.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e013e9a01..205e5d42e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 0) -set(PROJECT_VERSION_GFM 6) +set(PROJECT_VERSION_GFM 7) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 81cd04268..d970b0a19 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,7 @@ +[0.28.0.gfm.7] + + * Strikethrough characters do not disturb regular emphasis processing. + [0.28.0.gfm.6] * Fix inline sourcepos info when inlines span multiple lines, and in From fa84ac8a2f90dabe28aa4b423dfa75d68a8c1901 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Fri, 25 Aug 2017 14:57:59 +1000 Subject: [PATCH 085/218] Autolink should not cause : to be skipped --- extensions/strikethrough.c | 2 ++ src/blocks.c | 4 ++-- src/cmark_extension_api.h | 3 +++ src/inlines.c | 12 ++++++++---- src/inlines.h | 4 ++-- src/syntax_extension.c | 5 +++++ src/syntax_extension.h | 1 + 7 files changed, 23 insertions(+), 8 deletions(-) diff --git a/extensions/strikethrough.c b/extensions/strikethrough.c index 3153723d5..24e3ba11f 100644 --- a/extensions/strikethrough.c +++ b/extensions/strikethrough.c @@ -150,5 +150,7 @@ cmark_syntax_extension *create_strikethrough_extension(void) { special_chars = cmark_llist_append(mem, special_chars, (void *)'~'); cmark_syntax_extension_set_special_inline_chars(ext, special_chars); + cmark_syntax_extension_set_emphasis(ext, true); + return ext; } diff --git a/src/blocks.c b/src/blocks.c index 723abcc32..017ff454b 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -394,9 +394,9 @@ void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add) for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { unsigned char c = (unsigned char)(size_t)tmp_char->data; if (add) - cmark_inlines_add_special_character(c); + cmark_inlines_add_special_character(c, ext->emphasis); else - cmark_inlines_remove_special_character(c); + cmark_inlines_remove_special_character(c, ext->emphasis); } } } diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index f999d3338..7e76d6932 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -269,6 +269,9 @@ cmark_syntax_extension *cmark_syntax_extension_new (const char *name); CMARK_EXPORT cmark_node_type cmark_syntax_extension_add_node(int is_inline); +CMARK_EXPORT +void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension, bool emphasis); + /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT diff --git a/src/inlines.c b/src/inlines.c index 7dc81b143..4424ecce5 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1185,12 +1185,16 @@ static bufsize_t subject_find_special_char(subject *subj, int options) { return subj->input.len; } -void cmark_inlines_add_special_character(unsigned char c) { - SPECIAL_CHARS[c] = SKIP_CHARS[c] = 1; +void cmark_inlines_add_special_character(unsigned char c, bool emphasis) { + SPECIAL_CHARS[c] = 1; + if (emphasis) + SKIP_CHARS[c] = 1; } -void cmark_inlines_remove_special_character(unsigned char c) { - SPECIAL_CHARS[c] = SKIP_CHARS[c] = 0; +void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) { + SPECIAL_CHARS[c] = 0; + if (emphasis) + SKIP_CHARS[c] = 0; } static cmark_node *try_extensions(cmark_parser *parser, diff --git a/src/inlines.h b/src/inlines.h index 0d8305c2f..284ccc3cf 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -19,8 +19,8 @@ void cmark_parse_inlines(cmark_parser *parser, bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_map *refmap); -void cmark_inlines_add_special_character(unsigned char c); -void cmark_inlines_remove_special_character(unsigned char c); +void cmark_inlines_add_special_character(unsigned char c, bool emphasis); +void cmark_inlines_remove_special_character(unsigned char c, bool emphasis); #ifdef __cplusplus } diff --git a/src/syntax_extension.c b/src/syntax_extension.c index ee86b66d2..1492dad34 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -36,6 +36,11 @@ cmark_node_type cmark_syntax_extension_add_node(int is_inline) { return *ref = (cmark_node_type) ((int) *ref + 1); } +void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension, + bool emphasis) { + extension->emphasis = emphasis; +} + void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension, cmark_open_block_func func) { extension->try_opening_block = func; diff --git a/src/syntax_extension.h b/src/syntax_extension.h index 6fcf109d2..0414f83b1 100644 --- a/src/syntax_extension.h +++ b/src/syntax_extension.h @@ -12,6 +12,7 @@ struct cmark_syntax_extension { cmark_llist * special_inline_chars; char * name; void * priv; + bool emphasis; cmark_free_func free_function; cmark_get_type_string_func get_type_string_func; cmark_can_contain_func can_contain_func; From 28232aa2d569cb0760fd2ae359ce7dab4046ca84 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Fri, 25 Aug 2017 15:06:21 +1000 Subject: [PATCH 086/218] 0.28.0.gfm.8 --- CMakeLists.txt | 2 +- changelog.txt | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 205e5d42e..359cdc04a 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 0) -set(PROJECT_VERSION_GFM 7) +set(PROJECT_VERSION_GFM 8) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 0cb6f94d0..f3b05abfc 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,8 @@ +[0.28.0.gfm.8] + + * Fix bug where autolink would cause `:` to be skipped in emphasis + processing. + [0.28.0.gfm.7] * Strikethrough characters do not disturb regular emphasis processing. From 820372d95d6b6fdb83ace15564b40822e6e58546 Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Wed, 6 Sep 2017 11:12:46 +1000 Subject: [PATCH 087/218] Recursive chevrons are bananas (#49) * Add some pathological regression tests * Fix new pathological case * Cleanup. * Give it a second, for Travis --- api_test/main.c | 217 +++++++++++------------------------------------- src/inlines.c | 2 +- 2 files changed, 50 insertions(+), 169 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index 057ee51ce..69d1ba15d 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -934,176 +934,59 @@ static void test_feed_across_line_ending(test_batch_runner *runner) { cmark_node_free(document); } -static void source_pos(test_batch_runner *runner) { - static const char markdown[] = - "# Hi *there*.\n" - "\n" - "Hello &ldquo; <http://www.google.com>\n" - "there `hi` -- [okay](www.google.com (ok)).\n" - "\n" - "> 1. Okay.\n" - "> Sure.\n" - ">\n" - "> 2. Yes, okay.\n" - "> ![ok](hi \"yes\")\n"; - - cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); - char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); - STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" - "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" - "<document sourcepos=\"1:1-10:20\" xmlns=\"http://commonmark.org/xml/1.0\">\n" - " <heading sourcepos=\"1:1-1:13\" level=\"1\">\n" - " <text sourcepos=\"1:3-1:5\">Hi </text>\n" - " <emph sourcepos=\"1:6-1:12\">\n" - " <text sourcepos=\"1:7-1:11\">there</text>\n" - " </emph>\n" - " <text sourcepos=\"1:13-1:13\">.</text>\n" - " </heading>\n" - " <paragraph sourcepos=\"3:1-4:42\">\n" - " <text sourcepos=\"3:1-3:14\">Hello “ </text>\n" - " <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n" - " <text sourcepos=\"3:16-3:36\">http://www.google.com</text>\n" - " </link>\n" - " <softbreak />\n" - " <text sourcepos=\"4:1-4:6\">there </text>\n" - " <code sourcepos=\"4:8-4:9\">hi</code>\n" - " <text sourcepos=\"4:11-4:14\"> -- </text>\n" - " <link sourcepos=\"4:15-4:41\" destination=\"www.google.com\" title=\"ok\">\n" - " <text sourcepos=\"4:16-4:19\">okay</text>\n" - " </link>\n" - " <text sourcepos=\"4:42-4:42\">.</text>\n" - " </paragraph>\n" - " <block_quote sourcepos=\"6:1-10:20\">\n" - " <list sourcepos=\"6:3-10:20\" type=\"ordered\" start=\"1\" delim=\"period\" tight=\"false\">\n" - " <item sourcepos=\"6:3-8:1\">\n" - " <paragraph sourcepos=\"6:6-7:10\">\n" - " <text sourcepos=\"6:6-6:10\">Okay.</text>\n" - " <softbreak />\n" - " <text sourcepos=\"7:6-7:10\">Sure.</text>\n" - " </paragraph>\n" - " </item>\n" - " <item sourcepos=\"9:3-10:20\">\n" - " <paragraph sourcepos=\"9:6-10:20\">\n" - " <text sourcepos=\"9:6-9:15\">Yes, okay.</text>\n" - " <softbreak />\n" - " <image sourcepos=\"10:6-10:20\" destination=\"hi\" title=\"yes\">\n" - " <text sourcepos=\"10:8-10:9\">ok</text>\n" - " </image>\n" - " </paragraph>\n" - " </item>\n" - " </list>\n" - " </block_quote>\n" - "</document>\n", - "sourcepos are as expected"); - free(xml); - cmark_node_free(doc); -} - -static void ext_source_pos(test_batch_runner *runner) { - static const char *extensions[3] = { - "strikethrough", - "table", - "autolink", - }; - - static const char markdown[] = - "Hi ~~friend~~.\n" - "\n" - "> www.github.com\n" - "\n" - "1. | a | b | *c* |\n" - " | - | - | --: |\n" - " | 1 | 2 | ~3~ |\n"; - - cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); - core_extensions_ensure_registered(); +#if !defined(_WIN32) || defined(__CYGWIN__) +# include <sys/time.h> +static struct timeval _before, _after; +static int _timing; +# define START_TIMING() \ + gettimeofday(&_before, NULL) + +# define END_TIMING() \ + do { \ + gettimeofday(&_after, NULL); \ + _timing = (_after.tv_sec - _before.tv_sec) * 1000 + (_after.tv_usec - _before.tv_usec) / 1000; \ + } while (0) + +# define TIMING _timing +#else +# define START_TIMING() +# define END_TIMING() +# define TIMING 0 +#endif + +static void test_pathological_regressions(test_batch_runner *runner) { + { + // I don't care what the output is, so long as it doesn't take too long. + char path[] = "[a](b"; + char *input = (char *)calloc(1, (sizeof(path) - 1) * 50000); + for (int i = 0; i < 50000; ++i) + memcpy(input + i * (sizeof(path) - 1), path, sizeof(path) - 1); + + START_TIMING(); + char *html = cmark_markdown_to_html(input, (sizeof(path) - 1) * 50000, + CMARK_OPT_VALIDATE_UTF8); + END_TIMING(); + free(html); + free(input); - for (int i = 0; i < (int)(sizeof(extensions) / sizeof(*extensions)); ++i) { - cmark_syntax_extension *ext = cmark_find_syntax_extension(extensions[i]); - cmark_parser_attach_syntax_extension(parser, ext); + OK(runner, TIMING < 1000, "takes less than 1000ms to run"); } - cmark_parser_feed(parser, markdown, sizeof(markdown) - 1); - - cmark_node *doc = cmark_parser_finish(parser); - char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); - STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" - "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" - "<document sourcepos=\"1:1-7:18\" xmlns=\"http://commonmark.org/xml/1.0\">\n" - " <paragraph sourcepos=\"1:1-1:14\">\n" - " <text sourcepos=\"1:1-1:3\">Hi </text>\n" - " <strikethrough sourcepos=\"1:4-1:13\">\n" - " <text sourcepos=\"1:6-1:11\">friend</text>\n" - " </strikethrough>\n" - " <text sourcepos=\"1:14-1:14\">.</text>\n" - " </paragraph>\n" - " <block_quote sourcepos=\"3:1-3:16\">\n" - " <paragraph sourcepos=\"3:3-3:16\">\n" - " <link sourcepos=\"3:2-3:16\" destination=\"http://www.github.com\" title=\"\">\n" - " <text sourcepos=\"3:2-3:16\">www.github.com</text>\n" - " </link>\n" - " </paragraph>\n" - " </block_quote>\n" - " <list sourcepos=\"5:1-7:18\" type=\"ordered\" start=\"1\" delim=\"period\" tight=\"true\">\n" - " <item sourcepos=\"5:1-7:18\">\n" - " <table sourcepos=\"5:4-7:18\">\n" - " <table_header sourcepos=\"5:4-5:18\">\n" - " <table_cell sourcepos=\"5:5-5:7\">\n" - " <text sourcepos=\"5:6-5:6\">a</text>\n" - " </table_cell>\n" - " <table_cell sourcepos=\"5:9-5:11\">\n" - " <text sourcepos=\"5:10-5:10\">b</text>\n" - " </table_cell>\n" - " <table_cell sourcepos=\"5:13-5:17\">\n" - " <emph sourcepos=\"5:14-5:16\">\n" - " <text sourcepos=\"5:15-5:15\">c</text>\n" - " </emph>\n" - " </table_cell>\n" - " </table_header>\n" - " <table_row sourcepos=\"7:4-7:18\">\n" - " <table_cell sourcepos=\"7:5-7:7\">\n" - " <text sourcepos=\"7:6-7:6\">1</text>\n" - " </table_cell>\n" - " <table_cell sourcepos=\"7:9-7:11\">\n" - " <text sourcepos=\"7:10-7:10\">2</text>\n" - " </table_cell>\n" - " <table_cell sourcepos=\"7:13-7:17\">\n" - " <strikethrough sourcepos=\"7:14-7:16\">\n" - " <text sourcepos=\"7:15-7:15\">3</text>\n" - " </strikethrough>\n" - " </table_cell>\n" - " </table_row>\n" - " </table>\n" - " </item>\n" - " </list>\n" - "</document>\n", - "sourcepos are as expected"); - free(xml); - cmark_node_free(doc); -} + { + char path[] = "[a](<b"; + char *input = (char *)calloc(1, (sizeof(path) - 1) * 50000); + for (int i = 0; i < 50000; ++i) + memcpy(input + i * (sizeof(path) - 1), path, sizeof(path) - 1); -static void ref_source_pos(test_batch_runner *runner) { - static const char markdown[] = - "Let's try [reference] links.\n" - "\n" - "[reference]: https://github.com (GitHub)\n"; + START_TIMING(); + char *html = cmark_markdown_to_html(input, (sizeof(path) - 1) * 50000, + CMARK_OPT_VALIDATE_UTF8); + END_TIMING(); + free(html); + free(input); - cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); - char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); - STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" - "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" - "<document sourcepos=\"1:1-3:40\" xmlns=\"http://commonmark.org/xml/1.0\">\n" - " <paragraph sourcepos=\"1:1-1:28\">\n" - " <text sourcepos=\"1:1-1:10\">Let's try </text>\n" - " <link sourcepos=\"1:11-1:21\" destination=\"https://github.com\" title=\"GitHub\">\n" - " <text sourcepos=\"1:12-1:20\">reference</text>\n" - " </link>\n" - " <text sourcepos=\"1:22-1:28\"> links.</text>\n" - " </paragraph>\n" - "</document>\n", - "sourcepos are as expected"); - free(xml); - cmark_node_free(doc); + OK(runner, TIMING < 1000, "takes less than 1000ms to run"); + } } int main() { @@ -1132,9 +1015,7 @@ int main() { test_cplusplus(runner); test_safe(runner); test_feed_across_line_ending(runner); - source_pos(runner); - ext_source_pos(runner); - ref_source_pos(runner); + test_pathological_regressions(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; diff --git a/src/inlines.c b/src/inlines.c index 69a7bbdf3..6a66348f8 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -119,7 +119,7 @@ static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) { c.data = (unsigned char *)mem->calloc(len + 1, 1); c.alloc = 1; if (len) - memcpy(c.data, src->data, len); + memcpy(c.data, src->data, len); c.data[len] = '\0'; return c; From 150239ca7cc246eaff950169f2a9bdaab1add15a Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Wed, 6 Sep 2017 11:17:08 +1000 Subject: [PATCH 088/218] 0.28.0.gfm.9 --- CMakeLists.txt | 2 +- changelog.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 359cdc04a..179b79d41 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 0) -set(PROJECT_VERSION_GFM 8) +set(PROJECT_VERSION_GFM 9) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index f3b05abfc..67e562192 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,7 @@ +[0.28.0.gfm.9] + + * Fix denial of service parsing nested links (#49). + [0.28.0.gfm.8] * Fix bug where autolink would cause `:` to be skipped in emphasis From 63f17beaa7658900b63f9e6cb0364894bb394f49 Mon Sep 17 00:00:00 2001 From: Vicent Marti <tanoku@gmail.com> Date: Thu, 7 Sep 2017 08:06:11 -0700 Subject: [PATCH 089/218] blocks: Fix quadratic behavior in `finalize` --- src/blocks.c | 12 ++++++++---- src/inlines.c | 13 ++++++------- src/inlines.h | 2 +- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index 017ff454b..30f55e4ff 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -281,17 +281,21 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { switch (S_type(b)) { case CMARK_NODE_PARAGRAPH: - while (cmark_strbuf_at(node_content, 0) == '[' && - (pos = cmark_parse_reference_inline(parser->mem, node_content, - parser->refmap))) { + { + cmark_chunk chunk = {node_content->ptr, node_content->size, 0}; + while (chunk.len && chunk.data[0] == '[' && + (pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) { - cmark_strbuf_drop(node_content, pos); + chunk.data += pos; + chunk.len -= pos; } + cmark_strbuf_drop(node_content, (node_content->size - chunk.len)); if (is_blank(node_content, 0)) { // remove blank node (former reference def) cmark_node_free(b); } break; + } case CMARK_NODE_CODE_BLOCK: if (!b->as.code.fenced) { // indented code diff --git a/src/inlines.c b/src/inlines.c index 6a66348f8..676728837 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -70,7 +70,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options); static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, - cmark_strbuf *buffer, cmark_reference_map *refmap); + cmark_chunk *buffer, cmark_reference_map *refmap); static bufsize_t subject_find_special_char(subject *subj, int options); // Create an inline with a literal string value. @@ -157,12 +157,10 @@ static CMARK_INLINE cmark_node *make_autolink(subject *subj, } static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, - cmark_strbuf *buffer, cmark_reference_map *refmap) { + cmark_chunk *chunk, cmark_reference_map *refmap) { int i; e->mem = mem; - e->input.data = buffer->ptr; - e->input.len = buffer->size; - e->input.alloc = 0; + e->input = *chunk; e->line = line_number; e->pos = 0; e->block_offset = block_offset; @@ -1322,7 +1320,8 @@ void cmark_parse_inlines(cmark_parser *parser, cmark_reference_map *refmap, int options) { subject subj; - subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &parent->content, refmap); + cmark_chunk content = {parent->content.ptr, parent->content.size, 0}; + subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap); cmark_chunk_rtrim(&subj.input); while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options)) @@ -1350,7 +1349,7 @@ static void spnl(subject *subj) { // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, +bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, cmark_reference_map *refmap) { subject subj; diff --git a/src/inlines.h b/src/inlines.h index 284ccc3cf..0438f65a4 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -16,7 +16,7 @@ void cmark_parse_inlines(cmark_parser *parser, cmark_reference_map *refmap, int options); -bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, +bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, cmark_reference_map *refmap); void cmark_inlines_add_special_character(unsigned char c, bool emphasis); From d946b7c7f8a75ba71898202f21878eb35ae594f8 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Fri, 8 Sep 2017 13:45:59 +1000 Subject: [PATCH 090/218] 0.28.0.gfm.10 --- CMakeLists.txt | 2 +- changelog.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 179b79d41..1b461a5a6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 0) -set(PROJECT_VERSION_GFM 9) +set(PROJECT_VERSION_GFM 10) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 67e562192..bef3f2b7c 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,7 @@ +[0.28.0.gfm.10] + + * Fix denial of service parsing references. + [0.28.0.gfm.9] * Fix denial of service parsing nested links (#49). From 3afbcad536c761b406b03135d481cd68221a8b64 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Tue, 12 Sep 2017 08:35:47 +1000 Subject: [PATCH 091/218] No empty <tbody> --- extensions/table.c | 7 ++++--- test/extensions.txt | 12 ++++-------- test/spec.txt | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index f6d3e7a07..6b50959c5 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -577,6 +577,10 @@ static void html_render(cmark_syntax_extension *extension, table_state->in_table_header = 1; cmark_strbuf_puts(html, "<thead>"); cmark_html_render_cr(html); + } else if (!table_state->need_closing_table_body) { + cmark_strbuf_puts(html, "<tbody>"); + cmark_html_render_cr(html); + table_state->need_closing_table_body = 1; } cmark_strbuf_puts(html, "<tr"); cmark_html_render_sourcepos(node, html, options); @@ -587,9 +591,6 @@ static void html_render(cmark_syntax_extension *extension, if (((node_table_row *)node->as.opaque)->is_header) { cmark_html_render_cr(html); cmark_strbuf_puts(html, "</thead>"); - cmark_html_render_cr(html); - cmark_strbuf_puts(html, "<tbody>"); - table_state->need_closing_table_body = 1; table_state->in_table_header = false; } } diff --git a/test/extensions.txt b/test/extensions.txt index 4340b618f..3c783cf34 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -96,23 +96,20 @@ Here we demonstrate some edge cases about what is and isn't a table. <th>Just enough table</th> <th>to be considered table</th> </tr> -</thead> -<tbody></tbody></table> +</thead></table> <p>| ---- | --- |</p> <table> <thead> <tr> <th>x</th> </tr> -</thead> -<tbody></tbody></table> +</thead></table> <table> <thead> <tr> <th>xyz</th> </tr> -</thead> -<tbody></tbody></table> +</thead></table> ```````````````````````````````` A "simpler" table, GFM style: @@ -297,8 +294,7 @@ This shouldn't assert. <tr> <th>a</th> </tr> -</thead> -<tbody></tbody></table> +</thead></table> ```````````````````````````````` ### Escaping diff --git a/test/spec.txt b/test/spec.txt index babf3c8c9..670fe58ab 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -3399,6 +3399,21 @@ cells are inserted. If there are greater, the excess is ignored: </tr></tbody></table> ```````````````````````````````` +If there are no rows in the body, no `<tbody>` is generated in HTML output: + +```````````````````````````````` example table +| abc | def | +| --- | --- | +. +<table> +<thead> +<tr> +<th>abc</th> +<th>def</th> +</tr> +</thead></table> +```````````````````````````````` + </div> # Container blocks From e3f3a27ba1762f7acfc9881369fee737f8f73386 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Tue, 12 Sep 2017 08:37:44 +1000 Subject: [PATCH 092/218] 0.28.0.gfm.11 --- CMakeLists.txt | 2 +- changelog.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b461a5a6..1b8a5e8fd 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 0) -set(PROJECT_VERSION_GFM 10) +set(PROJECT_VERSION_GFM 11) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index bef3f2b7c..52037767a 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,7 @@ +[0.28.0.gfm.11] + + * Do not output empty `<tbody>` in table extension. + [0.28.0.gfm.10] * Fix denial of service parsing references. From 07fe00f38e78846bc0760cb02a3b1441dffc9b6d Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Tue, 3 Oct 2017 13:13:31 +1100 Subject: [PATCH 093/218] Period in email must precede alnum (#58) Fixes #57. --- extensions/autolink.c | 2 +- test/extensions.txt | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/extensions/autolink.c b/extensions/autolink.c index b204ccfd7..03a135f17 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -319,7 +319,7 @@ static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset) if (c == '@') nb++; - else if (c == '.' && link_end < size - 1) + else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1])) np++; else if (c != '-' && c != '_') break; diff --git a/test/extensions.txt b/test/extensions.txt index 3c783cf34..c8c7440dd 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -523,6 +523,19 @@ This shouldn't crash everything: (_A_@_.A <IGNORE> ```````````````````````````````` +```````````````````````````````` example +These should not link: + +* @a.b.c@. x +* n@. b +. +<p>These should not link:</p> +<ul> +<li>@a.b.c@. x</li> +<li>n@. b</li> +</ul> +```````````````````````````````` + ## HTML tag filter From 1781c950683b0ef8f5288b2d35e17e9dd88b7245 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 6 Nov 2017 11:09:06 +1100 Subject: [PATCH 094/218] feature test macros in harness --- api_test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api_test/CMakeLists.txt b/api_test/CMakeLists.txt index 5fe573db3..cf4709a23 100644 --- a/api_test/CMakeLists.txt +++ b/api_test/CMakeLists.txt @@ -22,5 +22,5 @@ if(MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4706 /D_CRT_SECURE_NO_WARNINGS") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP") elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -std=c99 -pedantic") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -std=c99 -pedantic -D_BSD_SOURCE -D_POSIX_SOURCE") endif() From 37804e6bc0e9800535fbb4890fe969cd27b79ee8 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 6 Nov 2017 11:53:50 +1100 Subject: [PATCH 095/218] Fix install EXPORT target --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 54be80279..21c9f5cf0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -165,7 +165,7 @@ if(CMARK_SHARED OR CMARK_STATIC) DESTINATION include ) - install(EXPORT cmark-gfm DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmark-gfm) + install(EXPORT cmark-gfm DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake) endif() # Feature tests From 8d71a16d5df1eb32daf2b244f9c8117678313013 Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Mon, 6 Nov 2017 12:21:10 +1100 Subject: [PATCH 096/218] Shift includes around for proper header install (#63) * Shift includes around for proper header install * msvc fixes --- extensions/core-extensions.c | 1 + extensions/core-extensions.h | 1 + extensions/strikethrough.c | 3 ++- extensions/table.c | 1 + src/blocks.c | 2 +- src/buffer.h | 2 -- src/chunk.h | 2 +- src/cmark.h | 3 +++ src/cmark_extension_api.h | 16 +++++++++------- src/html.c | 1 + src/plaintext.c | 1 + src/syntax_extension.c | 5 +++-- src/syntax_extension.h | 1 + 13 files changed, 25 insertions(+), 14 deletions(-) diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 7bb9e1296..4659ab177 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -4,6 +4,7 @@ #include "table.h" #include "tagfilter.h" #include "registry.h" +#include "plugin.h" static int core_extensions_registration(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, create_table_extension()); diff --git a/extensions/core-extensions.h b/extensions/core-extensions.h index aca255c08..e466b1e1d 100644 --- a/extensions/core-extensions.h +++ b/extensions/core-extensions.h @@ -7,6 +7,7 @@ extern "C" { #include <cmark_extension_api.h> #include "cmarkextensions_export.h" +#include <stdint.h> CMARKEXTENSIONS_EXPORT void core_extensions_ensure_registered(void); diff --git a/extensions/strikethrough.c b/extensions/strikethrough.c index 24e3ba11f..aa7550479 100644 --- a/extensions/strikethrough.c +++ b/extensions/strikethrough.c @@ -1,5 +1,6 @@ #include "strikethrough.h" #include <parser.h> +#include <render.h> cmark_node_type CMARK_NODE_STRIKETHROUGH; @@ -150,7 +151,7 @@ cmark_syntax_extension *create_strikethrough_extension(void) { special_chars = cmark_llist_append(mem, special_chars, (void *)'~'); cmark_syntax_extension_set_special_inline_chars(ext, special_chars); - cmark_syntax_extension_set_emphasis(ext, true); + cmark_syntax_extension_set_emphasis(ext, 1); return ext; } diff --git a/extensions/table.c b/extensions/table.c index 6b50959c5..3b7ae828f 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -4,6 +4,7 @@ #include <parser.h> #include <references.h> #include <string.h> +#include <render.h> #include "ext_scanners.h" #include "strikethrough.h" diff --git a/src/blocks.c b/src/blocks.c index 30f55e4ff..8e29a1d40 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -389,7 +389,7 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent, return child; } -void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add) { +void cmark_manage_extensions_special_characters(cmark_parser *parser, int add) { cmark_llist *tmp_ext; for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) { diff --git a/src/buffer.h b/src/buffer.h index 90fa7df71..35af9c390 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -13,8 +13,6 @@ extern "C" { #endif -typedef int32_t bufsize_t; - typedef struct { cmark_mem *mem; unsigned char *ptr; diff --git a/src/chunk.h b/src/chunk.h index f0a1c6c69..c6a68687c 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -12,7 +12,7 @@ #define CMARK_CHUNK_EMPTY \ { NULL, 0, 0 } -typedef struct { +typedef struct cmark_chunk { unsigned char *data; bufsize_t len; bufsize_t alloc; // also implies a NULL-terminated string diff --git a/src/cmark.h b/src/cmark.h index d98e00e88..e3f9d0aeb 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -2,6 +2,7 @@ #define CMARK_CMARK_H #include <stdio.h> +#include <stdint.h> #include "cmark_export.h" #include "cmark_version.h" @@ -773,6 +774,8 @@ const char *cmark_version_string(void); #define PAREN_DELIM CMARK_PAREN_DELIM #endif +typedef int32_t bufsize_t; + #ifdef __cplusplus } #endif diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index 7e76d6932..470e6294e 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -6,8 +6,10 @@ extern "C" { #endif #include <cmark.h> -#include <render.h> -#include <buffer.h> + +struct cmark_renderer; +struct cmark_html_renderer; +struct cmark_chunk; /** * ## Extension Support @@ -227,7 +229,7 @@ typedef int (*cmark_contains_inlines_func) (cmark_syntax_extension *extension, cmark_node *node); typedef void (*cmark_common_render_func) (cmark_syntax_extension *extension, - cmark_renderer *renderer, + struct cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options); @@ -237,7 +239,7 @@ typedef int (*cmark_commonmark_escape_func) (cmark_syntax_extension *extension, int c); typedef void (*cmark_html_render_func) (cmark_syntax_extension *extension, - cmark_html_renderer *renderer, + struct cmark_html_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options); @@ -270,7 +272,7 @@ CMARK_EXPORT cmark_node_type cmark_syntax_extension_add_node(int is_inline); CMARK_EXPORT -void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension, bool emphasis); +void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension, int emphasis); /** See the documentation for 'cmark_syntax_extension' */ @@ -614,7 +616,7 @@ void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset); * Use cmark_inline_parser_get_offset to get our current position in the chunk. */ CMARK_EXPORT -cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser); +struct cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser); /** Returns 1 if the inline parser is currently in a bracket; pass 1 for 'image' * if you want to know about an image-type bracket, 0 for link-type. */ @@ -699,7 +701,7 @@ int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, int *punct_after); CMARK_EXPORT -void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add); +void cmark_manage_extensions_special_characters(cmark_parser *parser, int add); CMARK_EXPORT cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser); diff --git a/src/html.c b/src/html.c index aaf2b7402..ce23c6a9e 100644 --- a/src/html.c +++ b/src/html.c @@ -9,6 +9,7 @@ #include "scanners.h" #include "syntax_extension.h" #include "html.h" +#include "render.h" // Functions to convert cmark_nodes to HTML strings. diff --git a/src/plaintext.c b/src/plaintext.c index a274827c9..910adf466 100644 --- a/src/plaintext.c +++ b/src/plaintext.c @@ -1,5 +1,6 @@ #include "node.h" #include "syntax_extension.h" +#include "render.h" #define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping) #define LIT(s) renderer->out(renderer, node, s, false, LITERAL) diff --git a/src/syntax_extension.c b/src/syntax_extension.c index 1492dad34..d079efade 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -1,4 +1,5 @@ #include <stdlib.h> +#include <assert.h> #include "cmark.h" #include "syntax_extension.h" @@ -37,8 +38,8 @@ cmark_node_type cmark_syntax_extension_add_node(int is_inline) { } void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension, - bool emphasis) { - extension->emphasis = emphasis; + int emphasis) { + extension->emphasis = emphasis == 1; } void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension, diff --git a/src/syntax_extension.h b/src/syntax_extension.h index 0414f83b1..f51f1f4ca 100644 --- a/src/syntax_extension.h +++ b/src/syntax_extension.h @@ -3,6 +3,7 @@ #include "cmark.h" #include "cmark_extension_api.h" +#include "config.h" struct cmark_syntax_extension { cmark_match_block_func last_block_matches; From 9188bdfadbb613fda5c1b039faf2ef8ee9cbcc2a Mon Sep 17 00:00:00 2001 From: killa123 <killa123@126.com> Date: Thu, 9 Nov 2017 11:21:41 +0800 Subject: [PATCH 097/218] add node.js wrapper (#46) --- README.md | 5 +++-- wrappers/wrapper.js | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 wrappers/wrapper.js diff --git a/README.md b/README.md index 5857d4252..40ef6af4c 100644 --- a/README.md +++ b/README.md @@ -65,8 +65,9 @@ There are also libraries that wrap `libcmark` for [Lua](https://github.com/jgm/cmark-lua), [Perl](https://metacpan.org/release/CommonMark), [Python](https://pypi.python.org/pypi/paka.cmark), -[R](https://cran.r-project.org/package=commonmark) and -[Scala](https://github.com/sparsetech/cmark-scala). +[R](https://cran.r-project.org/package=commonmark), +[Scala](https://github.com/sparsetech/cmark-scala) and +[Node.js](https://github.com/killa123/node-cmark). Installing ---------- diff --git a/wrappers/wrapper.js b/wrappers/wrapper.js new file mode 100644 index 000000000..0a9b8b5be --- /dev/null +++ b/wrappers/wrapper.js @@ -0,0 +1,6 @@ + +const cmark = require('node-cmark'); + +const markdown = '# h1 title'; + +cmark.markdown2html(markdown); From 59f723304ffc3b84f781431a320dffa2dea03fdd Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Thu, 16 Nov 2017 11:31:16 +1100 Subject: [PATCH 098/218] Footnotes (#64) * Add baseline test * Some preliminary work. * cont'd * Add footnote reference * Start postprocessing * MVP: tests pass * commonmark footnote out * Factor out reference/footnote maps * fix a memory leak & some asserts * We don't assert/check snprintf elsewhere * Remove bad linear search, extend test case * cleanup * man page update * add footnotes as option * bugfix (found in comrak first!) * Shift static var into renderer struct --- man/man3/cmark-gfm.3 | 42 ++++++++- src/CMakeLists.txt | 4 + src/blocks.c | 116 ++++++++++++++++++++++++- src/cmark.c | 4 +- src/cmark.h | 6 ++ src/commonmark.c | 23 +++++ src/footnotes.c | 40 +++++++++ src/footnotes.h | 25 ++++++ src/html.c | 56 +++++++++++- src/inlines.c | 35 ++++++-- src/inlines.h | 4 +- src/latex.c | 5 ++ src/main.c | 3 + src/man.c | 5 ++ src/map.c | 122 ++++++++++++++++++++++++++ src/map.h | 42 +++++++++ src/node.c | 3 + src/parser.h | 2 +- src/references.c | 136 +++-------------------------- src/references.h | 25 ++---- src/render.c | 3 +- src/render.h | 3 + src/scanners.c | 200 ++++++++++++++++++++++++++++++++++++------- src/scanners.h | 2 + src/scanners.re | 10 +++ test/CMakeLists.txt | 9 +- test/extensions.txt | 59 +++++++++++++ 27 files changed, 788 insertions(+), 196 deletions(-) create mode 100644 src/footnotes.c create mode 100644 src/footnotes.h create mode 100644 src/map.c create mode 100644 src/map.h diff --git a/man/man3/cmark-gfm.3 b/man/man3/cmark-gfm.3 index 471658d30..2d85c3783 100644 --- a/man/man3/cmark-gfm.3 +++ b/man/man3/cmark-gfm.3 @@ -1,4 +1,4 @@ -.TH cmark-gfm 3 "April 03, 2017" "LOCAL" "Library Functions Manual" +.TH cmark-gfm 3 "November 09, 2017" "LOCAL" "Library Functions Manual" .SH NAME .PP @@ -40,6 +40,7 @@ typedef enum { CMARK_NODE_PARAGRAPH = CMARK_NODE_TYPE_BLOCK | 0x0008, CMARK_NODE_HEADING = CMARK_NODE_TYPE_BLOCK | 0x0009, CMARK_NODE_THEMATIC_BREAK = CMARK_NODE_TYPE_BLOCK | 0x000a, + CMARK_NODE_FOOTNOTE_DEFINITION = CMARK_NODE_TYPE_BLOCK | 0x000b, /* Inline */ CMARK_NODE_TEXT = CMARK_NODE_TYPE_INLINE | 0x0001, @@ -52,6 +53,7 @@ typedef enum { CMARK_NODE_STRONG = CMARK_NODE_TYPE_INLINE | 0x0008, CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009, CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a, + CMARK_NODE_FOOTNOTE_REFERENCE = CMARK_NODE_TYPE_INLINE | 0x000b, } cmark_node_type; .RE \f[] @@ -780,6 +782,20 @@ responsibility to free the returned buffer. As for \f[I]cmark_render_commonmark\f[], but specifying the allocator to use for the resulting string. +.PP +\fIchar *\f[] \fBcmark_render_plaintext\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) + +.PP +Render a \f[I]node\f[] tree as a plain text document. It is the caller's +responsibility to free the returned buffer. + +.PP +\fIchar *\f[] \fBcmark_render_plaintext_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[]) + +.PP +As for \f[I]cmark_render_plaintext\f[], but specifying the allocator to +use for the resulting string. + .PP \fIchar *\f[] \fBcmark_render_latex\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[]) @@ -917,6 +933,30 @@ dashes. .PP Use GitHub\-style tags for code blocks instead of . +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_LIBERAL_HTML_TAG (1 << 12) +.RE +\f[] +.fi + +.PP +Be liberal in interpreting inline HTML tags. + +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_FOOTNOTES (1 << 13) +.RE +\f[] +.fi + +.PP +Parse footnotes. + .SS Version information diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 21c9f5cf0..6c2ae5bca 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,6 +15,8 @@ set(HEADERS iterator.h chunk.h references.h + footnotes.h + map.h utf8.h scanners.h inlines.h @@ -36,6 +38,8 @@ set(LIBRARY_SOURCES utf8.c buffer.c references.c + footnotes.c + map.c render.c man.c xml.c diff --git a/src/blocks.c b/src/blocks.c index 8e29a1d40..fb5dcf2d0 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -21,6 +21,7 @@ #include "inlines.h" #include "houdini.h" #include "buffer.h" +#include "footnotes.h" #define CODE_INDENT 4 #define TAB_STOP 4 @@ -97,7 +98,7 @@ static void cmark_parser_dispose(cmark_parser *parser) { cmark_node_free(parser->root); if (parser->refmap) - cmark_reference_map_free(parser->refmap); + cmark_map_free(parser->refmap); } static void cmark_parser_reset(cmark_parser *parser) { @@ -408,7 +409,7 @@ void cmark_manage_extensions_special_characters(cmark_parser *parser, int add) { // Walk through node and all children, recursively, parsing // string content into inline content where appropriate. static void process_inlines(cmark_parser *parser, - cmark_reference_map *refmap, int options) { + cmark_map *refmap, int options) { cmark_iter *iter = cmark_iter_new(parser->root); cmark_node *cur; cmark_event_type ev_type; @@ -429,6 +430,84 @@ static void process_inlines(cmark_parser *parser, cmark_iter_free(iter); } +static int sort_footnote_by_ix(const void *_a, const void *_b) { + cmark_footnote *a = *(cmark_footnote **)_a; + cmark_footnote *b = *(cmark_footnote **)_b; + return (int)a->ix - (int)b->ix; +} + +static void process_footnotes(cmark_parser *parser) { + // * Collect definitions in a map. + // * Iterate the references in the document in order, assigning indices to + // definitions in the order they're seen. + // * Write out the footnotes at the bottom of the document in index order. + + cmark_map *map = cmark_footnote_map_new(parser->mem); + + cmark_iter *iter = cmark_iter_new(parser->root); + cmark_node *cur; + cmark_event_type ev_type; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_DEFINITION) { + cmark_node_unlink(cur); + cmark_footnote_create(map, cur); + } + } + + cmark_iter_free(iter); + iter = cmark_iter_new(parser->root); + unsigned int ix = 0; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_REFERENCE) { + cmark_footnote *footnote = (cmark_footnote *)cmark_map_lookup(map, &cur->as.literal); + if (footnote) { + if (!footnote->ix) + footnote->ix = ++ix; + + char n[32]; + snprintf(n, sizeof(n), "%d", footnote->ix); + cmark_chunk_free(parser->mem, &cur->as.literal); + cmark_strbuf buf = CMARK_BUF_INIT(parser->mem); + cmark_strbuf_puts(&buf, n); + + cur->as.literal = cmark_chunk_buf_detach(&buf); + } else { + cmark_node *text = (cmark_node *)parser->mem->calloc(1, sizeof(*text)); + cmark_strbuf_init(parser->mem, &text->content, 0); + text->type = (uint16_t) CMARK_NODE_TEXT; + + cmark_strbuf buf = CMARK_BUF_INIT(parser->mem); + cmark_strbuf_puts(&buf, "[^"); + cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len); + cmark_strbuf_putc(&buf, ']'); + + text->as.literal = cmark_chunk_buf_detach(&buf); + cmark_node_insert_after(cur, text); + cmark_node_free(cur); + } + } + } + + cmark_iter_free(iter); + + if (map->sorted) { + qsort(map->sorted, map->size, sizeof(cmark_map_entry *), sort_footnote_by_ix); + for (unsigned int i = 0; i < map->size; ++i) { + cmark_footnote *footnote = (cmark_footnote *)map->sorted[i]; + if (!footnote->ix) + continue; + cmark_node_append_child(parser->root, footnote->node); + footnote->node = NULL; + } + } + + cmark_map_free(map); +} + // Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. @@ -533,6 +612,8 @@ static cmark_node *finalize_document(cmark_parser *parser) { finalize(parser, parser->root); process_inlines(parser, parser->refmap, parser->options); + if (parser->options & CMARK_OPT_FOOTNOTES) + process_footnotes(parser); return parser->root; } @@ -759,6 +840,18 @@ static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) { return res; } +static bool parse_footnote_definition_block_prefix(cmark_parser *parser, cmark_chunk *input, + cmark_node *container) { + if (parser->indent >= 4) { + S_advance_offset(parser, input, 4, true); + return true; + } else if (input->len > 0 && (input->data[0] == '\n' || (input->data[0] == '\r' && input->data[1] == '\n'))) { + return true; + } + + return false; +} + static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input, cmark_node *container) { bool res = false; @@ -913,6 +1006,10 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input, if (parser->blank) goto done; break; + case CMARK_NODE_FOOTNOTE_DEFINITION: + if (!parse_footnote_definition_block_prefix(parser, input, container)) + goto done; + break; default: break; } @@ -1024,6 +1121,21 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK, parser->first_nonspace + 1); S_advance_offset(parser, input, input->len - 1 - parser->offset, false); + } else if (!indented && + parser->options & CMARK_OPT_FOOTNOTES && + (matched = scan_footnote_definition(input, parser->first_nonspace))) { + cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2); + cmark_chunk_to_cstr(parser->mem, &c); + + while (c.data[c.len - 1] != ']') + --c.len; + --c.len; + + S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); + *container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1); + (*container)->as.literal = c; + + (*container)->internal_offset = matched; } else if ((!indented || cont_type == CMARK_NODE_LIST) && (matched = parse_list_marker( parser->mem, input, parser->first_nonspace, diff --git a/src/cmark.c b/src/cmark.c index dd013d2ce..f6b4cdf77 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -7,8 +7,8 @@ #include "cmark.h" #include "buffer.h" -cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK; -cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE; +cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_FOOTNOTE_DEFINITION; +cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_FOOTNOTE_REFERENCE; int cmark_version() { return CMARK_VERSION; } diff --git a/src/cmark.h b/src/cmark.h index e3f9d0aeb..ba2febcc6 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -52,6 +52,7 @@ typedef enum { CMARK_NODE_PARAGRAPH = CMARK_NODE_TYPE_BLOCK | 0x0008, CMARK_NODE_HEADING = CMARK_NODE_TYPE_BLOCK | 0x0009, CMARK_NODE_THEMATIC_BREAK = CMARK_NODE_TYPE_BLOCK | 0x000a, + CMARK_NODE_FOOTNOTE_DEFINITION = CMARK_NODE_TYPE_BLOCK | 0x000b, /* Inline */ CMARK_NODE_TEXT = CMARK_NODE_TYPE_INLINE | 0x0001, @@ -64,6 +65,7 @@ typedef enum { CMARK_NODE_STRONG = CMARK_NODE_TYPE_INLINE | 0x0008, CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009, CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a, + CMARK_NODE_FOOTNOTE_REFERENCE = CMARK_NODE_TYPE_INLINE | 0x000b, } cmark_node_type; extern cmark_node_type CMARK_NODE_LAST_BLOCK; @@ -718,6 +720,10 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar */ #define CMARK_OPT_LIBERAL_HTML_TAG (1 << 12) +/** Parse footnotes. + */ +#define CMARK_OPT_FOOTNOTES (1 << 13) + /** * ## Version information */ diff --git a/src/commonmark.c b/src/commonmark.c index 8063acb65..c0ba7ae5d 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -463,6 +463,29 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_FOOTNOTE_REFERENCE: + if (entering) { + LIT("[^"); + OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL); + LIT("]"); + } + break; + + case CMARK_NODE_FOOTNOTE_DEFINITION: + if (entering) { + renderer->footnote_ix += 1; + LIT("[^"); + char n[32]; + snprintf(n, sizeof(n), "%d", renderer->footnote_ix); + OUT(n, false, LITERAL); + LIT("]:\n"); + + cmark_strbuf_puts(renderer->prefix, " "); + } else { + cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4); + } + break; + default: assert(false); break; diff --git a/src/footnotes.c b/src/footnotes.c new file mode 100644 index 000000000..ccd452cc1 --- /dev/null +++ b/src/footnotes.c @@ -0,0 +1,40 @@ +#include "cmark.h" +#include "parser.h" +#include "footnotes.h" +#include "inlines.h" +#include "chunk.h" + +static void footnote_free(cmark_map *map, cmark_map_entry *_ref) { + cmark_footnote *ref = (cmark_footnote *)_ref; + cmark_mem *mem = map->mem; + if (ref != NULL) { + mem->free(ref->entry.label); + if (ref->node) + cmark_node_free(ref->node); + mem->free(ref); + } +} + +void cmark_footnote_create(cmark_map *map, cmark_node *node) { + cmark_footnote *ref; + unsigned char *reflabel = normalize_map_label(map->mem, &node->as.literal); + + /* empty footnote name, or composed from only whitespace */ + if (reflabel == NULL) + return; + + assert(map->sorted == NULL); + + ref = (cmark_footnote *)map->mem->calloc(1, sizeof(*ref)); + ref->entry.label = reflabel; + ref->node = node; + ref->entry.age = map->size; + ref->entry.next = map->refs; + + map->refs = (cmark_map_entry *)ref; + map->size++; +} + +cmark_map *cmark_footnote_map_new(cmark_mem *mem) { + return cmark_map_new(mem, footnote_free); +} diff --git a/src/footnotes.h b/src/footnotes.h new file mode 100644 index 000000000..43dd64ff6 --- /dev/null +++ b/src/footnotes.h @@ -0,0 +1,25 @@ +#ifndef CMARK_FOOTNOTES_H +#define CMARK_FOOTNOTES_H + +#include "map.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct cmark_footnote { + cmark_map_entry entry; + cmark_node *node; + unsigned int ix; +}; + +typedef struct cmark_footnote cmark_footnote; + +void cmark_footnote_create(cmark_map *map, cmark_node *node); +cmark_map *cmark_footnote_map_new(cmark_mem *mem); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/html.c b/src/html.c index ce23c6a9e..136f9e090 100644 --- a/src/html.c +++ b/src/html.c @@ -59,6 +59,20 @@ static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size cmark_strbuf_put(html, data, (bufsize_t)len); } +static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html) { + if (renderer->written_footnote_ix >= renderer->footnote_ix) + return false; + renderer->written_footnote_ix = renderer->footnote_ix; + + cmark_strbuf_puts(html, "<a href=\"#fnref"); + char n[32]; + snprintf(n, sizeof(n), "%d", renderer->footnote_ix); + cmark_strbuf_puts(html, n); + cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩</a>"); + + return true; +} + static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { cmark_node *parent; @@ -249,6 +263,10 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, cmark_html_render_sourcepos(node, html, options); cmark_strbuf_putc(html, '>'); } else { + if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) { + cmark_strbuf_putc(html, ' '); + S_put_footnote_backref(renderer, html); + } cmark_strbuf_puts(html, "</p>\n"); } } @@ -363,6 +381,37 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_FOOTNOTE_DEFINITION: + if (entering) { + if (renderer->footnote_ix == 0) { + cmark_strbuf_puts(html, "<section class=\"footnotes\">\n<ol>\n"); + } + ++renderer->footnote_ix; + cmark_strbuf_puts(html, "<li id=\"fn"); + char n[32]; + snprintf(n, sizeof(n), "%d", renderer->footnote_ix); + cmark_strbuf_puts(html, n); + cmark_strbuf_puts(html, "\">\n"); + } else { + if (S_put_footnote_backref(renderer, html)) { + cmark_strbuf_putc(html, '\n'); + } + cmark_strbuf_puts(html, "</li>\n"); + } + break; + + case CMARK_NODE_FOOTNOTE_REFERENCE: + if (entering) { + cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn"); + cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); + cmark_strbuf_puts(html, "\" id=\"fnref"); + cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); + cmark_strbuf_puts(html, "\">["); + cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); + cmark_strbuf_puts(html, "]</a></sup>"); + } + break; + default: assert(false); break; @@ -380,7 +429,7 @@ char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *ext cmark_strbuf html = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; - cmark_html_renderer renderer = {&html, NULL, NULL, NULL}; + cmark_html_renderer renderer = {&html, NULL, NULL, 0, 0, NULL}; cmark_iter *iter = cmark_iter_new(root); for (; extensions; extensions = extensions->next) @@ -394,6 +443,11 @@ char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *ext cur = cmark_iter_get_node(iter); S_render_node(&renderer, cur, ev_type, options); } + + if (renderer.footnote_ix) { + cmark_strbuf_puts(&html, "</ol>\n</section>\n"); + } + result = (char *)cmark_strbuf_detach(&html); cmark_llist_free(mem, renderer.filter_extensions); diff --git a/src/inlines.c b/src/inlines.c index cf0a306b9..9685983b0 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -50,7 +50,7 @@ typedef struct subject{ bufsize_t pos; int block_offset; int column_offset; - cmark_reference_map *refmap; + cmark_map *refmap; delimiter *last_delim; bracket *last_bracket; bufsize_t backticks[MAXBACKTICKS + 1]; @@ -70,7 +70,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options); static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, - cmark_chunk *buffer, cmark_reference_map *refmap); + cmark_chunk *buffer, cmark_map *refmap); static bufsize_t subject_find_special_char(subject *subj, int options); // Create an inline with a literal string value. @@ -157,7 +157,7 @@ static CMARK_INLINE cmark_node *make_autolink(subject *subj, } static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, - cmark_chunk *chunk, cmark_reference_map *refmap) { + cmark_chunk *chunk, cmark_map *refmap) { int i; e->mem = mem; e->input = *chunk; @@ -1076,7 +1076,7 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { } if (found_label) { - ref = cmark_reference_lookup(subj->refmap, &raw_label); + ref = (cmark_reference *)cmark_map_lookup(subj->refmap, &raw_label); cmark_chunk_free(subj->mem, &raw_label); } @@ -1089,7 +1089,28 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { } noMatch: - // If we fall through to here, it means we didn't match a link: + // If we fall through to here, it means we didn't match a link. + // What if we're a footnote link? + if (parser->options & CMARK_OPT_FOOTNOTES && + opener->inl_text->next && + opener->inl_text->next->type == CMARK_NODE_TEXT && + !opener->inl_text->next->next) { + cmark_chunk *literal = &opener->inl_text->next->as.literal; + if (literal->len > 1 && literal->data[0] == '^') { + inl = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE); + inl->as.literal = cmark_chunk_dup(literal, 1, literal->len - 1); + inl->start_line = inl->end_line = subj->line; + inl->start_column = opener->inl_text->start_column; + inl->end_column = subj->pos + subj->column_offset + subj->block_offset; + cmark_node_insert_before(opener->inl_text, inl); + cmark_node_free(opener->inl_text->next); + cmark_node_free(opener->inl_text); + process_emphasis(parser, subj, opener->previous_delimiter); + pop_bracket(subj); + return NULL; + } + } + pop_bracket(subj); // remove this opener from delimiter list subj->pos = initial_pos; return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); @@ -1317,7 +1338,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, // Parse inlines from parent's string_content, adding as children of parent. void cmark_parse_inlines(cmark_parser *parser, cmark_node *parent, - cmark_reference_map *refmap, + cmark_map *refmap, int options) { subject subj; cmark_chunk content = {parent->content.ptr, parent->content.size, 0}; @@ -1350,7 +1371,7 @@ static void spnl(subject *subj) { // Return 0 if no reference found, otherwise position of subject // after reference is parsed. bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, - cmark_reference_map *refmap) { + cmark_map *refmap) { subject subj; cmark_chunk lab; diff --git a/src/inlines.h b/src/inlines.h index 0438f65a4..8c8174c64 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -13,11 +13,11 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); CMARK_EXPORT void cmark_parse_inlines(cmark_parser *parser, cmark_node *parent, - cmark_reference_map *refmap, + cmark_map *refmap, int options); bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, - cmark_reference_map *refmap); + cmark_map *refmap); void cmark_inlines_add_special_character(unsigned char c, bool emphasis); void cmark_inlines_remove_special_character(unsigned char c, bool emphasis); diff --git a/src/latex.c b/src/latex.c index 618f78a49..1767cee39 100644 --- a/src/latex.c +++ b/src/latex.c @@ -444,6 +444,11 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_FOOTNOTE_DEFINITION: + case CMARK_NODE_FOOTNOTE_REFERENCE: + // TODO + break; + default: assert(false); break; diff --git a/src/main.c b/src/main.c index 4a46537c1..8d972d738 100644 --- a/src/main.c +++ b/src/main.c @@ -41,6 +41,7 @@ void print_usage() { printf(" --smart Use smart punctuation\n"); printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n"); printf(" --github-pre-lang Use GitHub-style <pre lang> for code blocks\n"); + printf(" --footnotes Parse footnotes\n"); printf(" --extension, -e EXTENSION_NAME Specify an extension name to use\n"); printf(" --list-extensions List available extensions and quit\n"); printf(" --help, -h Print usage information\n"); @@ -138,6 +139,8 @@ int main(int argc, char *argv[]) { options |= CMARK_OPT_SMART; } else if (strcmp(argv[i], "--github-pre-lang") == 0) { options |= CMARK_OPT_GITHUB_PRE_LANG; + } else if (strcmp(argv[i], "--footnotes") == 0) { + options |= CMARK_OPT_FOOTNOTES; } else if (strcmp(argv[i], "--safe") == 0) { options |= CMARK_OPT_SAFE; } else if (strcmp(argv[i], "--validate-utf8") == 0) { diff --git a/src/man.c b/src/man.c index 2b52ad5b2..1e1a69d26 100644 --- a/src/man.c +++ b/src/man.c @@ -256,6 +256,11 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_FOOTNOTE_DEFINITION: + case CMARK_NODE_FOOTNOTE_REFERENCE: + // TODO + break; + default: assert(false); break; diff --git a/src/map.c b/src/map.c new file mode 100644 index 000000000..9a418dfd4 --- /dev/null +++ b/src/map.c @@ -0,0 +1,122 @@ +#include "map.h" +#include "utf8.h" +#include "parser.h" + +// normalize map label: collapse internal whitespace to single space, +// remove leading/trailing whitespace, case fold +// Return NULL if the label is actually empty (i.e. composed solely from +// whitespace) +unsigned char *normalize_map_label(cmark_mem *mem, cmark_chunk *ref) { + cmark_strbuf normalized = CMARK_BUF_INIT(mem); + unsigned char *result; + + if (ref == NULL) + return NULL; + + if (ref->len == 0) + return NULL; + + cmark_utf8proc_case_fold(&normalized, ref->data, ref->len); + cmark_strbuf_trim(&normalized); + cmark_strbuf_normalize_whitespace(&normalized); + + result = cmark_strbuf_detach(&normalized); + assert(result); + + if (result[0] == '\0') { + mem->free(result); + return NULL; + } + + return result; +} + +static int +labelcmp(const unsigned char *a, const unsigned char *b) { + return strcmp((const char *)a, (const char *)b); +} + +static int +refcmp(const void *p1, const void *p2) { + cmark_map_entry *r1 = *(cmark_map_entry **)p1; + cmark_map_entry *r2 = *(cmark_map_entry **)p2; + int res = labelcmp(r1->label, r2->label); + return res ? res : ((int)r1->age - (int)r2->age); +} + +static int +refsearch(const void *label, const void *p2) { + cmark_map_entry *ref = *(cmark_map_entry **)p2; + return labelcmp((const unsigned char *)label, ref->label); +} + +static void sort_map(cmark_map *map) { + unsigned int i = 0, last = 0, size = map->size; + cmark_map_entry *r = map->refs, **sorted = NULL; + + sorted = (cmark_map_entry **)map->mem->calloc(size, sizeof(cmark_map_entry *)); + while (r) { + sorted[i++] = r; + r = r->next; + } + + qsort(sorted, size, sizeof(cmark_map_entry *), refcmp); + + for (i = 1; i < size; i++) { + if (labelcmp(sorted[i]->label, sorted[last]->label) != 0) + sorted[++last] = sorted[i]; + } + + map->sorted = sorted; + map->size = last + 1; +} + +cmark_map_entry *cmark_map_lookup(cmark_map *map, cmark_chunk *label) { + cmark_map_entry **ref = NULL; + unsigned char *norm; + + if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH) + return NULL; + + if (map == NULL || !map->size) + return NULL; + + norm = normalize_map_label(map->mem, label); + if (norm == NULL) + return NULL; + + if (!map->sorted) + sort_map(map); + + ref = (cmark_map_entry **)bsearch(norm, map->sorted, map->size, sizeof(cmark_map_entry *), refsearch); + map->mem->free(norm); + + if (!ref) + return NULL; + + return ref[0]; +} + +void cmark_map_free(cmark_map *map) { + cmark_map_entry *ref; + + if (map == NULL) + return; + + ref = map->refs; + while (ref) { + cmark_map_entry *next = ref->next; + map->free(map, ref); + ref = next; + } + + map->mem->free(map->sorted); + map->mem->free(map); +} + +cmark_map *cmark_map_new(cmark_mem *mem, cmark_map_free_f free) { + cmark_map *map = (cmark_map *)mem->calloc(1, sizeof(cmark_map)); + map->mem = mem; + map->free = free; + return map; +} diff --git a/src/map.h b/src/map.h new file mode 100644 index 000000000..00307be66 --- /dev/null +++ b/src/map.h @@ -0,0 +1,42 @@ +#ifndef CMARK_MAP_H +#define CMARK_MAP_H + +#include "memory.h" +#include "chunk.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct cmark_map_entry { + struct cmark_map_entry *next; + unsigned char *label; + unsigned int age; +}; + +typedef struct cmark_map_entry cmark_map_entry; + +struct cmark_map; + +typedef void (*cmark_map_free_f)(struct cmark_map *, cmark_map_entry *); + +struct cmark_map { + cmark_mem *mem; + cmark_map_entry *refs; + cmark_map_entry **sorted; + unsigned int size; + cmark_map_free_f free; +}; + +typedef struct cmark_map cmark_map; + +unsigned char *normalize_map_label(cmark_mem *mem, cmark_chunk *ref); +cmark_map *cmark_map_new(cmark_mem *mem, cmark_map_free_f free); +void cmark_map_free(cmark_map *map); +cmark_map_entry *cmark_map_lookup(cmark_map *map, cmark_chunk *label); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/node.c b/src/node.c index bcf322fd2..48389ab48 100644 --- a/src/node.c +++ b/src/node.c @@ -21,6 +21,7 @@ bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) { switch (node->type) { case CMARK_NODE_DOCUMENT: case CMARK_NODE_BLOCK_QUOTE: + case CMARK_NODE_FOOTNOTE_DEFINITION: case CMARK_NODE_ITEM: return CMARK_NODE_TYPE_BLOCK_P(child_type) && child_type != CMARK_NODE_ITEM; @@ -108,6 +109,8 @@ static void free_node_as(cmark_node *node) { case CMARK_NODE_HTML_INLINE: case CMARK_NODE_CODE: case CMARK_NODE_HTML_BLOCK: + case CMARK_NODE_FOOTNOTE_REFERENCE: + case CMARK_NODE_FOOTNOTE_DEFINITION: cmark_chunk_free(NODE_MEM(node), &node->as.literal); break; case CMARK_NODE_LINK: diff --git a/src/parser.h b/src/parser.h index 89c4209f4..f87e30948 100644 --- a/src/parser.h +++ b/src/parser.h @@ -15,7 +15,7 @@ extern "C" { struct cmark_parser { struct cmark_mem *mem; /* A hashtable of urls in the current document for cross-references */ - struct cmark_reference_map *refmap; + struct cmark_map *refmap; /* The root node of the parser, always a CMARK_NODE_DOCUMENT */ struct cmark_node *root; /* The last open block after a line is fully processed */ diff --git a/src/references.c b/src/references.c index 1648e9b24..2cd4b4435 100644 --- a/src/references.c +++ b/src/references.c @@ -1,53 +1,24 @@ #include "cmark.h" -#include "utf8.h" #include "parser.h" #include "references.h" #include "inlines.h" #include "chunk.h" -static void reference_free(cmark_reference_map *map, cmark_reference *ref) { +static void reference_free(cmark_map *map, cmark_map_entry *_ref) { + cmark_reference *ref = (cmark_reference *)_ref; cmark_mem *mem = map->mem; if (ref != NULL) { - mem->free(ref->label); + mem->free(ref->entry.label); cmark_chunk_free(mem, &ref->url); cmark_chunk_free(mem, &ref->title); mem->free(ref); } } -// normalize reference: collapse internal whitespace to single space, -// remove leading/trailing whitespace, case fold -// Return NULL if the reference name is actually empty (i.e. composed -// solely from whitespace) -static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) { - cmark_strbuf normalized = CMARK_BUF_INIT(mem); - unsigned char *result; - - if (ref == NULL) - return NULL; - - if (ref->len == 0) - return NULL; - - cmark_utf8proc_case_fold(&normalized, ref->data, ref->len); - cmark_strbuf_trim(&normalized); - cmark_strbuf_normalize_whitespace(&normalized); - - result = cmark_strbuf_detach(&normalized); - assert(result); - - if (result[0] == '\0') { - mem->free(result); - return NULL; - } - - return result; -} - -void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, +void cmark_reference_create(cmark_map *map, cmark_chunk *label, cmark_chunk *url, cmark_chunk *title) { cmark_reference *ref; - unsigned char *reflabel = normalize_reference(map->mem, label); + unsigned char *reflabel = normalize_map_label(map->mem, label); /* empty reference name, or composed from only whitespace */ if (reflabel == NULL) @@ -56,101 +27,16 @@ void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, assert(map->sorted == NULL); ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref)); - ref->label = reflabel; + ref->entry.label = reflabel; ref->url = cmark_clean_url(map->mem, url); ref->title = cmark_clean_title(map->mem, title); - ref->age = map->size; - ref->next = map->refs; + ref->entry.age = map->size; + ref->entry.next = map->refs; - map->refs = ref; + map->refs = (cmark_map_entry *)ref; map->size++; } -static int -labelcmp(const unsigned char *a, const unsigned char *b) { - return strcmp((const char *)a, (const char *)b); -} - -static int -refcmp(const void *p1, const void *p2) { - cmark_reference *r1 = *(cmark_reference **)p1; - cmark_reference *r2 = *(cmark_reference **)p2; - int res = labelcmp(r1->label, r2->label); - return res ? res : ((int)r1->age - (int)r2->age); -} - -static int -refsearch(const void *label, const void *p2) { - cmark_reference *ref = *(cmark_reference **)p2; - return labelcmp((const unsigned char *)label, ref->label); -} - -static void sort_references(cmark_reference_map *map) { - unsigned int i = 0, last = 0, size = map->size; - cmark_reference *r = map->refs, **sorted = NULL; - - sorted = (cmark_reference **)map->mem->calloc(size, sizeof(cmark_reference *)); - while (r) { - sorted[i++] = r; - r = r->next; - } - - qsort(sorted, size, sizeof(cmark_reference *), refcmp); - - for (i = 1; i < size; i++) { - if (labelcmp(sorted[i]->label, sorted[last]->label) != 0) - sorted[++last] = sorted[i]; - } - - map->sorted = sorted; - map->size = last + 1; -} - -// Returns reference if refmap contains a reference with matching -// label, otherwise NULL. -cmark_reference *cmark_reference_lookup(cmark_reference_map *map, - cmark_chunk *label) { - cmark_reference **ref = NULL; - unsigned char *norm; - - if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH) - return NULL; - - if (map == NULL || !map->size) - return NULL; - - norm = normalize_reference(map->mem, label); - if (norm == NULL) - return NULL; - - if (!map->sorted) - sort_references(map); - - ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), refsearch); - map->mem->free(norm); - return ref ? ref[0] : NULL; -} - -void cmark_reference_map_free(cmark_reference_map *map) { - cmark_reference *ref; - - if (map == NULL) - return; - - ref = map->refs; - while (ref) { - cmark_reference *next = ref->next; - reference_free(map, ref); - ref = next; - } - - map->mem->free(map->sorted); - map->mem->free(map); -} - -cmark_reference_map *cmark_reference_map_new(cmark_mem *mem) { - cmark_reference_map *map = - (cmark_reference_map *)mem->calloc(1, sizeof(cmark_reference_map)); - map->mem = mem; - return map; +cmark_map *cmark_reference_map_new(cmark_mem *mem) { + return cmark_map_new(mem, reference_free); } diff --git a/src/references.h b/src/references.h index 0bbbd5f4b..def944dc7 100644 --- a/src/references.h +++ b/src/references.h @@ -1,38 +1,23 @@ #ifndef CMARK_REFERENCES_H #define CMARK_REFERENCES_H -#include "memory.h" -#include "chunk.h" +#include "map.h" #ifdef __cplusplus extern "C" { #endif struct cmark_reference { - struct cmark_reference *next; - unsigned char *label; + cmark_map_entry entry; cmark_chunk url; cmark_chunk title; - unsigned int age; }; typedef struct cmark_reference cmark_reference; -struct cmark_reference_map { - cmark_mem *mem; - cmark_reference *refs; - cmark_reference **sorted; - unsigned int size; -}; - -typedef struct cmark_reference_map cmark_reference_map; - -cmark_reference_map *cmark_reference_map_new(cmark_mem *mem); -void cmark_reference_map_free(cmark_reference_map *map); -cmark_reference *cmark_reference_lookup(cmark_reference_map *map, - cmark_chunk *label); -extern void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, - cmark_chunk *url, cmark_chunk *title); +void cmark_reference_create(cmark_map *map, cmark_chunk *label, + cmark_chunk *url, cmark_chunk *title); +cmark_map *cmark_reference_map_new(cmark_mem *mem); #ifdef __cplusplus } diff --git a/src/render.c b/src/render.c index e731748a5..7f0902462 100644 --- a/src/render.c +++ b/src/render.c @@ -174,7 +174,8 @@ char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, cmark_renderer renderer = {mem, &buf, &pref, 0, width, 0, 0, true, true, false, - false, outc, S_cr, S_blankline, S_out}; + false, outc, S_cr, S_blankline, S_out, + 0}; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); diff --git a/src/render.h b/src/render.h index 36c48206c..32005eabb 100644 --- a/src/render.h +++ b/src/render.h @@ -28,6 +28,7 @@ struct cmark_renderer { void (*cr)(struct cmark_renderer *); void (*blankline)(struct cmark_renderer *); void (*out)(struct cmark_renderer *, cmark_node *, const char *, bool, cmark_escaping); + unsigned int footnote_ix; }; typedef struct cmark_renderer cmark_renderer; @@ -36,6 +37,8 @@ struct cmark_html_renderer { cmark_strbuf *html; cmark_node *plain; cmark_llist *filter_extensions; + unsigned int footnote_ix; + unsigned int written_footnote_ix; void *opaque; }; diff --git a/src/scanners.c b/src/scanners.c index 27fd99f90..4c569951d 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -7887,45 +7887,35 @@ bufsize_t _scan_html_tag(const unsigned char *p) { unsigned char yych; static const unsigned char yybm[] = { /* table 1 .. 8: 0 */ - 0, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238, + 0, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, - 239, 239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239, - 239, 239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, - 239, 239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255, + 239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239, 239, + 239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239, + 255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, - 239, 239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, + 239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* table 9 .. 11: 256 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 160, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 128, 0, 0, 0, 0, 0, 0, 160, 160, 160, 160, 160, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 160, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 128, 0, + 0, 0, 0, 0, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 0, 0, 0, 0, 128, 0, 160, + 160, 0, 0, 0, 0, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, + 160, 160, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; yych = *p; if (yych <= '>') { @@ -18057,3 +18047,149 @@ bufsize_t _scan_dangerous_url(const unsigned char *p) { goto yy1188; } } + +// Scans a footnote definition opening. +bufsize_t _scan_footnote_definition(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, 64, 128, 0, 64, 64, 0, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych == '[') + goto yy1235; + ++p; + yy1234 : { return 0; } + yy1235: + yych = *(marker = ++p); + if (yych != '^') + goto yy1234; + yych = *++p; + if (yych != ']') + goto yy1239; + yy1237: + p = marker; + goto yy1234; + yy1238: + ++p; + yych = *p; + yy1239: + if (yybm[0 + yych] & 64) { + goto yy1238; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= ' ') + goto yy1237; + if (yych <= ']') + goto yy1247; + goto yy1237; + } else { + if (yych <= 0xDF) + goto yy1240; + if (yych <= 0xE0) + goto yy1241; + goto yy1242; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy1246; + if (yych <= 0xEF) + goto yy1242; + goto yy1243; + } else { + if (yych <= 0xF3) + goto yy1244; + if (yych <= 0xF4) + goto yy1245; + goto yy1237; + } + } + yy1240: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy1237; + if (yych <= 0xBF) + goto yy1238; + goto yy1237; + yy1241: + ++p; + yych = *p; + if (yych <= 0x9F) + goto yy1237; + if (yych <= 0xBF) + goto yy1240; + goto yy1237; + yy1242: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy1237; + if (yych <= 0xBF) + goto yy1240; + goto yy1237; + yy1243: + ++p; + yych = *p; + if (yych <= 0x8F) + goto yy1237; + if (yych <= 0xBF) + goto yy1242; + goto yy1237; + yy1244: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy1237; + if (yych <= 0xBF) + goto yy1242; + goto yy1237; + yy1245: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy1237; + if (yych <= 0x8F) + goto yy1242; + goto yy1237; + yy1246: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy1237; + if (yych <= 0x9F) + goto yy1240; + goto yy1237; + yy1247: + yych = *++p; + if (yych != ':') + goto yy1237; + yy1248: + ++p; + yych = *p; + if (yybm[0 + yych] & 128) { + goto yy1248; + } + { return (bufsize_t)(p - start); } + } +} diff --git a/src/scanners.h b/src/scanners.h index d54d9d272..ffdbaeb06 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -31,6 +31,7 @@ bufsize_t _scan_open_code_fence(const unsigned char *p); bufsize_t _scan_close_code_fence(const unsigned char *p); bufsize_t _scan_entity(const unsigned char *p); bufsize_t _scan_dangerous_url(const unsigned char *p); +bufsize_t _scan_footnote_definition(const unsigned char *p); #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n) #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) @@ -54,6 +55,7 @@ bufsize_t _scan_dangerous_url(const unsigned char *p); #define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n) #define scan_entity(c, n) _scan_at(&_scan_entity, c, n) #define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n) +#define scan_footnote_definition(c, n) _scan_at(&_scan_footnote_definition, c, n) #ifdef __cplusplus } diff --git a/src/scanners.re b/src/scanners.re index 739dd5314..5af8b7b18 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -329,3 +329,13 @@ bufsize_t _scan_dangerous_url(const unsigned char *p) */ } +// Scans a footnote definition opening. +bufsize_t _scan_footnote_definition(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + '[^' ([^\] \r\n\x00\t]+) ']:' [ \t]* { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 74347a62e..e33d77b9c 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -64,14 +64,19 @@ IF (PYTHONINTERP_FOUND) ) add_test(extensions_executable - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" "--extensions" "table strikethrough autolink tagfilter" + ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" + "--no-normalize" + "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --footnotes" + "--extensions" "table strikethrough autolink tagfilter" ) add_test(roundtrip_extensions_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" - "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --footnotes" "--extensions" "table strikethrough autolink tagfilter" ) diff --git a/test/extensions.txt b/test/extensions.txt index c8c7440dd..b62145c3b 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -579,6 +579,65 @@ Even with {"x":"y"} or 1 > 2 or whatever. Even **markdown**. <!--thistoo--> ```````````````````````````````` +## Footnotes + +```````````````````````````````` example +This is some text.[^1]. Other text.[^footnote]. + +Here's a thing[^other-note]. + +And another thing[^codeblock-note]. + +This doesn't have a referent[^nope]. + + +[^other-note]: no code block here (spaces are stripped away) + +[^codeblock-note]: + this is now a code block (8 spaces indentation) + +[^1]: Some *bolded* footnote definition. + +Hi! + +[^footnote]: + > Blockquotes can be in a footnote. + + as well as code blocks + + or, naturally, simple paragraphs. + +[^unused]: This is unused. +. +<p>This is some text.<sup class="footnote-ref"><a href="#fn1" id="fnref1">[1]</a></sup>. Other text.<sup class="footnote-ref"><a href="#fn2" id="fnref2">[2]</a></sup>.</p> +<p>Here's a thing<sup class="footnote-ref"><a href="#fn3" id="fnref3">[3]</a></sup>.</p> +<p>And another thing<sup class="footnote-ref"><a href="#fn4" id="fnref4">[4]</a></sup>.</p> +<p>This doesn't have a referent[^nope].</p> +<p>Hi!</p> +<section class="footnotes"> +<ol> +<li id="fn1"> +<p>Some <em>bolded</em> footnote definition. <a href="#fnref1" class="footnote-backref">↩</a></p> +</li> +<li id="fn2"> +<blockquote> +<p>Blockquotes can be in a footnote.</p> +</blockquote> +<pre><code>as well as code blocks +</code></pre> +<p>or, naturally, simple paragraphs. <a href="#fnref2" class="footnote-backref">↩</a></p> +</li> +<li id="fn3"> +<p>no code block here (spaces are stripped away) <a href="#fnref3" class="footnote-backref">↩</a></p> +</li> +<li id="fn4"> +<pre><code>this is now a code block (8 spaces indentation) +</code></pre> +<a href="#fnref4" class="footnote-backref">↩</a> +</li> +</ol> +</section> +```````````````````````````````` ## Interop From 57b35673b74ea692e37e2930f05e5afdbbedd05e Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Thu, 16 Nov 2017 14:09:53 +1100 Subject: [PATCH 099/218] FOOTNOTE_REFERENCE has text content --- src/node.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/node.c b/src/node.c index 48389ab48..3f94834cf 100644 --- a/src/node.c +++ b/src/node.c @@ -322,6 +322,7 @@ const char *cmark_node_get_literal(cmark_node *node) { case CMARK_NODE_TEXT: case CMARK_NODE_HTML_INLINE: case CMARK_NODE_CODE: + case CMARK_NODE_FOOTNOTE_REFERENCE: return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.literal); case CMARK_NODE_CODE_BLOCK: @@ -344,6 +345,7 @@ int cmark_node_set_literal(cmark_node *node, const char *content) { case CMARK_NODE_TEXT: case CMARK_NODE_HTML_INLINE: case CMARK_NODE_CODE: + case CMARK_NODE_FOOTNOTE_REFERENCE: cmark_chunk_set_cstr(NODE_MEM(node), &node->as.literal, content); return 1; From 6b101e33ba1637e294076c46c69cd6a262c7539f Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 27 Nov 2017 10:43:13 +1100 Subject: [PATCH 100/218] Footnote fix per kivikakk/comrak#44 --- src/inlines.c | 12 ++++++++---- test/extensions.txt | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index 9685983b0..759739234 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -176,11 +176,15 @@ static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, static CMARK_INLINE int isbacktick(int c) { return (c == '`'); } -static CMARK_INLINE unsigned char peek_char(subject *subj) { +static CMARK_INLINE unsigned char peek_char_n(subject *subj, bufsize_t n) { // NULL bytes should have been stripped out by now. If they're // present, it's a programming error: - assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0)); - return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0; + assert(!(subj->pos + n < subj->input.len && subj->input.data[subj->pos + n] == 0)); + return (subj->pos + n < subj->input.len) ? subj->input.data[subj->pos + n] : 0; +} + +static CMARK_INLINE unsigned char peek_char(subject *subj) { + return peek_char_n(subj, 0); } static CMARK_INLINE unsigned char peek_at(subject *subj, bufsize_t pos) { @@ -1303,7 +1307,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, break; case '!': advance(subj); - if (peek_char(subj) == '[') { + if (peek_char(subj) == '[' && peek_char_n(subj, 1) != '^') { advance(subj); new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("![")); push_bracket(subj, true, new_inl); diff --git a/test/extensions.txt b/test/extensions.txt index b62145c3b..3ad67d0ad 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -582,7 +582,7 @@ Even with {"x":"y"} or 1 > 2 or whatever. Even **markdown**. ## Footnotes ```````````````````````````````` example -This is some text.[^1]. Other text.[^footnote]. +This is some text![^1]. Other text.[^footnote]. Here's a thing[^other-note]. @@ -609,7 +609,7 @@ Hi! [^unused]: This is unused. . -<p>This is some text.<sup class="footnote-ref"><a href="#fn1" id="fnref1">[1]</a></sup>. Other text.<sup class="footnote-ref"><a href="#fn2" id="fnref2">[2]</a></sup>.</p> +<p>This is some text!<sup class="footnote-ref"><a href="#fn1" id="fnref1">[1]</a></sup>. Other text.<sup class="footnote-ref"><a href="#fn2" id="fnref2">[2]</a></sup>.</p> <p>Here's a thing<sup class="footnote-ref"><a href="#fn3" id="fnref3">[3]</a></sup>.</p> <p>And another thing<sup class="footnote-ref"><a href="#fn4" id="fnref4">[4]</a></sup>.</p> <p>This doesn't have a referent[^nope].</p> From 0c6d1c4baa5b4152717b5b4d72463ec1d1910bf4 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Tue, 12 Dec 2017 11:32:36 +1100 Subject: [PATCH 101/218] ASCII clean source --- api_test/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api_test/main.c b/api_test/main.c index d1e18dcf9..d27e7cfad 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1015,7 +1015,7 @@ static void source_pos(test_batch_runner *runner) { " <text sourcepos=\"1:13-1:13\">.</text>\n" " </heading>\n" " <paragraph sourcepos=\"3:1-4:42\">\n" - " <text sourcepos=\"3:1-3:14\">Hello “ </text>\n" + " <text sourcepos=\"3:1-3:14\">Hello \xe2\x80\x9c </text>\n" " <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n" " <text sourcepos=\"3:16-3:36\">http://www.google.com</text>\n" " </link>\n" From e97b324e5ec61e1615fd7e462dd65a6abfb4166b Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 8 Jan 2018 08:04:36 +1100 Subject: [PATCH 102/218] Add -lcmark-gfmextensions to libcmark-gfm.pc.in --- src/libcmark-gfm.pc.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libcmark-gfm.pc.in b/src/libcmark-gfm.pc.in index 6bf2cd974..4aeca7c53 100644 --- a/src/libcmark-gfm.pc.in +++ b/src/libcmark-gfm.pc.in @@ -6,5 +6,5 @@ includedir=@CMAKE_INSTALL_PREFIX@/include Name: libcmark-gfm Description: CommonMark parsing, rendering, and manipulation with GitHub Flavored Markdown extensions Version: @PROJECT_VERSION@ -Libs: -L${libdir} -lcmark-gfm +Libs: -L${libdir} -lcmark-gfm -lcmark-gfmextensions Cflags: -I${includedir} From 6df666d71c16a34a3c415a3e19af0cc77500ae07 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 8 Jan 2018 09:08:57 +1100 Subject: [PATCH 103/218] Fix extensions with static only --- extensions/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 218b39bfe..4c2a57a5e 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -64,6 +64,11 @@ if (CMARK_STATIC) VERSION ${PROJECT_VERSION}) endif(MSVC) + if (NOT CMARK_SHARED) + generate_export_header(${STATICLIBRARY} + BASE_NAME cmarkextensions) + endif() + list(APPEND CMARK_INSTALL ${STATICLIBRARY}) endif() From f0b8686bb1c50443f087be982fa406e02f9b5502 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 8 Jan 2018 09:49:30 +1100 Subject: [PATCH 104/218] Build static on Windows again Otherwise we have to aggregate DLLs for tests to work. --- Makefile.nmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile.nmake b/Makefile.nmake index ecfd4f5f5..cb35b3df7 100644 --- a/Makefile.nmake +++ b/Makefile.nmake @@ -16,6 +16,8 @@ $(BUILDDIR)/CMakeFiles: -G "$(GENERATOR)" \ -D CMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -D CMAKE_INSTALL_PREFIX=$(INSTALLDIR) \ + -D CMARK_STATIC=ON \ + -D CMARK_SHARED=OFF \ .. && \ cd .. From 7ef1ebcf432863fdc2702de61c34ee561ae2f093 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 8 Jan 2018 09:04:09 +1100 Subject: [PATCH 105/218] 0.28.3.gfm.12 --- CMakeLists.txt | 2 +- changelog.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 61689d8bc..5813243f1 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 3) -set(PROJECT_VERSION_GFM 11) +set(PROJECT_VERSION_GFM 12) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 7e59890f4..3d62a9bed 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,7 @@ +[0.28.3.gfm.12] + + * Various security and bug fixes. + [0.28.3] * Include GNUInstallDirs in src/CMakeLists.txt (Nick Wellnhofer, #240). From 9137851accb62e8ebf23269234b231a1eedc1367 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Thu, 25 Jan 2018 18:15:13 +1100 Subject: [PATCH 106/218] Add CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE. Closes #71. --- extensions/strikethrough.c | 3 ++- man/man3/cmark-gfm.3 | 15 ++++++++++++++- src/cmark.h | 5 +++++ src/main.c | 4 ++++ 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/extensions/strikethrough.c b/extensions/strikethrough.c index aa7550479..0d9418090 100644 --- a/extensions/strikethrough.c +++ b/extensions/strikethrough.c @@ -27,7 +27,8 @@ static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser, res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser); res->start_column = cmark_inline_parser_get_column(inline_parser) - delims; - if (left_flanking || right_flanking) { + if ((left_flanking || right_flanking) && + (!(parser->options & CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE) || delims == 2)) { cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking, right_flanking, res); } diff --git a/man/man3/cmark-gfm.3 b/man/man3/cmark-gfm.3 index 2d85c3783..abdebe89d 100644 --- a/man/man3/cmark-gfm.3 +++ b/man/man3/cmark-gfm.3 @@ -1,4 +1,4 @@ -.TH cmark-gfm 3 "November 09, 2017" "LOCAL" "Library Functions Manual" +.TH cmark-gfm 3 "January 25, 2018" "LOCAL" "Library Functions Manual" .SH NAME .PP @@ -957,6 +957,19 @@ Be liberal in interpreting inline HTML tags. .PP Parse footnotes. +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE (1 << 14) +.RE +\f[] +.fi + +.PP +Only parse strikethroughs if surrounded by exactly 2 tildes. Gives some +compatibility with redcarpet. + .SS Version information diff --git a/src/cmark.h b/src/cmark.h index ba2febcc6..cf21b42a7 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -724,6 +724,11 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar */ #define CMARK_OPT_FOOTNOTES (1 << 13) +/** Only parse strikethroughs if surrounded by exactly 2 tildes. + * Gives some compatibility with redcarpet. + */ +#define CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE (1 << 14) + /** * ## Version information */ diff --git a/src/main.c b/src/main.c index 8d972d738..c29616e06 100644 --- a/src/main.c +++ b/src/main.c @@ -44,6 +44,8 @@ void print_usage() { printf(" --footnotes Parse footnotes\n"); printf(" --extension, -e EXTENSION_NAME Specify an extension name to use\n"); printf(" --list-extensions List available extensions and quit\n"); + printf(" --strikethrough-double-tilde Only parse strikethrough (if enabled)\n"); + printf(" with two tildes\n"); printf(" --help, -h Print usage information\n"); printf(" --version Print version\n"); } @@ -129,6 +131,8 @@ int main(int argc, char *argv[]) { } else if (strcmp(argv[i], "--list-extensions") == 0) { print_extensions(); goto success; + } else if (strcmp(argv[i], "--strikethrough-double-tilde") == 0) { + options |= CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE; } else if (strcmp(argv[i], "--sourcepos") == 0) { options |= CMARK_OPT_SOURCEPOS; } else if (strcmp(argv[i], "--hardbreaks") == 0) { From 7ddfb10caf9634395b0cca6733e6319ba4fbf29c Mon Sep 17 00:00:00 2001 From: FUJI Goro <gfuji@cpan.org> Date: Tue, 20 Feb 2018 13:15:44 +0900 Subject: [PATCH 107/218] add CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES (#86) (amend https://github.com/github/cmark/pull/85) --- extensions/table.c | 18 +++++++++++++++--- man/man3/cmark-gfm.3 | 14 +++++++++++++- src/cmark.h | 4 ++++ src/main.c | 4 ++++ 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index 3b7ae828f..add277166 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -542,6 +542,18 @@ static void man_render(cmark_syntax_extension *extension, } } +static void html_table_add_align(cmark_strbuf* html, const char* align, int options) { + if (options & CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES) { + cmark_strbuf_puts(html, " style=\"text-align: "); + cmark_strbuf_puts(html, align); + cmark_strbuf_puts(html, "\""); + } else { + cmark_strbuf_puts(html, " align=\""); + cmark_strbuf_puts(html, align); + cmark_strbuf_puts(html, "\""); + } +} + struct html_table_state { unsigned need_closing_table_body : 1; unsigned in_table_header : 1; @@ -611,9 +623,9 @@ static void html_render(cmark_syntax_extension *extension, break; switch (alignments[i]) { - case 'l': cmark_strbuf_puts(html, " align=\"left\""); break; - case 'c': cmark_strbuf_puts(html, " align=\"center\""); break; - case 'r': cmark_strbuf_puts(html, " align=\"right\""); break; + case 'l': html_table_add_align(html, "left", options); break; + case 'c': html_table_add_align(html, "center", options); break; + case 'r': html_table_add_align(html, "right", options); break; } cmark_html_render_sourcepos(node, html, options); diff --git a/man/man3/cmark-gfm.3 b/man/man3/cmark-gfm.3 index abdebe89d..772c26544 100644 --- a/man/man3/cmark-gfm.3 +++ b/man/man3/cmark-gfm.3 @@ -1,4 +1,4 @@ -.TH cmark-gfm 3 "January 25, 2018" "LOCAL" "Library Functions Manual" +.TH cmark-gfm 3 "February 20, 2018" "LOCAL" "Library Functions Manual" .SH NAME .PP @@ -970,6 +970,18 @@ Parse footnotes. Only parse strikethroughs if surrounded by exactly 2 tildes. Gives some compatibility with redcarpet. +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES (1 << 15) +.RE +\f[] +.fi + +.PP +Use style attributes to align table cells instead of align attributes. + .SS Version information diff --git a/src/cmark.h b/src/cmark.h index cf21b42a7..6526c60e6 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -729,6 +729,10 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar */ #define CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE (1 << 14) +/** Use style attributes to align table cells instead of align attributes. + */ +#define CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES (1 << 15) + /** * ## Version information */ diff --git a/src/main.c b/src/main.c index c29616e06..cedeca6a8 100644 --- a/src/main.c +++ b/src/main.c @@ -46,6 +46,8 @@ void print_usage() { printf(" --list-extensions List available extensions and quit\n"); printf(" --strikethrough-double-tilde Only parse strikethrough (if enabled)\n"); printf(" with two tildes\n"); + printf(" --table-prefer-style-attributes Use style attributes to align table cells\n" + " instead of align attributes.\n"); printf(" --help, -h Print usage information\n"); printf(" --version Print version\n"); } @@ -131,6 +133,8 @@ int main(int argc, char *argv[]) { } else if (strcmp(argv[i], "--list-extensions") == 0) { print_extensions(); goto success; + } else if (strcmp(argv[i], "--table-prefer-style-attributes") == 0) { + options |= CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES; } else if (strcmp(argv[i], "--strikethrough-double-tilde") == 0) { options |= CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE; } else if (strcmp(argv[i], "--sourcepos") == 0) { From b6098ab6752a30377b4ad89b68aaf965b3b0f17a Mon Sep 17 00:00:00 2001 From: FUJI Goro <gfuji@cpan.org> Date: Tue, 20 Feb 2018 16:29:13 +0900 Subject: [PATCH 108/218] add tests for --table-prefer-style-attributes (#87) --- test/CMakeLists.txt | 8 +++++ ...tensions-table-prefer-style-attributes.txt | 36 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 test/extensions-table-prefer-style-attributes.txt diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d5efeadf1..c3cac145c 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -81,6 +81,14 @@ IF (PYTHONINTERP_FOUND) "--extensions" "table strikethrough autolink tagfilter" ) + add_test(option_table_prefer_style_attributes + ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" + "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions-table-prefer-style-attributes.txt" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --footnotes --table-prefer-style-attributes" + "--extensions" "table strikethrough autolink tagfilter" + ) + add_test(regressiontest_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" diff --git a/test/extensions-table-prefer-style-attributes.txt b/test/extensions-table-prefer-style-attributes.txt new file mode 100644 index 000000000..09cb7299a --- /dev/null +++ b/test/extensions-table-prefer-style-attributes.txt @@ -0,0 +1,36 @@ +--- +title: Extensions test with --table-prefer-style-attributes +author: FUJI Goro +version: 0.1 +date: '2018-02-20' +license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' +... + +## Tables + +Table alignment: + +```````````````````````````````` example +aaa | bbb | ccc | ddd | eee +:-- | --- | :-: | --- | --: +fff | ggg | hhh | iii | jjj +. +<table> +<thead> +<tr> +<th style="text-align: left">aaa</th> +<th>bbb</th> +<th style="text-align: center">ccc</th> +<th>ddd</th> +<th style="text-align: right">eee</th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align: left">fff</td> +<td>ggg</td> +<td style="text-align: center">hhh</td> +<td>iii</td> +<td style="text-align: right">jjj</td> +</tr></tbody></table> +```````````````````````````````` From bf28ef6e7e949157ab85a5e4a3ea9107e17497b9 Mon Sep 17 00:00:00 2001 From: Michael Camilleri <mike@inqk.net> Date: Tue, 13 Mar 2018 07:30:34 +0900 Subject: [PATCH 109/218] Remove square brackets when rendering HTML for footnotes (#90) --- src/html.c | 4 ++-- test/extensions.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/html.c b/src/html.c index 136f9e090..e2718c895 100644 --- a/src/html.c +++ b/src/html.c @@ -406,9 +406,9 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); cmark_strbuf_puts(html, "\" id=\"fnref"); cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); - cmark_strbuf_puts(html, "\">["); + cmark_strbuf_puts(html, "\">"); cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); - cmark_strbuf_puts(html, "]</a></sup>"); + cmark_strbuf_puts(html, "</a></sup>"); } break; diff --git a/test/extensions.txt b/test/extensions.txt index 3ad67d0ad..6c8baefa1 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -609,9 +609,9 @@ Hi! [^unused]: This is unused. . -<p>This is some text!<sup class="footnote-ref"><a href="#fn1" id="fnref1">[1]</a></sup>. Other text.<sup class="footnote-ref"><a href="#fn2" id="fnref2">[2]</a></sup>.</p> -<p>Here's a thing<sup class="footnote-ref"><a href="#fn3" id="fnref3">[3]</a></sup>.</p> -<p>And another thing<sup class="footnote-ref"><a href="#fn4" id="fnref4">[4]</a></sup>.</p> +<p>This is some text!<sup class="footnote-ref"><a href="#fn1" id="fnref1">1</a></sup>. Other text.<sup class="footnote-ref"><a href="#fn2" id="fnref2">2</a></sup>.</p> +<p>Here's a thing<sup class="footnote-ref"><a href="#fn3" id="fnref3">3</a></sup>.</p> +<p>And another thing<sup class="footnote-ref"><a href="#fn4" id="fnref4">4</a></sup>.</p> <p>This doesn't have a referent[^nope].</p> <p>Hi!</p> <section class="footnotes"> From b7a3b7494ca0a39b80edd88a88d186ebbc48670e Mon Sep 17 00:00:00 2001 From: Phil Turnbull <philipturnbull@github.com> Date: Wed, 20 Jun 2018 02:13:54 -0400 Subject: [PATCH 110/218] Handle deeply nested lists (#95) * Optimize S_find_first_nonspace. We were needlessly redoing things we'd already done. Now we skip the work if the first nonspace is greater than the current offset. This fixes pathological slowdown with deeply nested lists (#255). For N = 3000, the time goes from over 17s to about 0.7s. Thanks to @mity for diagnosing the problem. * pathological_tests.py: added test for deeply nested lists. * pathological_tests.py: make tests run faster. - commented out the (already ignored) "many references" test, which times out - reduced the iterations for a couple other tests --- src/blocks.c | 33 +++++++++++++++++++-------------- test/pathological_tests.py | 19 +++++++++++-------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index 68d514fc6..377cc0e64 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -755,22 +755,24 @@ static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) { char c; int chars_to_tab = TAB_STOP - (parser->column % TAB_STOP); - parser->first_nonspace = parser->offset; - parser->first_nonspace_column = parser->column; - while ((c = peek_at(input, parser->first_nonspace))) { - if (c == ' ') { - parser->first_nonspace += 1; - parser->first_nonspace_column += 1; - chars_to_tab = chars_to_tab - 1; - if (chars_to_tab == 0) { + if (parser->first_nonspace <= parser->offset) { + parser->first_nonspace = parser->offset; + parser->first_nonspace_column = parser->column; + while ((c = peek_at(input, parser->first_nonspace))) { + if (c == ' ') { + parser->first_nonspace += 1; + parser->first_nonspace_column += 1; + chars_to_tab = chars_to_tab - 1; + if (chars_to_tab == 0) { + chars_to_tab = TAB_STOP; + } + } else if (c == '\t') { + parser->first_nonspace += 1; + parser->first_nonspace_column += chars_to_tab; chars_to_tab = TAB_STOP; + } else { + break; } - } else if (c == '\t') { - parser->first_nonspace += 1; - parser->first_nonspace_column += chars_to_tab; - chars_to_tab = TAB_STOP; - } else { - break; } } @@ -1373,6 +1375,9 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, parser->offset = 0; parser->column = 0; + parser->first_nonspace = 0; + parser->first_nonspace_column = 0; + parser->indent = 0; parser->blank = false; parser->partially_consumed_tab = false; diff --git a/test/pathological_tests.py b/test/pathological_tests.py index 9552b3996..ffe3bf9d6 100644 --- a/test/pathological_tests.py +++ b/test/pathological_tests.py @@ -69,21 +69,24 @@ def badhash(ref): "nested block quotes": ((("> " * 50000) + "a"), re.compile("(<blockquote>\n){50000}")), + "deeply nested lists": + ("".join(map(lambda x: (" " * x + "* a\n"), range(0,1000))), + re.compile("<ul>\n(<li>a\n<ul>\n){999}<li>a</li>\n</ul>\n(</li>\n</ul>\n){999}")), "U+0000 in input": ("abc\u0000de\u0000", re.compile("abc\ufffd?de\ufffd?")), "backticks": - ("".join(map(lambda x: ("e" + "`" * x), range(1,10000))), + ("".join(map(lambda x: ("e" + "`" * x), range(1,5000))), re.compile("^<p>[e`]*</p>\n$")), "unclosed links A": - ("[a](<b" * 50000, - re.compile("(\[a\]\(&lt;b){50000}")), + ("[a](<b" * 30000, + re.compile("(\[a\]\(&lt;b){30000}")), "unclosed links B": - ("[a](b" * 50000, - re.compile("(\[a\]\(b){50000}")), - "many references": - ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,50000 * 16))) + "[0] " * 50000, - re.compile("(\[0\] ){49999}")), + ("[a](b" * 30000, + re.compile("(\[a\]\(b){30000}")), +# "many references": +# ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000, +# re.compile("(\[0\] ){4999}")), "reference collisions": hash_collisions() } From 7b4a497c6b56f87604495b3b7093b8473a0b6068 Mon Sep 17 00:00:00 2001 From: Minghao Liu <molikto@gmail.com> Date: Wed, 20 Jun 2018 14:14:48 +0800 Subject: [PATCH 111/218] Expose `cmark_node_type CMARK_NODE_TABLE` etc., make XCode happy with imported headers. (#96) * no message * no message --- extensions/core-extensions.h | 2 +- extensions/table.h | 4 ++++ src/cmark_extension_api.h | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/extensions/core-extensions.h b/extensions/core-extensions.h index e466b1e1d..a9fd0cb28 100644 --- a/extensions/core-extensions.h +++ b/extensions/core-extensions.h @@ -5,7 +5,7 @@ extern "C" { #endif -#include <cmark_extension_api.h> +#include "cmark_extension_api.h" #include "cmarkextensions_export.h" #include <stdint.h> diff --git a/extensions/table.h b/extensions/table.h index ff630b25e..85bb1a4c7 100644 --- a/extensions/table.h +++ b/extensions/table.h @@ -3,6 +3,10 @@ #include "core-extensions.h" + +extern cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, + CMARK_NODE_TABLE_CELL; + cmark_syntax_extension *create_table_extension(void); #endif diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index 470e6294e..57ff229b4 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -5,7 +5,7 @@ extern "C" { #endif -#include <cmark.h> +#include "cmark.h" struct cmark_renderer; struct cmark_html_renderer; From 907dcdf91cd7d845423db2e9d89a8fa656aaf365 Mon Sep 17 00:00:00 2001 From: Joachim Nilsson <troglobit@gmail.com> Date: Wed, 20 Jun 2018 08:41:37 +0200 Subject: [PATCH 112/218] Debian packaging (#97) * Initial debian/ packaging, from official Debian package Signed-off-by: Joachim Nilsson <joachim.nilsson@westermo.se> * Convert build to cmark-gfm and update rules file to build static Signed-off-by: Joachim Nilsson <joachim.nilsson@westermo.se> --- debian/changelog | 11 ++++ debian/cmark-gfm.install | 2 + debian/compat | 1 + debian/control | 19 ++++++ debian/copyright | 128 +++++++++++++++++++++++++++++++++++++++ debian/rules | 14 +++++ debian/source/format | 1 + debian/source/options | 1 + debian/watch | 4 ++ 9 files changed, 181 insertions(+) create mode 100644 debian/changelog create mode 100644 debian/cmark-gfm.install create mode 100644 debian/compat create mode 100644 debian/control create mode 100644 debian/copyright create mode 100755 debian/rules create mode 100644 debian/source/format create mode 100644 debian/source/options create mode 100644 debian/watch diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 000000000..de68ad759 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,11 @@ +cmark-gfm (0.28.3.gfm.12-wmo1) unstable; urgency=medium + + * Repackage for GitHub flavor, native GIT package + + -- Joachim Nilsson <joachim.nilsson@westermo.se> Mon, 04 Jun 2018 11:02:12 +0200 + +cmark (0.26.1-1) unstable; urgency=low + + * Initial release (closes: #833682) + + -- Peter Eisentraut <petere@debian.org> Mon, 05 Sep 2016 01:53:18 +0000 diff --git a/debian/cmark-gfm.install b/debian/cmark-gfm.install new file mode 100644 index 000000000..63c303b03 --- /dev/null +++ b/debian/cmark-gfm.install @@ -0,0 +1,2 @@ +usr/bin/ +usr/share/man/man1/ diff --git a/debian/compat b/debian/compat new file mode 100644 index 000000000..ec635144f --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +9 diff --git a/debian/control b/debian/control new file mode 100644 index 000000000..f81b2feaa --- /dev/null +++ b/debian/control @@ -0,0 +1,19 @@ +Source: cmark-gfm +Build-Depends: cmake, debhelper (>= 9) +Homepage: https://github.com/github/cmark +Maintainer: Joachim Nilsson <joachim.nilsson@westermo.se> +Priority: optional +Section: text +Standards-Version: 3.9.8 + +Package: cmark-gfm +Architecture: any +Depends: ${misc:Depends}, ${shlibs:Depends} +Description: CommonMark parsing and rendering program, GitHub flavor + cmark is the C reference implementation of CommonMark, a rationalized + version of Markdown syntax with a spec. This package provides a + command-line program (cmark) for parsing and rendering CommonMark + documents. + . + This is the GitHub flavored Markdown with extensions for org-mode style + tables, auto-linking, and more. diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 000000000..e2cdefb47 --- /dev/null +++ b/debian/copyright @@ -0,0 +1,128 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Contact: https://github.com/commonmark/cmark +Source: https://github.com/commonmark/cmark/releases + +Files: * +Copyright: 2014, John MacFarlane +License: BSD-2-clause + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + . + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + . + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + . + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Files: FindAsan.cmake +Copyright: 2013, Matthew Arsenault +License: Expat + +Files: bench/statistics.py +Copyright: 2013, Steven D'Aprano <steve+python@pearwood.info> +License: Apache + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + . + http://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Files: data/CaseFolding-3.2.0.txt +Copyright: 1991-2016 Unicode, Inc. All rights reserved. +License: Unicode + Distributed under the Terms of Use in http://www.unicode.org/copyright.html. + . + Permission is hereby granted, free of charge, to any person obtaining + a copy of the Unicode data files and any associated documentation + (the "Data Files") or Unicode software and any associated documentation + (the "Software") to deal in the Data Files or Software + without restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, and/or sell copies of + the Data Files or Software, and to permit persons to whom the Data Files + or Software are furnished to do so, provided that either + (a) this copyright and permission notice appear with all copies + of the Data Files or Software, or + (b) this copyright and permission notice appear in associated + Documentation. + . + THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF + ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS + NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL + DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THE DATA FILES OR SOFTWARE. + . + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, + use or other dealings in these Data Files or Software without prior + written authorization of the copyright holder. +Comment: + The original file is at + <http://www.unicode.org/Public/3.2-Update/CaseFolding-3.2.0.txt>. + The license applies per + <http://www.unicode.org/copyright.html#License>. + +Files: src/houdini.h src/houdini_href_e.c src/houdini_html_e.c src/houdini_html_u.c +Copyright: 2012, Vicent Martí +License: Expat + +Files: src/buffer.h src/buffer.c src/chunk.h +Copyright: 2012, Github, Inc. +License: Expat + +Files: src/utf8.c src/utf8.c +Copyright: 2009, Public Software Group e. V., Berlin, Germany +License: Expat + +Files: test/normalize.py +Copyright: 2013, Karl Dubost +License: Expat + +Files: test/spec.txt +Copyright: 2014-15, John MacFarlane +License: CC-BY-SA + Released under the Creative Commons CC-BY-SA 4.0 license: + <http://creativecommons.org/licenses/by-sa/4.0/>. + +License: Expat + Permission is hereby granted, free of charge, to any person obtaining a copy of + this software and associated documentation files (the "Software"), to deal in + the Software without restriction, including without limitation the rights to + use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is furnished to do + so, subject to the following conditions: + . + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + . + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. diff --git a/debian/rules b/debian/rules new file mode 100755 index 000000000..ee0a94eaf --- /dev/null +++ b/debian/rules @@ -0,0 +1,14 @@ +#!/usr/bin/make -f +#export DH_VERBOSE=1 + +%: + dh $@ --buildsystem=cmake --parallel + +override_dh_auto_configure: + dh_auto_configure -- -DCMARK_SHARED=OFF -DCMAKE_INSTALL_PREFIX=/usr + +override_dh_auto_install: + dh_auto_install --destdir=debian/tmp + +override_dh_strip: + dh_strip --no-automatic-dbgsym diff --git a/debian/source/format b/debian/source/format new file mode 100644 index 000000000..89ae9db8f --- /dev/null +++ b/debian/source/format @@ -0,0 +1 @@ +3.0 (native) diff --git a/debian/source/options b/debian/source/options new file mode 100644 index 000000000..ec7126d16 --- /dev/null +++ b/debian/source/options @@ -0,0 +1 @@ +extend-diff-ignore = __pycache__ diff --git a/debian/watch b/debian/watch new file mode 100644 index 000000000..db876cbfb --- /dev/null +++ b/debian/watch @@ -0,0 +1,4 @@ +version=4 +opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%cmark-$1.tar.gz%" \ + https://github.com/github/cmark/releases \ + (?:.*?/)?v?(\d[\d.]*)\.tar\.gz debian uupdate From 02bc37d08878ca9139814ec5ad4e5a02a9c0ec8a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 11 Jun 2018 11:10:13 -0700 Subject: [PATCH 113/218] Removed meta from list of block tags. Added regression test. See commonmark/CommonMark#527. --- src/scanners.c | 27884 ++++++++++++++++-------------------------- src/scanners.re | 2 +- test/regression.txt | 15 +- 3 files changed, 10565 insertions(+), 17336 deletions(-) diff --git a/src/scanners.c b/src/scanners.c index 4a3e5eacd..5a4264826 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -1,17478 +1,10694 @@ -/* Generated by re2c 0.15.3 */ +/* Generated by re2c 1.0.3 */ #include <stdlib.h> #include "chunk.h" #include "scanners.h" -bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, - bufsize_t offset) { - bufsize_t res; - unsigned char *ptr = (unsigned char *)c->data; +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) +{ + bufsize_t res; + unsigned char *ptr = (unsigned char *)c->data; - if (ptr == NULL || offset > c->len) { - return 0; - } else { - unsigned char lim = ptr[c->len]; + if (ptr == NULL || offset > c->len) { + return 0; + } else { + unsigned char lim = ptr[c->len]; - ptr[c->len] = '\0'; - res = scanner(ptr + offset); - ptr[c->len] = lim; - } + ptr[c->len] = '\0'; + res = scanner(ptr + offset); + ptr[c->len] = lim; + } - return res; + return res; } + + // Try to match a scheme including colon. -bufsize_t _scan_scheme(const unsigned char *p) { +bufsize_t _scan_scheme(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - yych = *p; - if (yych <= '@') - goto yy2; - if (yych <= 'Z') - goto yy4; - if (yych <= '`') - goto yy2; - if (yych <= 'z') - goto yy4; - yy2: - ++p; - yy3 : { return 0; } - yy4: - yych = *(marker = ++p); - if (yych <= '/') { - if (yych <= '+') { - if (yych <= '*') - goto yy3; - } else { - if (yych <= ',') - goto yy3; - if (yych >= '/') - goto yy3; - } - } else { - if (yych <= 'Z') { - if (yych <= '9') - goto yy5; - if (yych <= '@') - goto yy3; - } else { - if (yych <= '`') - goto yy3; - if (yych >= '{') - goto yy3; - } - } - yy5: - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych == '+') - goto yy9; - } else { - if (yych != '/') - goto yy9; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych >= 'A') - goto yy9; - } else { - if (yych <= '`') - goto yy6; - if (yych <= 'z') - goto yy9; - } - } - yy6: - p = marker; - goto yy3; - yy7: - ++p; - { return (bufsize_t)(p - start); } - yy9: - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy6; - } else { - if (yych == '/') - goto yy6; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy7; - if (yych <= '@') - goto yy6; - } else { - if (yych <= '`') - goto yy6; - if (yych >= '{') - goto yy6; - } - } - ++p; - if ((yych = *p) == ':') - goto yy7; - goto yy6; - } +{ + unsigned char yych; + yych = *p; + if (yych <= '@') goto yy2; + if (yych <= 'Z') goto yy4; + if (yych <= '`') goto yy2; + if (yych <= 'z') goto yy4; +yy2: + ++p; +yy3: + { return 0; } +yy4: + yych = *(marker = ++p); + if (yych <= '/') { + if (yych <= '+') { + if (yych <= '*') goto yy3; + } else { + if (yych <= ',') goto yy3; + if (yych >= '/') goto yy3; + } + } else { + if (yych <= 'Z') { + if (yych <= '9') goto yy5; + if (yych <= '@') goto yy3; + } else { + if (yych <= '`') goto yy3; + if (yych >= '{') goto yy3; + } + } +yy5: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych == '+') goto yy7; + } else { + if (yych != '/') goto yy7; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych >= 'A') goto yy7; + } else { + if (yych <= '`') goto yy6; + if (yych <= 'z') goto yy7; + } + } +yy6: + p = marker; + goto yy3; +yy7: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych == '+') goto yy10; + goto yy6; + } else { + if (yych == '/') goto yy6; + goto yy10; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + goto yy10; + } else { + if (yych <= '`') goto yy6; + if (yych <= 'z') goto yy10; + goto yy6; + } + } +yy8: + ++p; + { return (bufsize_t)(p - start); } +yy10: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy6; + } else { + if (yych == '/') goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy8; + if (yych <= '@') goto yy6; + } else { + if (yych <= '`') goto yy6; + if (yych >= '{') goto yy6; + } + } + yych = *++p; + if (yych == ':') goto yy8; + goto yy6; +} + } // Try to match URI autolink after first <, returning number of chars matched. -bufsize_t _scan_autolink_uri(const unsigned char *p) { +bufsize_t _scan_autolink_uri(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - static const unsigned char yybm[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 0, 128, 0, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych <= '@') - goto yy41; - if (yych <= 'Z') - goto yy43; - if (yych <= '`') - goto yy41; - if (yych <= 'z') - goto yy43; - yy41: - ++p; - yy42 : { return 0; } - yy43: - yych = *(marker = ++p); - if (yych <= '/') { - if (yych <= '+') { - if (yych <= '*') - goto yy42; - } else { - if (yych <= ',') - goto yy42; - if (yych >= '/') - goto yy42; - } - } else { - if (yych <= 'Z') { - if (yych <= '9') - goto yy44; - if (yych <= '@') - goto yy42; - } else { - if (yych <= '`') - goto yy42; - if (yych >= '{') - goto yy42; - } - } - yy44: - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych == '+') - goto yy48; - } else { - if (yych != '/') - goto yy48; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych >= 'A') - goto yy48; - } else { - if (yych <= '`') - goto yy45; - if (yych <= 'z') - goto yy48; - } - } - yy45: - p = marker; - goto yy42; - yy46: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy46; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '<') - goto yy45; - if (yych <= '>') - goto yy85; - goto yy45; - } else { - if (yych <= 0xDF) - goto yy78; - if (yych <= 0xE0) - goto yy79; - goto yy80; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy84; - if (yych <= 0xEF) - goto yy80; - goto yy81; - } else { - if (yych <= 0xF3) - goto yy82; - if (yych <= 0xF4) - goto yy83; - goto yy45; - } - } - yy48: - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych <= '9') { - if (yych <= ',') { - if (yych != '+') - goto yy45; - } else { - if (yych == '/') - goto yy45; - } - } else { - if (yych <= 'Z') { - if (yych <= ':') - goto yy46; - if (yych <= '@') - goto yy45; - } else { - if (yych <= '`') - goto yy45; - if (yych >= '{') - goto yy45; - } - } - yych = *++p; - if (yych == ':') - goto yy46; - goto yy45; - yy78: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy45; - if (yych <= 0xBF) - goto yy46; - goto yy45; - yy79: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy45; - if (yych <= 0xBF) - goto yy78; - goto yy45; - yy80: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy45; - if (yych <= 0xBF) - goto yy78; - goto yy45; - yy81: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy45; - if (yych <= 0xBF) - goto yy80; - goto yy45; - yy82: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy45; - if (yych <= 0xBF) - goto yy80; - goto yy45; - yy83: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy45; - if (yych <= 0x8F) - goto yy80; - goto yy45; - yy84: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy45; - if (yych <= 0x9F) - goto yy78; - goto yy45; - yy85: - ++p; - { return (bufsize_t)(p - start); } - } +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 128, 0, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= '@') goto yy41; + if (yych <= 'Z') goto yy43; + if (yych <= '`') goto yy41; + if (yych <= 'z') goto yy43; +yy41: + ++p; +yy42: + { return 0; } +yy43: + yych = *(marker = ++p); + if (yych <= '/') { + if (yych <= '+') { + if (yych <= '*') goto yy42; + } else { + if (yych <= ',') goto yy42; + if (yych >= '/') goto yy42; + } + } else { + if (yych <= 'Z') { + if (yych <= '9') goto yy44; + if (yych <= '@') goto yy42; + } else { + if (yych <= '`') goto yy42; + if (yych >= '{') goto yy42; + } + } +yy44: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych == '+') goto yy46; + } else { + if (yych != '/') goto yy46; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych >= 'A') goto yy46; + } else { + if (yych <= '`') goto yy45; + if (yych <= 'z') goto yy46; + } + } +yy45: + p = marker; + goto yy42; +yy46: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych == '+') goto yy49; + goto yy45; + } else { + if (yych == '/') goto yy45; + goto yy49; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + goto yy49; + } else { + if (yych <= '`') goto yy45; + if (yych <= 'z') goto yy49; + goto yy45; + } + } +yy47: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy47; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '<') goto yy45; + if (yych <= '>') goto yy50; + goto yy45; + } else { + if (yych <= 0xDF) goto yy52; + if (yych <= 0xE0) goto yy53; + goto yy54; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy55; + if (yych <= 0xEF) goto yy54; + goto yy56; + } else { + if (yych <= 0xF3) goto yy57; + if (yych <= 0xF4) goto yy58; + goto yy45; + } + } +yy49: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych == '+') goto yy59; + goto yy45; + } else { + if (yych == '/') goto yy45; + goto yy59; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + goto yy59; + } else { + if (yych <= '`') goto yy45; + if (yych <= 'z') goto yy59; + goto yy45; + } + } +yy50: + ++p; + { return (bufsize_t)(p - start); } +yy52: + yych = *++p; + if (yych <= 0x7F) goto yy45; + if (yych <= 0xBF) goto yy47; + goto yy45; +yy53: + yych = *++p; + if (yych <= 0x9F) goto yy45; + if (yych <= 0xBF) goto yy52; + goto yy45; +yy54: + yych = *++p; + if (yych <= 0x7F) goto yy45; + if (yych <= 0xBF) goto yy52; + goto yy45; +yy55: + yych = *++p; + if (yych <= 0x7F) goto yy45; + if (yych <= 0x9F) goto yy52; + goto yy45; +yy56: + yych = *++p; + if (yych <= 0x8F) goto yy45; + if (yych <= 0xBF) goto yy54; + goto yy45; +yy57: + yych = *++p; + if (yych <= 0x7F) goto yy45; + if (yych <= 0xBF) goto yy54; + goto yy45; +yy58: + yych = *++p; + if (yych <= 0x7F) goto yy45; + if (yych <= 0x8F) goto yy54; + goto yy45; +yy59: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') goto yy45; + } else { + if (yych == '/') goto yy45; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') goto yy47; + if (yych <= '@') goto yy45; + } else { + if (yych <= '`') goto yy45; + if (yych >= '{') goto yy45; + } + } + yych = *++p; + if (yych == ':') goto yy47; + goto yy45; +} + } // Try to match email autolink after first <, returning num of chars matched. -bufsize_t _scan_autolink_email(const unsigned char *p) { +bufsize_t _scan_autolink_email(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - static const unsigned char yybm[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 128, 0, 128, 128, 128, 128, 128, 0, 0, - 128, 128, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 0, 0, 0, 128, 0, 128, 0, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych <= '9') { - if (yych <= '\'') { - if (yych == '!') - goto yy91; - if (yych >= '#') - goto yy91; - } else { - if (yych <= ')') - goto yy89; - if (yych != ',') - goto yy91; - } - } else { - if (yych <= '?') { - if (yych == '=') - goto yy91; - if (yych >= '?') - goto yy91; - } else { - if (yych <= 'Z') { - if (yych >= 'A') - goto yy91; - } else { - if (yych <= ']') - goto yy89; - if (yych <= '~') - goto yy91; - } - } - } - yy89: - ++p; - yy90 : { return 0; } - yy91: - yych = *(marker = ++p); - if (yych <= ',') { - if (yych <= '"') { - if (yych == '!') - goto yy95; - goto yy90; - } else { - if (yych <= '\'') - goto yy95; - if (yych <= ')') - goto yy90; - if (yych <= '+') - goto yy95; - goto yy90; - } - } else { - if (yych <= '>') { - if (yych <= '9') - goto yy95; - if (yych == '=') - goto yy95; - goto yy90; - } else { - if (yych <= 'Z') - goto yy95; - if (yych <= ']') - goto yy90; - if (yych <= '~') - goto yy95; - goto yy90; - } - } - yy92: - yych = *++p; - if (yych <= '@') { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy96; - } else { - if (yych <= 'Z') - goto yy96; - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy96; - } - yy93: - p = marker; - goto yy90; - yy94: - ++p; - yych = *p; - yy95: - if (yybm[0 + yych] & 128) { - goto yy94; - } - if (yych <= '>') - goto yy93; - if (yych <= '@') - goto yy92; - goto yy93; - yy96: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy98; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych >= ':') - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy227; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy226; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy226; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy226; - goto yy93; - } - } - yy98: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy227; - if (yych <= '/') - goto yy93; - goto yy226; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - goto yy226; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy226; - goto yy93; - } - } - yy99: - ++p; - yych = *p; - if (yych <= '@') { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy102; - goto yy93; - } else { - if (yych <= 'Z') - goto yy102; - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy102; - goto yy93; - } - yy100: - ++p; - { return (bufsize_t)(p - start); } - yy102: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy104; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych >= ':') - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy106; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy105; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy105; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy105; - goto yy93; - } - } - yy104: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy106; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy105: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy108; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy107; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy107; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy107; - goto yy93; - } - } - yy106: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy108; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy107: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy110; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy109; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy109; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy109; - goto yy93; - } - } - yy108: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy110; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy109: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy112; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy111; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy111; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy111; - goto yy93; - } - } - yy110: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy112; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy111: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy114; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy113; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy113; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy113; - goto yy93; - } - } - yy112: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy114; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy113: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy116; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy115; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy115; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy115; - goto yy93; - } - } - yy114: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy116; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy115: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy118; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy117; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy117; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy117; - goto yy93; - } - } - yy116: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy118; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy117: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy120; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy119; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy119; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy119; - goto yy93; - } - } - yy118: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy120; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy119: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy122; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy121; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy121; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy121; - goto yy93; - } - } - yy120: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy122; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy121: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy124; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy123; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy123; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy123; - goto yy93; - } - } - yy122: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy124; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy123: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy126; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy125; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy125; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy125; - goto yy93; - } - } - yy124: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy126; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy125: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy128; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy127; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy127; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy127; - goto yy93; - } - } - yy126: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy128; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy127: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy130; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy129; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy129; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy129; - goto yy93; - } - } - yy128: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy130; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy129: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy132; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy131; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy131; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy131; - goto yy93; - } - } - yy130: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy132; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy131: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy134; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy133; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy133; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy133; - goto yy93; - } - } - yy132: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy134; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy133: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy136; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy135; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy135; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy135; - goto yy93; - } - } - yy134: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy136; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy135: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy138; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy137; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy137; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy137; - goto yy93; - } - } - yy136: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy138; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy137: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy140; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy139; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy139; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy139; - goto yy93; - } - } - yy138: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy140; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy139: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy142; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy141; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy141; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy141; - goto yy93; - } - } - yy140: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy142; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy141: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy144; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy143; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy143; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy143; - goto yy93; - } - } - yy142: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy144; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy143: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy146; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy145; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy145; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy145; - goto yy93; - } - } - yy144: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy146; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy145: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy148; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy147; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy147; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy147; - goto yy93; - } - } - yy146: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy148; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy147: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy150; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy149; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy149; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy149; - goto yy93; - } - } - yy148: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy150; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy149: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy152; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy151; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy151; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy151; - goto yy93; - } - } - yy150: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy152; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy151: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy154; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy153; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy153; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy153; - goto yy93; - } - } - yy152: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy154; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy153: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy156; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy155; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy155; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy155; - goto yy93; - } - } - yy154: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy156; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy155: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy158; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy157; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy157; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy157; - goto yy93; - } - } - yy156: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy158; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy157: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy160; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy159; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy159; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy159; - goto yy93; - } - } - yy158: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy160; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy159: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy162; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy161; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy161; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy161; - goto yy93; - } - } - yy160: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy162; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy161: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy164; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy163; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy163; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy163; - goto yy93; - } - } - yy162: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy164; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy163: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy166; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy165; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy165; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy165; - goto yy93; - } - } - yy164: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy166; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy165: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy168; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy167; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy167; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy167; - goto yy93; - } - } - yy166: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy168; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy167: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy170; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy169; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy169; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy169; - goto yy93; - } - } - yy168: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy170; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy169: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy172; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy171; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy171; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy171; - goto yy93; - } - } - yy170: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy172; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy171: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy174; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy173; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy173; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy173; - goto yy93; - } - } - yy172: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy174; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy173: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy176; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy175; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy175; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy175; - goto yy93; - } - } - yy174: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy176; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy175: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy178; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy177; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy177; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy177; - goto yy93; - } - } - yy176: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy178; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy177: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy180; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy179; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy179; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy179; - goto yy93; - } - } - yy178: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy180; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy179: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy182; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy181; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy181; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy181; - goto yy93; - } - } - yy180: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy182; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy181: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy184; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy183; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy183; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy183; - goto yy93; - } - } - yy182: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy184; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy183: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy186; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy185; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy185; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy185; - goto yy93; - } - } - yy184: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy186; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy185: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy188; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy187; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy187; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy187; - goto yy93; - } - } - yy186: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy188; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy187: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy190; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy189; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy189; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy189; - goto yy93; - } - } - yy188: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy190; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy189: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy192; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy191; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy191; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy191; - goto yy93; - } - } - yy190: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy192; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy191: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy194; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy193; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy193; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy193; - goto yy93; - } - } - yy192: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy194; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy193: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy196; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy195; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy195; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy195; - goto yy93; - } - } - yy194: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy196; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy195: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy198; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy197; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy197; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy197; - goto yy93; - } - } - yy196: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy198; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy197: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy200; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy199; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy199; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy199; - goto yy93; - } - } - yy198: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy200; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy199: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy202; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy201; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy201; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy201; - goto yy93; - } - } - yy200: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy202; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy201: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy204; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy203; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy203; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy203; - goto yy93; - } - } - yy202: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy204; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy203: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy206; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy205; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy205; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy205; - goto yy93; - } - } - yy204: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy206; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy205: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy208; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy207; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy207; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy207; - goto yy93; - } - } - yy206: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy208; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy207: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy210; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy209; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy209; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy209; - goto yy93; - } - } - yy208: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy210; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy209: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy212; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy211; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy211; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy211; - goto yy93; - } - } - yy210: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy212; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy211: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy214; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy213; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy213; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy213; - goto yy93; - } - } - yy212: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy214; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy213: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy216; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy215; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy215; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy215; - goto yy93; - } - } - yy214: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy216; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy215: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy218; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy217; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy217; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy217; - goto yy93; - } - } - yy216: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy218; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy217: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy220; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy219; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy219; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy219; - goto yy93; - } - } - yy218: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy220; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy219: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy222; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy221; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy221; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy221; - goto yy93; - } - } - yy220: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy222; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy221: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy224; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy223; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy223; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy223; - goto yy93; - } - } - yy222: - ++p; - yych = *p; - if (yych <= '9') { - if (yych == '-') - goto yy224; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy223: - ++p; - yych = *p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= '-') - goto yy93; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy225; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy225; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy225; - goto yy93; - } - } - yy224: - ++p; - yych = *p; - if (yych <= '@') { - if (yych <= '/') - goto yy93; - if (yych >= ':') - goto yy93; - } else { - if (yych <= 'Z') - goto yy225; - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - yy225: - ++p; - yych = *p; - if (yych == '.') - goto yy99; - if (yych == '>') - goto yy100; - goto yy93; - yy226: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy229; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy228; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy228; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy228; - goto yy93; - } - } - yy227: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy229; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy228: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy231; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy230; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy230; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy230; - goto yy93; - } - } - yy229: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy231; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy230: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy233; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy232; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy232; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy232; - goto yy93; - } - } - yy231: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy233; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy232: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy235; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy234; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy234; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy234; - goto yy93; - } - } - yy233: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy235; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy234: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy237; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy236; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy236; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy236; - goto yy93; - } - } - yy235: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy237; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy236: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy239; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy238; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy238; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy238; - goto yy93; - } - } - yy237: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy239; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy238: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy241; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy240; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy240; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy240; - goto yy93; - } - } - yy239: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy241; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy240: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy243; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy242; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy242; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy242; - goto yy93; - } - } - yy241: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy243; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy242: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy245; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy244; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy244; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy244; - goto yy93; - } - } - yy243: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy245; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy244: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy247; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy246; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy246; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy246; - goto yy93; - } - } - yy245: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy247; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy246: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy249; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy248; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy248; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy248; - goto yy93; - } - } - yy247: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy249; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy248: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy251; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy250; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy250; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy250; - goto yy93; - } - } - yy249: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy251; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy250: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy253; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy252; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy252; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy252; - goto yy93; - } - } - yy251: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy253; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy252: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy255; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy254; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy254; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy254; - goto yy93; - } - } - yy253: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy255; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy254: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy257; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy256; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy256; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy256; - goto yy93; - } - } - yy255: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy257; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy256: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy259; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy258; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy258; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy258; - goto yy93; - } - } - yy257: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy259; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy258: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy261; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy260; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy260; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy260; - goto yy93; - } - } - yy259: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy261; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy260: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy263; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy262; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy262; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy262; - goto yy93; - } - } - yy261: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy263; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy262: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy265; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy264; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy264; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy264; - goto yy93; - } - } - yy263: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy265; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy264: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy267; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy266; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy266; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy266; - goto yy93; - } - } - yy265: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy267; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy266: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy269; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy268; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy268; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy268; - goto yy93; - } - } - yy267: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy269; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy268: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy271; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy270; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy270; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy270; - goto yy93; - } - } - yy269: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy271; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy270: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy273; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy272; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy272; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy272; - goto yy93; - } - } - yy271: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy273; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy272: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy275; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy274; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy274; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy274; - goto yy93; - } - } - yy273: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy275; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy274: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy277; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy276; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy276; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy276; - goto yy93; - } - } - yy275: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy277; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy276: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy279; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy278; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy278; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy278; - goto yy93; - } - } - yy277: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy279; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy278: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy281; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy280; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy280; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy280; - goto yy93; - } - } - yy279: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy281; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy280: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy283; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy282; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy282; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy282; - goto yy93; - } - } - yy281: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy283; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy282: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy285; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy284; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy284; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy284; - goto yy93; - } - } - yy283: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy285; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy284: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy287; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy286; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy286; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy286; - goto yy93; - } - } - yy285: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy287; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy286: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy289; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy288; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy288; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy288; - goto yy93; - } - } - yy287: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy289; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy288: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy291; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy290; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy290; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy290; - goto yy93; - } - } - yy289: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy291; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy290: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy293; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy292; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy292; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy292; - goto yy93; - } - } - yy291: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy293; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy292: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy295; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy294; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy294; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy294; - goto yy93; - } - } - yy293: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy295; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy294: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy297; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy296; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy296; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy296; - goto yy93; - } - } - yy295: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy297; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy296: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy299; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy298; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy298; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy298; - goto yy93; - } - } - yy297: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy299; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy298: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy301; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy300; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy300; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy300; - goto yy93; - } - } - yy299: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy301; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy300: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy303; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy302; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy302; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy302; - goto yy93; - } - } - yy301: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy303; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy302: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy305; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy304; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy304; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy304; - goto yy93; - } - } - yy303: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy305; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy304: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy307; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy306; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy306; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy306; - goto yy93; - } - } - yy305: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy307; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy306: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy309; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy308; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy308; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy308; - goto yy93; - } - } - yy307: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy309; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy308: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy311; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy310; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy310; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy310; - goto yy93; - } - } - yy309: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy311; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy310: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy313; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy312; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy312; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy312; - goto yy93; - } - } - yy311: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy313; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy312: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy315; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy314; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy314; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy314; - goto yy93; - } - } - yy313: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy315; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy314: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy317; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy316; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy316; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy316; - goto yy93; - } - } - yy315: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy317; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy316: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy319; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy318; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy318; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy318; - goto yy93; - } - } - yy317: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy319; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy318: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy321; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy320; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy320; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy320; - goto yy93; - } - } - yy319: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy321; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy320: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy323; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy322; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy322; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy322; - goto yy93; - } - } - yy321: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy323; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy322: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy325; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy324; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy324; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy324; - goto yy93; - } - } - yy323: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy325; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy324: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy327; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy326; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy326; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy326; - goto yy93; - } - } - yy325: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy327; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy326: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy329; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy328; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy328; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy328; - goto yy93; - } - } - yy327: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy329; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy328: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy331; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy330; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy330; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy330; - goto yy93; - } - } - yy329: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy331; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy330: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy333; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy332; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy332; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy332; - goto yy93; - } - } - yy331: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy333; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy332: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy335; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy334; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy334; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy334; - goto yy93; - } - } - yy333: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy335; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy334: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy337; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy336; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy336; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy336; - goto yy93; - } - } - yy335: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy337; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy336: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy339; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy338; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy338; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy338; - goto yy93; - } - } - yy337: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy339; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy338: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy341; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy340; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy340; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy340; - goto yy93; - } - } - yy339: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy341; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy340: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy343; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy342; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy342; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy342; - goto yy93; - } - } - yy341: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy343; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy342: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= ',') - goto yy93; - if (yych <= '-') - goto yy345; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy344; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy344; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy344; - goto yy93; - } - } - yy343: - yych = *++p; - if (yych <= '9') { - if (yych == '-') - goto yy345; - if (yych <= '/') - goto yy93; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy93; - } else { - if (yych <= '`') - goto yy93; - if (yych >= '{') - goto yy93; - } - } - yy344: - yych = *++p; - if (yych <= '=') { - if (yych <= '.') { - if (yych <= '-') - goto yy93; - goto yy99; - } else { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy225; - goto yy93; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy100; - if (yych <= '@') - goto yy93; - goto yy225; - } else { - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy225; - goto yy93; - } - } - yy345: - ++p; - if ((yych = *p) <= '@') { - if (yych <= '/') - goto yy93; - if (yych <= '9') - goto yy225; - goto yy93; - } else { - if (yych <= 'Z') - goto yy225; - if (yych <= '`') - goto yy93; - if (yych <= 'z') - goto yy225; - goto yy93; - } - } +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 128, 128, 128, 128, 128, + 0, 0, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 128, 0, 128, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= '9') { + if (yych <= '\'') { + if (yych == '!') goto yy91; + if (yych >= '#') goto yy91; + } else { + if (yych <= ')') goto yy89; + if (yych != ',') goto yy91; + } + } else { + if (yych <= '?') { + if (yych == '=') goto yy91; + if (yych >= '?') goto yy91; + } else { + if (yych <= 'Z') { + if (yych >= 'A') goto yy91; + } else { + if (yych <= ']') goto yy89; + if (yych <= '~') goto yy91; + } + } + } +yy89: + ++p; +yy90: + { return 0; } +yy91: + yych = *(marker = ++p); + if (yych <= ',') { + if (yych <= '"') { + if (yych == '!') goto yy93; + goto yy90; + } else { + if (yych <= '\'') goto yy93; + if (yych <= ')') goto yy90; + if (yych <= '+') goto yy93; + goto yy90; + } + } else { + if (yych <= '>') { + if (yych <= '9') goto yy93; + if (yych == '=') goto yy93; + goto yy90; + } else { + if (yych <= 'Z') goto yy93; + if (yych <= ']') goto yy90; + if (yych <= '~') goto yy93; + goto yy90; + } + } +yy92: + yych = *++p; +yy93: + if (yybm[0+yych] & 128) { + goto yy92; + } + if (yych <= '>') goto yy94; + if (yych <= '@') goto yy95; +yy94: + p = marker; + goto yy90; +yy95: + yych = *++p; + if (yych <= '@') { + if (yych <= '/') goto yy94; + if (yych >= ':') goto yy94; + } else { + if (yych <= 'Z') goto yy96; + if (yych <= '`') goto yy94; + if (yych >= '{') goto yy94; + } +yy96: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy98; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy98; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy98; + goto yy94; + } + } + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy101; + if (yych <= '/') goto yy94; + goto yy102; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy102; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy102; + goto yy94; + } + } +yy98: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych <= '-') goto yy101; + goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy102; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy102; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy102; + goto yy94; + } + } +yy99: + ++p; + { return (bufsize_t)(p - start); } +yy101: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy103; + if (yych <= '/') goto yy94; + goto yy104; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy104; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy104; + goto yy94; + } + } +yy102: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy104; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy104; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy104; + goto yy94; + } + } +yy103: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy105; + if (yych <= '/') goto yy94; + goto yy106; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy106; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy106; + goto yy94; + } + } +yy104: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy106; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy106; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy106; + goto yy94; + } + } +yy105: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy107; + if (yych <= '/') goto yy94; + goto yy108; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy108; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy108; + goto yy94; + } + } +yy106: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy108; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy108; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy108; + goto yy94; + } + } +yy107: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy109; + if (yych <= '/') goto yy94; + goto yy110; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy110; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy110; + goto yy94; + } + } +yy108: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy110; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy110; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy110; + goto yy94; + } + } +yy109: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy111; + if (yych <= '/') goto yy94; + goto yy112; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy112; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy112; + goto yy94; + } + } +yy110: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy112; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy112; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy112; + goto yy94; + } + } +yy111: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy113; + if (yych <= '/') goto yy94; + goto yy114; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy114; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy114; + goto yy94; + } + } +yy112: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy114; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy114; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy114; + goto yy94; + } + } +yy113: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy115; + if (yych <= '/') goto yy94; + goto yy116; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy116; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy116; + goto yy94; + } + } +yy114: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy116; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy116; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy116; + goto yy94; + } + } +yy115: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy117; + if (yych <= '/') goto yy94; + goto yy118; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy118; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy118; + goto yy94; + } + } +yy116: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy118; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy118; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy118; + goto yy94; + } + } +yy117: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy119; + if (yych <= '/') goto yy94; + goto yy120; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy120; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy120; + goto yy94; + } + } +yy118: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy120; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy120; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy120; + goto yy94; + } + } +yy119: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy121; + if (yych <= '/') goto yy94; + goto yy122; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy122; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy122; + goto yy94; + } + } +yy120: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy122; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy122; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy122; + goto yy94; + } + } +yy121: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy123; + if (yych <= '/') goto yy94; + goto yy124; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy124; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy124; + goto yy94; + } + } +yy122: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy124; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy124; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy124; + goto yy94; + } + } +yy123: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy125; + if (yych <= '/') goto yy94; + goto yy126; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy126; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy126; + goto yy94; + } + } +yy124: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy126; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy126; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy126; + goto yy94; + } + } +yy125: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy127; + if (yych <= '/') goto yy94; + goto yy128; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy128; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy128; + goto yy94; + } + } +yy126: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy128; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy128; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy128; + goto yy94; + } + } +yy127: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy129; + if (yych <= '/') goto yy94; + goto yy130; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy130; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy130; + goto yy94; + } + } +yy128: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy130; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy130; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy130; + goto yy94; + } + } +yy129: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy131; + if (yych <= '/') goto yy94; + goto yy132; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy132; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy132; + goto yy94; + } + } +yy130: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy132; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy132; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy132; + goto yy94; + } + } +yy131: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy133; + if (yych <= '/') goto yy94; + goto yy134; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy134; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy134; + goto yy94; + } + } +yy132: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy134; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy134; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy134; + goto yy94; + } + } +yy133: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy135; + if (yych <= '/') goto yy94; + goto yy136; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy136; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy136; + goto yy94; + } + } +yy134: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy136; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy136; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy136; + goto yy94; + } + } +yy135: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy137; + if (yych <= '/') goto yy94; + goto yy138; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy138; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy138; + goto yy94; + } + } +yy136: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy138; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy138; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy138; + goto yy94; + } + } +yy137: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy139; + if (yych <= '/') goto yy94; + goto yy140; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy140; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy140; + goto yy94; + } + } +yy138: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy140; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy140; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy140; + goto yy94; + } + } +yy139: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy141; + if (yych <= '/') goto yy94; + goto yy142; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy142; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy142; + goto yy94; + } + } +yy140: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy142; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy142; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy142; + goto yy94; + } + } +yy141: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy143; + if (yych <= '/') goto yy94; + goto yy144; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy144; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy144; + goto yy94; + } + } +yy142: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy144; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy144; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy144; + goto yy94; + } + } +yy143: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy145; + if (yych <= '/') goto yy94; + goto yy146; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy146; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy146; + goto yy94; + } + } +yy144: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy146; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy146; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy146; + goto yy94; + } + } +yy145: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy147; + if (yych <= '/') goto yy94; + goto yy148; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy148; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy148; + goto yy94; + } + } +yy146: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy148; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy148; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy148; + goto yy94; + } + } +yy147: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy149; + if (yych <= '/') goto yy94; + goto yy150; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy150; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy150; + goto yy94; + } + } +yy148: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy150; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy150; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy150; + goto yy94; + } + } +yy149: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy151; + if (yych <= '/') goto yy94; + goto yy152; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy152; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy152; + goto yy94; + } + } +yy150: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy152; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy152; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy152; + goto yy94; + } + } +yy151: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy153; + if (yych <= '/') goto yy94; + goto yy154; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy154; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy154; + goto yy94; + } + } +yy152: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy154; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy154; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy154; + goto yy94; + } + } +yy153: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy155; + if (yych <= '/') goto yy94; + goto yy156; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy156; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy156; + goto yy94; + } + } +yy154: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy156; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy156; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy156; + goto yy94; + } + } +yy155: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy157; + if (yych <= '/') goto yy94; + goto yy158; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy158; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy158; + goto yy94; + } + } +yy156: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy158; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy158; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy158; + goto yy94; + } + } +yy157: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy159; + if (yych <= '/') goto yy94; + goto yy160; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy160; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy160; + goto yy94; + } + } +yy158: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy160; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy160; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy160; + goto yy94; + } + } +yy159: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy161; + if (yych <= '/') goto yy94; + goto yy162; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy162; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy162; + goto yy94; + } + } +yy160: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy162; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy162; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy162; + goto yy94; + } + } +yy161: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy163; + if (yych <= '/') goto yy94; + goto yy164; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy164; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy164; + goto yy94; + } + } +yy162: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy164; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy164; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy164; + goto yy94; + } + } +yy163: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy165; + if (yych <= '/') goto yy94; + goto yy166; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy166; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy166; + goto yy94; + } + } +yy164: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy166; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy166; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy166; + goto yy94; + } + } +yy165: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy167; + if (yych <= '/') goto yy94; + goto yy168; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy168; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy168; + goto yy94; + } + } +yy166: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy168; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy168; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy168; + goto yy94; + } + } +yy167: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy169; + if (yych <= '/') goto yy94; + goto yy170; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy170; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy170; + goto yy94; + } + } +yy168: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy170; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy170; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy170; + goto yy94; + } + } +yy169: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy171; + if (yych <= '/') goto yy94; + goto yy172; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy172; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy172; + goto yy94; + } + } +yy170: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy172; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy172; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy172; + goto yy94; + } + } +yy171: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy173; + if (yych <= '/') goto yy94; + goto yy174; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy174; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy174; + goto yy94; + } + } +yy172: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy174; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy174; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy174; + goto yy94; + } + } +yy173: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy175; + if (yych <= '/') goto yy94; + goto yy176; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy176; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy176; + goto yy94; + } + } +yy174: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy176; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy176; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy176; + goto yy94; + } + } +yy175: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy177; + if (yych <= '/') goto yy94; + goto yy178; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy178; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy178; + goto yy94; + } + } +yy176: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy178; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy178; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy178; + goto yy94; + } + } +yy177: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy179; + if (yych <= '/') goto yy94; + goto yy180; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy180; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy180; + goto yy94; + } + } +yy178: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy180; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy180; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy180; + goto yy94; + } + } +yy179: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy181; + if (yych <= '/') goto yy94; + goto yy182; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy182; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy182; + goto yy94; + } + } +yy180: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy182; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy182; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy182; + goto yy94; + } + } +yy181: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy183; + if (yych <= '/') goto yy94; + goto yy184; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy184; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy184; + goto yy94; + } + } +yy182: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy184; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy184; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy184; + goto yy94; + } + } +yy183: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy185; + if (yych <= '/') goto yy94; + goto yy186; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy186; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy186; + goto yy94; + } + } +yy184: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy186; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy186; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy186; + goto yy94; + } + } +yy185: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy187; + if (yych <= '/') goto yy94; + goto yy188; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy188; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy188; + goto yy94; + } + } +yy186: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy188; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy188; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy188; + goto yy94; + } + } +yy187: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy189; + if (yych <= '/') goto yy94; + goto yy190; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy190; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy190; + goto yy94; + } + } +yy188: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy190; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy190; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy190; + goto yy94; + } + } +yy189: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy191; + if (yych <= '/') goto yy94; + goto yy192; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy192; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy192; + goto yy94; + } + } +yy190: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy192; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy192; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy192; + goto yy94; + } + } +yy191: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy193; + if (yych <= '/') goto yy94; + goto yy194; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy194; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy194; + goto yy94; + } + } +yy192: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy194; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy194; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy194; + goto yy94; + } + } +yy193: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy195; + if (yych <= '/') goto yy94; + goto yy196; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy196; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy196; + goto yy94; + } + } +yy194: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy196; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy196; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy196; + goto yy94; + } + } +yy195: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy197; + if (yych <= '/') goto yy94; + goto yy198; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy198; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy198; + goto yy94; + } + } +yy196: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy198; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy198; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy198; + goto yy94; + } + } +yy197: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy199; + if (yych <= '/') goto yy94; + goto yy200; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy200; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy200; + goto yy94; + } + } +yy198: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy200; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy200; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy200; + goto yy94; + } + } +yy199: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy201; + if (yych <= '/') goto yy94; + goto yy202; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy202; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy202; + goto yy94; + } + } +yy200: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy202; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy202; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy202; + goto yy94; + } + } +yy201: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy203; + if (yych <= '/') goto yy94; + goto yy204; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy204; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy204; + goto yy94; + } + } +yy202: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy204; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy204; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy204; + goto yy94; + } + } +yy203: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy205; + if (yych <= '/') goto yy94; + goto yy206; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy206; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy206; + goto yy94; + } + } +yy204: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy206; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy206; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy206; + goto yy94; + } + } +yy205: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy207; + if (yych <= '/') goto yy94; + goto yy208; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy208; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy208; + goto yy94; + } + } +yy206: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy208; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy208; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy208; + goto yy94; + } + } +yy207: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy209; + if (yych <= '/') goto yy94; + goto yy210; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy210; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy210; + goto yy94; + } + } +yy208: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy210; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy210; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy210; + goto yy94; + } + } +yy209: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy211; + if (yych <= '/') goto yy94; + goto yy212; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy212; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy212; + goto yy94; + } + } +yy210: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy212; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy212; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy212; + goto yy94; + } + } +yy211: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy213; + if (yych <= '/') goto yy94; + goto yy214; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy214; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy214; + goto yy94; + } + } +yy212: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy214; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy214; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy214; + goto yy94; + } + } +yy213: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy215; + if (yych <= '/') goto yy94; + goto yy216; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy216; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy216; + goto yy94; + } + } +yy214: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy216; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy216; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy216; + goto yy94; + } + } +yy215: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy217; + if (yych <= '/') goto yy94; + goto yy218; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy218; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy218; + goto yy94; + } + } +yy216: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy218; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy218; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy218; + goto yy94; + } + } +yy217: + yych = *++p; + if (yych <= '9') { + if (yych == '-') goto yy219; + if (yych <= '/') goto yy94; + goto yy220; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy94; + goto yy220; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy220; + goto yy94; + } + } +yy218: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') goto yy94; + if (yych >= '.') goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy220; + goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + goto yy220; + } else { + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy220; + goto yy94; + } + } +yy219: + yych = *++p; + if (yych <= '@') { + if (yych <= '/') goto yy94; + if (yych <= '9') goto yy221; + goto yy94; + } else { + if (yych <= 'Z') goto yy221; + if (yych <= '`') goto yy94; + if (yych <= 'z') goto yy221; + goto yy94; + } +yy220: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= '-') goto yy94; + goto yy95; + } else { + if (yych <= '/') goto yy94; + if (yych >= ':') goto yy94; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy99; + if (yych <= '@') goto yy94; + } else { + if (yych <= '`') goto yy94; + if (yych >= '{') goto yy94; + } + } +yy221: + yych = *++p; + if (yych == '.') goto yy95; + if (yych == '>') goto yy99; + goto yy94; +} + } // Try to match an HTML tag after first <, returning num of chars matched. -bufsize_t _scan_html_tag(const unsigned char *p) { +bufsize_t _scan_html_tag(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - static const unsigned char yybm[] = { - /* table 1 .. 8: 0 */ - 0, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238, - 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, - 239, 239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239, - 239, 239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, - 239, 239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239, - 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, - 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, - 239, 239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - /* table 9 .. 11: 256 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 160, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 128, 0, 0, 0, 0, 0, 0, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 0, 0, 0, 0, 128, 0, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych <= '>') { - if (yych <= '!') { - if (yych >= '!') - goto yy350; - } else { - if (yych == '/') - goto yy351; - } - } else { - if (yych <= 'Z') { - if (yych <= '?') - goto yy352; - if (yych >= 'A') - goto yy353; - } else { - if (yych <= '`') - goto yy348; - if (yych <= 'z') - goto yy353; - } - } - yy348: - ++p; - yy349 : { return 0; } - yy350: - yych = *(marker = ++p); - if (yybm[0 + yych] & 16) { - goto yy418; - } - if (yych == '-') - goto yy420; - if (yych <= '@') - goto yy349; - if (yych <= '[') - goto yy417; - goto yy349; - yy351: - yych = *(marker = ++p); - if (yych <= '@') - goto yy349; - if (yych <= 'Z') - goto yy413; - if (yych <= '`') - goto yy349; - if (yych <= 'z') - goto yy413; - goto yy349; - yy352: - yych = *(marker = ++p); - if (yych <= 0x00) - goto yy349; - if (yych <= 0x7F) - goto yy397; - if (yych <= 0xC1) - goto yy349; - if (yych <= 0xF4) - goto yy397; - goto yy349; - yy353: - yych = *(marker = ++p); - if (yybm[256 + yych] & 64) { - goto yy357; - } - if (yych <= '=') { - if (yych <= '.') { - if (yych != '-') - goto yy349; - } else { - if (yych <= '/') - goto yy359; - if (yych >= ':') - goto yy349; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy360; - if (yych <= '@') - goto yy349; - } else { - if (yych <= '`') - goto yy349; - if (yych >= '{') - goto yy349; - } - } - yy354: - ++p; - yych = *p; - if (yybm[256 + yych] & 64) { - goto yy357; - } - if (yych <= '=') { - if (yych <= '.') { - if (yych == '-') - goto yy354; - } else { - if (yych <= '/') - goto yy359; - if (yych <= '9') - goto yy354; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy360; - if (yych >= 'A') - goto yy354; - } else { - if (yych <= '`') - goto yy356; - if (yych <= 'z') - goto yy354; - } - } - yy356: - p = marker; - goto yy349; - yy357: - ++p; - yych = *p; - if (yybm[256 + yych] & 64) { - goto yy357; - } - if (yych <= '>') { - if (yych <= '9') { - if (yych != '/') - goto yy356; - } else { - if (yych <= ':') - goto yy362; - if (yych <= '=') - goto yy356; - goto yy360; - } - } else { - if (yych <= '^') { - if (yych <= '@') - goto yy356; - if (yych <= 'Z') - goto yy362; - goto yy356; - } else { - if (yych == '`') - goto yy356; - if (yych <= 'z') - goto yy362; - goto yy356; - } - } - yy359: - yych = *++p; - if (yych != '>') - goto yy356; - yy360: - ++p; - { return (bufsize_t)(p - start); } - yy362: - ++p; - yych = *p; - if (yybm[256 + yych] & 128) { - goto yy362; - } - if (yych <= ',') { - if (yych <= '\r') { - if (yych <= 0x08) - goto yy356; - } else { - if (yych != ' ') - goto yy356; - } - } else { - if (yych <= '<') { - if (yych <= '/') - goto yy359; - goto yy356; - } else { - if (yych <= '=') - goto yy366; - if (yych <= '>') - goto yy360; - goto yy356; - } - } - yy364: - ++p; - yych = *p; - if (yych <= '<') { - if (yych <= ' ') { - if (yych <= 0x08) - goto yy356; - if (yych <= '\r') - goto yy364; - if (yych <= 0x1F) - goto yy356; - goto yy364; - } else { - if (yych <= '/') { - if (yych <= '.') - goto yy356; - goto yy359; - } else { - if (yych == ':') - goto yy362; - goto yy356; - } - } - } else { - if (yych <= 'Z') { - if (yych <= '=') - goto yy366; - if (yych <= '>') - goto yy360; - if (yych <= '@') - goto yy356; - goto yy362; - } else { - if (yych <= '_') { - if (yych <= '^') - goto yy356; - goto yy362; - } else { - if (yych <= '`') - goto yy356; - if (yych <= 'z') - goto yy362; - goto yy356; - } - } - } - yy366: - ++p; - yych = *p; - if (yybm[0 + yych] & 1) { - goto yy368; - } - if (yych <= 0xE0) { - if (yych <= '"') { - if (yych <= 0x00) - goto yy356; - if (yych <= ' ') - goto yy366; - goto yy379; - } else { - if (yych <= '\'') - goto yy377; - if (yych <= 0xC1) - goto yy356; - if (yych <= 0xDF) - goto yy370; - goto yy371; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy376; - goto yy372; - } else { - if (yych <= 0xF0) - goto yy373; - if (yych <= 0xF3) - goto yy374; - if (yych <= 0xF4) - goto yy375; - goto yy356; - } - } - yy368: - ++p; - yych = *p; - if (yybm[0 + yych] & 1) { - goto yy368; - } - if (yych <= 0xE0) { - if (yych <= '=') { - if (yych <= 0x00) - goto yy356; - if (yych <= ' ') - goto yy357; - goto yy356; - } else { - if (yych <= '>') - goto yy360; - if (yych <= 0xC1) - goto yy356; - if (yych >= 0xE0) - goto yy371; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy376; - goto yy372; - } else { - if (yych <= 0xF0) - goto yy373; - if (yych <= 0xF3) - goto yy374; - if (yych <= 0xF4) - goto yy375; - goto yy356; - } - } - yy370: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy368; - goto yy356; - yy371: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy356; - if (yych <= 0xBF) - goto yy370; - goto yy356; - yy372: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy370; - goto yy356; - yy373: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy356; - if (yych <= 0xBF) - goto yy372; - goto yy356; - yy374: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy372; - goto yy356; - yy375: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x8F) - goto yy372; - goto yy356; - yy376: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x9F) - goto yy370; - goto yy356; - yy377: - ++p; - yych = *p; - if (yybm[0 + yych] & 2) { - goto yy377; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy356; - if (yych <= '\'') - goto yy388; - goto yy356; - } else { - if (yych <= 0xDF) - goto yy389; - if (yych <= 0xE0) - goto yy390; - goto yy391; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy395; - if (yych <= 0xEF) - goto yy391; - goto yy392; - } else { - if (yych <= 0xF3) - goto yy393; - if (yych <= 0xF4) - goto yy394; - goto yy356; - } - } - yy379: - ++p; - yych = *p; - if (yybm[0 + yych] & 4) { - goto yy379; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy356; - if (yych <= '"') - goto yy388; - goto yy356; - } else { - if (yych <= 0xDF) - goto yy381; - if (yych <= 0xE0) - goto yy382; - goto yy383; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy387; - if (yych <= 0xEF) - goto yy383; - goto yy384; - } else { - if (yych <= 0xF3) - goto yy385; - if (yych <= 0xF4) - goto yy386; - goto yy356; - } - } - yy381: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy379; - goto yy356; - yy382: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy356; - if (yych <= 0xBF) - goto yy381; - goto yy356; - yy383: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy381; - goto yy356; - yy384: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy356; - if (yych <= 0xBF) - goto yy383; - goto yy356; - yy385: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy383; - goto yy356; - yy386: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x8F) - goto yy383; - goto yy356; - yy387: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x9F) - goto yy381; - goto yy356; - yy388: - ++p; - yych = *p; - if (yybm[256 + yych] & 64) { - goto yy357; - } - if (yych == '/') - goto yy359; - if (yych == '>') - goto yy360; - goto yy356; - yy389: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy377; - goto yy356; - yy390: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy356; - if (yych <= 0xBF) - goto yy389; - goto yy356; - yy391: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy389; - goto yy356; - yy392: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy356; - if (yych <= 0xBF) - goto yy391; - goto yy356; - yy393: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy391; - goto yy356; - yy394: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x8F) - goto yy391; - goto yy356; - yy395: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x9F) - goto yy389; - goto yy356; - yy396: - ++p; - yych = *p; - yy397: - if (yybm[0 + yych] & 8) { - goto yy396; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy356; - if (yych <= '?') - goto yy405; - goto yy356; - } else { - if (yych <= 0xDF) - goto yy398; - if (yych <= 0xE0) - goto yy399; - goto yy400; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy404; - if (yych <= 0xEF) - goto yy400; - goto yy401; - } else { - if (yych <= 0xF3) - goto yy402; - if (yych <= 0xF4) - goto yy403; - goto yy356; - } - } - yy398: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy396; - goto yy356; - yy399: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy356; - if (yych <= 0xBF) - goto yy398; - goto yy356; - yy400: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy398; - goto yy356; - yy401: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy356; - if (yych <= 0xBF) - goto yy400; - goto yy356; - yy402: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy400; - goto yy356; - yy403: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x8F) - goto yy400; - goto yy356; - yy404: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x9F) - goto yy398; - goto yy356; - yy405: - ++p; - yych = *p; - if (yych <= 0xE0) { - if (yych <= '>') { - if (yych <= 0x00) - goto yy356; - if (yych <= '=') - goto yy396; - goto yy360; - } else { - if (yych <= 0x7F) - goto yy396; - if (yych <= 0xC1) - goto yy356; - if (yych >= 0xE0) - goto yy407; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy412; - goto yy408; - } else { - if (yych <= 0xF0) - goto yy409; - if (yych <= 0xF3) - goto yy410; - if (yych <= 0xF4) - goto yy411; - goto yy356; - } - } - yy406: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy396; - goto yy356; - yy407: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy356; - if (yych <= 0xBF) - goto yy406; - goto yy356; - yy408: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy406; - goto yy356; - yy409: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy356; - if (yych <= 0xBF) - goto yy408; - goto yy356; - yy410: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy408; - goto yy356; - yy411: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x8F) - goto yy408; - goto yy356; - yy412: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x9F) - goto yy406; - goto yy356; - yy413: - ++p; - yych = *p; - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy356; - if (yych >= 0x0E) - goto yy356; - } else { - if (yych <= ' ') - goto yy415; - if (yych == '-') - goto yy413; - goto yy356; - } - } else { - if (yych <= '@') { - if (yych <= '9') - goto yy413; - if (yych == '>') - goto yy360; - goto yy356; - } else { - if (yych <= 'Z') - goto yy413; - if (yych <= '`') - goto yy356; - if (yych <= 'z') - goto yy413; - goto yy356; - } - } - yy415: - ++p; - yych = *p; - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy356; - if (yych <= '\r') - goto yy415; - goto yy356; - } else { - if (yych <= ' ') - goto yy415; - if (yych == '>') - goto yy360; - goto yy356; - } - yy417: - yych = *++p; - if (yych == 'C') - goto yy453; - if (yych == 'c') - goto yy453; - goto yy356; - yy418: - ++p; - yych = *p; - if (yybm[0 + yych] & 16) { - goto yy418; - } - if (yych <= 0x08) - goto yy356; - if (yych <= '\r') - goto yy442; - if (yych == ' ') - goto yy442; - goto yy356; - yy420: - yych = *++p; - if (yych != '-') - goto yy356; - yych = *++p; - if (yych <= 0xE0) { - if (yych <= '=') { - if (yych != '-') - goto yy424; - } else { - if (yych <= '>') - goto yy356; - if (yych <= 0xC1) - goto yy424; - if (yych <= 0xDF) - goto yy425; - goto yy426; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy431; - goto yy427; - } else { - if (yych <= 0xF0) - goto yy428; - if (yych <= 0xF3) - goto yy429; - if (yych <= 0xF4) - goto yy430; - goto yy424; - } - } - yych = *++p; - if (yych <= 0xE0) { - if (yych <= '=') { - if (yych == '-') - goto yy441; - goto yy424; - } else { - if (yych <= '>') - goto yy356; - if (yych <= 0xC1) - goto yy424; - if (yych <= 0xDF) - goto yy425; - goto yy426; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy431; - goto yy427; - } else { - if (yych <= 0xF0) - goto yy428; - if (yych <= 0xF3) - goto yy429; - if (yych <= 0xF4) - goto yy430; - goto yy424; - } - } - yy423: - ++p; - yych = *p; - yy424: - if (yybm[0 + yych] & 32) { - goto yy423; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy356; - if (yych <= '-') - goto yy432; - goto yy356; - } else { - if (yych <= 0xDF) - goto yy433; - if (yych <= 0xE0) - goto yy434; - goto yy435; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy439; - if (yych <= 0xEF) - goto yy435; - goto yy436; - } else { - if (yych <= 0xF3) - goto yy437; - if (yych <= 0xF4) - goto yy438; - goto yy356; - } - } - yy425: - yych = *++p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy423; - goto yy356; - yy426: - yych = *++p; - if (yych <= 0x9F) - goto yy356; - if (yych <= 0xBF) - goto yy425; - goto yy356; - yy427: - yych = *++p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy425; - goto yy356; - yy428: - yych = *++p; - if (yych <= 0x8F) - goto yy356; - if (yych <= 0xBF) - goto yy427; - goto yy356; - yy429: - yych = *++p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy427; - goto yy356; - yy430: - yych = *++p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x8F) - goto yy427; - goto yy356; - yy431: - yych = *++p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x9F) - goto yy425; - goto yy356; - yy432: - ++p; - yych = *p; - if (yybm[0 + yych] & 32) { - goto yy423; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy356; - if (yych <= '-') - goto yy440; - goto yy356; - } else { - if (yych <= 0xDF) - goto yy433; - if (yych <= 0xE0) - goto yy434; - goto yy435; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy439; - if (yych <= 0xEF) - goto yy435; - goto yy436; - } else { - if (yych <= 0xF3) - goto yy437; - if (yych <= 0xF4) - goto yy438; - goto yy356; - } - } - yy433: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy423; - goto yy356; - yy434: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy356; - if (yych <= 0xBF) - goto yy433; - goto yy356; - yy435: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy433; - goto yy356; - yy436: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy356; - if (yych <= 0xBF) - goto yy435; - goto yy356; - yy437: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy435; - goto yy356; - yy438: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x8F) - goto yy435; - goto yy356; - yy439: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x9F) - goto yy433; - goto yy356; - yy440: - yych = *++p; - if (yych == '>') - goto yy360; - goto yy356; - yy441: - yych = *++p; - if (yych == '>') - goto yy360; - goto yy356; - yy442: - ++p; - yych = *p; - if (yych <= 0xC1) { - if (yych <= 0x1F) { - if (yych <= 0x00) - goto yy356; - if (yych <= 0x08) - goto yy444; - if (yych <= '\r') - goto yy442; - } else { - if (yych <= '=') { - if (yych <= ' ') - goto yy442; - } else { - if (yych <= '>') - goto yy360; - if (yych >= 0x80) - goto yy356; - } - } - } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy446; - if (yych <= 0xE0) - goto yy447; - if (yych <= 0xEC) - goto yy448; - goto yy452; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy448; - goto yy449; - } else { - if (yych <= 0xF3) - goto yy450; - if (yych <= 0xF4) - goto yy451; - goto yy356; - } - } - } - yy444: - ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy444; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy356; - if (yych <= '>') - goto yy360; - goto yy356; - } else { - if (yych <= 0xDF) - goto yy446; - if (yych <= 0xE0) - goto yy447; - goto yy448; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy452; - if (yych <= 0xEF) - goto yy448; - goto yy449; - } else { - if (yych <= 0xF3) - goto yy450; - if (yych <= 0xF4) - goto yy451; - goto yy356; - } - } - yy446: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy444; - goto yy356; - yy447: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy356; - if (yych <= 0xBF) - goto yy446; - goto yy356; - yy448: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy446; - goto yy356; - yy449: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy356; - if (yych <= 0xBF) - goto yy448; - goto yy356; - yy450: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy448; - goto yy356; - yy451: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x8F) - goto yy448; - goto yy356; - yy452: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x9F) - goto yy446; - goto yy356; - yy453: - yych = *++p; - if (yych == 'D') - goto yy454; - if (yych != 'd') - goto yy356; - yy454: - yych = *++p; - if (yych == 'A') - goto yy455; - if (yych != 'a') - goto yy356; - yy455: - yych = *++p; - if (yych == 'T') - goto yy456; - if (yych != 't') - goto yy356; - yy456: - yych = *++p; - if (yych == 'A') - goto yy457; - if (yych != 'a') - goto yy356; - yy457: - yych = *++p; - if (yych != '[') - goto yy356; - yy458: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy458; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy356; - if (yych <= ']') - goto yy467; - goto yy356; - } else { - if (yych <= 0xDF) - goto yy460; - if (yych <= 0xE0) - goto yy461; - goto yy462; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy466; - if (yych <= 0xEF) - goto yy462; - goto yy463; - } else { - if (yych <= 0xF3) - goto yy464; - if (yych <= 0xF4) - goto yy465; - goto yy356; - } - } - yy460: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy458; - goto yy356; - yy461: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy356; - if (yych <= 0xBF) - goto yy460; - goto yy356; - yy462: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy460; - goto yy356; - yy463: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy356; - if (yych <= 0xBF) - goto yy462; - goto yy356; - yy464: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy462; - goto yy356; - yy465: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x8F) - goto yy462; - goto yy356; - yy466: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x9F) - goto yy460; - goto yy356; - yy467: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy458; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy356; - if (yych >= '^') - goto yy356; - } else { - if (yych <= 0xDF) - goto yy469; - if (yych <= 0xE0) - goto yy470; - goto yy471; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy475; - if (yych <= 0xEF) - goto yy471; - goto yy472; - } else { - if (yych <= 0xF3) - goto yy473; - if (yych <= 0xF4) - goto yy474; - goto yy356; - } - } - ++p; - yych = *p; - if (yych <= 0xE0) { - if (yych <= '>') { - if (yych <= 0x00) - goto yy356; - if (yych <= '=') - goto yy458; - goto yy360; - } else { - if (yych <= 0x7F) - goto yy458; - if (yych <= 0xC1) - goto yy356; - if (yych <= 0xDF) - goto yy476; - goto yy477; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy482; - goto yy478; - } else { - if (yych <= 0xF0) - goto yy479; - if (yych <= 0xF3) - goto yy480; - if (yych <= 0xF4) - goto yy481; - goto yy356; - } - } - yy469: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy458; - goto yy356; - yy470: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy356; - if (yych <= 0xBF) - goto yy469; - goto yy356; - yy471: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy469; - goto yy356; - yy472: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy356; - if (yych <= 0xBF) - goto yy471; - goto yy356; - yy473: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy471; - goto yy356; - yy474: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x8F) - goto yy471; - goto yy356; - yy475: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x9F) - goto yy469; - goto yy356; - yy476: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy458; - goto yy356; - yy477: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy356; - if (yych <= 0xBF) - goto yy476; - goto yy356; - yy478: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy476; - goto yy356; - yy479: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy356; - if (yych <= 0xBF) - goto yy478; - goto yy356; - yy480: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0xBF) - goto yy478; - goto yy356; - yy481: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x8F) - goto yy478; - goto yy356; - yy482: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy356; - if (yych <= 0x9F) - goto yy476; - goto yy356; - } +{ + unsigned char yych; + static const unsigned char yybm[] = { + /* table 1 .. 8: 0 */ + 0, 250, 250, 250, 250, 250, 250, 250, + 250, 235, 235, 235, 235, 235, 250, 250, + 250, 250, 250, 250, 250, 250, 250, 250, + 250, 250, 250, 250, 250, 250, 250, 250, + 235, 250, 202, 250, 250, 250, 250, 170, + 250, 250, 250, 250, 250, 246, 254, 250, + 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 250, 234, 234, 232, 250, + 250, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 250, 250, 122, 250, 254, + 234, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 250, 250, 250, 250, 250, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + /* table 9 .. 11: 256 */ + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 192, 128, 128, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 128, 128, 128, 128, 128, 0, + 128, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 128, 128, 128, 128, 128, + 128, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 128, 128, 128, 128, 128, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= '>') { + if (yych <= '!') { + if (yych >= '!') goto yy226; + } else { + if (yych == '/') goto yy227; + } + } else { + if (yych <= 'Z') { + if (yych <= '?') goto yy228; + if (yych >= 'A') goto yy229; + } else { + if (yych <= '`') goto yy224; + if (yych <= 'z') goto yy229; + } + } +yy224: + ++p; +yy225: + { return 0; } +yy226: + yych = *(marker = ++p); + if (yybm[256+yych] & 32) { + goto yy232; + } + if (yych == '-') goto yy230; + if (yych <= '@') goto yy225; + if (yych <= '[') goto yy234; + goto yy225; +yy227: + yych = *(marker = ++p); + if (yych <= '@') goto yy225; + if (yych <= 'Z') goto yy235; + if (yych <= '`') goto yy225; + if (yych <= 'z') goto yy235; + goto yy225; +yy228: + yych = *(marker = ++p); + if (yych <= 0x00) goto yy225; + if (yych <= 0x7F) goto yy238; + if (yych <= 0xC1) goto yy225; + if (yych <= 0xF4) goto yy238; + goto yy225; +yy229: + yych = *(marker = ++p); + if (yych <= '.') { + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy225; + if (yych <= '\r') goto yy250; + goto yy225; + } else { + if (yych <= ' ') goto yy250; + if (yych == '-') goto yy250; + goto yy225; + } + } else { + if (yych <= '@') { + if (yych <= '9') goto yy250; + if (yych == '>') goto yy250; + goto yy225; + } else { + if (yych <= 'Z') goto yy250; + if (yych <= '`') goto yy225; + if (yych <= 'z') goto yy250; + goto yy225; + } + } +yy230: + yych = *++p; + if (yych == '-') goto yy254; +yy231: + p = marker; + goto yy225; +yy232: + yych = *++p; + if (yybm[256+yych] & 32) { + goto yy232; + } + if (yych <= 0x08) goto yy231; + if (yych <= '\r') goto yy255; + if (yych == ' ') goto yy255; + goto yy231; +yy234: + yych = *++p; + if (yych == 'C') goto yy257; + if (yych == 'c') goto yy257; + goto yy231; +yy235: + yych = *++p; + if (yybm[256+yych] & 64) { + goto yy235; + } + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy231; + if (yych <= '\r') goto yy258; + goto yy231; + } else { + if (yych <= ' ') goto yy258; + if (yych == '>') goto yy252; + goto yy231; + } +yy237: + yych = *++p; +yy238: + if (yybm[256+yych] & 128) { + goto yy237; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy231; + if (yych >= '@') goto yy231; + } else { + if (yych <= 0xDF) goto yy240; + if (yych <= 0xE0) goto yy241; + goto yy242; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy243; + if (yych <= 0xEF) goto yy242; + goto yy244; + } else { + if (yych <= 0xF3) goto yy245; + if (yych <= 0xF4) goto yy246; + goto yy231; + } + } + yych = *++p; + if (yych <= 0xE0) { + if (yych <= '>') { + if (yych <= 0x00) goto yy231; + if (yych <= '=') goto yy237; + goto yy252; + } else { + if (yych <= 0x7F) goto yy237; + if (yych <= 0xC1) goto yy231; + if (yych >= 0xE0) goto yy241; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy243; + goto yy242; + } else { + if (yych <= 0xF0) goto yy244; + if (yych <= 0xF3) goto yy245; + if (yych <= 0xF4) goto yy246; + goto yy231; + } + } +yy240: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy237; + goto yy231; +yy241: + yych = *++p; + if (yych <= 0x9F) goto yy231; + if (yych <= 0xBF) goto yy240; + goto yy231; +yy242: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy240; + goto yy231; +yy243: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x9F) goto yy240; + goto yy231; +yy244: + yych = *++p; + if (yych <= 0x8F) goto yy231; + if (yych <= 0xBF) goto yy242; + goto yy231; +yy245: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy242; + goto yy231; +yy246: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x8F) goto yy242; + goto yy231; +yy247: + yych = *++p; + if (yybm[0+yych] & 1) { + goto yy247; + } + if (yych <= '>') { + if (yych <= '9') { + if (yych == '/') goto yy251; + goto yy231; + } else { + if (yych <= ':') goto yy260; + if (yych <= '=') goto yy231; + goto yy252; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy231; + if (yych <= 'Z') goto yy260; + goto yy231; + } else { + if (yych == '`') goto yy231; + if (yych <= 'z') goto yy260; + goto yy231; + } + } +yy249: + yych = *++p; +yy250: + if (yybm[0+yych] & 1) { + goto yy247; + } + if (yych <= '=') { + if (yych <= '.') { + if (yych == '-') goto yy249; + goto yy231; + } else { + if (yych <= '/') goto yy251; + if (yych <= '9') goto yy249; + goto yy231; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy252; + if (yych <= '@') goto yy231; + goto yy249; + } else { + if (yych <= '`') goto yy231; + if (yych <= 'z') goto yy249; + goto yy231; + } + } +yy251: + yych = *++p; + if (yych != '>') goto yy231; +yy252: + ++p; + { return (bufsize_t)(p - start); } +yy254: + yych = *++p; + if (yych == '-') goto yy264; + if (yych == '>') goto yy231; + goto yy263; +yy255: + yych = *++p; + if (yybm[0+yych] & 2) { + goto yy255; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy231; + if (yych <= '>') goto yy252; + goto yy231; + } else { + if (yych <= 0xDF) goto yy272; + if (yych <= 0xE0) goto yy273; + goto yy274; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy275; + if (yych <= 0xEF) goto yy274; + goto yy276; + } else { + if (yych <= 0xF3) goto yy277; + if (yych <= 0xF4) goto yy278; + goto yy231; + } + } +yy257: + yych = *++p; + if (yych == 'D') goto yy279; + if (yych == 'd') goto yy279; + goto yy231; +yy258: + yych = *++p; + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy231; + if (yych <= '\r') goto yy258; + goto yy231; + } else { + if (yych <= ' ') goto yy258; + if (yych == '>') goto yy252; + goto yy231; + } +yy260: + yych = *++p; + if (yybm[0+yych] & 4) { + goto yy260; + } + if (yych <= ',') { + if (yych <= '\r') { + if (yych <= 0x08) goto yy231; + goto yy280; + } else { + if (yych == ' ') goto yy280; + goto yy231; + } + } else { + if (yych <= '<') { + if (yych <= '/') goto yy251; + goto yy231; + } else { + if (yych <= '=') goto yy282; + if (yych <= '>') goto yy252; + goto yy231; + } + } +yy262: + yych = *++p; +yy263: + if (yybm[0+yych] & 8) { + goto yy262; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy231; + if (yych <= '-') goto yy284; + goto yy231; + } else { + if (yych <= 0xDF) goto yy265; + if (yych <= 0xE0) goto yy266; + goto yy267; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy268; + if (yych <= 0xEF) goto yy267; + goto yy269; + } else { + if (yych <= 0xF3) goto yy270; + if (yych <= 0xF4) goto yy271; + goto yy231; + } + } +yy264: + yych = *++p; + if (yych == '-') goto yy251; + if (yych == '>') goto yy231; + goto yy263; +yy265: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy262; + goto yy231; +yy266: + yych = *++p; + if (yych <= 0x9F) goto yy231; + if (yych <= 0xBF) goto yy265; + goto yy231; +yy267: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy265; + goto yy231; +yy268: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x9F) goto yy265; + goto yy231; +yy269: + yych = *++p; + if (yych <= 0x8F) goto yy231; + if (yych <= 0xBF) goto yy267; + goto yy231; +yy270: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy267; + goto yy231; +yy271: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x8F) goto yy267; + goto yy231; +yy272: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy255; + goto yy231; +yy273: + yych = *++p; + if (yych <= 0x9F) goto yy231; + if (yych <= 0xBF) goto yy272; + goto yy231; +yy274: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy272; + goto yy231; +yy275: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x9F) goto yy272; + goto yy231; +yy276: + yych = *++p; + if (yych <= 0x8F) goto yy231; + if (yych <= 0xBF) goto yy274; + goto yy231; +yy277: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy274; + goto yy231; +yy278: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x8F) goto yy274; + goto yy231; +yy279: + yych = *++p; + if (yych == 'A') goto yy285; + if (yych == 'a') goto yy285; + goto yy231; +yy280: + yych = *++p; + if (yych <= '<') { + if (yych <= ' ') { + if (yych <= 0x08) goto yy231; + if (yych <= '\r') goto yy280; + if (yych <= 0x1F) goto yy231; + goto yy280; + } else { + if (yych <= '/') { + if (yych <= '.') goto yy231; + goto yy251; + } else { + if (yych == ':') goto yy260; + goto yy231; + } + } + } else { + if (yych <= 'Z') { + if (yych <= '=') goto yy282; + if (yych <= '>') goto yy252; + if (yych <= '@') goto yy231; + goto yy260; + } else { + if (yych <= '_') { + if (yych <= '^') goto yy231; + goto yy260; + } else { + if (yych <= '`') goto yy231; + if (yych <= 'z') goto yy260; + goto yy231; + } + } + } +yy282: + yych = *++p; + if (yybm[0+yych] & 16) { + goto yy286; + } + if (yych <= 0xE0) { + if (yych <= '"') { + if (yych <= 0x00) goto yy231; + if (yych <= ' ') goto yy282; + goto yy288; + } else { + if (yych <= '\'') goto yy290; + if (yych <= 0xC1) goto yy231; + if (yych <= 0xDF) goto yy292; + goto yy293; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy295; + goto yy294; + } else { + if (yych <= 0xF0) goto yy296; + if (yych <= 0xF3) goto yy297; + if (yych <= 0xF4) goto yy298; + goto yy231; + } + } +yy284: + yych = *++p; + if (yybm[0+yych] & 8) { + goto yy262; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy231; + if (yych <= '-') goto yy251; + goto yy231; + } else { + if (yych <= 0xDF) goto yy265; + if (yych <= 0xE0) goto yy266; + goto yy267; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy268; + if (yych <= 0xEF) goto yy267; + goto yy269; + } else { + if (yych <= 0xF3) goto yy270; + if (yych <= 0xF4) goto yy271; + goto yy231; + } + } +yy285: + yych = *++p; + if (yych == 'T') goto yy299; + if (yych == 't') goto yy299; + goto yy231; +yy286: + yych = *++p; + if (yybm[0+yych] & 16) { + goto yy286; + } + if (yych <= 0xE0) { + if (yych <= '=') { + if (yych <= 0x00) goto yy231; + if (yych <= ' ') goto yy247; + goto yy231; + } else { + if (yych <= '>') goto yy252; + if (yych <= 0xC1) goto yy231; + if (yych <= 0xDF) goto yy292; + goto yy293; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy295; + goto yy294; + } else { + if (yych <= 0xF0) goto yy296; + if (yych <= 0xF3) goto yy297; + if (yych <= 0xF4) goto yy298; + goto yy231; + } + } +yy288: + yych = *++p; + if (yybm[0+yych] & 32) { + goto yy288; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy231; + if (yych <= '"') goto yy300; + goto yy231; + } else { + if (yych <= 0xDF) goto yy301; + if (yych <= 0xE0) goto yy302; + goto yy303; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy304; + if (yych <= 0xEF) goto yy303; + goto yy305; + } else { + if (yych <= 0xF3) goto yy306; + if (yych <= 0xF4) goto yy307; + goto yy231; + } + } +yy290: + yych = *++p; + if (yybm[0+yych] & 64) { + goto yy290; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy231; + if (yych <= '\'') goto yy300; + goto yy231; + } else { + if (yych <= 0xDF) goto yy308; + if (yych <= 0xE0) goto yy309; + goto yy310; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy311; + if (yych <= 0xEF) goto yy310; + goto yy312; + } else { + if (yych <= 0xF3) goto yy313; + if (yych <= 0xF4) goto yy314; + goto yy231; + } + } +yy292: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy286; + goto yy231; +yy293: + yych = *++p; + if (yych <= 0x9F) goto yy231; + if (yych <= 0xBF) goto yy292; + goto yy231; +yy294: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy292; + goto yy231; +yy295: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x9F) goto yy292; + goto yy231; +yy296: + yych = *++p; + if (yych <= 0x8F) goto yy231; + if (yych <= 0xBF) goto yy294; + goto yy231; +yy297: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy294; + goto yy231; +yy298: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x8F) goto yy294; + goto yy231; +yy299: + yych = *++p; + if (yych == 'A') goto yy315; + if (yych == 'a') goto yy315; + goto yy231; +yy300: + yych = *++p; + if (yybm[0+yych] & 1) { + goto yy247; + } + if (yych == '/') goto yy251; + if (yych == '>') goto yy252; + goto yy231; +yy301: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy288; + goto yy231; +yy302: + yych = *++p; + if (yych <= 0x9F) goto yy231; + if (yych <= 0xBF) goto yy301; + goto yy231; +yy303: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy301; + goto yy231; +yy304: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x9F) goto yy301; + goto yy231; +yy305: + yych = *++p; + if (yych <= 0x8F) goto yy231; + if (yych <= 0xBF) goto yy303; + goto yy231; +yy306: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy303; + goto yy231; +yy307: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x8F) goto yy303; + goto yy231; +yy308: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy290; + goto yy231; +yy309: + yych = *++p; + if (yych <= 0x9F) goto yy231; + if (yych <= 0xBF) goto yy308; + goto yy231; +yy310: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy308; + goto yy231; +yy311: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x9F) goto yy308; + goto yy231; +yy312: + yych = *++p; + if (yych <= 0x8F) goto yy231; + if (yych <= 0xBF) goto yy310; + goto yy231; +yy313: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy310; + goto yy231; +yy314: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x8F) goto yy310; + goto yy231; +yy315: + yych = *++p; + if (yych != '[') goto yy231; +yy316: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy316; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy231; + if (yych >= '^') goto yy231; + } else { + if (yych <= 0xDF) goto yy319; + if (yych <= 0xE0) goto yy320; + goto yy321; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy322; + if (yych <= 0xEF) goto yy321; + goto yy323; + } else { + if (yych <= 0xF3) goto yy324; + if (yych <= 0xF4) goto yy325; + goto yy231; + } + } + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy316; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy231; + if (yych <= ']') goto yy326; + goto yy231; + } else { + if (yych <= 0xDF) goto yy319; + if (yych <= 0xE0) goto yy320; + goto yy321; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy322; + if (yych <= 0xEF) goto yy321; + goto yy323; + } else { + if (yych <= 0xF3) goto yy324; + if (yych <= 0xF4) goto yy325; + goto yy231; + } + } +yy319: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy316; + goto yy231; +yy320: + yych = *++p; + if (yych <= 0x9F) goto yy231; + if (yych <= 0xBF) goto yy319; + goto yy231; +yy321: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy319; + goto yy231; +yy322: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x9F) goto yy319; + goto yy231; +yy323: + yych = *++p; + if (yych <= 0x8F) goto yy231; + if (yych <= 0xBF) goto yy321; + goto yy231; +yy324: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0xBF) goto yy321; + goto yy231; +yy325: + yych = *++p; + if (yych <= 0x7F) goto yy231; + if (yych <= 0x8F) goto yy321; + goto yy231; +yy326: + yych = *++p; + if (yych <= 0xE0) { + if (yych <= '>') { + if (yych <= 0x00) goto yy231; + if (yych <= '=') goto yy316; + goto yy252; + } else { + if (yych <= 0x7F) goto yy316; + if (yych <= 0xC1) goto yy231; + if (yych <= 0xDF) goto yy319; + goto yy320; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy322; + goto yy321; + } else { + if (yych <= 0xF0) goto yy323; + if (yych <= 0xF3) goto yy324; + if (yych <= 0xF4) goto yy325; + goto yy231; + } + } } -// Try to (liberally) match an HTML tag after first <, returning num of chars -// matched. -bufsize_t _scan_liberal_html_tag(const unsigned char *p) { +} + +// Try to (liberally) match an HTML tag after first <, returning num of chars matched. +bufsize_t _scan_liberal_html_tag(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = { - 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych <= 0xE0) { - if (yych <= '\n') { - if (yych <= 0x00) - goto yy485; - if (yych <= '\t') - goto yy487; - } else { - if (yych <= 0x7F) - goto yy487; - if (yych <= 0xC1) - goto yy485; - if (yych <= 0xDF) - goto yy488; - goto yy489; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy491; - goto yy490; - } else { - if (yych <= 0xF0) - goto yy492; - if (yych <= 0xF3) - goto yy493; - if (yych <= 0xF4) - goto yy494; - } - } - yy485: - ++p; - yy486 : { return 0; } - yy487: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= '\n') { - if (yych <= 0x00) - goto yy486; - if (yych <= '\t') - goto yy499; - goto yy486; - } else { - if (yych <= 0x7F) - goto yy499; - if (yych <= 0xC1) - goto yy486; - if (yych <= 0xF4) - goto yy499; - goto yy486; - } - yy488: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy486; - if (yych <= 0xBF) - goto yy498; - goto yy486; - yy489: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x9F) - goto yy486; - if (yych <= 0xBF) - goto yy497; - goto yy486; - yy490: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy486; - if (yych <= 0xBF) - goto yy497; - goto yy486; - yy491: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy486; - if (yych <= 0x9F) - goto yy497; - goto yy486; - yy492: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x8F) - goto yy486; - if (yych <= 0xBF) - goto yy495; - goto yy486; - yy493: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy486; - if (yych <= 0xBF) - goto yy495; - goto yy486; - yy494: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy486; - if (yych >= 0x90) - goto yy486; - yy495: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy496; - if (yych <= 0xBF) - goto yy497; - yy496: - p = marker; - if (yyaccept == 0) { - goto yy486; - } else { - goto yy502; - } - yy497: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy496; - if (yych >= 0xC0) - goto yy496; - yy498: - ++p; - yych = *p; - yy499: - if (yybm[0 + yych] & 64) { - goto yy498; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy496; - if (yych >= '?') - goto yy496; - } else { - if (yych <= 0xDF) - goto yy497; - if (yych <= 0xE0) - goto yy503; - goto yy495; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy507; - if (yych <= 0xEF) - goto yy495; - goto yy504; - } else { - if (yych <= 0xF3) - goto yy505; - if (yych <= 0xF4) - goto yy506; - goto yy496; - } - } - yy500: - yyaccept = 1; - marker = ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy498; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy502; - if (yych <= '>') - goto yy500; - } else { - if (yych <= 0xDF) - goto yy497; - if (yych <= 0xE0) - goto yy503; - goto yy495; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy507; - if (yych <= 0xEF) - goto yy495; - goto yy504; - } else { - if (yych <= 0xF3) - goto yy505; - if (yych <= 0xF4) - goto yy506; - } - } - yy502 : { return (bufsize_t)(p - start); } - yy503: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy496; - if (yych <= 0xBF) - goto yy497; - goto yy496; - yy504: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy496; - if (yych <= 0xBF) - goto yy495; - goto yy496; - yy505: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy496; - if (yych <= 0xBF) - goto yy495; - goto yy496; - yy506: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy496; - if (yych <= 0x8F) - goto yy495; - goto yy496; - yy507: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy496; - if (yych <= 0x9F) - goto yy497; - goto yy496; - } +{ + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 128, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= 0xE0) { + if (yych <= '\n') { + if (yych <= 0x00) goto yy329; + if (yych <= '\t') goto yy331; + } else { + if (yych <= 0x7F) goto yy331; + if (yych <= 0xC1) goto yy329; + if (yych <= 0xDF) goto yy332; + goto yy333; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy335; + goto yy334; + } else { + if (yych <= 0xF0) goto yy336; + if (yych <= 0xF3) goto yy337; + if (yych <= 0xF4) goto yy338; + } + } +yy329: + ++p; +yy330: + { return 0; } +yy331: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) goto yy330; + if (yych <= '\t') goto yy340; + goto yy330; + } else { + if (yych <= 0x7F) goto yy340; + if (yych <= 0xC1) goto yy330; + if (yych <= 0xF4) goto yy340; + goto yy330; + } +yy332: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy330; + if (yych <= 0xBF) goto yy339; + goto yy330; +yy333: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) goto yy330; + if (yych <= 0xBF) goto yy345; + goto yy330; +yy334: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy330; + if (yych <= 0xBF) goto yy345; + goto yy330; +yy335: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy330; + if (yych <= 0x9F) goto yy345; + goto yy330; +yy336: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) goto yy330; + if (yych <= 0xBF) goto yy347; + goto yy330; +yy337: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy330; + if (yych <= 0xBF) goto yy347; + goto yy330; +yy338: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy330; + if (yych <= 0x8F) goto yy347; + goto yy330; +yy339: + yych = *++p; +yy340: + if (yybm[0+yych] & 64) { + goto yy339; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy341; + if (yych <= '>') goto yy342; + } else { + if (yych <= 0xDF) goto yy345; + if (yych <= 0xE0) goto yy346; + goto yy347; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy348; + if (yych <= 0xEF) goto yy347; + goto yy349; + } else { + if (yych <= 0xF3) goto yy350; + if (yych <= 0xF4) goto yy351; + } + } +yy341: + p = marker; + if (yyaccept == 0) { + goto yy330; + } else { + goto yy344; + } +yy342: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0+yych] & 64) { + goto yy339; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy344; + if (yych <= '>') goto yy342; + } else { + if (yych <= 0xDF) goto yy345; + if (yych <= 0xE0) goto yy346; + goto yy347; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy348; + if (yych <= 0xEF) goto yy347; + goto yy349; + } else { + if (yych <= 0xF3) goto yy350; + if (yych <= 0xF4) goto yy351; + } + } +yy344: + { return (bufsize_t)(p - start); } +yy345: + yych = *++p; + if (yych <= 0x7F) goto yy341; + if (yych <= 0xBF) goto yy339; + goto yy341; +yy346: + yych = *++p; + if (yych <= 0x9F) goto yy341; + if (yych <= 0xBF) goto yy345; + goto yy341; +yy347: + yych = *++p; + if (yych <= 0x7F) goto yy341; + if (yych <= 0xBF) goto yy345; + goto yy341; +yy348: + yych = *++p; + if (yych <= 0x7F) goto yy341; + if (yych <= 0x9F) goto yy345; + goto yy341; +yy349: + yych = *++p; + if (yych <= 0x8F) goto yy341; + if (yych <= 0xBF) goto yy347; + goto yy341; +yy350: + yych = *++p; + if (yych <= 0x7F) goto yy341; + if (yych <= 0xBF) goto yy347; + goto yy341; +yy351: + yych = *++p; + if (yych <= 0x7F) goto yy341; + if (yych <= 0x8F) goto yy347; + goto yy341; +} + } // Try to match an HTML block tag start line, returning // an integer code for the type of block (1-6, matching the spec). // #7 is handled by a separate function, below. -bufsize_t _scan_html_block_start(const unsigned char *p) { +bufsize_t _scan_html_block_start(const unsigned char *p) +{ const unsigned char *marker = NULL; - { - unsigned char yych; - yych = *p; - if (yych == '<') - goto yy512; - ++p; - yy511 : { return 0; } - yy512: - yych = *(marker = ++p); - switch (yych) { - case '!': - goto yy530; - case '/': - goto yy513; - case '?': - goto yy531; - case 'A': - case 'a': - goto yy516; - case 'B': - case 'b': - goto yy517; - case 'C': - case 'c': - goto yy518; - case 'D': - case 'd': - goto yy519; - case 'F': - case 'f': - goto yy520; - case 'H': - case 'h': - goto yy521; - case 'I': - case 'i': - goto yy522; - case 'L': - case 'l': - goto yy523; - case 'M': - case 'm': - goto yy524; - case 'N': - case 'n': - goto yy525; - case 'O': - case 'o': - goto yy526; - case 'P': - case 'p': - goto yy515; - case 'S': - case 's': - goto yy527; - case 'T': - case 't': - goto yy528; - case 'U': - case 'u': - goto yy529; - default: - goto yy511; - } - yy513: - yych = *++p; - switch (yych) { - case 'A': - case 'a': - goto yy516; - case 'B': - case 'b': - goto yy517; - case 'C': - case 'c': - goto yy518; - case 'D': - case 'd': - goto yy519; - case 'F': - case 'f': - goto yy520; - case 'H': - case 'h': - goto yy521; - case 'I': - case 'i': - goto yy522; - case 'L': - case 'l': - goto yy523; - case 'M': - case 'm': - goto yy524; - case 'N': - case 'n': - goto yy525; - case 'O': - case 'o': - goto yy526; - case 'P': - case 'p': - goto yy715; - case 'S': - case 's': - goto yy716; - case 'T': - case 't': - goto yy528; - case 'U': - case 'u': - goto yy529; - default: - goto yy514; - } - yy514: - p = marker; - goto yy511; - yy515: - yych = *++p; - if (yych <= '>') { - if (yych <= ' ') { - if (yych <= 0x08) - goto yy514; - if (yych <= '\r') - goto yy547; - if (yych <= 0x1F) - goto yy514; - goto yy547; - } else { - if (yych == '/') - goto yy549; - if (yych <= '=') - goto yy514; - goto yy547; - } - } else { - if (yych <= 'R') { - if (yych == 'A') - goto yy712; - if (yych <= 'Q') - goto yy514; - goto yy711; - } else { - if (yych <= 'a') { - if (yych <= '`') - goto yy514; - goto yy712; - } else { - if (yych == 'r') - goto yy711; - goto yy514; - } - } - } - yy516: - yych = *++p; - if (yych <= 'S') { - if (yych <= 'D') { - if (yych <= 'C') - goto yy514; - goto yy700; - } else { - if (yych <= 'Q') - goto yy514; - if (yych <= 'R') - goto yy699; - goto yy698; - } - } else { - if (yych <= 'q') { - if (yych == 'd') - goto yy700; - goto yy514; - } else { - if (yych <= 'r') - goto yy699; - if (yych <= 's') - goto yy698; - goto yy514; - } - } - yy517: - yych = *++p; - if (yych <= 'O') { - if (yych <= 'K') { - if (yych == 'A') - goto yy684; - goto yy514; - } else { - if (yych <= 'L') - goto yy683; - if (yych <= 'N') - goto yy514; - goto yy682; - } - } else { - if (yych <= 'k') { - if (yych == 'a') - goto yy684; - goto yy514; - } else { - if (yych <= 'l') - goto yy683; - if (yych == 'o') - goto yy682; - goto yy514; - } - } - yy518: - yych = *++p; - if (yych <= 'O') { - if (yych <= 'D') { - if (yych == 'A') - goto yy669; - goto yy514; - } else { - if (yych <= 'E') - goto yy668; - if (yych <= 'N') - goto yy514; - goto yy667; - } - } else { - if (yych <= 'd') { - if (yych == 'a') - goto yy669; - goto yy514; - } else { - if (yych <= 'e') - goto yy668; - if (yych == 'o') - goto yy667; - goto yy514; - } - } - yy519: - yych = *++p; - switch (yych) { - case 'D': - case 'L': - case 'T': - case 'd': - case 'l': - case 't': - goto yy546; - case 'E': - case 'e': - goto yy659; - case 'I': - case 'i': - goto yy658; - default: - goto yy514; - } - yy520: - yych = *++p; - if (yych <= 'R') { - if (yych <= 'N') { - if (yych == 'I') - goto yy634; - goto yy514; - } else { - if (yych <= 'O') - goto yy633; - if (yych <= 'Q') - goto yy514; - goto yy632; - } - } else { - if (yych <= 'n') { - if (yych == 'i') - goto yy634; - goto yy514; - } else { - if (yych <= 'o') - goto yy633; - if (yych == 'r') - goto yy632; - goto yy514; - } - } - yy521: - yych = *++p; - if (yych <= 'S') { - if (yych <= 'D') { - if (yych <= '0') - goto yy514; - if (yych <= '6') - goto yy546; - goto yy514; - } else { - if (yych <= 'E') - goto yy627; - if (yych == 'R') - goto yy546; - goto yy514; - } - } else { - if (yych <= 'q') { - if (yych <= 'T') - goto yy626; - if (yych == 'e') - goto yy627; - goto yy514; - } else { - if (yych <= 'r') - goto yy546; - if (yych == 't') - goto yy626; - goto yy514; - } - } - yy522: - yych = *++p; - if (yych == 'F') - goto yy622; - if (yych == 'f') - goto yy622; - goto yy514; - yy523: - yych = *++p; - if (yych <= 'I') { - if (yych == 'E') - goto yy617; - if (yych <= 'H') - goto yy514; - goto yy616; - } else { - if (yych <= 'e') { - if (yych <= 'd') - goto yy514; - goto yy617; - } else { - if (yych == 'i') - goto yy616; - goto yy514; - } - } - yy524: - yych = *++p; - if (yych <= 'E') { - if (yych == 'A') - goto yy608; - if (yych <= 'D') - goto yy514; - goto yy607; - } else { - if (yych <= 'a') { - if (yych <= '`') - goto yy514; - goto yy608; - } else { - if (yych == 'e') - goto yy607; - goto yy514; - } - } - yy525: - yych = *++p; - if (yych <= 'O') { - if (yych == 'A') - goto yy601; - if (yych <= 'N') - goto yy514; - goto yy600; - } else { - if (yych <= 'a') { - if (yych <= '`') - goto yy514; - goto yy601; - } else { - if (yych == 'o') - goto yy600; - goto yy514; - } - } - yy526: - yych = *++p; - if (yych <= 'P') { - if (yych == 'L') - goto yy546; - if (yych <= 'O') - goto yy514; - goto yy592; - } else { - if (yych <= 'l') { - if (yych <= 'k') - goto yy514; - goto yy546; - } else { - if (yych == 'p') - goto yy592; - goto yy514; - } - } - yy527: - yych = *++p; - switch (yych) { - case 'C': - case 'c': - goto yy569; - case 'E': - case 'e': - goto yy572; - case 'O': - case 'o': - goto yy571; - case 'T': - case 't': - goto yy568; - case 'U': - case 'u': - goto yy570; - default: - goto yy514; - } - yy528: - yych = *++p; - switch (yych) { - case 'A': - case 'a': - goto yy555; - case 'B': - case 'b': - goto yy554; - case 'D': - case 'd': - goto yy546; - case 'F': - case 'f': - goto yy553; - case 'H': - case 'h': - goto yy552; - case 'I': - case 'i': - goto yy551; - case 'R': - case 'r': - goto yy550; - default: - goto yy514; - } - yy529: - yych = *++p; - if (yych == 'L') - goto yy546; - if (yych == 'l') - goto yy546; - goto yy514; - yy530: - yych = *++p; - if (yych <= '@') { - if (yych == '-') - goto yy533; - goto yy514; - } else { - if (yych <= 'Z') - goto yy534; - if (yych <= '[') - goto yy536; - goto yy514; - } - yy531: - ++p; - { return 3; } - yy533: - yych = *++p; - if (yych == '-') - goto yy544; - goto yy514; - yy534: - ++p; - { return 4; } - yy536: - yych = *++p; - if (yych == 'C') - goto yy537; - if (yych != 'c') - goto yy514; - yy537: - yych = *++p; - if (yych == 'D') - goto yy538; - if (yych != 'd') - goto yy514; - yy538: - yych = *++p; - if (yych == 'A') - goto yy539; - if (yych != 'a') - goto yy514; - yy539: - yych = *++p; - if (yych == 'T') - goto yy540; - if (yych != 't') - goto yy514; - yy540: - yych = *++p; - if (yych == 'A') - goto yy541; - if (yych != 'a') - goto yy514; - yy541: - yych = *++p; - if (yych != '[') - goto yy514; - ++p; - { return 5; } - yy544: - ++p; - { return 2; } - yy546: - yych = *++p; - if (yych <= ' ') { - if (yych <= 0x08) - goto yy514; - if (yych <= '\r') - goto yy547; - if (yych <= 0x1F) - goto yy514; - } else { - if (yych <= '/') { - if (yych <= '.') - goto yy514; - goto yy549; - } else { - if (yych != '>') - goto yy514; - } - } - yy547: - ++p; - { return 6; } - yy549: - yych = *++p; - if (yych == '>') - goto yy547; - goto yy514; - yy550: - yych = *++p; - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy514; - if (yych <= '\r') - goto yy547; - goto yy514; - } else { - if (yych <= ' ') - goto yy547; - if (yych <= '.') - goto yy514; - goto yy549; - } - } else { - if (yych <= '@') { - if (yych == '>') - goto yy547; - goto yy514; - } else { - if (yych <= 'A') - goto yy566; - if (yych == 'a') - goto yy566; - goto yy514; - } - } - yy551: - yych = *++p; - if (yych == 'T') - goto yy564; - if (yych == 't') - goto yy564; - goto yy514; - yy552: - yych = *++p; - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy514; - if (yych <= '\r') - goto yy547; - goto yy514; - } else { - if (yych <= ' ') - goto yy547; - if (yych <= '.') - goto yy514; - goto yy549; - } - } else { - if (yych <= 'D') { - if (yych == '>') - goto yy547; - goto yy514; - } else { - if (yych <= 'E') - goto yy562; - if (yych == 'e') - goto yy562; - goto yy514; - } - } - yy553: - yych = *++p; - if (yych == 'O') - goto yy560; - if (yych == 'o') - goto yy560; - goto yy514; - yy554: - yych = *++p; - if (yych == 'O') - goto yy558; - if (yych == 'o') - goto yy558; - goto yy514; - yy555: - yych = *++p; - if (yych == 'B') - goto yy556; - if (yych != 'b') - goto yy514; - yy556: - yych = *++p; - if (yych == 'L') - goto yy557; - if (yych != 'l') - goto yy514; - yy557: - yych = *++p; - if (yych == 'E') - goto yy546; - if (yych == 'e') - goto yy546; - goto yy514; - yy558: - yych = *++p; - if (yych == 'D') - goto yy559; - if (yych != 'd') - goto yy514; - yy559: - yych = *++p; - if (yych == 'Y') - goto yy546; - if (yych == 'y') - goto yy546; - goto yy514; - yy560: - yych = *++p; - if (yych == 'O') - goto yy561; - if (yych != 'o') - goto yy514; - yy561: - yych = *++p; - if (yych == 'T') - goto yy546; - if (yych == 't') - goto yy546; - goto yy514; - yy562: - yych = *++p; - if (yych == 'A') - goto yy563; - if (yych != 'a') - goto yy514; - yy563: - yych = *++p; - if (yych == 'D') - goto yy546; - if (yych == 'd') - goto yy546; - goto yy514; - yy564: - yych = *++p; - if (yych == 'L') - goto yy565; - if (yych != 'l') - goto yy514; - yy565: - yych = *++p; - if (yych == 'E') - goto yy546; - if (yych == 'e') - goto yy546; - goto yy514; - yy566: - yych = *++p; - if (yych == 'C') - goto yy567; - if (yych != 'c') - goto yy514; - yy567: - yych = *++p; - if (yych == 'K') - goto yy546; - if (yych == 'k') - goto yy546; - goto yy514; - yy568: - yych = *++p; - if (yych == 'Y') - goto yy590; - if (yych == 'y') - goto yy590; - goto yy514; - yy569: - yych = *++p; - if (yych == 'R') - goto yy584; - if (yych == 'r') - goto yy584; - goto yy514; - yy570: - yych = *++p; - if (yych == 'M') - goto yy580; - if (yych == 'm') - goto yy580; - goto yy514; - yy571: - yych = *++p; - if (yych == 'U') - goto yy577; - if (yych == 'u') - goto yy577; - goto yy514; - yy572: - yych = *++p; - if (yych == 'C') - goto yy573; - if (yych != 'c') - goto yy514; - yy573: - yych = *++p; - if (yych == 'T') - goto yy574; - if (yych != 't') - goto yy514; - yy574: - yych = *++p; - if (yych == 'I') - goto yy575; - if (yych != 'i') - goto yy514; - yy575: - yych = *++p; - if (yych == 'O') - goto yy576; - if (yych != 'o') - goto yy514; - yy576: - yych = *++p; - if (yych == 'N') - goto yy546; - if (yych == 'n') - goto yy546; - goto yy514; - yy577: - yych = *++p; - if (yych == 'R') - goto yy578; - if (yych != 'r') - goto yy514; - yy578: - yych = *++p; - if (yych == 'C') - goto yy579; - if (yych != 'c') - goto yy514; - yy579: - yych = *++p; - if (yych == 'E') - goto yy546; - if (yych == 'e') - goto yy546; - goto yy514; - yy580: - yych = *++p; - if (yych == 'M') - goto yy581; - if (yych != 'm') - goto yy514; - yy581: - yych = *++p; - if (yych == 'A') - goto yy582; - if (yych != 'a') - goto yy514; - yy582: - yych = *++p; - if (yych == 'R') - goto yy583; - if (yych != 'r') - goto yy514; - yy583: - yych = *++p; - if (yych == 'Y') - goto yy546; - if (yych == 'y') - goto yy546; - goto yy514; - yy584: - yych = *++p; - if (yych == 'I') - goto yy585; - if (yych != 'i') - goto yy514; - yy585: - yych = *++p; - if (yych == 'P') - goto yy586; - if (yych != 'p') - goto yy514; - yy586: - yych = *++p; - if (yych == 'T') - goto yy587; - if (yych != 't') - goto yy514; - yy587: - yych = *++p; - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy514; - if (yych >= 0x0E) - goto yy514; - } else { - if (yych <= ' ') - goto yy588; - if (yych != '>') - goto yy514; - } - yy588: - ++p; - { return 1; } - yy590: - yych = *++p; - if (yych == 'L') - goto yy591; - if (yych != 'l') - goto yy514; - yy591: - yych = *++p; - if (yych == 'E') - goto yy587; - if (yych == 'e') - goto yy587; - goto yy514; - yy592: - yych = *++p; - if (yych == 'T') - goto yy593; - if (yych != 't') - goto yy514; - yy593: - yych = *++p; - if (yych <= 'I') { - if (yych == 'G') - goto yy595; - if (yych <= 'H') - goto yy514; - } else { - if (yych <= 'g') { - if (yych <= 'f') - goto yy514; - goto yy595; - } else { - if (yych != 'i') - goto yy514; - } - } - yych = *++p; - if (yych == 'O') - goto yy599; - if (yych == 'o') - goto yy599; - goto yy514; - yy595: - yych = *++p; - if (yych == 'R') - goto yy596; - if (yych != 'r') - goto yy514; - yy596: - yych = *++p; - if (yych == 'O') - goto yy597; - if (yych != 'o') - goto yy514; - yy597: - yych = *++p; - if (yych == 'U') - goto yy598; - if (yych != 'u') - goto yy514; - yy598: - yych = *++p; - if (yych == 'P') - goto yy546; - if (yych == 'p') - goto yy546; - goto yy514; - yy599: - yych = *++p; - if (yych == 'N') - goto yy546; - if (yych == 'n') - goto yy546; - goto yy514; - yy600: - yych = *++p; - if (yych == 'F') - goto yy602; - if (yych == 'f') - goto yy602; - goto yy514; - yy601: - yych = *++p; - if (yych == 'V') - goto yy546; - if (yych == 'v') - goto yy546; - goto yy514; - yy602: - yych = *++p; - if (yych == 'R') - goto yy603; - if (yych != 'r') - goto yy514; - yy603: - yych = *++p; - if (yych == 'A') - goto yy604; - if (yych != 'a') - goto yy514; - yy604: - yych = *++p; - if (yych == 'M') - goto yy605; - if (yych != 'm') - goto yy514; - yy605: - yych = *++p; - if (yych == 'E') - goto yy606; - if (yych != 'e') - goto yy514; - yy606: - yych = *++p; - if (yych == 'S') - goto yy546; - if (yych == 's') - goto yy546; - goto yy514; - yy607: - yych = *++p; - if (yych <= 'T') { - if (yych == 'N') - goto yy610; - if (yych <= 'S') - goto yy514; - goto yy611; - } else { - if (yych <= 'n') { - if (yych <= 'm') - goto yy514; - goto yy610; - } else { - if (yych == 't') - goto yy611; - goto yy514; - } - } - yy608: - yych = *++p; - if (yych == 'I') - goto yy609; - if (yych != 'i') - goto yy514; - yy609: - yych = *++p; - if (yych == 'N') - goto yy546; - if (yych == 'n') - goto yy546; - goto yy514; - yy610: - yych = *++p; - if (yych == 'U') - goto yy612; - if (yych == 'u') - goto yy612; - goto yy514; - yy611: - yych = *++p; - if (yych == 'A') - goto yy546; - if (yych == 'a') - goto yy546; - goto yy514; - yy612: - yych = *++p; - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy514; - if (yych <= '\r') - goto yy547; - goto yy514; - } else { - if (yych <= ' ') - goto yy547; - if (yych <= '.') - goto yy514; - goto yy549; - } - } else { - if (yych <= 'H') { - if (yych == '>') - goto yy547; - goto yy514; - } else { - if (yych <= 'I') - goto yy613; - if (yych != 'i') - goto yy514; - } - } - yy613: - yych = *++p; - if (yych == 'T') - goto yy614; - if (yych != 't') - goto yy514; - yy614: - yych = *++p; - if (yych == 'E') - goto yy615; - if (yych != 'e') - goto yy514; - yy615: - yych = *++p; - if (yych == 'M') - goto yy546; - if (yych == 'm') - goto yy546; - goto yy514; - yy616: - yych = *++p; - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy514; - if (yych <= '\r') - goto yy547; - goto yy514; - } else { - if (yych <= ' ') - goto yy547; - if (yych <= '.') - goto yy514; - goto yy549; - } - } else { - if (yych <= 'M') { - if (yych == '>') - goto yy547; - goto yy514; - } else { - if (yych <= 'N') - goto yy621; - if (yych == 'n') - goto yy621; - goto yy514; - } - } - yy617: - yych = *++p; - if (yych == 'G') - goto yy618; - if (yych != 'g') - goto yy514; - yy618: - yych = *++p; - if (yych == 'E') - goto yy619; - if (yych != 'e') - goto yy514; - yy619: - yych = *++p; - if (yych == 'N') - goto yy620; - if (yych != 'n') - goto yy514; - yy620: - yych = *++p; - if (yych == 'D') - goto yy546; - if (yych == 'd') - goto yy546; - goto yy514; - yy621: - yych = *++p; - if (yych == 'K') - goto yy546; - if (yych == 'k') - goto yy546; - goto yy514; - yy622: - yych = *++p; - if (yych == 'R') - goto yy623; - if (yych != 'r') - goto yy514; - yy623: - yych = *++p; - if (yych == 'A') - goto yy624; - if (yych != 'a') - goto yy514; - yy624: - yych = *++p; - if (yych == 'M') - goto yy625; - if (yych != 'm') - goto yy514; - yy625: - yych = *++p; - if (yych == 'E') - goto yy546; - if (yych == 'e') - goto yy546; - goto yy514; - yy626: - yych = *++p; - if (yych == 'M') - goto yy631; - if (yych == 'm') - goto yy631; - goto yy514; - yy627: - yych = *++p; - if (yych == 'A') - goto yy628; - if (yych != 'a') - goto yy514; - yy628: - yych = *++p; - if (yych == 'D') - goto yy629; - if (yych != 'd') - goto yy514; - yy629: - yych = *++p; - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy514; - if (yych <= '\r') - goto yy547; - goto yy514; - } else { - if (yych <= ' ') - goto yy547; - if (yych <= '.') - goto yy514; - goto yy549; - } - } else { - if (yych <= 'D') { - if (yych == '>') - goto yy547; - goto yy514; - } else { - if (yych <= 'E') - goto yy630; - if (yych != 'e') - goto yy514; - } - } - yy630: - yych = *++p; - if (yych == 'R') - goto yy546; - if (yych == 'r') - goto yy546; - goto yy514; - yy631: - yych = *++p; - if (yych == 'L') - goto yy546; - if (yych == 'l') - goto yy546; - goto yy514; - yy632: - yych = *++p; - if (yych == 'A') - goto yy653; - if (yych == 'a') - goto yy653; - goto yy514; - yy633: - yych = *++p; - if (yych <= 'R') { - if (yych == 'O') - goto yy649; - if (yych <= 'Q') - goto yy514; - goto yy650; - } else { - if (yych <= 'o') { - if (yych <= 'n') - goto yy514; - goto yy649; - } else { - if (yych == 'r') - goto yy650; - goto yy514; - } - } - yy634: - yych = *++p; - if (yych <= 'G') { - if (yych == 'E') - goto yy635; - if (yych <= 'F') - goto yy514; - goto yy636; - } else { - if (yych <= 'e') { - if (yych <= 'd') - goto yy514; - } else { - if (yych == 'g') - goto yy636; - goto yy514; - } - } - yy635: - yych = *++p; - if (yych == 'L') - goto yy645; - if (yych == 'l') - goto yy645; - goto yy514; - yy636: - yych = *++p; - if (yych <= 'U') { - if (yych == 'C') - goto yy638; - if (yych <= 'T') - goto yy514; - } else { - if (yych <= 'c') { - if (yych <= 'b') - goto yy514; - goto yy638; - } else { - if (yych != 'u') - goto yy514; - } - } - yych = *++p; - if (yych == 'R') - goto yy644; - if (yych == 'r') - goto yy644; - goto yy514; - yy638: - yych = *++p; - if (yych == 'A') - goto yy639; - if (yych != 'a') - goto yy514; - yy639: - yych = *++p; - if (yych == 'P') - goto yy640; - if (yych != 'p') - goto yy514; - yy640: - yych = *++p; - if (yych == 'T') - goto yy641; - if (yych != 't') - goto yy514; - yy641: - yych = *++p; - if (yych == 'I') - goto yy642; - if (yych != 'i') - goto yy514; - yy642: - yych = *++p; - if (yych == 'O') - goto yy643; - if (yych != 'o') - goto yy514; - yy643: - yych = *++p; - if (yych == 'N') - goto yy546; - if (yych == 'n') - goto yy546; - goto yy514; - yy644: - yych = *++p; - if (yych == 'E') - goto yy546; - if (yych == 'e') - goto yy546; - goto yy514; - yy645: - yych = *++p; - if (yych == 'D') - goto yy646; - if (yych != 'd') - goto yy514; - yy646: - yych = *++p; - if (yych == 'S') - goto yy647; - if (yych != 's') - goto yy514; - yy647: - yych = *++p; - if (yych == 'E') - goto yy648; - if (yych != 'e') - goto yy514; - yy648: - yych = *++p; - if (yych == 'T') - goto yy546; - if (yych == 't') - goto yy546; - goto yy514; - yy649: - yych = *++p; - if (yych == 'T') - goto yy651; - if (yych == 't') - goto yy651; - goto yy514; - yy650: - yych = *++p; - if (yych == 'M') - goto yy546; - if (yych == 'm') - goto yy546; - goto yy514; - yy651: - yych = *++p; - if (yych == 'E') - goto yy652; - if (yych != 'e') - goto yy514; - yy652: - yych = *++p; - if (yych == 'R') - goto yy546; - if (yych == 'r') - goto yy546; - goto yy514; - yy653: - yych = *++p; - if (yych == 'M') - goto yy654; - if (yych != 'm') - goto yy514; - yy654: - yych = *++p; - if (yych == 'E') - goto yy655; - if (yych != 'e') - goto yy514; - yy655: - yych = *++p; - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy514; - if (yych <= '\r') - goto yy547; - goto yy514; - } else { - if (yych <= ' ') - goto yy547; - if (yych <= '.') - goto yy514; - goto yy549; - } - } else { - if (yych <= 'R') { - if (yych == '>') - goto yy547; - goto yy514; - } else { - if (yych <= 'S') - goto yy656; - if (yych != 's') - goto yy514; - } - } - yy656: - yych = *++p; - if (yych == 'E') - goto yy657; - if (yych != 'e') - goto yy514; - yy657: - yych = *++p; - if (yych == 'T') - goto yy546; - if (yych == 't') - goto yy546; - goto yy514; - yy658: - yych = *++p; - if (yych <= 'V') { - if (yych <= 'Q') { - if (yych == 'A') - goto yy664; - goto yy514; - } else { - if (yych <= 'R') - goto yy546; - if (yych <= 'U') - goto yy514; - goto yy546; - } - } else { - if (yych <= 'q') { - if (yych == 'a') - goto yy664; - goto yy514; - } else { - if (yych <= 'r') - goto yy546; - if (yych == 'v') - goto yy546; - goto yy514; - } - } - yy659: - yych = *++p; - if (yych == 'T') - goto yy660; - if (yych != 't') - goto yy514; - yy660: - yych = *++p; - if (yych == 'A') - goto yy661; - if (yych != 'a') - goto yy514; - yy661: - yych = *++p; - if (yych == 'I') - goto yy662; - if (yych != 'i') - goto yy514; - yy662: - yych = *++p; - if (yych == 'L') - goto yy663; - if (yych != 'l') - goto yy514; - yy663: - yych = *++p; - if (yych == 'S') - goto yy546; - if (yych == 's') - goto yy546; - goto yy514; - yy664: - yych = *++p; - if (yych == 'L') - goto yy665; - if (yych != 'l') - goto yy514; - yy665: - yych = *++p; - if (yych == 'O') - goto yy666; - if (yych != 'o') - goto yy514; - yy666: - yych = *++p; - if (yych == 'G') - goto yy546; - if (yych == 'g') - goto yy546; - goto yy514; - yy667: - yych = *++p; - if (yych == 'L') - goto yy677; - if (yych == 'l') - goto yy677; - goto yy514; - yy668: - yych = *++p; - if (yych == 'N') - goto yy674; - if (yych == 'n') - goto yy674; - goto yy514; - yy669: - yych = *++p; - if (yych == 'P') - goto yy670; - if (yych != 'p') - goto yy514; - yy670: - yych = *++p; - if (yych == 'T') - goto yy671; - if (yych != 't') - goto yy514; - yy671: - yych = *++p; - if (yych == 'I') - goto yy672; - if (yych != 'i') - goto yy514; - yy672: - yych = *++p; - if (yych == 'O') - goto yy673; - if (yych != 'o') - goto yy514; - yy673: - yych = *++p; - if (yych == 'N') - goto yy546; - if (yych == 'n') - goto yy546; - goto yy514; - yy674: - yych = *++p; - if (yych == 'T') - goto yy675; - if (yych != 't') - goto yy514; - yy675: - yych = *++p; - if (yych == 'E') - goto yy676; - if (yych != 'e') - goto yy514; - yy676: - yych = *++p; - if (yych == 'R') - goto yy546; - if (yych == 'r') - goto yy546; - goto yy514; - yy677: - yych = *++p; - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy514; - if (yych <= '\r') - goto yy547; - goto yy514; - } else { - if (yych <= ' ') - goto yy547; - if (yych <= '.') - goto yy514; - goto yy549; - } - } else { - if (yych <= 'F') { - if (yych == '>') - goto yy547; - goto yy514; - } else { - if (yych <= 'G') - goto yy678; - if (yych != 'g') - goto yy514; - } - } - yy678: - yych = *++p; - if (yych == 'R') - goto yy679; - if (yych != 'r') - goto yy514; - yy679: - yych = *++p; - if (yych == 'O') - goto yy680; - if (yych != 'o') - goto yy514; - yy680: - yych = *++p; - if (yych == 'U') - goto yy681; - if (yych != 'u') - goto yy514; - yy681: - yych = *++p; - if (yych == 'P') - goto yy546; - if (yych == 'p') - goto yy546; - goto yy514; - yy682: - yych = *++p; - if (yych == 'D') - goto yy697; - if (yych == 'd') - goto yy697; - goto yy514; - yy683: - yych = *++p; - if (yych == 'O') - goto yy690; - if (yych == 'o') - goto yy690; - goto yy514; - yy684: - yych = *++p; - if (yych == 'S') - goto yy685; - if (yych != 's') - goto yy514; - yy685: - yych = *++p; - if (yych == 'E') - goto yy686; - if (yych != 'e') - goto yy514; - yy686: - yych = *++p; - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy514; - if (yych <= '\r') - goto yy547; - goto yy514; - } else { - if (yych <= ' ') - goto yy547; - if (yych <= '.') - goto yy514; - goto yy549; - } - } else { - if (yych <= 'E') { - if (yych == '>') - goto yy547; - goto yy514; - } else { - if (yych <= 'F') - goto yy687; - if (yych != 'f') - goto yy514; - } - } - yy687: - yych = *++p; - if (yych == 'O') - goto yy688; - if (yych != 'o') - goto yy514; - yy688: - yych = *++p; - if (yych == 'N') - goto yy689; - if (yych != 'n') - goto yy514; - yy689: - yych = *++p; - if (yych == 'T') - goto yy546; - if (yych == 't') - goto yy546; - goto yy514; - yy690: - yych = *++p; - if (yych == 'C') - goto yy691; - if (yych != 'c') - goto yy514; - yy691: - yych = *++p; - if (yych == 'K') - goto yy692; - if (yych != 'k') - goto yy514; - yy692: - yych = *++p; - if (yych == 'Q') - goto yy693; - if (yych != 'q') - goto yy514; - yy693: - yych = *++p; - if (yych == 'U') - goto yy694; - if (yych != 'u') - goto yy514; - yy694: - yych = *++p; - if (yych == 'O') - goto yy695; - if (yych != 'o') - goto yy514; - yy695: - yych = *++p; - if (yych == 'T') - goto yy696; - if (yych != 't') - goto yy514; - yy696: - yych = *++p; - if (yych == 'E') - goto yy546; - if (yych == 'e') - goto yy546; - goto yy514; - yy697: - yych = *++p; - if (yych == 'Y') - goto yy546; - if (yych == 'y') - goto yy546; - goto yy514; - yy698: - yych = *++p; - if (yych == 'I') - goto yy709; - if (yych == 'i') - goto yy709; - goto yy514; - yy699: - yych = *++p; - if (yych == 'T') - goto yy705; - if (yych == 't') - goto yy705; - goto yy514; - yy700: - yych = *++p; - if (yych == 'D') - goto yy701; - if (yych != 'd') - goto yy514; - yy701: - yych = *++p; - if (yych == 'R') - goto yy702; - if (yych != 'r') - goto yy514; - yy702: - yych = *++p; - if (yych == 'E') - goto yy703; - if (yych != 'e') - goto yy514; - yy703: - yych = *++p; - if (yych == 'S') - goto yy704; - if (yych != 's') - goto yy514; - yy704: - yych = *++p; - if (yych == 'S') - goto yy546; - if (yych == 's') - goto yy546; - goto yy514; - yy705: - yych = *++p; - if (yych == 'I') - goto yy706; - if (yych != 'i') - goto yy514; - yy706: - yych = *++p; - if (yych == 'C') - goto yy707; - if (yych != 'c') - goto yy514; - yy707: - yych = *++p; - if (yych == 'L') - goto yy708; - if (yych != 'l') - goto yy514; - yy708: - yych = *++p; - if (yych == 'E') - goto yy546; - if (yych == 'e') - goto yy546; - goto yy514; - yy709: - yych = *++p; - if (yych == 'D') - goto yy710; - if (yych != 'd') - goto yy514; - yy710: - yych = *++p; - if (yych == 'E') - goto yy546; - if (yych == 'e') - goto yy546; - goto yy514; - yy711: - yych = *++p; - if (yych == 'E') - goto yy587; - if (yych == 'e') - goto yy587; - goto yy514; - yy712: - yych = *++p; - if (yych == 'R') - goto yy713; - if (yych != 'r') - goto yy514; - yy713: - yych = *++p; - if (yych == 'A') - goto yy714; - if (yych != 'a') - goto yy514; - yy714: - yych = *++p; - if (yych == 'M') - goto yy546; - if (yych == 'm') - goto yy546; - goto yy514; - yy715: - yych = *++p; - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= 0x08) - goto yy514; - if (yych <= '\r') - goto yy547; - goto yy514; - } else { - if (yych <= ' ') - goto yy547; - if (yych <= '.') - goto yy514; - goto yy549; - } - } else { - if (yych <= '@') { - if (yych == '>') - goto yy547; - goto yy514; - } else { - if (yych <= 'A') - goto yy712; - if (yych == 'a') - goto yy712; - goto yy514; - } - } - yy716: - ++p; - if ((yych = *p) <= 'U') { - if (yych <= 'N') { - if (yych == 'E') - goto yy572; - goto yy514; - } else { - if (yych <= 'O') - goto yy571; - if (yych <= 'T') - goto yy514; - goto yy570; - } - } else { - if (yych <= 'n') { - if (yych == 'e') - goto yy572; - goto yy514; - } else { - if (yych <= 'o') - goto yy571; - if (yych == 'u') - goto yy570; - goto yy514; - } - } - } +{ + unsigned char yych; + yych = *p; + if (yych == '<') goto yy356; + ++p; +yy355: + { return 0; } +yy356: + yych = *(marker = ++p); + switch (yych) { + case '!': goto yy357; + case '/': goto yy359; + case '?': goto yy360; + case 'A': + case 'a': goto yy362; + case 'B': + case 'b': goto yy363; + case 'C': + case 'c': goto yy364; + case 'D': + case 'd': goto yy365; + case 'F': + case 'f': goto yy366; + case 'H': + case 'h': goto yy367; + case 'I': + case 'i': goto yy368; + case 'L': + case 'l': goto yy369; + case 'M': + case 'm': goto yy370; + case 'N': + case 'n': goto yy371; + case 'O': + case 'o': goto yy372; + case 'P': + case 'p': goto yy373; + case 'S': + case 's': goto yy374; + case 'T': + case 't': goto yy375; + case 'U': + case 'u': goto yy376; + default: goto yy355; + } +yy357: + yych = *++p; + if (yych <= '@') { + if (yych == '-') goto yy377; + } else { + if (yych <= 'Z') goto yy378; + if (yych <= '[') goto yy380; + } +yy358: + p = marker; + goto yy355; +yy359: + yych = *++p; + switch (yych) { + case 'A': + case 'a': goto yy362; + case 'B': + case 'b': goto yy363; + case 'C': + case 'c': goto yy364; + case 'D': + case 'd': goto yy365; + case 'F': + case 'f': goto yy366; + case 'H': + case 'h': goto yy367; + case 'I': + case 'i': goto yy368; + case 'L': + case 'l': goto yy369; + case 'M': + case 'm': goto yy370; + case 'N': + case 'n': goto yy371; + case 'O': + case 'o': goto yy372; + case 'P': + case 'p': goto yy381; + case 'S': + case 's': goto yy382; + case 'T': + case 't': goto yy375; + case 'U': + case 'u': goto yy376; + default: goto yy358; + } +yy360: + ++p; + { return 3; } +yy362: + yych = *++p; + if (yych <= 'S') { + if (yych <= 'D') { + if (yych <= 'C') goto yy358; + goto yy383; + } else { + if (yych <= 'Q') goto yy358; + if (yych <= 'R') goto yy384; + goto yy385; + } + } else { + if (yych <= 'q') { + if (yych == 'd') goto yy383; + goto yy358; + } else { + if (yych <= 'r') goto yy384; + if (yych <= 's') goto yy385; + goto yy358; + } + } +yy363: + yych = *++p; + if (yych <= 'O') { + if (yych <= 'K') { + if (yych == 'A') goto yy386; + goto yy358; + } else { + if (yych <= 'L') goto yy387; + if (yych <= 'N') goto yy358; + goto yy388; + } + } else { + if (yych <= 'k') { + if (yych == 'a') goto yy386; + goto yy358; + } else { + if (yych <= 'l') goto yy387; + if (yych == 'o') goto yy388; + goto yy358; + } + } +yy364: + yych = *++p; + if (yych <= 'O') { + if (yych <= 'D') { + if (yych == 'A') goto yy389; + goto yy358; + } else { + if (yych <= 'E') goto yy390; + if (yych <= 'N') goto yy358; + goto yy391; + } + } else { + if (yych <= 'd') { + if (yych == 'a') goto yy389; + goto yy358; + } else { + if (yych <= 'e') goto yy390; + if (yych == 'o') goto yy391; + goto yy358; + } + } +yy365: + yych = *++p; + switch (yych) { + case 'D': + case 'L': + case 'T': + case 'd': + case 'l': + case 't': goto yy392; + case 'E': + case 'e': goto yy393; + case 'I': + case 'i': goto yy394; + default: goto yy358; + } +yy366: + yych = *++p; + if (yych <= 'R') { + if (yych <= 'N') { + if (yych == 'I') goto yy395; + goto yy358; + } else { + if (yych <= 'O') goto yy396; + if (yych <= 'Q') goto yy358; + goto yy397; + } + } else { + if (yych <= 'n') { + if (yych == 'i') goto yy395; + goto yy358; + } else { + if (yych <= 'o') goto yy396; + if (yych == 'r') goto yy397; + goto yy358; + } + } +yy367: + yych = *++p; + if (yych <= 'S') { + if (yych <= 'D') { + if (yych <= '0') goto yy358; + if (yych <= '6') goto yy392; + goto yy358; + } else { + if (yych <= 'E') goto yy398; + if (yych == 'R') goto yy392; + goto yy358; + } + } else { + if (yych <= 'q') { + if (yych <= 'T') goto yy399; + if (yych == 'e') goto yy398; + goto yy358; + } else { + if (yych <= 'r') goto yy392; + if (yych == 't') goto yy399; + goto yy358; + } + } +yy368: + yych = *++p; + if (yych == 'F') goto yy400; + if (yych == 'f') goto yy400; + goto yy358; +yy369: + yych = *++p; + if (yych <= 'I') { + if (yych == 'E') goto yy401; + if (yych <= 'H') goto yy358; + goto yy402; + } else { + if (yych <= 'e') { + if (yych <= 'd') goto yy358; + goto yy401; + } else { + if (yych == 'i') goto yy402; + goto yy358; + } + } +yy370: + yych = *++p; + if (yych <= 'E') { + if (yych == 'A') goto yy403; + if (yych <= 'D') goto yy358; + goto yy404; + } else { + if (yych <= 'a') { + if (yych <= '`') goto yy358; + goto yy403; + } else { + if (yych == 'e') goto yy404; + goto yy358; + } + } +yy371: + yych = *++p; + if (yych <= 'O') { + if (yych == 'A') goto yy405; + if (yych <= 'N') goto yy358; + goto yy406; + } else { + if (yych <= 'a') { + if (yych <= '`') goto yy358; + goto yy405; + } else { + if (yych == 'o') goto yy406; + goto yy358; + } + } +yy372: + yych = *++p; + if (yych <= 'P') { + if (yych == 'L') goto yy392; + if (yych <= 'O') goto yy358; + goto yy407; + } else { + if (yych <= 'l') { + if (yych <= 'k') goto yy358; + goto yy392; + } else { + if (yych == 'p') goto yy407; + goto yy358; + } + } +yy373: + yych = *++p; + if (yych <= '>') { + if (yych <= ' ') { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy408; + if (yych <= 0x1F) goto yy358; + goto yy408; + } else { + if (yych == '/') goto yy410; + if (yych <= '=') goto yy358; + goto yy408; + } + } else { + if (yych <= 'R') { + if (yych == 'A') goto yy411; + if (yych <= 'Q') goto yy358; + goto yy412; + } else { + if (yych <= 'a') { + if (yych <= '`') goto yy358; + goto yy411; + } else { + if (yych == 'r') goto yy412; + goto yy358; + } + } + } +yy374: + yych = *++p; + switch (yych) { + case 'C': + case 'c': goto yy413; + case 'E': + case 'e': goto yy414; + case 'O': + case 'o': goto yy415; + case 'T': + case 't': goto yy416; + case 'U': + case 'u': goto yy417; + default: goto yy358; + } +yy375: + yych = *++p; + switch (yych) { + case 'A': + case 'a': goto yy418; + case 'B': + case 'b': goto yy419; + case 'D': + case 'd': goto yy392; + case 'F': + case 'f': goto yy420; + case 'H': + case 'h': goto yy421; + case 'I': + case 'i': goto yy422; + case 'R': + case 'r': goto yy423; + default: goto yy358; + } +yy376: + yych = *++p; + if (yych == 'L') goto yy392; + if (yych == 'l') goto yy392; + goto yy358; +yy377: + yych = *++p; + if (yych == '-') goto yy424; + goto yy358; +yy378: + ++p; + { return 4; } +yy380: + yych = *++p; + if (yych == 'C') goto yy426; + if (yych == 'c') goto yy426; + goto yy358; +yy381: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy408; + goto yy358; + } else { + if (yych <= ' ') goto yy408; + if (yych <= '.') goto yy358; + goto yy410; + } + } else { + if (yych <= '@') { + if (yych == '>') goto yy408; + goto yy358; + } else { + if (yych <= 'A') goto yy411; + if (yych == 'a') goto yy411; + goto yy358; + } + } +yy382: + yych = *++p; + if (yych <= 'U') { + if (yych <= 'N') { + if (yych == 'E') goto yy414; + goto yy358; + } else { + if (yych <= 'O') goto yy415; + if (yych <= 'T') goto yy358; + goto yy417; + } + } else { + if (yych <= 'n') { + if (yych == 'e') goto yy414; + goto yy358; + } else { + if (yych <= 'o') goto yy415; + if (yych == 'u') goto yy417; + goto yy358; + } + } +yy383: + yych = *++p; + if (yych == 'D') goto yy427; + if (yych == 'd') goto yy427; + goto yy358; +yy384: + yych = *++p; + if (yych == 'T') goto yy428; + if (yych == 't') goto yy428; + goto yy358; +yy385: + yych = *++p; + if (yych == 'I') goto yy429; + if (yych == 'i') goto yy429; + goto yy358; +yy386: + yych = *++p; + if (yych == 'S') goto yy430; + if (yych == 's') goto yy430; + goto yy358; +yy387: + yych = *++p; + if (yych == 'O') goto yy431; + if (yych == 'o') goto yy431; + goto yy358; +yy388: + yych = *++p; + if (yych == 'D') goto yy432; + if (yych == 'd') goto yy432; + goto yy358; +yy389: + yych = *++p; + if (yych == 'P') goto yy433; + if (yych == 'p') goto yy433; + goto yy358; +yy390: + yych = *++p; + if (yych == 'N') goto yy434; + if (yych == 'n') goto yy434; + goto yy358; +yy391: + yych = *++p; + if (yych == 'L') goto yy435; + if (yych == 'l') goto yy435; + goto yy358; +yy392: + yych = *++p; + if (yych <= ' ') { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy408; + if (yych <= 0x1F) goto yy358; + goto yy408; + } else { + if (yych <= '/') { + if (yych <= '.') goto yy358; + goto yy410; + } else { + if (yych == '>') goto yy408; + goto yy358; + } + } +yy393: + yych = *++p; + if (yych == 'T') goto yy436; + if (yych == 't') goto yy436; + goto yy358; +yy394: + yych = *++p; + if (yych <= 'V') { + if (yych <= 'Q') { + if (yych == 'A') goto yy437; + goto yy358; + } else { + if (yych <= 'R') goto yy392; + if (yych <= 'U') goto yy358; + goto yy392; + } + } else { + if (yych <= 'q') { + if (yych == 'a') goto yy437; + goto yy358; + } else { + if (yych <= 'r') goto yy392; + if (yych == 'v') goto yy392; + goto yy358; + } + } +yy395: + yych = *++p; + if (yych <= 'G') { + if (yych == 'E') goto yy438; + if (yych <= 'F') goto yy358; + goto yy439; + } else { + if (yych <= 'e') { + if (yych <= 'd') goto yy358; + goto yy438; + } else { + if (yych == 'g') goto yy439; + goto yy358; + } + } +yy396: + yych = *++p; + if (yych <= 'R') { + if (yych == 'O') goto yy434; + if (yych <= 'Q') goto yy358; + goto yy440; + } else { + if (yych <= 'o') { + if (yych <= 'n') goto yy358; + goto yy434; + } else { + if (yych == 'r') goto yy440; + goto yy358; + } + } +yy397: + yych = *++p; + if (yych == 'A') goto yy441; + if (yych == 'a') goto yy441; + goto yy358; +yy398: + yych = *++p; + if (yych == 'A') goto yy442; + if (yych == 'a') goto yy442; + goto yy358; +yy399: + yych = *++p; + if (yych == 'M') goto yy376; + if (yych == 'm') goto yy376; + goto yy358; +yy400: + yych = *++p; + if (yych == 'R') goto yy443; + if (yych == 'r') goto yy443; + goto yy358; +yy401: + yych = *++p; + if (yych == 'G') goto yy444; + if (yych == 'g') goto yy444; + goto yy358; +yy402: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy408; + goto yy358; + } else { + if (yych <= ' ') goto yy408; + if (yych <= '.') goto yy358; + goto yy410; + } + } else { + if (yych <= 'M') { + if (yych == '>') goto yy408; + goto yy358; + } else { + if (yych <= 'N') goto yy445; + if (yych == 'n') goto yy445; + goto yy358; + } + } +yy403: + yych = *++p; + if (yych == 'I') goto yy446; + if (yych == 'i') goto yy446; + goto yy358; +yy404: + yych = *++p; + if (yych == 'N') goto yy447; + if (yych == 'n') goto yy447; + goto yy358; +yy405: + yych = *++p; + if (yych == 'V') goto yy392; + if (yych == 'v') goto yy392; + goto yy358; +yy406: + yych = *++p; + if (yych == 'F') goto yy448; + if (yych == 'f') goto yy448; + goto yy358; +yy407: + yych = *++p; + if (yych == 'T') goto yy449; + if (yych == 't') goto yy449; + goto yy358; +yy408: + ++p; + { return 6; } +yy410: + yych = *++p; + if (yych == '>') goto yy408; + goto yy358; +yy411: + yych = *++p; + if (yych == 'R') goto yy450; + if (yych == 'r') goto yy450; + goto yy358; +yy412: + yych = *++p; + if (yych == 'E') goto yy451; + if (yych == 'e') goto yy451; + goto yy358; +yy413: + yych = *++p; + if (yych == 'R') goto yy452; + if (yych == 'r') goto yy452; + goto yy358; +yy414: + yych = *++p; + if (yych == 'C') goto yy433; + if (yych == 'c') goto yy433; + goto yy358; +yy415: + yych = *++p; + if (yych == 'U') goto yy453; + if (yych == 'u') goto yy453; + goto yy358; +yy416: + yych = *++p; + if (yych == 'Y') goto yy454; + if (yych == 'y') goto yy454; + goto yy358; +yy417: + yych = *++p; + if (yych == 'M') goto yy455; + if (yych == 'm') goto yy455; + goto yy358; +yy418: + yych = *++p; + if (yych == 'B') goto yy456; + if (yych == 'b') goto yy456; + goto yy358; +yy419: + yych = *++p; + if (yych == 'O') goto yy388; + if (yych == 'o') goto yy388; + goto yy358; +yy420: + yych = *++p; + if (yych == 'O') goto yy457; + if (yych == 'o') goto yy457; + goto yy358; +yy421: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy408; + goto yy358; + } else { + if (yych <= ' ') goto yy408; + if (yych <= '.') goto yy358; + goto yy410; + } + } else { + if (yych <= 'D') { + if (yych == '>') goto yy408; + goto yy358; + } else { + if (yych <= 'E') goto yy458; + if (yych == 'e') goto yy458; + goto yy358; + } + } +yy422: + yych = *++p; + if (yych == 'T') goto yy456; + if (yych == 't') goto yy456; + goto yy358; +yy423: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy408; + goto yy358; + } else { + if (yych <= ' ') goto yy408; + if (yych <= '.') goto yy358; + goto yy410; + } + } else { + if (yych <= '@') { + if (yych == '>') goto yy408; + goto yy358; + } else { + if (yych <= 'A') goto yy459; + if (yych == 'a') goto yy459; + goto yy358; + } + } +yy424: + ++p; + { return 2; } +yy426: + yych = *++p; + if (yych == 'D') goto yy460; + if (yych == 'd') goto yy460; + goto yy358; +yy427: + yych = *++p; + if (yych == 'R') goto yy461; + if (yych == 'r') goto yy461; + goto yy358; +yy428: + yych = *++p; + if (yych == 'I') goto yy462; + if (yych == 'i') goto yy462; + goto yy358; +yy429: + yych = *++p; + if (yych == 'D') goto yy463; + if (yych == 'd') goto yy463; + goto yy358; +yy430: + yych = *++p; + if (yych == 'E') goto yy464; + if (yych == 'e') goto yy464; + goto yy358; +yy431: + yych = *++p; + if (yych == 'C') goto yy465; + if (yych == 'c') goto yy465; + goto yy358; +yy432: + yych = *++p; + if (yych == 'Y') goto yy392; + if (yych == 'y') goto yy392; + goto yy358; +yy433: + yych = *++p; + if (yych == 'T') goto yy466; + if (yych == 't') goto yy466; + goto yy358; +yy434: + yych = *++p; + if (yych == 'T') goto yy467; + if (yych == 't') goto yy467; + goto yy358; +yy435: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy408; + goto yy358; + } else { + if (yych <= ' ') goto yy408; + if (yych <= '.') goto yy358; + goto yy410; + } + } else { + if (yych <= 'F') { + if (yych == '>') goto yy408; + goto yy358; + } else { + if (yych <= 'G') goto yy468; + if (yych == 'g') goto yy468; + goto yy358; + } + } +yy436: + yych = *++p; + if (yych == 'A') goto yy469; + if (yych == 'a') goto yy469; + goto yy358; +yy437: + yych = *++p; + if (yych == 'L') goto yy470; + if (yych == 'l') goto yy470; + goto yy358; +yy438: + yych = *++p; + if (yych == 'L') goto yy471; + if (yych == 'l') goto yy471; + goto yy358; +yy439: + yych = *++p; + if (yych <= 'U') { + if (yych == 'C') goto yy472; + if (yych <= 'T') goto yy358; + goto yy473; + } else { + if (yych <= 'c') { + if (yych <= 'b') goto yy358; + goto yy472; + } else { + if (yych == 'u') goto yy473; + goto yy358; + } + } +yy440: + yych = *++p; + if (yych == 'M') goto yy392; + if (yych == 'm') goto yy392; + goto yy358; +yy441: + yych = *++p; + if (yych == 'M') goto yy474; + if (yych == 'm') goto yy474; + goto yy358; +yy442: + yych = *++p; + if (yych == 'D') goto yy475; + if (yych == 'd') goto yy475; + goto yy358; +yy443: + yych = *++p; + if (yych == 'A') goto yy476; + if (yych == 'a') goto yy476; + goto yy358; +yy444: + yych = *++p; + if (yych == 'E') goto yy477; + if (yych == 'e') goto yy477; + goto yy358; +yy445: + yych = *++p; + if (yych == 'K') goto yy392; + if (yych == 'k') goto yy392; + goto yy358; +yy446: + yych = *++p; + if (yych == 'N') goto yy392; + if (yych == 'n') goto yy392; + goto yy358; +yy447: + yych = *++p; + if (yych == 'U') goto yy478; + if (yych == 'u') goto yy478; + goto yy358; +yy448: + yych = *++p; + if (yych == 'R') goto yy479; + if (yych == 'r') goto yy479; + goto yy358; +yy449: + yych = *++p; + if (yych <= 'I') { + if (yych == 'G') goto yy468; + if (yych <= 'H') goto yy358; + goto yy480; + } else { + if (yych <= 'g') { + if (yych <= 'f') goto yy358; + goto yy468; + } else { + if (yych == 'i') goto yy480; + goto yy358; + } + } +yy450: + yych = *++p; + if (yych == 'A') goto yy440; + if (yych == 'a') goto yy440; + goto yy358; +yy451: + yych = *++p; + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy481; + goto yy358; + } else { + if (yych <= ' ') goto yy481; + if (yych == '>') goto yy481; + goto yy358; + } +yy452: + yych = *++p; + if (yych == 'I') goto yy483; + if (yych == 'i') goto yy483; + goto yy358; +yy453: + yych = *++p; + if (yych == 'R') goto yy484; + if (yych == 'r') goto yy484; + goto yy358; +yy454: + yych = *++p; + if (yych == 'L') goto yy412; + if (yych == 'l') goto yy412; + goto yy358; +yy455: + yych = *++p; + if (yych == 'M') goto yy485; + if (yych == 'm') goto yy485; + goto yy358; +yy456: + yych = *++p; + if (yych == 'L') goto yy463; + if (yych == 'l') goto yy463; + goto yy358; +yy457: + yych = *++p; + if (yych == 'O') goto yy486; + if (yych == 'o') goto yy486; + goto yy358; +yy458: + yych = *++p; + if (yych == 'A') goto yy487; + if (yych == 'a') goto yy487; + goto yy358; +yy459: + yych = *++p; + if (yych == 'C') goto yy445; + if (yych == 'c') goto yy445; + goto yy358; +yy460: + yych = *++p; + if (yych == 'A') goto yy488; + if (yych == 'a') goto yy488; + goto yy358; +yy461: + yych = *++p; + if (yych == 'E') goto yy489; + if (yych == 'e') goto yy489; + goto yy358; +yy462: + yych = *++p; + if (yych == 'C') goto yy456; + if (yych == 'c') goto yy456; + goto yy358; +yy463: + yych = *++p; + if (yych == 'E') goto yy392; + if (yych == 'e') goto yy392; + goto yy358; +yy464: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy408; + goto yy358; + } else { + if (yych <= ' ') goto yy408; + if (yych <= '.') goto yy358; + goto yy410; + } + } else { + if (yych <= 'E') { + if (yych == '>') goto yy408; + goto yy358; + } else { + if (yych <= 'F') goto yy490; + if (yych == 'f') goto yy490; + goto yy358; + } + } +yy465: + yych = *++p; + if (yych == 'K') goto yy491; + if (yych == 'k') goto yy491; + goto yy358; +yy466: + yych = *++p; + if (yych == 'I') goto yy480; + if (yych == 'i') goto yy480; + goto yy358; +yy467: + yych = *++p; + if (yych == 'E') goto yy492; + if (yych == 'e') goto yy492; + goto yy358; +yy468: + yych = *++p; + if (yych == 'R') goto yy493; + if (yych == 'r') goto yy493; + goto yy358; +yy469: + yych = *++p; + if (yych == 'I') goto yy494; + if (yych == 'i') goto yy494; + goto yy358; +yy470: + yych = *++p; + if (yych == 'O') goto yy495; + if (yych == 'o') goto yy495; + goto yy358; +yy471: + yych = *++p; + if (yych == 'D') goto yy496; + if (yych == 'd') goto yy496; + goto yy358; +yy472: + yych = *++p; + if (yych == 'A') goto yy389; + if (yych == 'a') goto yy389; + goto yy358; +yy473: + yych = *++p; + if (yych == 'R') goto yy463; + if (yych == 'r') goto yy463; + goto yy358; +yy474: + yych = *++p; + if (yych == 'E') goto yy497; + if (yych == 'e') goto yy497; + goto yy358; +yy475: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy408; + goto yy358; + } else { + if (yych <= ' ') goto yy408; + if (yych <= '.') goto yy358; + goto yy410; + } + } else { + if (yych <= 'D') { + if (yych == '>') goto yy408; + goto yy358; + } else { + if (yych <= 'E') goto yy492; + if (yych == 'e') goto yy492; + goto yy358; + } + } +yy476: + yych = *++p; + if (yych == 'M') goto yy463; + if (yych == 'm') goto yy463; + goto yy358; +yy477: + yych = *++p; + if (yych == 'N') goto yy487; + if (yych == 'n') goto yy487; + goto yy358; +yy478: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy408; + goto yy358; + } else { + if (yych <= ' ') goto yy408; + if (yych <= '.') goto yy358; + goto yy410; + } + } else { + if (yych <= 'H') { + if (yych == '>') goto yy408; + goto yy358; + } else { + if (yych <= 'I') goto yy498; + if (yych == 'i') goto yy498; + goto yy358; + } + } +yy479: + yych = *++p; + if (yych == 'A') goto yy499; + if (yych == 'a') goto yy499; + goto yy358; +yy480: + yych = *++p; + if (yych == 'O') goto yy446; + if (yych == 'o') goto yy446; + goto yy358; +yy481: + ++p; + { return 1; } +yy483: + yych = *++p; + if (yych == 'P') goto yy500; + if (yych == 'p') goto yy500; + goto yy358; +yy484: + yych = *++p; + if (yych == 'C') goto yy463; + if (yych == 'c') goto yy463; + goto yy358; +yy485: + yych = *++p; + if (yych == 'A') goto yy501; + if (yych == 'a') goto yy501; + goto yy358; +yy486: + yych = *++p; + if (yych == 'T') goto yy392; + if (yych == 't') goto yy392; + goto yy358; +yy487: + yych = *++p; + if (yych == 'D') goto yy392; + if (yych == 'd') goto yy392; + goto yy358; +yy488: + yych = *++p; + if (yych == 'T') goto yy502; + if (yych == 't') goto yy502; + goto yy358; +yy489: + yych = *++p; + if (yych == 'S') goto yy503; + if (yych == 's') goto yy503; + goto yy358; +yy490: + yych = *++p; + if (yych == 'O') goto yy504; + if (yych == 'o') goto yy504; + goto yy358; +yy491: + yych = *++p; + if (yych == 'Q') goto yy505; + if (yych == 'q') goto yy505; + goto yy358; +yy492: + yych = *++p; + if (yych == 'R') goto yy392; + if (yych == 'r') goto yy392; + goto yy358; +yy493: + yych = *++p; + if (yych == 'O') goto yy506; + if (yych == 'o') goto yy506; + goto yy358; +yy494: + yych = *++p; + if (yych == 'L') goto yy503; + if (yych == 'l') goto yy503; + goto yy358; +yy495: + yych = *++p; + if (yych == 'G') goto yy392; + if (yych == 'g') goto yy392; + goto yy358; +yy496: + yych = *++p; + if (yych == 'S') goto yy507; + if (yych == 's') goto yy507; + goto yy358; +yy497: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy358; + if (yych <= '\r') goto yy408; + goto yy358; + } else { + if (yych <= ' ') goto yy408; + if (yych <= '.') goto yy358; + goto yy410; + } + } else { + if (yych <= 'R') { + if (yych == '>') goto yy408; + goto yy358; + } else { + if (yych <= 'S') goto yy507; + if (yych == 's') goto yy507; + goto yy358; + } + } +yy498: + yych = *++p; + if (yych == 'T') goto yy508; + if (yych == 't') goto yy508; + goto yy358; +yy499: + yych = *++p; + if (yych == 'M') goto yy509; + if (yych == 'm') goto yy509; + goto yy358; +yy500: + yych = *++p; + if (yych == 'T') goto yy451; + if (yych == 't') goto yy451; + goto yy358; +yy501: + yych = *++p; + if (yych == 'R') goto yy432; + if (yych == 'r') goto yy432; + goto yy358; +yy502: + yych = *++p; + if (yych == 'A') goto yy510; + if (yych == 'a') goto yy510; + goto yy358; +yy503: + yych = *++p; + if (yych == 'S') goto yy392; + if (yych == 's') goto yy392; + goto yy358; +yy504: + yych = *++p; + if (yych == 'N') goto yy486; + if (yych == 'n') goto yy486; + goto yy358; +yy505: + yych = *++p; + if (yych == 'U') goto yy511; + if (yych == 'u') goto yy511; + goto yy358; +yy506: + yych = *++p; + if (yych == 'U') goto yy512; + if (yych == 'u') goto yy512; + goto yy358; +yy507: + yych = *++p; + if (yych == 'E') goto yy486; + if (yych == 'e') goto yy486; + goto yy358; +yy508: + yych = *++p; + if (yych == 'E') goto yy440; + if (yych == 'e') goto yy440; + goto yy358; +yy509: + yych = *++p; + if (yych == 'E') goto yy503; + if (yych == 'e') goto yy503; + goto yy358; +yy510: + yych = *++p; + if (yych == '[') goto yy513; + goto yy358; +yy511: + yych = *++p; + if (yych == 'O') goto yy515; + if (yych == 'o') goto yy515; + goto yy358; +yy512: + yych = *++p; + if (yych == 'P') goto yy392; + if (yych == 'p') goto yy392; + goto yy358; +yy513: + ++p; + { return 5; } +yy515: + yych = *++p; + if (yych == 'T') goto yy463; + if (yych == 't') goto yy463; + goto yy358; +} + } // Try to match an HTML block tag start line of type 7, returning // 7 if successful, 0 if not. -bufsize_t _scan_html_block_start_7(const unsigned char *p) { +bufsize_t _scan_html_block_start_7(const unsigned char *p) +{ const unsigned char *marker = NULL; - { - unsigned char yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = { - 0, 224, 224, 224, 224, 224, 224, 224, 224, 198, 202, 194, 198, 194, - 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 198, 224, 64, 224, 224, 224, 224, 128, 224, 224, - 224, 224, 224, 241, 240, 224, 241, 241, 241, 241, 241, 241, 241, 241, - 241, 241, 240, 224, 192, 192, 192, 224, 224, 241, 241, 241, 241, 241, - 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, - 241, 241, 241, 241, 241, 241, 241, 224, 224, 224, 224, 240, 192, 241, - 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, - 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 224, 224, 224, - 224, 224, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych == '<') - goto yy721; - ++p; - yy720 : { return 0; } - yy721: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= '@') { - if (yych == '/') - goto yy725; - goto yy720; - } else { - if (yych <= 'Z') - goto yy722; - if (yych <= '`') - goto yy720; - if (yych >= '{') - goto yy720; - } - yy722: - ++p; - yych = *p; - if (yybm[0 + yych] & 1) { - goto yy722; - } - if (yych <= ' ') { - if (yych <= 0x08) - goto yy724; - if (yych <= '\r') - goto yy736; - if (yych >= ' ') - goto yy736; - } else { - if (yych <= '/') { - if (yych >= '/') - goto yy738; - } else { - if (yych == '>') - goto yy730; - } - } - yy724: - p = marker; - if (yyaccept == 0) { - goto yy720; - } else { - goto yy734; - } - yy725: - yych = *++p; - if (yych <= '@') - goto yy724; - if (yych <= 'Z') - goto yy726; - if (yych <= '`') - goto yy724; - if (yych >= '{') - goto yy724; - yy726: - ++p; - yych = *p; - if (yybm[0 + yych] & 2) { - goto yy728; - } - if (yych <= '=') { - if (yych <= '-') { - if (yych <= ',') - goto yy724; - goto yy726; - } else { - if (yych <= '/') - goto yy724; - if (yych <= '9') - goto yy726; - goto yy724; - } - } else { - if (yych <= 'Z') { - if (yych <= '>') - goto yy730; - if (yych <= '@') - goto yy724; - goto yy726; - } else { - if (yych <= '`') - goto yy724; - if (yych <= 'z') - goto yy726; - goto yy724; - } - } - yy728: - ++p; - yych = *p; - if (yybm[0 + yych] & 2) { - goto yy728; - } - if (yych != '>') - goto yy724; - yy730: - ++p; - yych = *p; - if (yybm[0 + yych] & 4) { - goto yy730; - } - if (yych <= 0x08) - goto yy724; - if (yych <= '\n') - goto yy732; - if (yych <= '\v') - goto yy724; - if (yych <= '\r') - goto yy735; - goto yy724; - yy732: - yyaccept = 1; - marker = ++p; - yych = *p; - if (yybm[0 + yych] & 4) { - goto yy730; - } - if (yych <= 0x08) - goto yy734; - if (yych <= '\n') - goto yy732; - if (yych <= '\v') - goto yy734; - if (yych <= '\r') - goto yy735; - yy734 : { return 7; } - yy735: - yych = *++p; - goto yy734; - yy736: - ++p; - yych = *p; - if (yych <= ':') { - if (yych <= ' ') { - if (yych <= 0x08) - goto yy724; - if (yych <= '\r') - goto yy736; - if (yych <= 0x1F) - goto yy724; - goto yy736; - } else { - if (yych == '/') - goto yy738; - if (yych <= '9') - goto yy724; - goto yy739; - } - } else { - if (yych <= 'Z') { - if (yych == '>') - goto yy730; - if (yych <= '@') - goto yy724; - goto yy739; - } else { - if (yych <= '_') { - if (yych <= '^') - goto yy724; - goto yy739; - } else { - if (yych <= '`') - goto yy724; - if (yych <= 'z') - goto yy739; - goto yy724; - } - } - } - yy738: - yych = *++p; - if (yych == '>') - goto yy730; - goto yy724; - yy739: - ++p; - yych = *p; - if (yybm[0 + yych] & 16) { - goto yy739; - } - if (yych <= ',') { - if (yych <= '\r') { - if (yych <= 0x08) - goto yy724; - } else { - if (yych != ' ') - goto yy724; - } - } else { - if (yych <= '<') { - if (yych <= '/') - goto yy738; - goto yy724; - } else { - if (yych <= '=') - goto yy743; - if (yych <= '>') - goto yy730; - goto yy724; - } - } - yy741: - ++p; - yych = *p; - if (yych <= '<') { - if (yych <= ' ') { - if (yych <= 0x08) - goto yy724; - if (yych <= '\r') - goto yy741; - if (yych <= 0x1F) - goto yy724; - goto yy741; - } else { - if (yych <= '/') { - if (yych <= '.') - goto yy724; - goto yy738; - } else { - if (yych == ':') - goto yy739; - goto yy724; - } - } - } else { - if (yych <= 'Z') { - if (yych <= '=') - goto yy743; - if (yych <= '>') - goto yy730; - if (yych <= '@') - goto yy724; - goto yy739; - } else { - if (yych <= '_') { - if (yych <= '^') - goto yy724; - goto yy739; - } else { - if (yych <= '`') - goto yy724; - if (yych <= 'z') - goto yy739; - goto yy724; - } - } - } - yy743: - ++p; - yych = *p; - if (yybm[0 + yych] & 32) { - goto yy745; - } - if (yych <= 0xE0) { - if (yych <= '"') { - if (yych <= 0x00) - goto yy724; - if (yych <= ' ') - goto yy743; - goto yy756; - } else { - if (yych <= '\'') - goto yy754; - if (yych <= 0xC1) - goto yy724; - if (yych <= 0xDF) - goto yy747; - goto yy748; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy753; - goto yy749; - } else { - if (yych <= 0xF0) - goto yy750; - if (yych <= 0xF3) - goto yy751; - if (yych <= 0xF4) - goto yy752; - goto yy724; - } - } - yy745: - ++p; - yych = *p; - if (yybm[0 + yych] & 32) { - goto yy745; - } - if (yych <= 0xE0) { - if (yych <= '=') { - if (yych <= 0x00) - goto yy724; - if (yych <= ' ') - goto yy736; - goto yy724; - } else { - if (yych <= '>') - goto yy730; - if (yych <= 0xC1) - goto yy724; - if (yych >= 0xE0) - goto yy748; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy753; - goto yy749; - } else { - if (yych <= 0xF0) - goto yy750; - if (yych <= 0xF3) - goto yy751; - if (yych <= 0xF4) - goto yy752; - goto yy724; - } - } - yy747: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0xBF) - goto yy745; - goto yy724; - yy748: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy724; - if (yych <= 0xBF) - goto yy747; - goto yy724; - yy749: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0xBF) - goto yy747; - goto yy724; - yy750: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy724; - if (yych <= 0xBF) - goto yy749; - goto yy724; - yy751: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0xBF) - goto yy749; - goto yy724; - yy752: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0x8F) - goto yy749; - goto yy724; - yy753: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0x9F) - goto yy747; - goto yy724; - yy754: - ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy754; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy724; - if (yych <= '\'') - goto yy765; - goto yy724; - } else { - if (yych <= 0xDF) - goto yy766; - if (yych <= 0xE0) - goto yy767; - goto yy768; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy772; - if (yych <= 0xEF) - goto yy768; - goto yy769; - } else { - if (yych <= 0xF3) - goto yy770; - if (yych <= 0xF4) - goto yy771; - goto yy724; - } - } - yy756: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy756; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy724; - if (yych <= '"') - goto yy765; - goto yy724; - } else { - if (yych <= 0xDF) - goto yy758; - if (yych <= 0xE0) - goto yy759; - goto yy760; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy764; - if (yych <= 0xEF) - goto yy760; - goto yy761; - } else { - if (yych <= 0xF3) - goto yy762; - if (yych <= 0xF4) - goto yy763; - goto yy724; - } - } - yy758: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0xBF) - goto yy756; - goto yy724; - yy759: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy724; - if (yych <= 0xBF) - goto yy758; - goto yy724; - yy760: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0xBF) - goto yy758; - goto yy724; - yy761: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy724; - if (yych <= 0xBF) - goto yy760; - goto yy724; - yy762: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0xBF) - goto yy760; - goto yy724; - yy763: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0x8F) - goto yy760; - goto yy724; - yy764: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0x9F) - goto yy758; - goto yy724; - yy765: - ++p; - yych = *p; - if (yych <= ' ') { - if (yych <= 0x08) - goto yy724; - if (yych <= '\r') - goto yy736; - if (yych <= 0x1F) - goto yy724; - goto yy736; - } else { - if (yych <= '/') { - if (yych <= '.') - goto yy724; - goto yy738; - } else { - if (yych == '>') - goto yy730; - goto yy724; - } - } - yy766: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0xBF) - goto yy754; - goto yy724; - yy767: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy724; - if (yych <= 0xBF) - goto yy766; - goto yy724; - yy768: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0xBF) - goto yy766; - goto yy724; - yy769: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy724; - if (yych <= 0xBF) - goto yy768; - goto yy724; - yy770: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0xBF) - goto yy768; - goto yy724; - yy771: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0x8F) - goto yy768; - goto yy724; - yy772: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy724; - if (yych <= 0x9F) - goto yy766; - goto yy724; - } +{ + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 224, 224, 224, 224, 224, 224, 224, + 224, 198, 210, 194, 198, 194, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 198, 224, 128, 224, 224, 224, 224, 64, + 224, 224, 224, 224, 224, 233, 232, 224, + 233, 233, 233, 233, 233, 233, 233, 233, + 233, 233, 232, 224, 192, 192, 192, 224, + 224, 233, 233, 233, 233, 233, 233, 233, + 233, 233, 233, 233, 233, 233, 233, 233, + 233, 233, 233, 233, 233, 233, 233, 233, + 233, 233, 233, 224, 224, 224, 224, 232, + 192, 233, 233, 233, 233, 233, 233, 233, + 233, 233, 233, 233, 233, 233, 233, 233, + 233, 233, 233, 233, 233, 233, 233, 233, + 233, 233, 233, 224, 224, 224, 224, 224, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych == '<') goto yy520; + ++p; +yy519: + { return 0; } +yy520: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '@') { + if (yych != '/') goto yy519; + } else { + if (yych <= 'Z') goto yy523; + if (yych <= '`') goto yy519; + if (yych <= 'z') goto yy523; + goto yy519; + } + yych = *++p; + if (yych <= '@') goto yy522; + if (yych <= 'Z') goto yy525; + if (yych <= '`') goto yy522; + if (yych <= 'z') goto yy525; +yy522: + p = marker; + if (yyaccept == 0) { + goto yy519; + } else { + goto yy538; + } +yy523: + yych = *++p; + if (yybm[0+yych] & 2) { + goto yy527; + } + if (yych <= '=') { + if (yych <= '.') { + if (yych == '-') goto yy523; + goto yy522; + } else { + if (yych <= '/') goto yy529; + if (yych <= '9') goto yy523; + goto yy522; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') goto yy530; + if (yych <= '@') goto yy522; + goto yy523; + } else { + if (yych <= '`') goto yy522; + if (yych <= 'z') goto yy523; + goto yy522; + } + } +yy525: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy522; + if (yych <= '\r') goto yy532; + goto yy522; + } else { + if (yych <= ' ') goto yy532; + if (yych == '-') goto yy525; + goto yy522; + } + } else { + if (yych <= '@') { + if (yych <= '9') goto yy525; + if (yych == '>') goto yy530; + goto yy522; + } else { + if (yych <= 'Z') goto yy525; + if (yych <= '`') goto yy522; + if (yych <= 'z') goto yy525; + goto yy522; + } + } +yy527: + yych = *++p; + if (yybm[0+yych] & 2) { + goto yy527; + } + if (yych <= '>') { + if (yych <= '9') { + if (yych != '/') goto yy522; + } else { + if (yych <= ':') goto yy534; + if (yych <= '=') goto yy522; + goto yy530; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy522; + if (yych <= 'Z') goto yy534; + goto yy522; + } else { + if (yych == '`') goto yy522; + if (yych <= 'z') goto yy534; + goto yy522; + } + } +yy529: + yych = *++p; + if (yych != '>') goto yy522; +yy530: + yych = *++p; + if (yybm[0+yych] & 4) { + goto yy530; + } + if (yych <= 0x08) goto yy522; + if (yych <= '\n') goto yy536; + if (yych <= '\v') goto yy522; + if (yych <= '\r') goto yy539; + goto yy522; +yy532: + yych = *++p; + if (yych <= 0x1F) { + if (yych <= 0x08) goto yy522; + if (yych <= '\r') goto yy532; + goto yy522; + } else { + if (yych <= ' ') goto yy532; + if (yych == '>') goto yy530; + goto yy522; + } +yy534: + yych = *++p; + if (yybm[0+yych] & 8) { + goto yy534; + } + if (yych <= ',') { + if (yych <= '\r') { + if (yych <= 0x08) goto yy522; + goto yy540; + } else { + if (yych == ' ') goto yy540; + goto yy522; + } + } else { + if (yych <= '<') { + if (yych <= '/') goto yy529; + goto yy522; + } else { + if (yych <= '=') goto yy542; + if (yych <= '>') goto yy530; + goto yy522; + } + } +yy536: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0+yych] & 4) { + goto yy530; + } + if (yych <= 0x08) goto yy538; + if (yych <= '\n') goto yy536; + if (yych <= '\v') goto yy538; + if (yych <= '\r') goto yy539; +yy538: + { return 7; } +yy539: + ++p; + goto yy538; +yy540: + yych = *++p; + if (yych <= '<') { + if (yych <= ' ') { + if (yych <= 0x08) goto yy522; + if (yych <= '\r') goto yy540; + if (yych <= 0x1F) goto yy522; + goto yy540; + } else { + if (yych <= '/') { + if (yych <= '.') goto yy522; + goto yy529; + } else { + if (yych == ':') goto yy534; + goto yy522; + } + } + } else { + if (yych <= 'Z') { + if (yych <= '=') goto yy542; + if (yych <= '>') goto yy530; + if (yych <= '@') goto yy522; + goto yy534; + } else { + if (yych <= '_') { + if (yych <= '^') goto yy522; + goto yy534; + } else { + if (yych <= '`') goto yy522; + if (yych <= 'z') goto yy534; + goto yy522; + } + } + } +yy542: + yych = *++p; + if (yybm[0+yych] & 32) { + goto yy544; + } + if (yych <= 0xE0) { + if (yych <= '"') { + if (yych <= 0x00) goto yy522; + if (yych <= ' ') goto yy542; + goto yy546; + } else { + if (yych <= '\'') goto yy548; + if (yych <= 0xC1) goto yy522; + if (yych <= 0xDF) goto yy550; + goto yy551; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy553; + goto yy552; + } else { + if (yych <= 0xF0) goto yy554; + if (yych <= 0xF3) goto yy555; + if (yych <= 0xF4) goto yy556; + goto yy522; + } + } +yy544: + yych = *++p; + if (yybm[0+yych] & 32) { + goto yy544; + } + if (yych <= 0xE0) { + if (yych <= '=') { + if (yych <= 0x00) goto yy522; + if (yych <= ' ') goto yy527; + goto yy522; + } else { + if (yych <= '>') goto yy530; + if (yych <= 0xC1) goto yy522; + if (yych <= 0xDF) goto yy550; + goto yy551; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy553; + goto yy552; + } else { + if (yych <= 0xF0) goto yy554; + if (yych <= 0xF3) goto yy555; + if (yych <= 0xF4) goto yy556; + goto yy522; + } + } +yy546: + yych = *++p; + if (yybm[0+yych] & 64) { + goto yy546; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy522; + if (yych <= '"') goto yy557; + goto yy522; + } else { + if (yych <= 0xDF) goto yy558; + if (yych <= 0xE0) goto yy559; + goto yy560; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy561; + if (yych <= 0xEF) goto yy560; + goto yy562; + } else { + if (yych <= 0xF3) goto yy563; + if (yych <= 0xF4) goto yy564; + goto yy522; + } + } +yy548: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy548; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy522; + if (yych <= '\'') goto yy557; + goto yy522; + } else { + if (yych <= 0xDF) goto yy565; + if (yych <= 0xE0) goto yy566; + goto yy567; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy568; + if (yych <= 0xEF) goto yy567; + goto yy569; + } else { + if (yych <= 0xF3) goto yy570; + if (yych <= 0xF4) goto yy571; + goto yy522; + } + } +yy550: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0xBF) goto yy544; + goto yy522; +yy551: + yych = *++p; + if (yych <= 0x9F) goto yy522; + if (yych <= 0xBF) goto yy550; + goto yy522; +yy552: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0xBF) goto yy550; + goto yy522; +yy553: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0x9F) goto yy550; + goto yy522; +yy554: + yych = *++p; + if (yych <= 0x8F) goto yy522; + if (yych <= 0xBF) goto yy552; + goto yy522; +yy555: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0xBF) goto yy552; + goto yy522; +yy556: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0x8F) goto yy552; + goto yy522; +yy557: + yych = *++p; + if (yybm[0+yych] & 2) { + goto yy527; + } + if (yych == '/') goto yy529; + if (yych == '>') goto yy530; + goto yy522; +yy558: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0xBF) goto yy546; + goto yy522; +yy559: + yych = *++p; + if (yych <= 0x9F) goto yy522; + if (yych <= 0xBF) goto yy558; + goto yy522; +yy560: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0xBF) goto yy558; + goto yy522; +yy561: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0x9F) goto yy558; + goto yy522; +yy562: + yych = *++p; + if (yych <= 0x8F) goto yy522; + if (yych <= 0xBF) goto yy560; + goto yy522; +yy563: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0xBF) goto yy560; + goto yy522; +yy564: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0x8F) goto yy560; + goto yy522; +yy565: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0xBF) goto yy548; + goto yy522; +yy566: + yych = *++p; + if (yych <= 0x9F) goto yy522; + if (yych <= 0xBF) goto yy565; + goto yy522; +yy567: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0xBF) goto yy565; + goto yy522; +yy568: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0x9F) goto yy565; + goto yy522; +yy569: + yych = *++p; + if (yych <= 0x8F) goto yy522; + if (yych <= 0xBF) goto yy567; + goto yy522; +yy570: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0xBF) goto yy567; + goto yy522; +yy571: + yych = *++p; + if (yych <= 0x7F) goto yy522; + if (yych <= 0x8F) goto yy567; + goto yy522; +} + } // Try to match an HTML block end line of type 1 -bufsize_t _scan_html_block_end_1(const unsigned char *p) { +bufsize_t _scan_html_block_end_1(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = { - 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych <= 0xDF) { - if (yych <= ';') { - if (yych <= 0x00) - goto yy775; - if (yych != '\n') - goto yy777; - } else { - if (yych <= '<') - goto yy778; - if (yych <= 0x7F) - goto yy777; - if (yych >= 0xC2) - goto yy779; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy780; - if (yych == 0xED) - goto yy782; - goto yy781; - } else { - if (yych <= 0xF0) - goto yy783; - if (yych <= 0xF3) - goto yy784; - if (yych <= 0xF4) - goto yy785; - } - } - yy775: - ++p; - yy776 : { return 0; } - yy777: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= '\n') { - if (yych <= 0x00) - goto yy776; - if (yych <= '\t') - goto yy790; - goto yy776; - } else { - if (yych <= 0x7F) - goto yy790; - if (yych <= 0xC1) - goto yy776; - if (yych <= 0xF4) - goto yy790; - goto yy776; - } - yy778: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= '.') { - if (yych <= 0x00) - goto yy776; - if (yych == '\n') - goto yy776; - goto yy790; - } else { - if (yych <= 0x7F) { - if (yych <= '/') - goto yy798; - goto yy790; - } else { - if (yych <= 0xC1) - goto yy776; - if (yych <= 0xF4) - goto yy790; - goto yy776; - } - } - yy779: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy776; - if (yych <= 0xBF) - goto yy789; - goto yy776; - yy780: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x9F) - goto yy776; - if (yych <= 0xBF) - goto yy788; - goto yy776; - yy781: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy776; - if (yych <= 0xBF) - goto yy788; - goto yy776; - yy782: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy776; - if (yych <= 0x9F) - goto yy788; - goto yy776; - yy783: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x8F) - goto yy776; - if (yych <= 0xBF) - goto yy786; - goto yy776; - yy784: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy776; - if (yych <= 0xBF) - goto yy786; - goto yy776; - yy785: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy776; - if (yych >= 0x90) - goto yy776; - yy786: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy787; - if (yych <= 0xBF) - goto yy788; - yy787: - p = marker; - if (yyaccept == 0) { - goto yy776; - } else { - goto yy804; - } - yy788: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy787; - if (yych >= 0xC0) - goto yy787; - yy789: - ++p; - yych = *p; - yy790: - if (yybm[0 + yych] & 64) { - goto yy789; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy787; - if (yych >= '=') - goto yy787; - } else { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - goto yy786; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy797; - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - yy791: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 0xDF) { - if (yych <= '.') { - if (yych <= 0x00) - goto yy787; - if (yych == '\n') - goto yy787; - goto yy789; - } else { - if (yych <= '/') - goto yy798; - if (yych <= 0x7F) - goto yy789; - if (yych <= 0xC1) - goto yy787; - goto yy788; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy793; - if (yych == 0xED) - goto yy797; - goto yy786; - } else { - if (yych <= 0xF0) - goto yy794; - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - yy793: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy787; - if (yych <= 0xBF) - goto yy788; - goto yy787; - yy794: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy787; - if (yych <= 0xBF) - goto yy786; - goto yy787; - yy795: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy787; - if (yych <= 0xBF) - goto yy786; - goto yy787; - yy796: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy787; - if (yych <= 0x8F) - goto yy786; - goto yy787; - yy797: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy787; - if (yych <= 0x9F) - goto yy788; - goto yy787; - yy798: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 's') { - if (yych <= 'P') { - if (yych <= '\t') { - if (yych <= 0x00) - goto yy787; - goto yy789; - } else { - if (yych <= '\n') - goto yy787; - if (yych <= 'O') - goto yy789; - goto yy800; - } - } else { - if (yych <= 'o') { - if (yych != 'S') - goto yy789; - } else { - if (yych <= 'p') - goto yy800; - if (yych <= 'r') - goto yy789; - } - } - } else { - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x7F) - goto yy789; - goto yy787; - } else { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - goto yy786; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy797; - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - } - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 't') { - if (yych <= 'C') { - if (yych <= '\t') { - if (yych <= 0x00) - goto yy787; - goto yy789; - } else { - if (yych <= '\n') - goto yy787; - if (yych <= 'B') - goto yy789; - goto yy805; - } - } else { - if (yych <= 'b') { - if (yych == 'T') - goto yy806; - goto yy789; - } else { - if (yych <= 'c') - goto yy805; - if (yych <= 's') - goto yy789; - goto yy806; - } - } - } else { - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x7F) - goto yy789; - goto yy787; - } else { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - goto yy786; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy797; - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - } - yy800: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 0xC1) { - if (yych <= 'Q') { - if (yych <= 0x00) - goto yy787; - if (yych == '\n') - goto yy787; - goto yy789; - } else { - if (yych <= 'q') { - if (yych >= 'S') - goto yy789; - } else { - if (yych <= 'r') - goto yy801; - if (yych <= 0x7F) - goto yy789; - goto yy787; - } - } - } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - if (yych <= 0xEC) - goto yy786; - goto yy797; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - } - yy801: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 0xC1) { - if (yych <= 'D') { - if (yych <= 0x00) - goto yy787; - if (yych == '\n') - goto yy787; - goto yy789; - } else { - if (yych <= 'd') { - if (yych >= 'F') - goto yy789; - } else { - if (yych <= 'e') - goto yy802; - if (yych <= 0x7F) - goto yy789; - goto yy787; - } - } - } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - if (yych <= 0xEC) - goto yy786; - goto yy797; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - } - yy802: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 0xDF) { - if (yych <= '=') { - if (yych <= 0x00) - goto yy787; - if (yych == '\n') - goto yy787; - goto yy789; - } else { - if (yych <= '>') - goto yy803; - if (yych <= 0x7F) - goto yy789; - if (yych <= 0xC1) - goto yy787; - goto yy788; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy793; - if (yych == 0xED) - goto yy797; - goto yy786; - } else { - if (yych <= 0xF0) - goto yy794; - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - yy803: - yyaccept = 1; - marker = ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy789; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy804; - if (yych <= '<') - goto yy791; - } else { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - goto yy786; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy797; - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - } - } - yy804 : { return (bufsize_t)(p - start); } - yy805: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 0xC1) { - if (yych <= 'Q') { - if (yych <= 0x00) - goto yy787; - if (yych == '\n') - goto yy787; - goto yy789; - } else { - if (yych <= 'q') { - if (yych <= 'R') - goto yy809; - goto yy789; - } else { - if (yych <= 'r') - goto yy809; - if (yych <= 0x7F) - goto yy789; - goto yy787; - } - } - } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - if (yych <= 0xEC) - goto yy786; - goto yy797; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - } - yy806: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 0xC1) { - if (yych <= 'X') { - if (yych <= 0x00) - goto yy787; - if (yych == '\n') - goto yy787; - goto yy789; - } else { - if (yych <= 'x') { - if (yych >= 'Z') - goto yy789; - } else { - if (yych <= 'y') - goto yy807; - if (yych <= 0x7F) - goto yy789; - goto yy787; - } - } - } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - if (yych <= 0xEC) - goto yy786; - goto yy797; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - } - yy807: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 0xC1) { - if (yych <= 'K') { - if (yych <= 0x00) - goto yy787; - if (yych == '\n') - goto yy787; - goto yy789; - } else { - if (yych <= 'k') { - if (yych >= 'M') - goto yy789; - } else { - if (yych <= 'l') - goto yy808; - if (yych <= 0x7F) - goto yy789; - goto yy787; - } - } - } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - if (yych <= 0xEC) - goto yy786; - goto yy797; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - } - yy808: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 0xC1) { - if (yych <= 'D') { - if (yych <= 0x00) - goto yy787; - if (yych == '\n') - goto yy787; - goto yy789; - } else { - if (yych <= 'd') { - if (yych <= 'E') - goto yy802; - goto yy789; - } else { - if (yych <= 'e') - goto yy802; - if (yych <= 0x7F) - goto yy789; - goto yy787; - } - } - } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - if (yych <= 0xEC) - goto yy786; - goto yy797; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - } - yy809: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 0xC1) { - if (yych <= 'H') { - if (yych <= 0x00) - goto yy787; - if (yych == '\n') - goto yy787; - goto yy789; - } else { - if (yych <= 'h') { - if (yych >= 'J') - goto yy789; - } else { - if (yych <= 'i') - goto yy810; - if (yych <= 0x7F) - goto yy789; - goto yy787; - } - } - } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - if (yych <= 0xEC) - goto yy786; - goto yy797; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - } - yy810: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 0xC1) { - if (yych <= 'O') { - if (yych <= 0x00) - goto yy787; - if (yych == '\n') - goto yy787; - goto yy789; - } else { - if (yych <= 'o') { - if (yych >= 'Q') - goto yy789; - } else { - if (yych <= 'p') - goto yy811; - if (yych <= 0x7F) - goto yy789; - goto yy787; - } - } - } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - if (yych <= 0xEC) - goto yy786; - goto yy797; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - } - yy811: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy791; - } - if (yych <= 0xC1) { - if (yych <= 'S') { - if (yych <= 0x00) - goto yy787; - if (yych == '\n') - goto yy787; - goto yy789; - } else { - if (yych <= 's') { - if (yych <= 'T') - goto yy802; - goto yy789; - } else { - if (yych <= 't') - goto yy802; - if (yych <= 0x7F) - goto yy789; - goto yy787; - } - } - } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy788; - if (yych <= 0xE0) - goto yy793; - if (yych <= 0xEC) - goto yy786; - goto yy797; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy786; - goto yy794; - } else { - if (yych <= 0xF3) - goto yy795; - if (yych <= 0xF4) - goto yy796; - goto yy787; - } - } - } - } +{ + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 128, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= 0xDF) { + if (yych <= ';') { + if (yych <= 0x00) goto yy574; + if (yych != '\n') goto yy576; + } else { + if (yych <= '<') goto yy577; + if (yych <= 0x7F) goto yy576; + if (yych >= 0xC2) goto yy578; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) goto yy579; + if (yych == 0xED) goto yy581; + goto yy580; + } else { + if (yych <= 0xF0) goto yy582; + if (yych <= 0xF3) goto yy583; + if (yych <= 0xF4) goto yy584; + } + } +yy574: + ++p; +yy575: + { return 0; } +yy576: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) goto yy575; + if (yych <= '\t') goto yy586; + goto yy575; + } else { + if (yych <= 0x7F) goto yy586; + if (yych <= 0xC1) goto yy575; + if (yych <= 0xF4) goto yy586; + goto yy575; + } +yy577: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '.') { + if (yych <= 0x00) goto yy575; + if (yych == '\n') goto yy575; + goto yy586; + } else { + if (yych <= 0x7F) { + if (yych <= '/') goto yy597; + goto yy586; + } else { + if (yych <= 0xC1) goto yy575; + if (yych <= 0xF4) goto yy586; + goto yy575; + } + } +yy578: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy575; + if (yych <= 0xBF) goto yy585; + goto yy575; +yy579: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) goto yy575; + if (yych <= 0xBF) goto yy590; + goto yy575; +yy580: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy575; + if (yych <= 0xBF) goto yy590; + goto yy575; +yy581: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy575; + if (yych <= 0x9F) goto yy590; + goto yy575; +yy582: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) goto yy575; + if (yych <= 0xBF) goto yy592; + goto yy575; +yy583: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy575; + if (yych <= 0xBF) goto yy592; + goto yy575; +yy584: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy575; + if (yych <= 0x8F) goto yy592; + goto yy575; +yy585: + yych = *++p; +yy586: + if (yybm[0+yych] & 64) { + goto yy585; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy587; + if (yych <= '<') goto yy588; + } else { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + goto yy592; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy593; + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + } + } +yy587: + p = marker; + if (yyaccept == 0) { + goto yy575; + } else { + goto yy607; + } +yy588: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 0xDF) { + if (yych <= '.') { + if (yych <= 0x00) goto yy587; + if (yych == '\n') goto yy587; + goto yy585; + } else { + if (yych <= '/') goto yy597; + if (yych <= 0x7F) goto yy585; + if (yych <= 0xC1) goto yy587; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) goto yy591; + if (yych == 0xED) goto yy593; + goto yy592; + } else { + if (yych <= 0xF0) goto yy594; + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } +yy590: + yych = *++p; + if (yych <= 0x7F) goto yy587; + if (yych <= 0xBF) goto yy585; + goto yy587; +yy591: + yych = *++p; + if (yych <= 0x9F) goto yy587; + if (yych <= 0xBF) goto yy590; + goto yy587; +yy592: + yych = *++p; + if (yych <= 0x7F) goto yy587; + if (yych <= 0xBF) goto yy590; + goto yy587; +yy593: + yych = *++p; + if (yych <= 0x7F) goto yy587; + if (yych <= 0x9F) goto yy590; + goto yy587; +yy594: + yych = *++p; + if (yych <= 0x8F) goto yy587; + if (yych <= 0xBF) goto yy592; + goto yy587; +yy595: + yych = *++p; + if (yych <= 0x7F) goto yy587; + if (yych <= 0xBF) goto yy592; + goto yy587; +yy596: + yych = *++p; + if (yych <= 0x7F) goto yy587; + if (yych <= 0x8F) goto yy592; + goto yy587; +yy597: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 's') { + if (yych <= 'P') { + if (yych <= '\t') { + if (yych <= 0x00) goto yy587; + goto yy585; + } else { + if (yych <= '\n') goto yy587; + if (yych <= 'O') goto yy585; + } + } else { + if (yych <= 'o') { + if (yych == 'S') goto yy599; + goto yy585; + } else { + if (yych <= 'p') goto yy598; + if (yych <= 'r') goto yy585; + goto yy599; + } + } + } else { + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x7F) goto yy585; + goto yy587; + } else { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + goto yy592; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy593; + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } + } +yy598: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 0xC1) { + if (yych <= 'Q') { + if (yych <= 0x00) goto yy587; + if (yych == '\n') goto yy587; + goto yy585; + } else { + if (yych <= 'q') { + if (yych <= 'R') goto yy600; + goto yy585; + } else { + if (yych <= 'r') goto yy600; + if (yych <= 0x7F) goto yy585; + goto yy587; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + if (yych <= 0xEC) goto yy592; + goto yy593; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } + } +yy599: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 't') { + if (yych <= 'C') { + if (yych <= '\t') { + if (yych <= 0x00) goto yy587; + goto yy585; + } else { + if (yych <= '\n') goto yy587; + if (yych <= 'B') goto yy585; + goto yy601; + } + } else { + if (yych <= 'b') { + if (yych == 'T') goto yy602; + goto yy585; + } else { + if (yych <= 'c') goto yy601; + if (yych <= 's') goto yy585; + goto yy602; + } + } + } else { + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x7F) goto yy585; + goto yy587; + } else { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + goto yy592; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy593; + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } + } +yy600: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 0xC1) { + if (yych <= 'D') { + if (yych <= 0x00) goto yy587; + if (yych == '\n') goto yy587; + goto yy585; + } else { + if (yych <= 'd') { + if (yych <= 'E') goto yy603; + goto yy585; + } else { + if (yych <= 'e') goto yy603; + if (yych <= 0x7F) goto yy585; + goto yy587; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + if (yych <= 0xEC) goto yy592; + goto yy593; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } + } +yy601: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 0xC1) { + if (yych <= 'Q') { + if (yych <= 0x00) goto yy587; + if (yych == '\n') goto yy587; + goto yy585; + } else { + if (yych <= 'q') { + if (yych <= 'R') goto yy604; + goto yy585; + } else { + if (yych <= 'r') goto yy604; + if (yych <= 0x7F) goto yy585; + goto yy587; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + if (yych <= 0xEC) goto yy592; + goto yy593; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } + } +yy602: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 0xC1) { + if (yych <= 'X') { + if (yych <= 0x00) goto yy587; + if (yych == '\n') goto yy587; + goto yy585; + } else { + if (yych <= 'x') { + if (yych <= 'Y') goto yy605; + goto yy585; + } else { + if (yych <= 'y') goto yy605; + if (yych <= 0x7F) goto yy585; + goto yy587; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + if (yych <= 0xEC) goto yy592; + goto yy593; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } + } +yy603: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 0xDF) { + if (yych <= '=') { + if (yych <= 0x00) goto yy587; + if (yych == '\n') goto yy587; + goto yy585; + } else { + if (yych <= '>') goto yy606; + if (yych <= 0x7F) goto yy585; + if (yych <= 0xC1) goto yy587; + goto yy590; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) goto yy591; + if (yych == 0xED) goto yy593; + goto yy592; + } else { + if (yych <= 0xF0) goto yy594; + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } +yy604: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 0xC1) { + if (yych <= 'H') { + if (yych <= 0x00) goto yy587; + if (yych == '\n') goto yy587; + goto yy585; + } else { + if (yych <= 'h') { + if (yych <= 'I') goto yy608; + goto yy585; + } else { + if (yych <= 'i') goto yy608; + if (yych <= 0x7F) goto yy585; + goto yy587; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + if (yych <= 0xEC) goto yy592; + goto yy593; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } + } +yy605: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 0xC1) { + if (yych <= 'K') { + if (yych <= 0x00) goto yy587; + if (yych == '\n') goto yy587; + goto yy585; + } else { + if (yych <= 'k') { + if (yych <= 'L') goto yy600; + goto yy585; + } else { + if (yych <= 'l') goto yy600; + if (yych <= 0x7F) goto yy585; + goto yy587; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + if (yych <= 0xEC) goto yy592; + goto yy593; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } + } +yy606: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0+yych] & 64) { + goto yy585; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy607; + if (yych <= '<') goto yy588; + } else { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + goto yy592; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy593; + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + } + } +yy607: + { return (bufsize_t)(p - start); } +yy608: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 0xC1) { + if (yych <= 'O') { + if (yych <= 0x00) goto yy587; + if (yych == '\n') goto yy587; + goto yy585; + } else { + if (yych <= 'o') { + if (yych >= 'Q') goto yy585; + } else { + if (yych <= 'p') goto yy609; + if (yych <= 0x7F) goto yy585; + goto yy587; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + if (yych <= 0xEC) goto yy592; + goto yy593; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } + } +yy609: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy588; + } + if (yych <= 0xC1) { + if (yych <= 'S') { + if (yych <= 0x00) goto yy587; + if (yych == '\n') goto yy587; + goto yy585; + } else { + if (yych <= 's') { + if (yych <= 'T') goto yy603; + goto yy585; + } else { + if (yych <= 't') goto yy603; + if (yych <= 0x7F) goto yy585; + goto yy587; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) goto yy590; + if (yych <= 0xE0) goto yy591; + if (yych <= 0xEC) goto yy592; + goto yy593; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) goto yy592; + goto yy594; + } else { + if (yych <= 0xF3) goto yy595; + if (yych <= 0xF4) goto yy596; + goto yy587; + } + } + } +} + } // Try to match an HTML block end line of type 2 -bufsize_t _scan_html_block_end_2(const unsigned char *p) { +bufsize_t _scan_html_block_end_2(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = { - 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych <= 0xDF) { - if (yych <= ',') { - if (yych <= 0x00) - goto yy814; - if (yych != '\n') - goto yy816; - } else { - if (yych <= '-') - goto yy817; - if (yych <= 0x7F) - goto yy816; - if (yych >= 0xC2) - goto yy818; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy819; - if (yych == 0xED) - goto yy821; - goto yy820; - } else { - if (yych <= 0xF0) - goto yy822; - if (yych <= 0xF3) - goto yy823; - if (yych <= 0xF4) - goto yy824; - } - } - yy814: - ++p; - yy815 : { return 0; } - yy816: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= '\n') { - if (yych <= 0x00) - goto yy815; - if (yych <= '\t') - goto yy829; - goto yy815; - } else { - if (yych <= 0x7F) - goto yy829; - if (yych <= 0xC1) - goto yy815; - if (yych <= 0xF4) - goto yy829; - goto yy815; - } - yy817: - yyaccept = 0; - yych = *(marker = ++p); - if (yybm[0 + yych] & 128) { - goto yy836; - } - if (yych <= '\n') { - if (yych <= 0x00) - goto yy815; - if (yych <= '\t') - goto yy829; - goto yy815; - } else { - if (yych <= 0x7F) - goto yy829; - if (yych <= 0xC1) - goto yy815; - if (yych <= 0xF4) - goto yy829; - goto yy815; - } - yy818: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy815; - if (yych <= 0xBF) - goto yy828; - goto yy815; - yy819: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x9F) - goto yy815; - if (yych <= 0xBF) - goto yy827; - goto yy815; - yy820: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy815; - if (yych <= 0xBF) - goto yy827; - goto yy815; - yy821: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy815; - if (yych <= 0x9F) - goto yy827; - goto yy815; - yy822: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x8F) - goto yy815; - if (yych <= 0xBF) - goto yy825; - goto yy815; - yy823: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy815; - if (yych <= 0xBF) - goto yy825; - goto yy815; - yy824: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy815; - if (yych >= 0x90) - goto yy815; - yy825: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy826; - if (yych <= 0xBF) - goto yy827; - yy826: - p = marker; - if (yyaccept == 0) { - goto yy815; - } else { - goto yy839; - } - yy827: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy826; - if (yych >= 0xC0) - goto yy826; - yy828: - ++p; - yych = *p; - yy829: - if (yybm[0 + yych] & 64) { - goto yy828; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy826; - if (yych >= '.') - goto yy826; - } else { - if (yych <= 0xDF) - goto yy827; - if (yych <= 0xE0) - goto yy831; - goto yy825; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy835; - if (yych <= 0xEF) - goto yy825; - goto yy832; - } else { - if (yych <= 0xF3) - goto yy833; - if (yych <= 0xF4) - goto yy834; - goto yy826; - } - } - yy830: - ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy828; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy826; - if (yych <= '-') - goto yy836; - goto yy826; - } else { - if (yych <= 0xDF) - goto yy827; - if (yych >= 0xE1) - goto yy825; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy835; - if (yych <= 0xEF) - goto yy825; - goto yy832; - } else { - if (yych <= 0xF3) - goto yy833; - if (yych <= 0xF4) - goto yy834; - goto yy826; - } - } - yy831: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy826; - if (yych <= 0xBF) - goto yy827; - goto yy826; - yy832: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy826; - if (yych <= 0xBF) - goto yy825; - goto yy826; - yy833: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy826; - if (yych <= 0xBF) - goto yy825; - goto yy826; - yy834: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy826; - if (yych <= 0x8F) - goto yy825; - goto yy826; - yy835: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy826; - if (yych <= 0x9F) - goto yy827; - goto yy826; - yy836: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy836; - } - if (yych <= 0xDF) { - if (yych <= '=') { - if (yych <= 0x00) - goto yy826; - if (yych == '\n') - goto yy826; - goto yy828; - } else { - if (yych <= '>') - goto yy838; - if (yych <= 0x7F) - goto yy828; - if (yych <= 0xC1) - goto yy826; - goto yy827; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy831; - if (yych == 0xED) - goto yy835; - goto yy825; - } else { - if (yych <= 0xF0) - goto yy832; - if (yych <= 0xF3) - goto yy833; - if (yych <= 0xF4) - goto yy834; - goto yy826; - } - } - yy838: - yyaccept = 1; - marker = ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy828; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy839; - if (yych <= '-') - goto yy830; - } else { - if (yych <= 0xDF) - goto yy827; - if (yych <= 0xE0) - goto yy831; - goto yy825; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy835; - if (yych <= 0xEF) - goto yy825; - goto yy832; - } else { - if (yych <= 0xF3) - goto yy833; - if (yych <= 0xF4) - goto yy834; - } - } - yy839 : { return (bufsize_t)(p - start); } - } +{ + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 128, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= 0xDF) { + if (yych <= ',') { + if (yych <= 0x00) goto yy612; + if (yych != '\n') goto yy614; + } else { + if (yych <= '-') goto yy615; + if (yych <= 0x7F) goto yy614; + if (yych >= 0xC2) goto yy616; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) goto yy617; + if (yych == 0xED) goto yy619; + goto yy618; + } else { + if (yych <= 0xF0) goto yy620; + if (yych <= 0xF3) goto yy621; + if (yych <= 0xF4) goto yy622; + } + } +yy612: + ++p; +yy613: + { return 0; } +yy614: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) goto yy613; + if (yych <= '\t') goto yy624; + goto yy613; + } else { + if (yych <= 0x7F) goto yy624; + if (yych <= 0xC1) goto yy613; + if (yych <= 0xF4) goto yy624; + goto yy613; + } +yy615: + yyaccept = 0; + yych = *(marker = ++p); + if (yybm[0+yych] & 128) { + goto yy634; + } + if (yych <= '\n') { + if (yych <= 0x00) goto yy613; + if (yych <= '\t') goto yy624; + goto yy613; + } else { + if (yych <= 0x7F) goto yy624; + if (yych <= 0xC1) goto yy613; + if (yych <= 0xF4) goto yy624; + goto yy613; + } +yy616: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy613; + if (yych <= 0xBF) goto yy623; + goto yy613; +yy617: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) goto yy613; + if (yych <= 0xBF) goto yy627; + goto yy613; +yy618: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy613; + if (yych <= 0xBF) goto yy627; + goto yy613; +yy619: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy613; + if (yych <= 0x9F) goto yy627; + goto yy613; +yy620: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) goto yy613; + if (yych <= 0xBF) goto yy629; + goto yy613; +yy621: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy613; + if (yych <= 0xBF) goto yy629; + goto yy613; +yy622: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy613; + if (yych <= 0x8F) goto yy629; + goto yy613; +yy623: + yych = *++p; +yy624: + if (yybm[0+yych] & 64) { + goto yy623; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy625; + if (yych <= '-') goto yy626; + } else { + if (yych <= 0xDF) goto yy627; + if (yych <= 0xE0) goto yy628; + goto yy629; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy630; + if (yych <= 0xEF) goto yy629; + goto yy631; + } else { + if (yych <= 0xF3) goto yy632; + if (yych <= 0xF4) goto yy633; + } + } +yy625: + p = marker; + if (yyaccept == 0) { + goto yy613; + } else { + goto yy637; + } +yy626: + yych = *++p; + if (yybm[0+yych] & 64) { + goto yy623; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy625; + if (yych <= '-') goto yy634; + goto yy625; + } else { + if (yych <= 0xDF) goto yy627; + if (yych <= 0xE0) goto yy628; + goto yy629; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy630; + if (yych <= 0xEF) goto yy629; + goto yy631; + } else { + if (yych <= 0xF3) goto yy632; + if (yych <= 0xF4) goto yy633; + goto yy625; + } + } +yy627: + yych = *++p; + if (yych <= 0x7F) goto yy625; + if (yych <= 0xBF) goto yy623; + goto yy625; +yy628: + yych = *++p; + if (yych <= 0x9F) goto yy625; + if (yych <= 0xBF) goto yy627; + goto yy625; +yy629: + yych = *++p; + if (yych <= 0x7F) goto yy625; + if (yych <= 0xBF) goto yy627; + goto yy625; +yy630: + yych = *++p; + if (yych <= 0x7F) goto yy625; + if (yych <= 0x9F) goto yy627; + goto yy625; +yy631: + yych = *++p; + if (yych <= 0x8F) goto yy625; + if (yych <= 0xBF) goto yy629; + goto yy625; +yy632: + yych = *++p; + if (yych <= 0x7F) goto yy625; + if (yych <= 0xBF) goto yy629; + goto yy625; +yy633: + yych = *++p; + if (yych <= 0x7F) goto yy625; + if (yych <= 0x8F) goto yy629; + goto yy625; +yy634: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy634; + } + if (yych <= 0xDF) { + if (yych <= '=') { + if (yych <= 0x00) goto yy625; + if (yych == '\n') goto yy625; + goto yy623; + } else { + if (yych <= '>') goto yy636; + if (yych <= 0x7F) goto yy623; + if (yych <= 0xC1) goto yy625; + goto yy627; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) goto yy628; + if (yych == 0xED) goto yy630; + goto yy629; + } else { + if (yych <= 0xF0) goto yy631; + if (yych <= 0xF3) goto yy632; + if (yych <= 0xF4) goto yy633; + goto yy625; + } + } +yy636: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0+yych] & 64) { + goto yy623; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy637; + if (yych <= '-') goto yy626; + } else { + if (yych <= 0xDF) goto yy627; + if (yych <= 0xE0) goto yy628; + goto yy629; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy630; + if (yych <= 0xEF) goto yy629; + goto yy631; + } else { + if (yych <= 0xF3) goto yy632; + if (yych <= 0xF4) goto yy633; + } + } +yy637: + { return (bufsize_t)(p - start); } +} + } // Try to match an HTML block end line of type 3 -bufsize_t _scan_html_block_end_3(const unsigned char *p) { +bufsize_t _scan_html_block_end_3(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = { - 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych <= 0xDF) { - if (yych <= '>') { - if (yych <= 0x00) - goto yy842; - if (yych != '\n') - goto yy844; - } else { - if (yych <= '?') - goto yy845; - if (yych <= 0x7F) - goto yy844; - if (yych >= 0xC2) - goto yy846; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy847; - if (yych == 0xED) - goto yy849; - goto yy848; - } else { - if (yych <= 0xF0) - goto yy850; - if (yych <= 0xF3) - goto yy851; - if (yych <= 0xF4) - goto yy852; - } - } - yy842: - ++p; - yy843 : { return 0; } - yy844: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= '\n') { - if (yych <= 0x00) - goto yy843; - if (yych <= '\t') - goto yy857; - goto yy843; - } else { - if (yych <= 0x7F) - goto yy857; - if (yych <= 0xC1) - goto yy843; - if (yych <= 0xF4) - goto yy857; - goto yy843; - } - yy845: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= '=') { - if (yych <= 0x00) - goto yy843; - if (yych == '\n') - goto yy843; - goto yy857; - } else { - if (yych <= 0x7F) { - if (yych <= '>') - goto yy865; - goto yy857; - } else { - if (yych <= 0xC1) - goto yy843; - if (yych <= 0xF4) - goto yy857; - goto yy843; - } - } - yy846: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy843; - if (yych <= 0xBF) - goto yy856; - goto yy843; - yy847: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x9F) - goto yy843; - if (yych <= 0xBF) - goto yy855; - goto yy843; - yy848: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy843; - if (yych <= 0xBF) - goto yy855; - goto yy843; - yy849: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy843; - if (yych <= 0x9F) - goto yy855; - goto yy843; - yy850: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x8F) - goto yy843; - if (yych <= 0xBF) - goto yy853; - goto yy843; - yy851: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy843; - if (yych <= 0xBF) - goto yy853; - goto yy843; - yy852: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy843; - if (yych >= 0x90) - goto yy843; - yy853: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy854; - if (yych <= 0xBF) - goto yy855; - yy854: - p = marker; - if (yyaccept == 0) { - goto yy843; - } else { - goto yy866; - } - yy855: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy854; - if (yych >= 0xC0) - goto yy854; - yy856: - ++p; - yych = *p; - yy857: - if (yybm[0 + yych] & 64) { - goto yy856; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy854; - if (yych >= '@') - goto yy854; - } else { - if (yych <= 0xDF) - goto yy855; - if (yych <= 0xE0) - goto yy860; - goto yy853; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy864; - if (yych <= 0xEF) - goto yy853; - goto yy861; - } else { - if (yych <= 0xF3) - goto yy862; - if (yych <= 0xF4) - goto yy863; - goto yy854; - } - } - yy858: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy858; - } - if (yych <= 0xDF) { - if (yych <= '=') { - if (yych <= 0x00) - goto yy854; - if (yych == '\n') - goto yy854; - goto yy856; - } else { - if (yych <= '>') - goto yy865; - if (yych <= 0x7F) - goto yy856; - if (yych <= 0xC1) - goto yy854; - goto yy855; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy860; - if (yych == 0xED) - goto yy864; - goto yy853; - } else { - if (yych <= 0xF0) - goto yy861; - if (yych <= 0xF3) - goto yy862; - if (yych <= 0xF4) - goto yy863; - goto yy854; - } - } - yy860: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy854; - if (yych <= 0xBF) - goto yy855; - goto yy854; - yy861: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy854; - if (yych <= 0xBF) - goto yy853; - goto yy854; - yy862: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy854; - if (yych <= 0xBF) - goto yy853; - goto yy854; - yy863: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy854; - if (yych <= 0x8F) - goto yy853; - goto yy854; - yy864: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy854; - if (yych <= 0x9F) - goto yy855; - goto yy854; - yy865: - yyaccept = 1; - marker = ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy856; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy866; - if (yych <= '?') - goto yy858; - } else { - if (yych <= 0xDF) - goto yy855; - if (yych <= 0xE0) - goto yy860; - goto yy853; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy864; - if (yych <= 0xEF) - goto yy853; - goto yy861; - } else { - if (yych <= 0xF3) - goto yy862; - if (yych <= 0xF4) - goto yy863; - } - } - yy866 : { return (bufsize_t)(p - start); } - } +{ + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 128, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= 0xDF) { + if (yych <= '>') { + if (yych <= 0x00) goto yy640; + if (yych != '\n') goto yy642; + } else { + if (yych <= '?') goto yy643; + if (yych <= 0x7F) goto yy642; + if (yych >= 0xC2) goto yy644; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) goto yy645; + if (yych == 0xED) goto yy647; + goto yy646; + } else { + if (yych <= 0xF0) goto yy648; + if (yych <= 0xF3) goto yy649; + if (yych <= 0xF4) goto yy650; + } + } +yy640: + ++p; +yy641: + { return 0; } +yy642: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) goto yy641; + if (yych <= '\t') goto yy652; + goto yy641; + } else { + if (yych <= 0x7F) goto yy652; + if (yych <= 0xC1) goto yy641; + if (yych <= 0xF4) goto yy652; + goto yy641; + } +yy643: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '=') { + if (yych <= 0x00) goto yy641; + if (yych == '\n') goto yy641; + goto yy652; + } else { + if (yych <= 0x7F) { + if (yych <= '>') goto yy663; + goto yy652; + } else { + if (yych <= 0xC1) goto yy641; + if (yych <= 0xF4) goto yy652; + goto yy641; + } + } +yy644: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy641; + if (yych <= 0xBF) goto yy651; + goto yy641; +yy645: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) goto yy641; + if (yych <= 0xBF) goto yy656; + goto yy641; +yy646: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy641; + if (yych <= 0xBF) goto yy656; + goto yy641; +yy647: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy641; + if (yych <= 0x9F) goto yy656; + goto yy641; +yy648: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) goto yy641; + if (yych <= 0xBF) goto yy658; + goto yy641; +yy649: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy641; + if (yych <= 0xBF) goto yy658; + goto yy641; +yy650: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy641; + if (yych <= 0x8F) goto yy658; + goto yy641; +yy651: + yych = *++p; +yy652: + if (yybm[0+yych] & 64) { + goto yy651; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy653; + if (yych <= '?') goto yy654; + } else { + if (yych <= 0xDF) goto yy656; + if (yych <= 0xE0) goto yy657; + goto yy658; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy659; + if (yych <= 0xEF) goto yy658; + goto yy660; + } else { + if (yych <= 0xF3) goto yy661; + if (yych <= 0xF4) goto yy662; + } + } +yy653: + p = marker; + if (yyaccept == 0) { + goto yy641; + } else { + goto yy664; + } +yy654: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy654; + } + if (yych <= 0xDF) { + if (yych <= '=') { + if (yych <= 0x00) goto yy653; + if (yych == '\n') goto yy653; + goto yy651; + } else { + if (yych <= '>') goto yy663; + if (yych <= 0x7F) goto yy651; + if (yych <= 0xC1) goto yy653; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) goto yy657; + if (yych == 0xED) goto yy659; + goto yy658; + } else { + if (yych <= 0xF0) goto yy660; + if (yych <= 0xF3) goto yy661; + if (yych <= 0xF4) goto yy662; + goto yy653; + } + } +yy656: + yych = *++p; + if (yych <= 0x7F) goto yy653; + if (yych <= 0xBF) goto yy651; + goto yy653; +yy657: + yych = *++p; + if (yych <= 0x9F) goto yy653; + if (yych <= 0xBF) goto yy656; + goto yy653; +yy658: + yych = *++p; + if (yych <= 0x7F) goto yy653; + if (yych <= 0xBF) goto yy656; + goto yy653; +yy659: + yych = *++p; + if (yych <= 0x7F) goto yy653; + if (yych <= 0x9F) goto yy656; + goto yy653; +yy660: + yych = *++p; + if (yych <= 0x8F) goto yy653; + if (yych <= 0xBF) goto yy658; + goto yy653; +yy661: + yych = *++p; + if (yych <= 0x7F) goto yy653; + if (yych <= 0xBF) goto yy658; + goto yy653; +yy662: + yych = *++p; + if (yych <= 0x7F) goto yy653; + if (yych <= 0x8F) goto yy658; + goto yy653; +yy663: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0+yych] & 64) { + goto yy651; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy664; + if (yych <= '?') goto yy654; + } else { + if (yych <= 0xDF) goto yy656; + if (yych <= 0xE0) goto yy657; + goto yy658; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy659; + if (yych <= 0xEF) goto yy658; + goto yy660; + } else { + if (yych <= 0xF3) goto yy661; + if (yych <= 0xF4) goto yy662; + } + } +yy664: + { return (bufsize_t)(p - start); } +} + } // Try to match an HTML block end line of type 4 -bufsize_t _scan_html_block_end_4(const unsigned char *p) { +bufsize_t _scan_html_block_end_4(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = { - 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych <= 0xDF) { - if (yych <= '=') { - if (yych <= 0x00) - goto yy869; - if (yych != '\n') - goto yy871; - } else { - if (yych <= '>') - goto yy872; - if (yych <= 0x7F) - goto yy871; - if (yych >= 0xC2) - goto yy874; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy875; - if (yych == 0xED) - goto yy877; - goto yy876; - } else { - if (yych <= 0xF0) - goto yy878; - if (yych <= 0xF3) - goto yy879; - if (yych <= 0xF4) - goto yy880; - } - } - yy869: - ++p; - yy870 : { return 0; } - yy871: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= '\n') { - if (yych <= 0x00) - goto yy870; - if (yych <= '\t') - goto yy885; - goto yy870; - } else { - if (yych <= 0x7F) - goto yy885; - if (yych <= 0xC1) - goto yy870; - if (yych <= 0xF4) - goto yy885; - goto yy870; - } - yy872: - yyaccept = 1; - yych = *(marker = ++p); - if (yych <= '\n') { - if (yych <= 0x00) - goto yy873; - if (yych <= '\t') - goto yy885; - } else { - if (yych <= 0x7F) - goto yy885; - if (yych <= 0xC1) - goto yy873; - if (yych <= 0xF4) - goto yy885; - } - yy873 : { return (bufsize_t)(p - start); } - yy874: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy870; - if (yych <= 0xBF) - goto yy884; - goto yy870; - yy875: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x9F) - goto yy870; - if (yych <= 0xBF) - goto yy883; - goto yy870; - yy876: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy870; - if (yych <= 0xBF) - goto yy883; - goto yy870; - yy877: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy870; - if (yych <= 0x9F) - goto yy883; - goto yy870; - yy878: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x8F) - goto yy870; - if (yych <= 0xBF) - goto yy881; - goto yy870; - yy879: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy870; - if (yych <= 0xBF) - goto yy881; - goto yy870; - yy880: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy870; - if (yych >= 0x90) - goto yy870; - yy881: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy882; - if (yych <= 0xBF) - goto yy883; - yy882: - p = marker; - if (yyaccept == 0) { - goto yy870; - } else { - goto yy873; - } - yy883: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy882; - if (yych >= 0xC0) - goto yy882; - yy884: - ++p; - yych = *p; - yy885: - if (yybm[0 + yych] & 64) { - goto yy884; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy882; - if (yych >= '?') - goto yy882; - } else { - if (yych <= 0xDF) - goto yy883; - if (yych <= 0xE0) - goto yy888; - goto yy881; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy892; - if (yych <= 0xEF) - goto yy881; - goto yy889; - } else { - if (yych <= 0xF3) - goto yy890; - if (yych <= 0xF4) - goto yy891; - goto yy882; - } - } - yy886: - yyaccept = 1; - marker = ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy884; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy873; - if (yych <= '>') - goto yy886; - goto yy873; - } else { - if (yych <= 0xDF) - goto yy883; - if (yych >= 0xE1) - goto yy881; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy892; - if (yych <= 0xEF) - goto yy881; - goto yy889; - } else { - if (yych <= 0xF3) - goto yy890; - if (yych <= 0xF4) - goto yy891; - goto yy873; - } - } - yy888: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy882; - if (yych <= 0xBF) - goto yy883; - goto yy882; - yy889: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy882; - if (yych <= 0xBF) - goto yy881; - goto yy882; - yy890: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy882; - if (yych <= 0xBF) - goto yy881; - goto yy882; - yy891: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy882; - if (yych <= 0x8F) - goto yy881; - goto yy882; - yy892: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy882; - if (yych <= 0x9F) - goto yy883; - goto yy882; - } +{ + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 64, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yybm[0+yych] & 64) { + goto yy670; + } + if (yych <= 0xE0) { + if (yych <= '\n') { + if (yych <= 0x00) goto yy667; + if (yych <= '\t') goto yy669; + } else { + if (yych <= 0x7F) goto yy669; + if (yych <= 0xC1) goto yy667; + if (yych <= 0xDF) goto yy673; + goto yy674; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy676; + goto yy675; + } else { + if (yych <= 0xF0) goto yy677; + if (yych <= 0xF3) goto yy678; + if (yych <= 0xF4) goto yy679; + } + } +yy667: + ++p; +yy668: + { return 0; } +yy669: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) goto yy668; + if (yych <= '\t') goto yy681; + goto yy668; + } else { + if (yych <= 0x7F) goto yy681; + if (yych <= 0xC1) goto yy668; + if (yych <= 0xF4) goto yy681; + goto yy668; + } +yy670: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0+yych] & 128) { + goto yy680; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy672; + if (yych <= '>') goto yy670; + } else { + if (yych <= 0xDF) goto yy683; + if (yych <= 0xE0) goto yy684; + goto yy685; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy686; + if (yych <= 0xEF) goto yy685; + goto yy687; + } else { + if (yych <= 0xF3) goto yy688; + if (yych <= 0xF4) goto yy689; + } + } +yy672: + { return (bufsize_t)(p - start); } +yy673: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy668; + if (yych <= 0xBF) goto yy680; + goto yy668; +yy674: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) goto yy668; + if (yych <= 0xBF) goto yy683; + goto yy668; +yy675: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy668; + if (yych <= 0xBF) goto yy683; + goto yy668; +yy676: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy668; + if (yych <= 0x9F) goto yy683; + goto yy668; +yy677: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) goto yy668; + if (yych <= 0xBF) goto yy685; + goto yy668; +yy678: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy668; + if (yych <= 0xBF) goto yy685; + goto yy668; +yy679: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy668; + if (yych <= 0x8F) goto yy685; + goto yy668; +yy680: + yych = *++p; +yy681: + if (yybm[0+yych] & 128) { + goto yy680; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy682; + if (yych <= '>') goto yy670; + } else { + if (yych <= 0xDF) goto yy683; + if (yych <= 0xE0) goto yy684; + goto yy685; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy686; + if (yych <= 0xEF) goto yy685; + goto yy687; + } else { + if (yych <= 0xF3) goto yy688; + if (yych <= 0xF4) goto yy689; + } + } +yy682: + p = marker; + if (yyaccept == 0) { + goto yy668; + } else { + goto yy672; + } +yy683: + yych = *++p; + if (yych <= 0x7F) goto yy682; + if (yych <= 0xBF) goto yy680; + goto yy682; +yy684: + yych = *++p; + if (yych <= 0x9F) goto yy682; + if (yych <= 0xBF) goto yy683; + goto yy682; +yy685: + yych = *++p; + if (yych <= 0x7F) goto yy682; + if (yych <= 0xBF) goto yy683; + goto yy682; +yy686: + yych = *++p; + if (yych <= 0x7F) goto yy682; + if (yych <= 0x9F) goto yy683; + goto yy682; +yy687: + yych = *++p; + if (yych <= 0x8F) goto yy682; + if (yych <= 0xBF) goto yy685; + goto yy682; +yy688: + yych = *++p; + if (yych <= 0x7F) goto yy682; + if (yych <= 0xBF) goto yy685; + goto yy682; +yy689: + yych = *++p; + if (yych <= 0x7F) goto yy682; + if (yych <= 0x8F) goto yy685; + goto yy682; +} + } // Try to match an HTML block end line of type 5 -bufsize_t _scan_html_block_end_5(const unsigned char *p) { +bufsize_t _scan_html_block_end_5(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = { - 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych <= 0xDF) { - if (yych <= '\\') { - if (yych <= 0x00) - goto yy895; - if (yych != '\n') - goto yy897; - } else { - if (yych <= ']') - goto yy898; - if (yych <= 0x7F) - goto yy897; - if (yych >= 0xC2) - goto yy899; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy900; - if (yych == 0xED) - goto yy902; - goto yy901; - } else { - if (yych <= 0xF0) - goto yy903; - if (yych <= 0xF3) - goto yy904; - if (yych <= 0xF4) - goto yy905; - } - } - yy895: - ++p; - yy896 : { return 0; } - yy897: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= '\n') { - if (yych <= 0x00) - goto yy896; - if (yych <= '\t') - goto yy910; - goto yy896; - } else { - if (yych <= 0x7F) - goto yy910; - if (yych <= 0xC1) - goto yy896; - if (yych <= 0xF4) - goto yy910; - goto yy896; - } - yy898: - yyaccept = 0; - yych = *(marker = ++p); - if (yybm[0 + yych] & 128) { - goto yy917; - } - if (yych <= '\n') { - if (yych <= 0x00) - goto yy896; - if (yych <= '\t') - goto yy910; - goto yy896; - } else { - if (yych <= 0x7F) - goto yy910; - if (yych <= 0xC1) - goto yy896; - if (yych <= 0xF4) - goto yy910; - goto yy896; - } - yy899: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy896; - if (yych <= 0xBF) - goto yy909; - goto yy896; - yy900: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x9F) - goto yy896; - if (yych <= 0xBF) - goto yy908; - goto yy896; - yy901: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy896; - if (yych <= 0xBF) - goto yy908; - goto yy896; - yy902: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy896; - if (yych <= 0x9F) - goto yy908; - goto yy896; - yy903: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x8F) - goto yy896; - if (yych <= 0xBF) - goto yy906; - goto yy896; - yy904: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy896; - if (yych <= 0xBF) - goto yy906; - goto yy896; - yy905: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x7F) - goto yy896; - if (yych >= 0x90) - goto yy896; - yy906: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy907; - if (yych <= 0xBF) - goto yy908; - yy907: - p = marker; - if (yyaccept == 0) { - goto yy896; - } else { - goto yy920; - } - yy908: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy907; - if (yych >= 0xC0) - goto yy907; - yy909: - ++p; - yych = *p; - yy910: - if (yybm[0 + yych] & 64) { - goto yy909; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy907; - if (yych >= '^') - goto yy907; - } else { - if (yych <= 0xDF) - goto yy908; - if (yych <= 0xE0) - goto yy912; - goto yy906; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy916; - if (yych <= 0xEF) - goto yy906; - goto yy913; - } else { - if (yych <= 0xF3) - goto yy914; - if (yych <= 0xF4) - goto yy915; - goto yy907; - } - } - yy911: - ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy909; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy907; - if (yych <= ']') - goto yy917; - goto yy907; - } else { - if (yych <= 0xDF) - goto yy908; - if (yych >= 0xE1) - goto yy906; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy916; - if (yych <= 0xEF) - goto yy906; - goto yy913; - } else { - if (yych <= 0xF3) - goto yy914; - if (yych <= 0xF4) - goto yy915; - goto yy907; - } - } - yy912: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy907; - if (yych <= 0xBF) - goto yy908; - goto yy907; - yy913: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy907; - if (yych <= 0xBF) - goto yy906; - goto yy907; - yy914: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy907; - if (yych <= 0xBF) - goto yy906; - goto yy907; - yy915: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy907; - if (yych <= 0x8F) - goto yy906; - goto yy907; - yy916: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy907; - if (yych <= 0x9F) - goto yy908; - goto yy907; - yy917: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy917; - } - if (yych <= 0xDF) { - if (yych <= '=') { - if (yych <= 0x00) - goto yy907; - if (yych == '\n') - goto yy907; - goto yy909; - } else { - if (yych <= '>') - goto yy919; - if (yych <= 0x7F) - goto yy909; - if (yych <= 0xC1) - goto yy907; - goto yy908; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy912; - if (yych == 0xED) - goto yy916; - goto yy906; - } else { - if (yych <= 0xF0) - goto yy913; - if (yych <= 0xF3) - goto yy914; - if (yych <= 0xF4) - goto yy915; - goto yy907; - } - } - yy919: - yyaccept = 1; - marker = ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy909; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '\n') - goto yy920; - if (yych <= ']') - goto yy911; - } else { - if (yych <= 0xDF) - goto yy908; - if (yych <= 0xE0) - goto yy912; - goto yy906; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy916; - if (yych <= 0xEF) - goto yy906; - goto yy913; - } else { - if (yych <= 0xF3) - goto yy914; - if (yych <= 0xF4) - goto yy915; - } - } - yy920 : { return (bufsize_t)(p - start); } - } +{ + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 128, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= 0xDF) { + if (yych <= '\\') { + if (yych <= 0x00) goto yy692; + if (yych != '\n') goto yy694; + } else { + if (yych <= ']') goto yy695; + if (yych <= 0x7F) goto yy694; + if (yych >= 0xC2) goto yy696; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) goto yy697; + if (yych == 0xED) goto yy699; + goto yy698; + } else { + if (yych <= 0xF0) goto yy700; + if (yych <= 0xF3) goto yy701; + if (yych <= 0xF4) goto yy702; + } + } +yy692: + ++p; +yy693: + { return 0; } +yy694: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) goto yy693; + if (yych <= '\t') goto yy704; + goto yy693; + } else { + if (yych <= 0x7F) goto yy704; + if (yych <= 0xC1) goto yy693; + if (yych <= 0xF4) goto yy704; + goto yy693; + } +yy695: + yyaccept = 0; + yych = *(marker = ++p); + if (yybm[0+yych] & 128) { + goto yy714; + } + if (yych <= '\n') { + if (yych <= 0x00) goto yy693; + if (yych <= '\t') goto yy704; + goto yy693; + } else { + if (yych <= 0x7F) goto yy704; + if (yych <= 0xC1) goto yy693; + if (yych <= 0xF4) goto yy704; + goto yy693; + } +yy696: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy693; + if (yych <= 0xBF) goto yy703; + goto yy693; +yy697: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) goto yy693; + if (yych <= 0xBF) goto yy707; + goto yy693; +yy698: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy693; + if (yych <= 0xBF) goto yy707; + goto yy693; +yy699: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy693; + if (yych <= 0x9F) goto yy707; + goto yy693; +yy700: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) goto yy693; + if (yych <= 0xBF) goto yy709; + goto yy693; +yy701: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy693; + if (yych <= 0xBF) goto yy709; + goto yy693; +yy702: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) goto yy693; + if (yych <= 0x8F) goto yy709; + goto yy693; +yy703: + yych = *++p; +yy704: + if (yybm[0+yych] & 64) { + goto yy703; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy705; + if (yych <= ']') goto yy706; + } else { + if (yych <= 0xDF) goto yy707; + if (yych <= 0xE0) goto yy708; + goto yy709; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy710; + if (yych <= 0xEF) goto yy709; + goto yy711; + } else { + if (yych <= 0xF3) goto yy712; + if (yych <= 0xF4) goto yy713; + } + } +yy705: + p = marker; + if (yyaccept == 0) { + goto yy693; + } else { + goto yy717; + } +yy706: + yych = *++p; + if (yybm[0+yych] & 64) { + goto yy703; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy705; + if (yych <= ']') goto yy714; + goto yy705; + } else { + if (yych <= 0xDF) goto yy707; + if (yych <= 0xE0) goto yy708; + goto yy709; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy710; + if (yych <= 0xEF) goto yy709; + goto yy711; + } else { + if (yych <= 0xF3) goto yy712; + if (yych <= 0xF4) goto yy713; + goto yy705; + } + } +yy707: + yych = *++p; + if (yych <= 0x7F) goto yy705; + if (yych <= 0xBF) goto yy703; + goto yy705; +yy708: + yych = *++p; + if (yych <= 0x9F) goto yy705; + if (yych <= 0xBF) goto yy707; + goto yy705; +yy709: + yych = *++p; + if (yych <= 0x7F) goto yy705; + if (yych <= 0xBF) goto yy707; + goto yy705; +yy710: + yych = *++p; + if (yych <= 0x7F) goto yy705; + if (yych <= 0x9F) goto yy707; + goto yy705; +yy711: + yych = *++p; + if (yych <= 0x8F) goto yy705; + if (yych <= 0xBF) goto yy709; + goto yy705; +yy712: + yych = *++p; + if (yych <= 0x7F) goto yy705; + if (yych <= 0xBF) goto yy709; + goto yy705; +yy713: + yych = *++p; + if (yych <= 0x7F) goto yy705; + if (yych <= 0x8F) goto yy709; + goto yy705; +yy714: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy714; + } + if (yych <= 0xDF) { + if (yych <= '=') { + if (yych <= 0x00) goto yy705; + if (yych == '\n') goto yy705; + goto yy703; + } else { + if (yych <= '>') goto yy716; + if (yych <= 0x7F) goto yy703; + if (yych <= 0xC1) goto yy705; + goto yy707; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) goto yy708; + if (yych == 0xED) goto yy710; + goto yy709; + } else { + if (yych <= 0xF0) goto yy711; + if (yych <= 0xF3) goto yy712; + if (yych <= 0xF4) goto yy713; + goto yy705; + } + } +yy716: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0+yych] & 64) { + goto yy703; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') goto yy717; + if (yych <= ']') goto yy706; + } else { + if (yych <= 0xDF) goto yy707; + if (yych <= 0xE0) goto yy708; + goto yy709; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy710; + if (yych <= 0xEF) goto yy709; + goto yy711; + } else { + if (yych <= 0xF3) goto yy712; + if (yych <= 0xF4) goto yy713; + } + } +yy717: + { return (bufsize_t)(p - start); } +} + } // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -bufsize_t _scan_link_title(const unsigned char *p) { +bufsize_t _scan_link_title(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = { - 0, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 96, 224, 224, 224, 224, 160, 224, 192, - 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, 16, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych <= '&') { - if (yych == '"') - goto yy925; - } else { - if (yych <= '\'') - goto yy926; - if (yych <= '(') - goto yy927; - } - ++p; - yy924 : { return 0; } - yy925: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x00) - goto yy924; - if (yych <= 0x7F) - goto yy960; - if (yych <= 0xC1) - goto yy924; - if (yych <= 0xF4) - goto yy960; - goto yy924; - yy926: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x00) - goto yy924; - if (yych <= 0x7F) - goto yy946; - if (yych <= 0xC1) - goto yy924; - if (yych <= 0xF4) - goto yy946; - goto yy924; - yy927: - yyaccept = 0; - yych = *(marker = ++p); - if (yych <= 0x00) - goto yy924; - if (yych <= 0x7F) - goto yy932; - if (yych <= 0xC1) - goto yy924; - if (yych <= 0xF4) - goto yy932; - goto yy924; - yy928: - ++p; - yych = *p; - if (yybm[0 + yych] & 32) { - goto yy931; - } - if (yych <= 0xE0) { - if (yych <= '\\') { - if (yych <= 0x00) - goto yy930; - if (yych <= ')') - goto yy942; - goto yy928; - } else { - if (yych <= 0xC1) - goto yy930; - if (yych <= 0xDF) - goto yy933; - goto yy934; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy939; - goto yy935; - } else { - if (yych <= 0xF0) - goto yy936; - if (yych <= 0xF3) - goto yy937; - if (yych <= 0xF4) - goto yy938; - } - } - yy930: - p = marker; - if (yyaccept <= 1) { - if (yyaccept == 0) { - goto yy924; - } else { - goto yy941; - } - } else { - if (yyaccept == 2) { - goto yy955; - } else { - goto yy969; - } - } - yy931: - ++p; - yych = *p; - yy932: - if (yybm[0 + yych] & 32) { - goto yy931; - } - if (yych <= 0xE0) { - if (yych <= '\\') { - if (yych <= 0x00) - goto yy930; - if (yych <= ')') - goto yy940; - goto yy928; - } else { - if (yych <= 0xC1) - goto yy930; - if (yych >= 0xE0) - goto yy934; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy939; - goto yy935; - } else { - if (yych <= 0xF0) - goto yy936; - if (yych <= 0xF3) - goto yy937; - if (yych <= 0xF4) - goto yy938; - goto yy930; - } - } - yy933: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0xBF) - goto yy931; - goto yy930; - yy934: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy930; - if (yych <= 0xBF) - goto yy933; - goto yy930; - yy935: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0xBF) - goto yy933; - goto yy930; - yy936: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy930; - if (yych <= 0xBF) - goto yy935; - goto yy930; - yy937: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0xBF) - goto yy935; - goto yy930; - yy938: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0x8F) - goto yy935; - goto yy930; - yy939: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0x9F) - goto yy933; - goto yy930; - yy940: - ++p; - yy941 : { return (bufsize_t)(p - start); } - yy942: - yyaccept = 1; - marker = ++p; - yych = *p; - if (yybm[0 + yych] & 32) { - goto yy931; - } - if (yych <= 0xE0) { - if (yych <= '\\') { - if (yych <= 0x00) - goto yy941; - if (yych <= ')') - goto yy940; - goto yy928; - } else { - if (yych <= 0xC1) - goto yy941; - if (yych <= 0xDF) - goto yy933; - goto yy934; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy939; - goto yy935; - } else { - if (yych <= 0xF0) - goto yy936; - if (yych <= 0xF3) - goto yy937; - if (yych <= 0xF4) - goto yy938; - goto yy941; - } - } - yy943: - ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy945; - } - if (yych <= 0xE0) { - if (yych <= '\\') { - if (yych <= 0x00) - goto yy930; - if (yych <= '\'') - goto yy956; - goto yy943; - } else { - if (yych <= 0xC1) - goto yy930; - if (yych <= 0xDF) - goto yy947; - goto yy948; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy953; - goto yy949; - } else { - if (yych <= 0xF0) - goto yy950; - if (yych <= 0xF3) - goto yy951; - if (yych <= 0xF4) - goto yy952; - goto yy930; - } - } - yy945: - ++p; - yych = *p; - yy946: - if (yybm[0 + yych] & 64) { - goto yy945; - } - if (yych <= 0xE0) { - if (yych <= '\\') { - if (yych <= 0x00) - goto yy930; - if (yych <= '\'') - goto yy954; - goto yy943; - } else { - if (yych <= 0xC1) - goto yy930; - if (yych >= 0xE0) - goto yy948; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy953; - goto yy949; - } else { - if (yych <= 0xF0) - goto yy950; - if (yych <= 0xF3) - goto yy951; - if (yych <= 0xF4) - goto yy952; - goto yy930; - } - } - yy947: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0xBF) - goto yy945; - goto yy930; - yy948: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy930; - if (yych <= 0xBF) - goto yy947; - goto yy930; - yy949: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0xBF) - goto yy947; - goto yy930; - yy950: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy930; - if (yych <= 0xBF) - goto yy949; - goto yy930; - yy951: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0xBF) - goto yy949; - goto yy930; - yy952: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0x8F) - goto yy949; - goto yy930; - yy953: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0x9F) - goto yy947; - goto yy930; - yy954: - ++p; - yy955 : { return (bufsize_t)(p - start); } - yy956: - yyaccept = 2; - marker = ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy945; - } - if (yych <= 0xE0) { - if (yych <= '\\') { - if (yych <= 0x00) - goto yy955; - if (yych <= '\'') - goto yy954; - goto yy943; - } else { - if (yych <= 0xC1) - goto yy955; - if (yych <= 0xDF) - goto yy947; - goto yy948; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy953; - goto yy949; - } else { - if (yych <= 0xF0) - goto yy950; - if (yych <= 0xF3) - goto yy951; - if (yych <= 0xF4) - goto yy952; - goto yy955; - } - } - yy957: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy959; - } - if (yych <= 0xE0) { - if (yych <= '\\') { - if (yych <= 0x00) - goto yy930; - if (yych <= '"') - goto yy970; - goto yy957; - } else { - if (yych <= 0xC1) - goto yy930; - if (yych <= 0xDF) - goto yy961; - goto yy962; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy967; - goto yy963; - } else { - if (yych <= 0xF0) - goto yy964; - if (yych <= 0xF3) - goto yy965; - if (yych <= 0xF4) - goto yy966; - goto yy930; - } - } - yy959: - ++p; - yych = *p; - yy960: - if (yybm[0 + yych] & 128) { - goto yy959; - } - if (yych <= 0xE0) { - if (yych <= '\\') { - if (yych <= 0x00) - goto yy930; - if (yych <= '"') - goto yy968; - goto yy957; - } else { - if (yych <= 0xC1) - goto yy930; - if (yych >= 0xE0) - goto yy962; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy967; - goto yy963; - } else { - if (yych <= 0xF0) - goto yy964; - if (yych <= 0xF3) - goto yy965; - if (yych <= 0xF4) - goto yy966; - goto yy930; - } - } - yy961: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0xBF) - goto yy959; - goto yy930; - yy962: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy930; - if (yych <= 0xBF) - goto yy961; - goto yy930; - yy963: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0xBF) - goto yy961; - goto yy930; - yy964: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy930; - if (yych <= 0xBF) - goto yy963; - goto yy930; - yy965: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0xBF) - goto yy963; - goto yy930; - yy966: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0x8F) - goto yy963; - goto yy930; - yy967: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy930; - if (yych <= 0x9F) - goto yy961; - goto yy930; - yy968: - ++p; - yy969 : { return (bufsize_t)(p - start); } - yy970: - yyaccept = 3; - marker = ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy959; - } - if (yych <= 0xE0) { - if (yych <= '\\') { - if (yych <= 0x00) - goto yy969; - if (yych <= '"') - goto yy968; - goto yy957; - } else { - if (yych <= 0xC1) - goto yy969; - if (yych <= 0xDF) - goto yy961; - goto yy962; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy967; - goto yy963; - } else { - if (yych <= 0xF0) - goto yy964; - if (yych <= 0xF3) - goto yy965; - if (yych <= 0xF4) - goto yy966; - goto yy969; - } - } - } +{ + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 192, 208, 208, 208, 208, 144, + 208, 80, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 32, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= '&') { + if (yych == '"') goto yy722; + } else { + if (yych <= '\'') goto yy723; + if (yych <= '(') goto yy724; + } + ++p; +yy721: + { return 0; } +yy722: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x00) goto yy721; + if (yych <= 0x7F) goto yy726; + if (yych <= 0xC1) goto yy721; + if (yych <= 0xF4) goto yy726; + goto yy721; +yy723: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x00) goto yy721; + if (yych <= 0x7F) goto yy740; + if (yych <= 0xC1) goto yy721; + if (yych <= 0xF4) goto yy740; + goto yy721; +yy724: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x00) goto yy721; + if (yych <= 0x7F) goto yy753; + if (yych <= 0xC1) goto yy721; + if (yych <= 0xF4) goto yy753; + goto yy721; +yy725: + yych = *++p; +yy726: + if (yybm[0+yych] & 16) { + goto yy725; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) goto yy727; + if (yych <= '"') goto yy728; + goto yy730; + } else { + if (yych <= 0xC1) goto yy727; + if (yych <= 0xDF) goto yy732; + goto yy733; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy735; + goto yy734; + } else { + if (yych <= 0xF0) goto yy736; + if (yych <= 0xF3) goto yy737; + if (yych <= 0xF4) goto yy738; + } + } +yy727: + p = marker; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy721; + } else { + goto yy729; + } + } else { + if (yyaccept == 2) { + goto yy742; + } else { + goto yy755; + } + } +yy728: + ++p; +yy729: + { return (bufsize_t)(p - start); } +yy730: + yych = *++p; + if (yybm[0+yych] & 16) { + goto yy725; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) goto yy727; + if (yych <= '"') goto yy765; + goto yy730; + } else { + if (yych <= 0xC1) goto yy727; + if (yych >= 0xE0) goto yy733; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy735; + goto yy734; + } else { + if (yych <= 0xF0) goto yy736; + if (yych <= 0xF3) goto yy737; + if (yych <= 0xF4) goto yy738; + goto yy727; + } + } +yy732: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0xBF) goto yy725; + goto yy727; +yy733: + yych = *++p; + if (yych <= 0x9F) goto yy727; + if (yych <= 0xBF) goto yy732; + goto yy727; +yy734: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0xBF) goto yy732; + goto yy727; +yy735: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0x9F) goto yy732; + goto yy727; +yy736: + yych = *++p; + if (yych <= 0x8F) goto yy727; + if (yych <= 0xBF) goto yy734; + goto yy727; +yy737: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0xBF) goto yy734; + goto yy727; +yy738: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0x8F) goto yy734; + goto yy727; +yy739: + yych = *++p; +yy740: + if (yybm[0+yych] & 64) { + goto yy739; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) goto yy727; + if (yych >= '(') goto yy743; + } else { + if (yych <= 0xC1) goto yy727; + if (yych <= 0xDF) goto yy745; + goto yy746; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy748; + goto yy747; + } else { + if (yych <= 0xF0) goto yy749; + if (yych <= 0xF3) goto yy750; + if (yych <= 0xF4) goto yy751; + goto yy727; + } + } +yy741: + ++p; +yy742: + { return (bufsize_t)(p - start); } +yy743: + yych = *++p; + if (yybm[0+yych] & 64) { + goto yy739; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) goto yy727; + if (yych <= '\'') goto yy766; + goto yy743; + } else { + if (yych <= 0xC1) goto yy727; + if (yych >= 0xE0) goto yy746; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy748; + goto yy747; + } else { + if (yych <= 0xF0) goto yy749; + if (yych <= 0xF3) goto yy750; + if (yych <= 0xF4) goto yy751; + goto yy727; + } + } +yy745: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0xBF) goto yy739; + goto yy727; +yy746: + yych = *++p; + if (yych <= 0x9F) goto yy727; + if (yych <= 0xBF) goto yy745; + goto yy727; +yy747: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0xBF) goto yy745; + goto yy727; +yy748: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0x9F) goto yy745; + goto yy727; +yy749: + yych = *++p; + if (yych <= 0x8F) goto yy727; + if (yych <= 0xBF) goto yy747; + goto yy727; +yy750: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0xBF) goto yy747; + goto yy727; +yy751: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0x8F) goto yy747; + goto yy727; +yy752: + yych = *++p; +yy753: + if (yybm[0+yych] & 128) { + goto yy752; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) goto yy727; + if (yych >= '*') goto yy756; + } else { + if (yych <= 0xC1) goto yy727; + if (yych <= 0xDF) goto yy758; + goto yy759; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy761; + goto yy760; + } else { + if (yych <= 0xF0) goto yy762; + if (yych <= 0xF3) goto yy763; + if (yych <= 0xF4) goto yy764; + goto yy727; + } + } +yy754: + ++p; +yy755: + { return (bufsize_t)(p - start); } +yy756: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy752; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) goto yy727; + if (yych <= ')') goto yy767; + goto yy756; + } else { + if (yych <= 0xC1) goto yy727; + if (yych >= 0xE0) goto yy759; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy761; + goto yy760; + } else { + if (yych <= 0xF0) goto yy762; + if (yych <= 0xF3) goto yy763; + if (yych <= 0xF4) goto yy764; + goto yy727; + } + } +yy758: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0xBF) goto yy752; + goto yy727; +yy759: + yych = *++p; + if (yych <= 0x9F) goto yy727; + if (yych <= 0xBF) goto yy758; + goto yy727; +yy760: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0xBF) goto yy758; + goto yy727; +yy761: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0x9F) goto yy758; + goto yy727; +yy762: + yych = *++p; + if (yych <= 0x8F) goto yy727; + if (yych <= 0xBF) goto yy760; + goto yy727; +yy763: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0xBF) goto yy760; + goto yy727; +yy764: + yych = *++p; + if (yych <= 0x7F) goto yy727; + if (yych <= 0x8F) goto yy760; + goto yy727; +yy765: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0+yych] & 16) { + goto yy725; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) goto yy729; + if (yych <= '"') goto yy728; + goto yy730; + } else { + if (yych <= 0xC1) goto yy729; + if (yych <= 0xDF) goto yy732; + goto yy733; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy735; + goto yy734; + } else { + if (yych <= 0xF0) goto yy736; + if (yych <= 0xF3) goto yy737; + if (yych <= 0xF4) goto yy738; + goto yy729; + } + } +yy766: + yyaccept = 2; + yych = *(marker = ++p); + if (yybm[0+yych] & 64) { + goto yy739; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) goto yy742; + if (yych <= '\'') goto yy741; + goto yy743; + } else { + if (yych <= 0xC1) goto yy742; + if (yych <= 0xDF) goto yy745; + goto yy746; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy748; + goto yy747; + } else { + if (yych <= 0xF0) goto yy749; + if (yych <= 0xF3) goto yy750; + if (yych <= 0xF4) goto yy751; + goto yy742; + } + } +yy767: + yyaccept = 3; + yych = *(marker = ++p); + if (yybm[0+yych] & 128) { + goto yy752; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) goto yy755; + if (yych <= ')') goto yy754; + goto yy756; + } else { + if (yych <= 0xC1) goto yy755; + if (yych <= 0xDF) goto yy758; + goto yy759; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) goto yy761; + goto yy760; + } else { + if (yych <= 0xF0) goto yy762; + if (yych <= 0xF3) goto yy763; + if (yych <= 0xF4) goto yy764; + goto yy755; + } + } +} + } // Match space characters, including newlines. -bufsize_t _scan_spacechars(const unsigned char *p) { - const unsigned char *start = p; +bufsize_t _scan_spacechars(const unsigned char *p) +{ + const unsigned char *start = p; \ + +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 128, 128, 128, 128, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yybm[0+yych] & 128) { + goto yy772; + } + ++p; + { return 0; } +yy772: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy772; + } + { return (bufsize_t)(p - start); } +} - { - unsigned char yych; - static const unsigned char yybm[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - yych = *p; - if (yych <= 0x08) - goto yy973; - if (yych <= '\r') - goto yy975; - if (yych == ' ') - goto yy975; - yy973: - ++p; - { return 0; } - yy975: - ++p; - yych = *p; - goto yy978; - yy976 : { return (bufsize_t)(p - start); } - yy977: - ++p; - yych = *p; - yy978: - if (yybm[0 + yych] & 128) { - goto yy977; - } - goto yy976; - } } // Match ATX heading start. -bufsize_t _scan_atx_heading_start(const unsigned char *p) { +bufsize_t _scan_atx_heading_start(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - static const unsigned char yybm[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - yych = *p; - if (yych == '#') - goto yy983; - ++p; - yy982 : { return 0; } - yy983: - yych = *(marker = ++p); - if (yybm[0 + yych] & 128) { - goto yy986; - } - if (yych <= '\f') { - if (yych <= 0x08) - goto yy982; - if (yych >= '\v') - goto yy982; - } else { - if (yych <= '\r') - goto yy984; - if (yych == '#') - goto yy988; - goto yy982; - } - yy984: - ++p; - yy985 : { return (bufsize_t)(p - start); } - yy986: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy986; - } - goto yy985; - yy988: - yych = *++p; - if (yybm[0 + yych] & 128) { - goto yy986; - } - if (yych <= '\f') { - if (yych <= 0x08) - goto yy989; - if (yych <= '\n') - goto yy984; - } else { - if (yych <= '\r') - goto yy984; - if (yych == '#') - goto yy990; - } - yy989: - p = marker; - goto yy982; - yy990: - yych = *++p; - if (yybm[0 + yych] & 128) { - goto yy986; - } - if (yych <= '\f') { - if (yych <= 0x08) - goto yy989; - if (yych <= '\n') - goto yy984; - goto yy989; - } else { - if (yych <= '\r') - goto yy984; - if (yych != '#') - goto yy989; - } - yych = *++p; - if (yybm[0 + yych] & 128) { - goto yy986; - } - if (yych <= '\f') { - if (yych <= 0x08) - goto yy989; - if (yych <= '\n') - goto yy984; - goto yy989; - } else { - if (yych <= '\r') - goto yy984; - if (yych != '#') - goto yy989; - } - yych = *++p; - if (yybm[0 + yych] & 128) { - goto yy986; - } - if (yych <= '\f') { - if (yych <= 0x08) - goto yy989; - if (yych <= '\n') - goto yy984; - goto yy989; - } else { - if (yych <= '\r') - goto yy984; - if (yych != '#') - goto yy989; - } - ++p; - if (yybm[0 + (yych = *p)] & 128) { - goto yy986; - } - if (yych <= 0x08) - goto yy989; - if (yych <= '\n') - goto yy984; - if (yych == '\r') - goto yy984; - goto yy989; - } +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych == '#') goto yy779; + ++p; +yy778: + { return 0; } +yy779: + yych = *(marker = ++p); + if (yybm[0+yych] & 128) { + goto yy780; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy778; + if (yych <= '\n') goto yy783; + goto yy778; + } else { + if (yych <= '\r') goto yy783; + if (yych == '#') goto yy784; + goto yy778; + } +yy780: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy780; + } +yy782: + { return (bufsize_t)(p - start); } +yy783: + ++p; + goto yy782; +yy784: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy780; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy785; + if (yych <= '\n') goto yy783; + } else { + if (yych <= '\r') goto yy783; + if (yych == '#') goto yy786; + } +yy785: + p = marker; + goto yy778; +yy786: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy780; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy785; + if (yych <= '\n') goto yy783; + goto yy785; + } else { + if (yych <= '\r') goto yy783; + if (yych != '#') goto yy785; + } + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy780; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy785; + if (yych <= '\n') goto yy783; + goto yy785; + } else { + if (yych <= '\r') goto yy783; + if (yych != '#') goto yy785; + } + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy780; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy785; + if (yych <= '\n') goto yy783; + goto yy785; + } else { + if (yych <= '\r') goto yy783; + if (yych != '#') goto yy785; + } + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy780; + } + if (yych <= 0x08) goto yy785; + if (yych <= '\n') goto yy783; + if (yych == '\r') goto yy783; + goto yy785; +} + } // Match setext heading line. Return 1 for level-1 heading, // 2 for level-2, 0 for no match. -bufsize_t _scan_setext_heading_line(const unsigned char *p) { +bufsize_t _scan_setext_heading_line(const unsigned char *p) +{ const unsigned char *marker = NULL; - { - unsigned char yych; - static const unsigned char yybm[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - yych = *p; - if (yych == '-') - goto yy998; - if (yych == '=') - goto yy999; - ++p; - yy997 : { return 0; } - yy998: - yych = *(marker = ++p); - if (yybm[0 + yych] & 128) { - goto yy1011; - } - if (yych <= '\f') { - if (yych <= 0x08) - goto yy997; - if (yych <= '\n') - goto yy1008; - goto yy997; - } else { - if (yych <= '\r') - goto yy1008; - if (yych == ' ') - goto yy1008; - goto yy997; - } - yy999: - yych = *(marker = ++p); - if (yybm[0 + yych] & 64) { - goto yy1005; - } - if (yych <= '\f') { - if (yych <= 0x08) - goto yy997; - if (yych <= '\n') - goto yy1001; - goto yy997; - } else { - if (yych <= '\r') - goto yy1001; - if (yych == ' ') - goto yy1001; - goto yy997; - } - yy1000: - ++p; - yych = *p; - yy1001: - if (yybm[0 + yych] & 32) { - goto yy1000; - } - if (yych <= 0x08) - goto yy1002; - if (yych <= '\n') - goto yy1003; - if (yych == '\r') - goto yy1003; - yy1002: - p = marker; - goto yy997; - yy1003: - ++p; - { return 1; } - yy1005: - ++p; - yych = *p; - if (yybm[0 + yych] & 32) { - goto yy1000; - } - if (yych <= '\f') { - if (yych <= 0x08) - goto yy1002; - if (yych <= '\n') - goto yy1003; - goto yy1002; - } else { - if (yych <= '\r') - goto yy1003; - if (yych == '=') - goto yy1005; - goto yy1002; - } - yy1007: - ++p; - yych = *p; - yy1008: - if (yych <= '\f') { - if (yych <= 0x08) - goto yy1002; - if (yych <= '\t') - goto yy1007; - if (yych >= '\v') - goto yy1002; - } else { - if (yych <= '\r') - goto yy1009; - if (yych == ' ') - goto yy1007; - goto yy1002; - } - yy1009: - ++p; - { return 2; } - yy1011: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy1011; - } - if (yych <= '\f') { - if (yych <= 0x08) - goto yy1002; - if (yych <= '\t') - goto yy1007; - if (yych <= '\n') - goto yy1009; - goto yy1002; - } else { - if (yych <= '\r') - goto yy1009; - if (yych == ' ') - goto yy1007; - goto yy1002; - } - } +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 32, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 32, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 64, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 128, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych == '-') goto yy794; + if (yych == '=') goto yy795; + ++p; +yy793: + { return 0; } +yy794: + yych = *(marker = ++p); + if (yybm[0+yych] & 64) { + goto yy801; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy793; + if (yych <= '\n') goto yy797; + goto yy793; + } else { + if (yych <= '\r') goto yy797; + if (yych == ' ') goto yy797; + goto yy793; + } +yy795: + yych = *(marker = ++p); + if (yybm[0+yych] & 128) { + goto yy807; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy793; + if (yych <= '\n') goto yy804; + goto yy793; + } else { + if (yych <= '\r') goto yy804; + if (yych == ' ') goto yy804; + goto yy793; + } +yy796: + yych = *++p; +yy797: + if (yybm[0+yych] & 32) { + goto yy796; + } + if (yych <= 0x08) goto yy798; + if (yych <= '\n') goto yy799; + if (yych == '\r') goto yy799; +yy798: + p = marker; + goto yy793; +yy799: + ++p; + { return 2; } +yy801: + yych = *++p; + if (yybm[0+yych] & 32) { + goto yy796; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy798; + if (yych <= '\n') goto yy799; + goto yy798; + } else { + if (yych <= '\r') goto yy799; + if (yych == '-') goto yy801; + goto yy798; + } +yy803: + yych = *++p; +yy804: + if (yych <= '\f') { + if (yych <= 0x08) goto yy798; + if (yych <= '\t') goto yy803; + if (yych >= '\v') goto yy798; + } else { + if (yych <= '\r') goto yy805; + if (yych == ' ') goto yy803; + goto yy798; + } +yy805: + ++p; + { return 1; } +yy807: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy807; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy798; + if (yych <= '\t') goto yy803; + if (yych <= '\n') goto yy805; + goto yy798; + } else { + if (yych <= '\r') goto yy805; + if (yych == ' ') goto yy803; + goto yy798; + } +} + } // Scan a thematic break line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -bufsize_t _scan_thematic_break(const unsigned char *p) { +bufsize_t _scan_thematic_break(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - static const unsigned char yybm[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 128, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - yych = *p; - if (yych <= ',') { - if (yych == '*') - goto yy1017; - } else { - if (yych <= '-') - goto yy1018; - if (yych == '_') - goto yy1019; - } - ++p; - yy1016 : { return 0; } - yy1017: - yych = *(marker = ++p); - if (yych <= 0x1F) { - if (yych == '\t') - goto yy1041; - goto yy1016; - } else { - if (yych <= ' ') - goto yy1041; - if (yych == '*') - goto yy1043; - goto yy1016; - } - yy1018: - yych = *(marker = ++p); - if (yych <= 0x1F) { - if (yych == '\t') - goto yy1031; - goto yy1016; - } else { - if (yych <= ' ') - goto yy1031; - if (yych == '-') - goto yy1033; - goto yy1016; - } - yy1019: - yych = *(marker = ++p); - if (yybm[0 + yych] & 16) { - goto yy1020; - } - if (yych == '_') - goto yy1023; - goto yy1016; - yy1020: - ++p; - yych = *p; - if (yybm[0 + yych] & 16) { - goto yy1020; - } - if (yych == '_') - goto yy1023; - yy1022: - p = marker; - goto yy1016; - yy1023: - ++p; - yych = *p; - if (yych <= 0x1F) { - if (yych == '\t') - goto yy1023; - goto yy1022; - } else { - if (yych <= ' ') - goto yy1023; - if (yych != '_') - goto yy1022; - } - yy1025: - ++p; - yych = *p; - if (yych <= '\r') { - if (yych <= '\t') { - if (yych <= 0x08) - goto yy1022; - goto yy1025; - } else { - if (yych <= '\n') - goto yy1027; - if (yych <= '\f') - goto yy1022; - } - } else { - if (yych <= ' ') { - if (yych <= 0x1F) - goto yy1022; - goto yy1025; - } else { - if (yych == '_') - goto yy1029; - goto yy1022; - } - } - yy1027: - ++p; - { return (bufsize_t)(p - start); } - yy1029: - ++p; - yych = *p; - if (yybm[0 + yych] & 32) { - goto yy1029; - } - if (yych <= 0x08) - goto yy1022; - if (yych <= '\n') - goto yy1027; - if (yych == '\r') - goto yy1027; - goto yy1022; - yy1031: - ++p; - yych = *p; - if (yych <= 0x1F) { - if (yych == '\t') - goto yy1031; - goto yy1022; - } else { - if (yych <= ' ') - goto yy1031; - if (yych != '-') - goto yy1022; - } - yy1033: - ++p; - yych = *p; - if (yych <= 0x1F) { - if (yych == '\t') - goto yy1033; - goto yy1022; - } else { - if (yych <= ' ') - goto yy1033; - if (yych != '-') - goto yy1022; - } - yy1035: - ++p; - yych = *p; - if (yych <= '\r') { - if (yych <= '\t') { - if (yych <= 0x08) - goto yy1022; - goto yy1035; - } else { - if (yych <= '\n') - goto yy1037; - if (yych <= '\f') - goto yy1022; - } - } else { - if (yych <= ' ') { - if (yych <= 0x1F) - goto yy1022; - goto yy1035; - } else { - if (yych == '-') - goto yy1039; - goto yy1022; - } - } - yy1037: - ++p; - { return (bufsize_t)(p - start); } - yy1039: - ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy1039; - } - if (yych <= 0x08) - goto yy1022; - if (yych <= '\n') - goto yy1037; - if (yych == '\r') - goto yy1037; - goto yy1022; - yy1041: - ++p; - yych = *p; - if (yych <= 0x1F) { - if (yych == '\t') - goto yy1041; - goto yy1022; - } else { - if (yych <= ' ') - goto yy1041; - if (yych != '*') - goto yy1022; - } - yy1043: - ++p; - yych = *p; - if (yych <= 0x1F) { - if (yych == '\t') - goto yy1043; - goto yy1022; - } else { - if (yych <= ' ') - goto yy1043; - if (yych != '*') - goto yy1022; - } - yy1045: - ++p; - yych = *p; - if (yych <= '\r') { - if (yych <= '\t') { - if (yych <= 0x08) - goto yy1022; - goto yy1045; - } else { - if (yych <= '\n') - goto yy1047; - if (yych <= '\f') - goto yy1022; - } - } else { - if (yych <= ' ') { - if (yych <= 0x1F) - goto yy1022; - goto yy1045; - } else { - if (yych == '*') - goto yy1049; - goto yy1022; - } - } - yy1047: - ++p; - { return (bufsize_t)(p - start); } - yy1049: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy1049; - } - if (yych <= 0x08) - goto yy1022; - if (yych <= '\n') - goto yy1047; - if (yych == '\r') - goto yy1047; - goto yy1022; - } +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 240, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 240, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 32, 0, 0, 64, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 128, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= ',') { + if (yych == '*') goto yy813; + } else { + if (yych <= '-') goto yy814; + if (yych == '_') goto yy815; + } + ++p; +yy812: + { return 0; } +yy813: + yych = *(marker = ++p); + if (yybm[0+yych] & 16) { + goto yy816; + } + if (yych == '*') goto yy819; + goto yy812; +yy814: + yych = *(marker = ++p); + if (yych <= 0x1F) { + if (yych == '\t') goto yy821; + goto yy812; + } else { + if (yych <= ' ') goto yy821; + if (yych == '-') goto yy823; + goto yy812; + } +yy815: + yych = *(marker = ++p); + if (yych <= 0x1F) { + if (yych == '\t') goto yy825; + goto yy812; + } else { + if (yych <= ' ') goto yy825; + if (yych == '_') goto yy827; + goto yy812; + } +yy816: + yych = *++p; + if (yybm[0+yych] & 16) { + goto yy816; + } + if (yych == '*') goto yy819; +yy818: + p = marker; + goto yy812; +yy819: + yych = *++p; + if (yych <= 0x1F) { + if (yych == '\t') goto yy819; + goto yy818; + } else { + if (yych <= ' ') goto yy819; + if (yych == '*') goto yy829; + goto yy818; + } +yy821: + yych = *++p; + if (yych <= 0x1F) { + if (yych == '\t') goto yy821; + goto yy818; + } else { + if (yych <= ' ') goto yy821; + if (yych != '-') goto yy818; + } +yy823: + yych = *++p; + if (yych <= 0x1F) { + if (yych == '\t') goto yy823; + goto yy818; + } else { + if (yych <= ' ') goto yy823; + if (yych == '-') goto yy831; + goto yy818; + } +yy825: + yych = *++p; + if (yych <= 0x1F) { + if (yych == '\t') goto yy825; + goto yy818; + } else { + if (yych <= ' ') goto yy825; + if (yych != '_') goto yy818; + } +yy827: + yych = *++p; + if (yych <= 0x1F) { + if (yych == '\t') goto yy827; + goto yy818; + } else { + if (yych <= ' ') goto yy827; + if (yych == '_') goto yy833; + goto yy818; + } +yy829: + yych = *++p; + if (yybm[0+yych] & 32) { + goto yy829; + } + if (yych <= 0x08) goto yy818; + if (yych <= '\n') goto yy835; + if (yych == '\r') goto yy835; + goto yy818; +yy831: + yych = *++p; + if (yybm[0+yych] & 64) { + goto yy831; + } + if (yych <= 0x08) goto yy818; + if (yych <= '\n') goto yy837; + if (yych == '\r') goto yy837; + goto yy818; +yy833: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy833; + } + if (yych <= 0x08) goto yy818; + if (yych <= '\n') goto yy839; + if (yych == '\r') goto yy839; + goto yy818; +yy835: + ++p; + { return (bufsize_t)(p - start); } +yy837: + ++p; + { return (bufsize_t)(p - start); } +yy839: + ++p; + { return (bufsize_t)(p - start); } +} + } // Scan an opening code fence. -bufsize_t _scan_open_code_fence(const unsigned char *p) { +bufsize_t _scan_open_code_fence(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - static const unsigned char yybm[] = { - 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 0, 160, 160, 0, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 96, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 144, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }; - yych = *p; - if (yych == '`') - goto yy1055; - if (yych == '~') - goto yy1056; - ++p; - yy1054 : { return 0; } - yy1055: - yych = *(marker = ++p); - if (yych == '`') - goto yy1072; - goto yy1054; - yy1056: - yych = *(marker = ++p); - if (yych != '~') - goto yy1054; - yych = *++p; - if (yybm[0 + yych] & 16) { - goto yy1059; - } - yy1058: - p = marker; - goto yy1054; - yy1059: - ++p; - yych = *p; - marker = p; - if (yybm[0 + yych] & 32) { - goto yy1061; - } - if (yych <= 0xE0) { - if (yych <= '~') { - if (yych <= 0x00) - goto yy1058; - if (yych <= '\r') - goto yy1070; - goto yy1059; - } else { - if (yych <= 0xC1) - goto yy1058; - if (yych <= 0xDF) - goto yy1063; - goto yy1064; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy1069; - goto yy1065; - } else { - if (yych <= 0xF0) - goto yy1066; - if (yych <= 0xF3) - goto yy1067; - if (yych <= 0xF4) - goto yy1068; - goto yy1058; - } - } - yy1061: - ++p; - yych = *p; - if (yybm[0 + yych] & 32) { - goto yy1061; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy1058; - if (yych <= '\r') - goto yy1070; - goto yy1058; - } else { - if (yych <= 0xDF) - goto yy1063; - if (yych <= 0xE0) - goto yy1064; - goto yy1065; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy1069; - if (yych <= 0xEF) - goto yy1065; - goto yy1066; - } else { - if (yych <= 0xF3) - goto yy1067; - if (yych <= 0xF4) - goto yy1068; - goto yy1058; - } - } - yy1063: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1058; - if (yych <= 0xBF) - goto yy1061; - goto yy1058; - yy1064: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy1058; - if (yych <= 0xBF) - goto yy1063; - goto yy1058; - yy1065: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1058; - if (yych <= 0xBF) - goto yy1063; - goto yy1058; - yy1066: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy1058; - if (yych <= 0xBF) - goto yy1065; - goto yy1058; - yy1067: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1058; - if (yych <= 0xBF) - goto yy1065; - goto yy1058; - yy1068: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1058; - if (yych <= 0x8F) - goto yy1065; - goto yy1058; - yy1069: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1058; - if (yych <= 0x9F) - goto yy1063; - goto yy1058; - yy1070: - ++p; - p = marker; - { return (bufsize_t)(p - start); } - yy1072: - yych = *++p; - if (yybm[0 + yych] & 64) { - goto yy1073; - } - goto yy1058; - yy1073: - ++p; - yych = *p; - marker = p; - if (yybm[0 + yych] & 128) { - goto yy1075; - } - if (yych <= 0xE0) { - if (yych <= '`') { - if (yych <= 0x00) - goto yy1058; - if (yych <= '\r') - goto yy1084; - goto yy1073; - } else { - if (yych <= 0xC1) - goto yy1058; - if (yych <= 0xDF) - goto yy1077; - goto yy1078; - } - } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy1083; - goto yy1079; - } else { - if (yych <= 0xF0) - goto yy1080; - if (yych <= 0xF3) - goto yy1081; - if (yych <= 0xF4) - goto yy1082; - goto yy1058; - } - } - yy1075: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy1075; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= 0x00) - goto yy1058; - if (yych <= '\r') - goto yy1084; - goto yy1058; - } else { - if (yych <= 0xDF) - goto yy1077; - if (yych <= 0xE0) - goto yy1078; - goto yy1079; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy1083; - if (yych <= 0xEF) - goto yy1079; - goto yy1080; - } else { - if (yych <= 0xF3) - goto yy1081; - if (yych <= 0xF4) - goto yy1082; - goto yy1058; - } - } - yy1077: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1058; - if (yych <= 0xBF) - goto yy1075; - goto yy1058; - yy1078: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy1058; - if (yych <= 0xBF) - goto yy1077; - goto yy1058; - yy1079: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1058; - if (yych <= 0xBF) - goto yy1077; - goto yy1058; - yy1080: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy1058; - if (yych <= 0xBF) - goto yy1079; - goto yy1058; - yy1081: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1058; - if (yych <= 0xBF) - goto yy1079; - goto yy1058; - yy1082: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1058; - if (yych <= 0x8F) - goto yy1079; - goto yy1058; - yy1083: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1058; - if (yych <= 0x9F) - goto yy1077; - goto yy1058; - yy1084: - ++p; - p = marker; - { return (bufsize_t)(p - start); } - } +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 0, 192, 192, 0, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 144, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 96, 192, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych == '`') goto yy845; + if (yych == '~') goto yy846; + ++p; +yy844: + { return 0; } +yy845: + yych = *(marker = ++p); + if (yych == '`') goto yy847; + goto yy844; +yy846: + yych = *(marker = ++p); + if (yych == '~') goto yy849; + goto yy844; +yy847: + yych = *++p; + if (yybm[0+yych] & 16) { + goto yy850; + } +yy848: + p = marker; + goto yy844; +yy849: + yych = *++p; + if (yybm[0+yych] & 32) { + goto yy852; + } + goto yy848; +yy850: + yych = *++p; + if (yybm[0+yych] & 16) { + goto yy850; + } + if (yych <= 0xDF) { + if (yych <= '\f') { + if (yych <= 0x00) goto yy848; + if (yych == '\n') { + marker = p; + goto yy856; + } + marker = p; + goto yy854; + } else { + if (yych <= '\r') { + marker = p; + goto yy856; + } + if (yych <= 0x7F) { + marker = p; + goto yy854; + } + if (yych <= 0xC1) goto yy848; + marker = p; + goto yy858; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) { + marker = p; + goto yy859; + } + if (yych == 0xED) { + marker = p; + goto yy861; + } + marker = p; + goto yy860; + } else { + if (yych <= 0xF0) { + marker = p; + goto yy862; + } + if (yych <= 0xF3) { + marker = p; + goto yy863; + } + if (yych <= 0xF4) { + marker = p; + goto yy864; + } + goto yy848; + } + } +yy852: + yych = *++p; + if (yybm[0+yych] & 32) { + goto yy852; + } + if (yych <= 0xDF) { + if (yych <= '\f') { + if (yych <= 0x00) goto yy848; + if (yych == '\n') { + marker = p; + goto yy867; + } + marker = p; + goto yy865; + } else { + if (yych <= '\r') { + marker = p; + goto yy867; + } + if (yych <= 0x7F) { + marker = p; + goto yy865; + } + if (yych <= 0xC1) goto yy848; + marker = p; + goto yy869; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) { + marker = p; + goto yy870; + } + if (yych == 0xED) { + marker = p; + goto yy872; + } + marker = p; + goto yy871; + } else { + if (yych <= 0xF0) { + marker = p; + goto yy873; + } + if (yych <= 0xF3) { + marker = p; + goto yy874; + } + if (yych <= 0xF4) { + marker = p; + goto yy875; + } + goto yy848; + } + } +yy854: + yych = *++p; + if (yybm[0+yych] & 64) { + goto yy854; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy848; + if (yych >= 0x0E) goto yy848; + } else { + if (yych <= 0xDF) goto yy858; + if (yych <= 0xE0) goto yy859; + goto yy860; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy861; + if (yych <= 0xEF) goto yy860; + goto yy862; + } else { + if (yych <= 0xF3) goto yy863; + if (yych <= 0xF4) goto yy864; + goto yy848; + } + } +yy856: + ++p; + p = marker; + { return (bufsize_t)(p - start); } +yy858: + yych = *++p; + if (yych <= 0x7F) goto yy848; + if (yych <= 0xBF) goto yy854; + goto yy848; +yy859: + yych = *++p; + if (yych <= 0x9F) goto yy848; + if (yych <= 0xBF) goto yy858; + goto yy848; +yy860: + yych = *++p; + if (yych <= 0x7F) goto yy848; + if (yych <= 0xBF) goto yy858; + goto yy848; +yy861: + yych = *++p; + if (yych <= 0x7F) goto yy848; + if (yych <= 0x9F) goto yy858; + goto yy848; +yy862: + yych = *++p; + if (yych <= 0x8F) goto yy848; + if (yych <= 0xBF) goto yy860; + goto yy848; +yy863: + yych = *++p; + if (yych <= 0x7F) goto yy848; + if (yych <= 0xBF) goto yy860; + goto yy848; +yy864: + yych = *++p; + if (yych <= 0x7F) goto yy848; + if (yych <= 0x8F) goto yy860; + goto yy848; +yy865: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy865; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) goto yy848; + if (yych >= 0x0E) goto yy848; + } else { + if (yych <= 0xDF) goto yy869; + if (yych <= 0xE0) goto yy870; + goto yy871; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy872; + if (yych <= 0xEF) goto yy871; + goto yy873; + } else { + if (yych <= 0xF3) goto yy874; + if (yych <= 0xF4) goto yy875; + goto yy848; + } + } +yy867: + ++p; + p = marker; + { return (bufsize_t)(p - start); } +yy869: + yych = *++p; + if (yych <= 0x7F) goto yy848; + if (yych <= 0xBF) goto yy865; + goto yy848; +yy870: + yych = *++p; + if (yych <= 0x9F) goto yy848; + if (yych <= 0xBF) goto yy869; + goto yy848; +yy871: + yych = *++p; + if (yych <= 0x7F) goto yy848; + if (yych <= 0xBF) goto yy869; + goto yy848; +yy872: + yych = *++p; + if (yych <= 0x7F) goto yy848; + if (yych <= 0x9F) goto yy869; + goto yy848; +yy873: + yych = *++p; + if (yych <= 0x8F) goto yy848; + if (yych <= 0xBF) goto yy871; + goto yy848; +yy874: + yych = *++p; + if (yych <= 0x7F) goto yy848; + if (yych <= 0xBF) goto yy871; + goto yy848; +yy875: + yych = *++p; + if (yych <= 0x7F) goto yy848; + if (yych <= 0x8F) goto yy871; + goto yy848; +} + } // Scan a closing code fence with length at least len. -bufsize_t _scan_close_code_fence(const unsigned char *p) { +bufsize_t _scan_close_code_fence(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - static const unsigned char yybm[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - yych = *p; - if (yych == '`') - goto yy1090; - if (yych == '~') - goto yy1091; - ++p; - yy1089 : { return 0; } - yy1090: - yych = *(marker = ++p); - if (yych == '`') - goto yy1100; - goto yy1089; - yy1091: - yych = *(marker = ++p); - if (yych != '~') - goto yy1089; - yych = *++p; - if (yybm[0 + yych] & 32) { - goto yy1094; - } - yy1093: - p = marker; - goto yy1089; - yy1094: - ++p; - yych = *p; - marker = p; - if (yybm[0 + yych] & 64) { - goto yy1096; - } - if (yych <= '\f') { - if (yych <= 0x08) - goto yy1093; - if (yych <= '\n') - goto yy1098; - goto yy1093; - } else { - if (yych <= '\r') - goto yy1098; - if (yych == '~') - goto yy1094; - goto yy1093; - } - yy1096: - ++p; - yych = *p; - if (yybm[0 + yych] & 64) { - goto yy1096; - } - if (yych <= 0x08) - goto yy1093; - if (yych <= '\n') - goto yy1098; - if (yych != '\r') - goto yy1093; - yy1098: - ++p; - p = marker; - { return (bufsize_t)(p - start); } - yy1100: - yych = *++p; - if (yybm[0 + yych] & 128) { - goto yy1101; - } - goto yy1093; - yy1101: - ++p; - yych = *p; - marker = p; - if (yybm[0 + yych] & 128) { - goto yy1101; - } - if (yych <= '\f') { - if (yych <= 0x08) - goto yy1093; - if (yych <= '\t') - goto yy1103; - if (yych <= '\n') - goto yy1105; - goto yy1093; - } else { - if (yych <= '\r') - goto yy1105; - if (yych != ' ') - goto yy1093; - } - yy1103: - ++p; - yych = *p; - if (yych <= '\f') { - if (yych <= 0x08) - goto yy1093; - if (yych <= '\t') - goto yy1103; - if (yych >= '\v') - goto yy1093; - } else { - if (yych <= '\r') - goto yy1105; - if (yych == ' ') - goto yy1103; - goto yy1093; - } - yy1105: - ++p; - p = marker; - { return (bufsize_t)(p - start); } - } +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 32, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 64, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych == '`') goto yy880; + if (yych == '~') goto yy881; + ++p; +yy879: + { return 0; } +yy880: + yych = *(marker = ++p); + if (yych == '`') goto yy882; + goto yy879; +yy881: + yych = *(marker = ++p); + if (yych == '~') goto yy884; + goto yy879; +yy882: + yych = *++p; + if (yybm[0+yych] & 32) { + goto yy885; + } +yy883: + p = marker; + goto yy879; +yy884: + yych = *++p; + if (yybm[0+yych] & 64) { + goto yy887; + } + goto yy883; +yy885: + yych = *++p; + if (yybm[0+yych] & 32) { + goto yy885; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy883; + if (yych <= '\t') { + marker = p; + goto yy889; + } + if (yych <= '\n') { + marker = p; + goto yy891; + } + goto yy883; + } else { + if (yych <= '\r') { + marker = p; + goto yy891; + } + if (yych == ' ') { + marker = p; + goto yy889; + } + goto yy883; + } +yy887: + yych = *++p; + if (yybm[0+yych] & 64) { + goto yy887; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy883; + if (yych <= '\t') { + marker = p; + goto yy893; + } + if (yych <= '\n') { + marker = p; + goto yy895; + } + goto yy883; + } else { + if (yych <= '\r') { + marker = p; + goto yy895; + } + if (yych == ' ') { + marker = p; + goto yy893; + } + goto yy883; + } +yy889: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy889; + } + if (yych <= 0x08) goto yy883; + if (yych <= '\n') goto yy891; + if (yych != '\r') goto yy883; +yy891: + ++p; + p = marker; + { return (bufsize_t)(p - start); } +yy893: + yych = *++p; + if (yych <= '\f') { + if (yych <= 0x08) goto yy883; + if (yych <= '\t') goto yy893; + if (yych >= '\v') goto yy883; + } else { + if (yych <= '\r') goto yy895; + if (yych == ' ') goto yy893; + goto yy883; + } +yy895: + ++p; + p = marker; + { return (bufsize_t)(p - start); } +} + } // Scans an entity. // Returns number of chars matched. -bufsize_t _scan_entity(const unsigned char *p) { +bufsize_t _scan_entity(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - yych = *p; - if (yych == '&') - goto yy1111; - ++p; - yy1110 : { return 0; } - yy1111: - yych = *(marker = ++p); - if (yych <= '@') { - if (yych != '#') - goto yy1110; - } else { - if (yych <= 'Z') - goto yy1114; - if (yych <= '`') - goto yy1110; - if (yych <= 'z') - goto yy1114; - goto yy1110; - } - yych = *++p; - if (yych <= 'W') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1149; - } else { - if (yych <= 'X') - goto yy1148; - if (yych == 'x') - goto yy1148; - } - yy1113: - p = marker; - goto yy1110; - yy1114: - yych = *++p; - if (yych <= '@') { - if (yych <= '/') - goto yy1113; - if (yych >= ':') - goto yy1113; - } else { - if (yych <= 'Z') - goto yy1115; - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - yy1115: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1118; - if (yych <= ':') - goto yy1113; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - goto yy1118; - } else { - if (yych <= '`') - goto yy1113; - if (yych <= 'z') - goto yy1118; - goto yy1113; - } - } - yy1116: - ++p; - { return (bufsize_t)(p - start); } - yy1118: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1119; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1119: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1120; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1120: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1121; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1121: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1122; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1122: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1123; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1123: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1124; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1124: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1125; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1125: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1126; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1126: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1127; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1127: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1128; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1128: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1129; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1129: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1130; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1130: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1131; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1131: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1132; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1132: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1133; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1133: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1134; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1134: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1135; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1135: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1136; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1136: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1137; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1137: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1138; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1138: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1139; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1139: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1140; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1140: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1141; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1141: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1142; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1142: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1143; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1143: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1144; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1144: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1145; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1145: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1146; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1146: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1147; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= '{') - goto yy1113; - } - } - yy1147: - yych = *++p; - if (yych == ';') - goto yy1116; - goto yy1113; - yy1148: - yych = *++p; - if (yych <= '@') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1155; - goto yy1113; - } else { - if (yych <= 'F') - goto yy1155; - if (yych <= '`') - goto yy1113; - if (yych <= 'f') - goto yy1155; - goto yy1113; - } - yy1149: - yych = *++p; - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1150; - if (yych == ';') - goto yy1116; - goto yy1113; - yy1150: - yych = *++p; - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1151; - if (yych == ';') - goto yy1116; - goto yy1113; - yy1151: - yych = *++p; - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1152; - if (yych == ';') - goto yy1116; - goto yy1113; - yy1152: - yych = *++p; - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1153; - if (yych == ';') - goto yy1116; - goto yy1113; - yy1153: - yych = *++p; - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1154; - if (yych == ';') - goto yy1116; - goto yy1113; - yy1154: - yych = *++p; - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1147; - if (yych == ';') - goto yy1116; - goto yy1113; - yy1155: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1156; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'F') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= 'g') - goto yy1113; - } - } - yy1156: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1157; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'F') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= 'g') - goto yy1113; - } - } - yy1157: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1158; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'F') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= 'g') - goto yy1113; - } - } - yy1158: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1159; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'F') { - if (yych <= '@') - goto yy1113; - } else { - if (yych <= '`') - goto yy1113; - if (yych >= 'g') - goto yy1113; - } - } - yy1159: - ++p; - if ((yych = *p) <= ';') { - if (yych <= '/') - goto yy1113; - if (yych <= '9') - goto yy1147; - if (yych <= ':') - goto yy1113; - goto yy1116; - } else { - if (yych <= 'F') { - if (yych <= '@') - goto yy1113; - goto yy1147; - } else { - if (yych <= '`') - goto yy1113; - if (yych <= 'f') - goto yy1147; - goto yy1113; - } - } - } +{ + unsigned char yych; + yych = *p; + if (yych == '&') goto yy901; + ++p; +yy900: + { return 0; } +yy901: + yych = *(marker = ++p); + if (yych <= '@') { + if (yych != '#') goto yy900; + } else { + if (yych <= 'Z') goto yy904; + if (yych <= '`') goto yy900; + if (yych <= 'z') goto yy904; + goto yy900; + } + yych = *++p; + if (yych <= 'W') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy905; + } else { + if (yych <= 'X') goto yy906; + if (yych == 'x') goto yy906; + } +yy903: + p = marker; + goto yy900; +yy904: + yych = *++p; + if (yych <= '@') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy907; + goto yy903; + } else { + if (yych <= 'Z') goto yy907; + if (yych <= '`') goto yy903; + if (yych <= 'z') goto yy907; + goto yy903; + } +yy905: + yych = *++p; + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy908; + if (yych == ';') goto yy909; + goto yy903; +yy906: + yych = *++p; + if (yych <= '@') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy911; + goto yy903; + } else { + if (yych <= 'F') goto yy911; + if (yych <= '`') goto yy903; + if (yych <= 'f') goto yy911; + goto yy903; + } +yy907: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy912; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + goto yy912; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'z') goto yy912; + goto yy903; + } + } +yy908: + yych = *++p; + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy913; + if (yych != ';') goto yy903; +yy909: + ++p; + { return (bufsize_t)(p - start); } +yy911: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy914; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'F') { + if (yych <= '@') goto yy903; + goto yy914; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'f') goto yy914; + goto yy903; + } + } +yy912: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy915; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + goto yy915; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'z') goto yy915; + goto yy903; + } + } +yy913: + yych = *++p; + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy916; + if (yych == ';') goto yy909; + goto yy903; +yy914: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy917; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'F') { + if (yych <= '@') goto yy903; + goto yy917; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'f') goto yy917; + goto yy903; + } + } +yy915: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy918; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + goto yy918; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'z') goto yy918; + goto yy903; + } + } +yy916: + yych = *++p; + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy919; + if (yych == ';') goto yy909; + goto yy903; +yy917: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy920; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'F') { + if (yych <= '@') goto yy903; + goto yy920; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'f') goto yy920; + goto yy903; + } + } +yy918: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy921; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + goto yy921; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'z') goto yy921; + goto yy903; + } + } +yy919: + yych = *++p; + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy922; + if (yych == ';') goto yy909; + goto yy903; +yy920: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy923; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'F') { + if (yych <= '@') goto yy903; + goto yy923; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'f') goto yy923; + goto yy903; + } + } +yy921: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy924; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + goto yy924; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'z') goto yy924; + goto yy903; + } + } +yy922: + yych = *++p; + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy925; + if (yych == ';') goto yy909; + goto yy903; +yy923: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy925; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'F') { + if (yych <= '@') goto yy903; + goto yy925; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'f') goto yy925; + goto yy903; + } + } +yy924: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy926; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + goto yy926; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'z') goto yy926; + goto yy903; + } + } +yy925: + yych = *++p; + if (yych == ';') goto yy909; + goto yy903; +yy926: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy927; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy927: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy928; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy928: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy929; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy929: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy930; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy930: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy931; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy931: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy932; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy932: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy933; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy933: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy934; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy934: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy935; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy935: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy936; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy936: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy937; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy937: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy938; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy938: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy939; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy939: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy940; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy940: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy941; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy941: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy942; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy942: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy943; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy943: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy944; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy944: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy945; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy945: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy946; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy946: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy947; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy947: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy948; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy948: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy949; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + } else { + if (yych <= '`') goto yy903; + if (yych >= '{') goto yy903; + } + } +yy949: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') goto yy903; + if (yych <= '9') goto yy925; + if (yych <= ':') goto yy903; + goto yy909; + } else { + if (yych <= 'Z') { + if (yych <= '@') goto yy903; + goto yy925; + } else { + if (yych <= '`') goto yy903; + if (yych <= 'z') goto yy925; + goto yy903; + } + } +} + } // Returns positive value if a URL begins in a way that is potentially // dangerous, with javascript:, vbscript:, file:, or data:, otherwise 0. -bufsize_t _scan_dangerous_url(const unsigned char *p) { +bufsize_t _scan_dangerous_url(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - unsigned int yyaccept = 0; - yych = *p; - if (yych <= 'V') { - if (yych <= 'F') { - if (yych == 'D') - goto yy1164; - if (yych >= 'F') - goto yy1165; - } else { - if (yych == 'J') - goto yy1166; - if (yych >= 'V') - goto yy1167; - } - } else { - if (yych <= 'f') { - if (yych == 'd') - goto yy1164; - if (yych >= 'f') - goto yy1165; - } else { - if (yych <= 'j') { - if (yych >= 'j') - goto yy1166; - } else { - if (yych == 'v') - goto yy1167; - } - } - } - ++p; - yy1163 : { return 0; } - yy1164: - yyaccept = 0; - yych = *(marker = ++p); - if (yych == 'A') - goto yy1190; - if (yych == 'a') - goto yy1190; - goto yy1163; - yy1165: - yyaccept = 0; - yych = *(marker = ++p); - if (yych == 'I') - goto yy1187; - if (yych == 'i') - goto yy1187; - goto yy1163; - yy1166: - yyaccept = 0; - yych = *(marker = ++p); - if (yych == 'A') - goto yy1178; - if (yych == 'a') - goto yy1178; - goto yy1163; - yy1167: - yyaccept = 0; - yych = *(marker = ++p); - if (yych == 'B') - goto yy1168; - if (yych != 'b') - goto yy1163; - yy1168: - yych = *++p; - if (yych == 'S') - goto yy1170; - if (yych == 's') - goto yy1170; - yy1169: - p = marker; - if (yyaccept == 0) { - goto yy1163; - } else { - goto yy1177; - } - yy1170: - yych = *++p; - if (yych == 'C') - goto yy1171; - if (yych != 'c') - goto yy1169; - yy1171: - yych = *++p; - if (yych == 'R') - goto yy1172; - if (yych != 'r') - goto yy1169; - yy1172: - yych = *++p; - if (yych == 'I') - goto yy1173; - if (yych != 'i') - goto yy1169; - yy1173: - yych = *++p; - if (yych == 'P') - goto yy1174; - if (yych != 'p') - goto yy1169; - yy1174: - yych = *++p; - if (yych == 'T') - goto yy1175; - if (yych != 't') - goto yy1169; - yy1175: - yych = *++p; - if (yych != ':') - goto yy1169; - yy1176: - ++p; - yy1177 : { return (bufsize_t)(p - start); } - yy1178: - yych = *++p; - if (yych == 'V') - goto yy1179; - if (yych != 'v') - goto yy1169; - yy1179: - yych = *++p; - if (yych == 'A') - goto yy1180; - if (yych != 'a') - goto yy1169; - yy1180: - yych = *++p; - if (yych == 'S') - goto yy1181; - if (yych != 's') - goto yy1169; - yy1181: - yych = *++p; - if (yych == 'C') - goto yy1182; - if (yych != 'c') - goto yy1169; - yy1182: - yych = *++p; - if (yych == 'R') - goto yy1183; - if (yych != 'r') - goto yy1169; - yy1183: - yych = *++p; - if (yych == 'I') - goto yy1184; - if (yych != 'i') - goto yy1169; - yy1184: - yych = *++p; - if (yych == 'P') - goto yy1185; - if (yych != 'p') - goto yy1169; - yy1185: - yych = *++p; - if (yych == 'T') - goto yy1186; - if (yych != 't') - goto yy1169; - yy1186: - yych = *++p; - if (yych == ':') - goto yy1176; - goto yy1169; - yy1187: - yych = *++p; - if (yych == 'L') - goto yy1188; - if (yych != 'l') - goto yy1169; - yy1188: - yych = *++p; - if (yych == 'E') - goto yy1189; - if (yych != 'e') - goto yy1169; - yy1189: - yych = *++p; - if (yych == ':') - goto yy1176; - goto yy1169; - yy1190: - yych = *++p; - if (yych == 'T') - goto yy1191; - if (yych != 't') - goto yy1169; - yy1191: - yych = *++p; - if (yych == 'A') - goto yy1192; - if (yych != 'a') - goto yy1169; - yy1192: - yych = *++p; - if (yych != ':') - goto yy1169; - yyaccept = 1; - yych = *(marker = ++p); - if (yych == 'I') - goto yy1194; - if (yych != 'i') - goto yy1177; - yy1194: - yych = *++p; - if (yych == 'M') - goto yy1195; - if (yych != 'm') - goto yy1169; - yy1195: - yych = *++p; - if (yych == 'A') - goto yy1196; - if (yych != 'a') - goto yy1169; - yy1196: - yych = *++p; - if (yych == 'G') - goto yy1197; - if (yych != 'g') - goto yy1169; - yy1197: - yych = *++p; - if (yych == 'E') - goto yy1198; - if (yych != 'e') - goto yy1169; - yy1198: - yych = *++p; - if (yych != '/') - goto yy1169; - yych = *++p; - if (yych <= 'W') { - if (yych <= 'J') { - if (yych == 'G') - goto yy1201; - if (yych <= 'I') - goto yy1169; - goto yy1202; - } else { - if (yych == 'P') - goto yy1200; - if (yych <= 'V') - goto yy1169; - goto yy1203; - } - } else { - if (yych <= 'j') { - if (yych == 'g') - goto yy1201; - if (yych <= 'i') - goto yy1169; - goto yy1202; - } else { - if (yych <= 'p') { - if (yych <= 'o') - goto yy1169; - } else { - if (yych == 'w') - goto yy1203; - goto yy1169; - } - } - } - yy1200: - yych = *++p; - if (yych == 'N') - goto yy1211; - if (yych == 'n') - goto yy1211; - goto yy1169; - yy1201: - yych = *++p; - if (yych == 'I') - goto yy1210; - if (yych == 'i') - goto yy1210; - goto yy1169; - yy1202: - yych = *++p; - if (yych == 'P') - goto yy1208; - if (yych == 'p') - goto yy1208; - goto yy1169; - yy1203: - yych = *++p; - if (yych == 'E') - goto yy1204; - if (yych != 'e') - goto yy1169; - yy1204: - yych = *++p; - if (yych == 'B') - goto yy1205; - if (yych != 'b') - goto yy1169; - yy1205: - yych = *++p; - if (yych == 'P') - goto yy1206; - if (yych != 'p') - goto yy1169; - yy1206: - ++p; - { return 0; } - yy1208: - yych = *++p; - if (yych == 'E') - goto yy1209; - if (yych != 'e') - goto yy1169; - yy1209: - yych = *++p; - if (yych == 'G') - goto yy1206; - if (yych == 'g') - goto yy1206; - goto yy1169; - yy1210: - yych = *++p; - if (yych == 'F') - goto yy1206; - if (yych == 'f') - goto yy1206; - goto yy1169; - yy1211: - ++p; - if ((yych = *p) == 'G') - goto yy1206; - if (yych == 'g') - goto yy1206; - goto yy1169; - } +{ + unsigned char yych; + unsigned int yyaccept = 0; + yych = *p; + if (yych <= 'V') { + if (yych <= 'F') { + if (yych == 'D') goto yy954; + if (yych >= 'F') goto yy955; + } else { + if (yych == 'J') goto yy956; + if (yych >= 'V') goto yy957; + } + } else { + if (yych <= 'f') { + if (yych == 'd') goto yy954; + if (yych >= 'f') goto yy955; + } else { + if (yych <= 'j') { + if (yych >= 'j') goto yy956; + } else { + if (yych == 'v') goto yy957; + } + } + } + ++p; +yy953: + { return 0; } +yy954: + yyaccept = 0; + yych = *(marker = ++p); + if (yych == 'A') goto yy958; + if (yych == 'a') goto yy958; + goto yy953; +yy955: + yyaccept = 0; + yych = *(marker = ++p); + if (yych == 'I') goto yy960; + if (yych == 'i') goto yy960; + goto yy953; +yy956: + yyaccept = 0; + yych = *(marker = ++p); + if (yych == 'A') goto yy961; + if (yych == 'a') goto yy961; + goto yy953; +yy957: + yyaccept = 0; + yych = *(marker = ++p); + if (yych == 'B') goto yy962; + if (yych == 'b') goto yy962; + goto yy953; +yy958: + yych = *++p; + if (yych == 'T') goto yy963; + if (yych == 't') goto yy963; +yy959: + p = marker; + if (yyaccept == 0) { + goto yy953; + } else { + goto yy971; + } +yy960: + yych = *++p; + if (yych == 'L') goto yy964; + if (yych == 'l') goto yy964; + goto yy959; +yy961: + yych = *++p; + if (yych == 'V') goto yy965; + if (yych == 'v') goto yy965; + goto yy959; +yy962: + yych = *++p; + if (yych == 'S') goto yy966; + if (yych == 's') goto yy966; + goto yy959; +yy963: + yych = *++p; + if (yych == 'A') goto yy967; + if (yych == 'a') goto yy967; + goto yy959; +yy964: + yych = *++p; + if (yych == 'E') goto yy968; + if (yych == 'e') goto yy968; + goto yy959; +yy965: + yych = *++p; + if (yych == 'A') goto yy962; + if (yych == 'a') goto yy962; + goto yy959; +yy966: + yych = *++p; + if (yych == 'C') goto yy969; + if (yych == 'c') goto yy969; + goto yy959; +yy967: + yych = *++p; + if (yych == ':') goto yy970; + goto yy959; +yy968: + yych = *++p; + if (yych == ':') goto yy972; + goto yy959; +yy969: + yych = *++p; + if (yych == 'R') goto yy973; + if (yych == 'r') goto yy973; + goto yy959; +yy970: + yyaccept = 1; + yych = *(marker = ++p); + if (yych == 'I') goto yy974; + if (yych == 'i') goto yy974; +yy971: + { return (bufsize_t)(p - start); } +yy972: + ++p; + goto yy971; +yy973: + yych = *++p; + if (yych == 'I') goto yy975; + if (yych == 'i') goto yy975; + goto yy959; +yy974: + yych = *++p; + if (yych == 'M') goto yy976; + if (yych == 'm') goto yy976; + goto yy959; +yy975: + yych = *++p; + if (yych == 'P') goto yy977; + if (yych == 'p') goto yy977; + goto yy959; +yy976: + yych = *++p; + if (yych == 'A') goto yy978; + if (yych == 'a') goto yy978; + goto yy959; +yy977: + yych = *++p; + if (yych == 'T') goto yy968; + if (yych == 't') goto yy968; + goto yy959; +yy978: + yych = *++p; + if (yych == 'G') goto yy979; + if (yych != 'g') goto yy959; +yy979: + yych = *++p; + if (yych == 'E') goto yy980; + if (yych != 'e') goto yy959; +yy980: + yych = *++p; + if (yych != '/') goto yy959; + yych = *++p; + if (yych <= 'W') { + if (yych <= 'J') { + if (yych == 'G') goto yy982; + if (yych <= 'I') goto yy959; + goto yy983; + } else { + if (yych == 'P') goto yy984; + if (yych <= 'V') goto yy959; + goto yy985; + } + } else { + if (yych <= 'j') { + if (yych == 'g') goto yy982; + if (yych <= 'i') goto yy959; + goto yy983; + } else { + if (yych <= 'p') { + if (yych <= 'o') goto yy959; + goto yy984; + } else { + if (yych == 'w') goto yy985; + goto yy959; + } + } + } +yy982: + yych = *++p; + if (yych == 'I') goto yy986; + if (yych == 'i') goto yy986; + goto yy959; +yy983: + yych = *++p; + if (yych == 'P') goto yy987; + if (yych == 'p') goto yy987; + goto yy959; +yy984: + yych = *++p; + if (yych == 'N') goto yy988; + if (yych == 'n') goto yy988; + goto yy959; +yy985: + yych = *++p; + if (yych == 'E') goto yy989; + if (yych == 'e') goto yy989; + goto yy959; +yy986: + yych = *++p; + if (yych == 'F') goto yy990; + if (yych == 'f') goto yy990; + goto yy959; +yy987: + yych = *++p; + if (yych == 'E') goto yy988; + if (yych != 'e') goto yy959; +yy988: + yych = *++p; + if (yych == 'G') goto yy990; + if (yych == 'g') goto yy990; + goto yy959; +yy989: + yych = *++p; + if (yych == 'B') goto yy992; + if (yych == 'b') goto yy992; + goto yy959; +yy990: + ++p; + { return 0; } +yy992: + yych = *++p; + if (yych == 'P') goto yy990; + if (yych == 'p') goto yy990; + goto yy959; +} + } // Scans a footnote definition opening. -bufsize_t _scan_footnote_definition(const unsigned char *p) { +bufsize_t _scan_footnote_definition(const unsigned char *p) +{ const unsigned char *marker = NULL; const unsigned char *start = p; - { - unsigned char yych; - static const unsigned char yybm[] = { - 0, 64, 64, 64, 64, 64, 64, 64, 64, 128, 0, 64, 64, 0, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - yych = *p; - if (yych == '[') - goto yy1216; - ++p; - yy1215 : { return 0; } - yy1216: - yych = *(marker = ++p); - if (yych != '^') - goto yy1215; - yych = *++p; - if (yych != ']') - goto yy1220; - yy1218: - p = marker; - goto yy1215; - yy1219: - ++p; - yych = *p; - yy1220: - if (yybm[0 + yych] & 64) { - goto yy1219; - } - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= ' ') - goto yy1218; - if (yych <= ']') - goto yy1228; - goto yy1218; - } else { - if (yych <= 0xDF) - goto yy1221; - if (yych <= 0xE0) - goto yy1222; - goto yy1223; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy1227; - if (yych <= 0xEF) - goto yy1223; - goto yy1224; - } else { - if (yych <= 0xF3) - goto yy1225; - if (yych <= 0xF4) - goto yy1226; - goto yy1218; - } - } - yy1221: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1218; - if (yych <= 0xBF) - goto yy1219; - goto yy1218; - yy1222: - ++p; - yych = *p; - if (yych <= 0x9F) - goto yy1218; - if (yych <= 0xBF) - goto yy1221; - goto yy1218; - yy1223: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1218; - if (yych <= 0xBF) - goto yy1221; - goto yy1218; - yy1224: - ++p; - yych = *p; - if (yych <= 0x8F) - goto yy1218; - if (yych <= 0xBF) - goto yy1223; - goto yy1218; - yy1225: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1218; - if (yych <= 0xBF) - goto yy1223; - goto yy1218; - yy1226: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1218; - if (yych <= 0x8F) - goto yy1223; - goto yy1218; - yy1227: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy1218; - if (yych <= 0x9F) - goto yy1221; - goto yy1218; - yy1228: - yych = *++p; - if (yych != ':') - goto yy1218; - yy1229: - ++p; - yych = *p; - if (yybm[0 + yych] & 128) { - goto yy1229; - } - { return (bufsize_t)(p - start); } - } +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, + 64, 128, 0, 64, 64, 0, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 128, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 0, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych == '[') goto yy997; + ++p; +yy996: + { return 0; } +yy997: + yych = *(marker = ++p); + if (yych != '^') goto yy996; + yych = *++p; + if (yych != ']') goto yy1001; +yy999: + p = marker; + goto yy996; +yy1000: + yych = *++p; +yy1001: + if (yybm[0+yych] & 64) { + goto yy1000; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= ' ') goto yy999; + if (yych <= ']') goto yy1009; + goto yy999; + } else { + if (yych <= 0xDF) goto yy1002; + if (yych <= 0xE0) goto yy1003; + goto yy1004; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) goto yy1005; + if (yych <= 0xEF) goto yy1004; + goto yy1006; + } else { + if (yych <= 0xF3) goto yy1007; + if (yych <= 0xF4) goto yy1008; + goto yy999; + } + } +yy1002: + yych = *++p; + if (yych <= 0x7F) goto yy999; + if (yych <= 0xBF) goto yy1000; + goto yy999; +yy1003: + yych = *++p; + if (yych <= 0x9F) goto yy999; + if (yych <= 0xBF) goto yy1002; + goto yy999; +yy1004: + yych = *++p; + if (yych <= 0x7F) goto yy999; + if (yych <= 0xBF) goto yy1002; + goto yy999; +yy1005: + yych = *++p; + if (yych <= 0x7F) goto yy999; + if (yych <= 0x9F) goto yy1002; + goto yy999; +yy1006: + yych = *++p; + if (yych <= 0x8F) goto yy999; + if (yych <= 0xBF) goto yy1004; + goto yy999; +yy1007: + yych = *++p; + if (yych <= 0x7F) goto yy999; + if (yych <= 0xBF) goto yy1004; + goto yy999; +yy1008: + yych = *++p; + if (yych <= 0x7F) goto yy999; + if (yych <= 0x8F) goto yy1004; + goto yy999; +yy1009: + yych = *++p; + if (yych != ':') goto yy999; +yy1010: + yych = *++p; + if (yybm[0+yych] & 128) { + goto yy1010; + } + { return (bufsize_t)(p - start); } +} + } diff --git a/src/scanners.re b/src/scanners.re index 3b1266b1e..0b2178681 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -37,7 +37,7 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, tagname = [A-Za-z][A-Za-z0-9-]*; - blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'meta'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul'; + blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul'; attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*; diff --git a/test/regression.txt b/test/regression.txt index 069fa90d7..af56c0c86 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -92,7 +92,6 @@ Issue #192 - escaped spaces in link destination . <p>[a](te\ st)</p> ```````````````````````````````` - Issue github/github#76615: multiple delimiter combinations gets sketchy @@ -126,3 +125,17 @@ ___this___ <em><strong>this</strong></em><br /> <em><strong>this</strong></em></p> ```````````````````````````````` + +Issue #527 - meta tags in inline contexts + +```````````````````````````````` example +City: +<span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> + <meta itemprop="name" content="Springfield"> +</span> +. +<p>City: +<span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> + <meta itemprop="name" content="Springfield"> +</span></p> +```````````````````````````````` From 61f1f053957d6f97cceeffdd9a54326b6c746bcf Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 11 Jun 2018 14:47:04 -0700 Subject: [PATCH 114/218] Fix spaces on regression test. --- test/regression.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/regression.txt b/test/regression.txt index af56c0c86..629da0780 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -136,6 +136,6 @@ City: . <p>City: <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City"> - <meta itemprop="name" content="Springfield"> +<meta itemprop="name" content="Springfield"> </span></p> ```````````````````````````````` From 38cc7a439d592c7ff3e2e15b9e30a25679e151c4 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Thu, 12 Jul 2018 11:17:39 +1000 Subject: [PATCH 115/218] add regression test from comrak --- test/regression.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/regression.txt b/test/regression.txt index 629da0780..012da8418 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -139,3 +139,18 @@ City: <meta itemprop="name" content="Springfield"> </span></p> ```````````````````````````````` + +Issue kivikakk/comrak#80 - link parsing cases + +```````````````````````````````` example +[a](\ b) + +[a](<<b) + +[a](<b +) +. +<p>[a](\ b)</p> +<p><a href="%3C%3Cb">a</a></p> +<p><a href="%3Cb">a</a></p> +```````````````````````````````` From e9a231f3a03d432d59ba209028f170d0eac423a5 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Thu, 12 Jul 2018 15:26:19 +1000 Subject: [PATCH 116/218] latest spec --- test/spec.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/spec.txt b/test/spec.txt index ec1749aef..257cde800 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -2045,7 +2045,7 @@ followed by one of the strings (case-insensitive) `address`, `footer`, `form`, `frame`, `frameset`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`, `html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`, -`meta`, `nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`, +`nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`, `section`, `source`, `summary`, `table`, `tbody`, `td`, `tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed by [whitespace], the end of the line, the string `>`, or From 343c5d2eff3888d3468276e87fadcca17e24db8c Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 16 Jul 2018 09:49:03 +1000 Subject: [PATCH 117/218] latest spec --- test/spec.txt | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index 257cde800..ded685d1a 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -7495,25 +7495,16 @@ __a<http://foo.bar/?q=__> GFM enables the `strikethrough` extension, where an additional emphasis type is available. -Strikethrough text is any text wrapped in tildes (`~`). +Strikethrough text is any text wrapped in two tildes (`~`). ```````````````````````````````` example strikethrough -~Hi~ Hello, world! +~~Hi~~ Hello, world! . <p><del>Hi</del> Hello, world!</p> ```````````````````````````````` -Any number of tildes may be used on either side of the text; they do not need -to match, and they cannot be nested. - -```````````````````````````````` example strikethrough -This ~text~~~~ is ~~~~curious~. -. -<p>This <del>text</del> is <del>curious</del>.</p> -```````````````````````````````` - -As with regular emphasis delimiters, a new paragraph will cause the cessation -of parsing a strikethrough: +As with regular emphasis delimiters, a new paragraph will cause strikethrough +parsing to cease: ```````````````````````````````` example strikethrough This ~~has a From 648b388d257357386198ee3fb0e925a554968b72 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 16 Jul 2018 09:50:10 +1000 Subject: [PATCH 118/218] regressions.txt has non-specified strikethrough --- test/regression.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/regression.txt b/test/regression.txt index 012da8418..0569b3617 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -92,6 +92,7 @@ Issue #192 - escaped spaces in link destination . <p>[a](te\ st)</p> ```````````````````````````````` + Issue github/github#76615: multiple delimiter combinations gets sketchy @@ -154,3 +155,17 @@ Issue kivikakk/comrak#80 - link parsing cases <p><a href="%3C%3Cb">a</a></p> <p><a href="%3Cb">a</a></p> ```````````````````````````````` + +cmark-gfm strikethrough rules + +```````````````````````````````` example strikethrough +~Hi~ Hello, world! +. +<p><del>Hi</del> Hello, world!</p> +```````````````````````````````` + +```````````````````````````````` example strikethrough +This ~text~~~~ is ~~~~curious~. +. +<p>This <del>text</del> is <del>curious</del>.</p> +```````````````````````````````` From 8aa0868afc1281bff39de038051dc6eda73120ec Mon Sep 17 00:00:00 2001 From: Greg Stein <gstein@gmail.com> Date: Thu, 19 Jul 2018 00:28:36 -0500 Subject: [PATCH 119/218] Add example of a Python wrapper which uses libcmark-gfmextensions. (#102) --- wrappers/wrapper_ext.py | 109 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100755 wrappers/wrapper_ext.py diff --git a/wrappers/wrapper_ext.py b/wrappers/wrapper_ext.py new file mode 100755 index 000000000..1d6f9785e --- /dev/null +++ b/wrappers/wrapper_ext.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +# +# Example for using the shared library from python. +# Will work with either python 2 or python 3. +# Requires cmark-gfm and cmark-gfmextensions libraries to be installed. +# +# This particular example uses the GitHub extensions from the gfmextensions +# library. EXTENSIONS specifies which to use, and the sample shows how to +# connect them into a parser. +# + +import sys +import ctypes + +if sys.platform == 'darwin': + libname = 'libcmark-gfm.dylib' + extname = 'libcmark-gfmextensions.dylib' +elif sys.platform == 'win32': + libname = 'cmark-gfm.dll' + extname = 'cmark-gfmextensions.dll' +else: + libname = 'libcmark-gfm.so' + extname = 'libcmark-gfmextensions.so' +cmark = ctypes.CDLL(libname) +cmark_ext = ctypes.CDLL(extname) + +# Options for the GFM rendering call +OPTS = 0 # defaults + +# The GFM extensions that we want to use +EXTENSIONS = ( + 'autolink', + 'table', + 'strikethrough', + 'tagfilter', + ) + +# Use ctypes to access the functions in libcmark-gfm + +F_cmark_parser_new = cmark.cmark_parser_new +F_cmark_parser_new.restype = ctypes.c_void_p +F_cmark_parser_new.argtypes = (ctypes.c_int,) + +F_cmark_parser_feed = cmark.cmark_parser_feed +F_cmark_parser_feed.restype = None +F_cmark_parser_feed.argtypes = (ctypes.c_void_p, ctypes.c_char_p, ctypes.c_size_t) + +F_cmark_parser_finish = cmark.cmark_parser_finish +F_cmark_parser_finish.restype = ctypes.c_void_p +F_cmark_parser_finish.argtypes = (ctypes.c_void_p,) + +F_cmark_parser_attach_syntax_extension = cmark.cmark_parser_attach_syntax_extension +F_cmark_parser_attach_syntax_extension.restype = ctypes.c_int +F_cmark_parser_attach_syntax_extension.argtypes = (ctypes.c_void_p, ctypes.c_void_p) + +F_cmark_parser_get_syntax_extensions = cmark.cmark_parser_get_syntax_extensions +F_cmark_parser_get_syntax_extensions.restype = ctypes.c_void_p +F_cmark_parser_get_syntax_extensions.argtypes = (ctypes.c_void_p,) + +F_cmark_parser_free = cmark.cmark_parser_free +F_cmark_parser_free.restype = None +F_cmark_parser_free.argtypes = (ctypes.c_void_p,) + +F_cmark_node_free = cmark.cmark_node_free +F_cmark_node_free.restype = None +F_cmark_node_free.argtypes = (ctypes.c_void_p,) + +F_cmark_find_syntax_extension = cmark.cmark_find_syntax_extension +F_cmark_find_syntax_extension.restype = ctypes.c_void_p +F_cmark_find_syntax_extension.argtypes = (ctypes.c_char_p,) + +F_cmark_render_html = cmark.cmark_render_html +F_cmark_render_html.restype = ctypes.c_char_p +F_cmark_render_html.argtypes = (ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p) + + +# Set up the libcmark-gfm library and its extensions +F_register = cmark_ext.core_extensions_ensure_registered +F_register.restype = None +F_register.argtypes = ( ) +F_register() + + +def md2html(text): + "Use cmark-gfm to render the Markdown into an HTML fragment." + + parser = F_cmark_parser_new(OPTS) + assert parser + for name in EXTENSIONS: + ext = F_cmark_find_syntax_extension(name) + assert ext + rv = F_cmark_parser_attach_syntax_extension(parser, ext) + assert rv + exts = F_cmark_parser_get_syntax_extensions(parser) + + F_cmark_parser_feed(parser, text, len(text)) + doc = F_cmark_parser_finish(parser) + assert doc + + output = F_cmark_render_html(doc, OPTS, exts) + + F_cmark_parser_free(parser) + F_cmark_node_free(doc) + + return output + + +sys.stdout.write(md2html(sys.stdin.read())) From 4503875c651417ee88b4c4e0ef2936510f9e5029 Mon Sep 17 00:00:00 2001 From: Mike Kavouras <mikekavouras@users.noreply.github.com> Date: Tue, 7 Aug 2018 19:49:38 -0700 Subject: [PATCH 120/218] Parse rest of info string as meta (#103) * parse rest of info string as meta * put info string remainder behind option * add tests --- man/man3/cmark-gfm.3 | 15 +++++++++++++- src/cmark.h | 5 +++++ src/html.c | 8 +++++++ src/main.c | 30 +++++++++++++++------------ test/CMakeLists.txt | 7 +++++++ test/extensions-full-info-string.txt | Bin 0 -> 1527 bytes 6 files changed, 51 insertions(+), 14 deletions(-) create mode 100644 test/extensions-full-info-string.txt diff --git a/man/man3/cmark-gfm.3 b/man/man3/cmark-gfm.3 index 772c26544..797d16734 100644 --- a/man/man3/cmark-gfm.3 +++ b/man/man3/cmark-gfm.3 @@ -1,4 +1,4 @@ -.TH cmark-gfm 3 "February 20, 2018" "LOCAL" "Library Functions Manual" +.TH cmark-gfm 3 "August 08, 2018" "LOCAL" "Library Functions Manual" .SH NAME .PP @@ -982,6 +982,19 @@ compatibility with redcarpet. .PP Use style attributes to align table cells instead of align attributes. +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_FULL_INFO_STRING (1 << 16) +.RE +\f[] +.fi + +.PP +Include the remainder of the info string in code blocks in a separate +attribute. + .SS Version information diff --git a/src/cmark.h b/src/cmark.h index 6526c60e6..ef9e4a150 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -733,6 +733,11 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar */ #define CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES (1 << 15) +/** Include the remainder of the info string in code blocks in + * a separate attribute. + */ +#define CMARK_OPT_FULL_INFO_STRING (1 << 16) + /** * ## Version information */ diff --git a/src/html.c b/src/html.c index e2718c895..c9ef17f74 100644 --- a/src/html.c +++ b/src/html.c @@ -203,12 +203,20 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, cmark_html_render_sourcepos(node, html, options); cmark_strbuf_puts(html, " lang=\""); escape_html(html, node->as.code.info.data, first_tag); + if (first_tag < node->as.code.info.len && (options & CMARK_OPT_FULL_INFO_STRING)) { + cmark_strbuf_puts(html, "\" data-meta=\""); + escape_html(html, node->as.code.info.data + first_tag + 1, node->as.code.info.len - first_tag - 1); + } cmark_strbuf_puts(html, "\"><code>"); } else { cmark_strbuf_puts(html, "<pre"); cmark_html_render_sourcepos(node, html, options); cmark_strbuf_puts(html, "><code class=\"language-"); escape_html(html, node->as.code.info.data, first_tag); + if (first_tag < node->as.code.info.len && (options & CMARK_OPT_FULL_INFO_STRING)) { + cmark_strbuf_puts(html, "\" data-meta=\""); + escape_html(html, node->as.code.info.data + first_tag + 1, node->as.code.info.len - first_tag - 1); + } cmark_strbuf_puts(html, "\">"); } } diff --git a/src/main.c b/src/main.c index cedeca6a8..9c79598c7 100644 --- a/src/main.c +++ b/src/main.c @@ -31,23 +31,25 @@ typedef enum { void print_usage() { printf("Usage: cmark-gfm [FILE*]\n"); printf("Options:\n"); - printf(" --to, -t FORMAT Specify output format (html, xml, man, " + printf(" --to, -t FORMAT Specify output format (html, xml, man, " "commonmark, plaintext, latex)\n"); - printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n"); - printf(" --sourcepos Include source position attribute\n"); - printf(" --hardbreaks Treat newlines as hard line breaks\n"); - printf(" --nobreaks Render soft line breaks as spaces\n"); - printf(" --safe Suppress raw HTML and dangerous URLs\n"); - printf(" --smart Use smart punctuation\n"); - printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n"); + printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n"); + printf(" --sourcepos Include source position attribute\n"); + printf(" --hardbreaks Treat newlines as hard line breaks\n"); + printf(" --nobreaks Render soft line breaks as spaces\n"); + printf(" --safe Suppress raw HTML and dangerous URLs\n"); + printf(" --smart Use smart punctuation\n"); + printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n"); printf(" --github-pre-lang Use GitHub-style <pre lang> for code blocks\n"); - printf(" --footnotes Parse footnotes\n"); - printf(" --extension, -e EXTENSION_NAME Specify an extension name to use\n"); - printf(" --list-extensions List available extensions and quit\n"); - printf(" --strikethrough-double-tilde Only parse strikethrough (if enabled)\n"); - printf(" with two tildes\n"); + printf(" --footnotes Parse footnotes\n"); + printf(" --extension, -e EXTENSION_NAME Specify an extension name to use\n"); + printf(" --list-extensions List available extensions and quit\n"); + printf(" --strikethrough-double-tilde Only parse strikethrough (if enabled)\n"); + printf(" with two tildes\n"); printf(" --table-prefer-style-attributes Use style attributes to align table cells\n" " instead of align attributes.\n"); + printf(" --full-info-string Include remainder of code block info\n" + " string in a separate attribute.\n"); printf(" --help, -h Print usage information\n"); printf(" --version Print version\n"); } @@ -133,6 +135,8 @@ int main(int argc, char *argv[]) { } else if (strcmp(argv[i], "--list-extensions") == 0) { print_extensions(); goto success; + } else if (strcmp(argv[i], "--full-info-string") == 0) { + options |= CMARK_OPT_FULL_INFO_STRING; } else if (strcmp(argv[i], "--table-prefer-style-attributes") == 0) { options |= CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES; } else if (strcmp(argv[i], "--strikethrough-double-tilde") == 0) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c3cac145c..9ddc7327a 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -89,6 +89,13 @@ IF (PYTHONINTERP_FOUND) "--extensions" "table strikethrough autolink tagfilter" ) + add_test(option_full_info_string + ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" + "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions-full-info-string.txt" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --full-info-string" + ) + add_test(regressiontest_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" diff --git a/test/extensions-full-info-string.txt b/test/extensions-full-info-string.txt new file mode 100644 index 0000000000000000000000000000000000000000..aeb4f445dafd0f28101dc1220033c885eb1662bb GIT binary patch literal 1527 zcmd^7!Ab)$6!e_0cx<87LpEFSAlvOiEqD?yA|fI*-F@9IWRse_E$zpNpX4u?v>pTx z#jdx8B!tJz%S<Mmb4E2OOo8)7s}!&G!f;2n))f#sVzQ;Bu_;X45@BkzHkPf?y4vUz zl6c5UNuEBqOorDy33I?yU7&WM`ZAsJn`izofvY%qJzo;7QXvY9lIk@UrfH0Jv9Xo- z<~T9mawmm{i;Dq^<Cw8t4`#pR%@}*Cy|$JBH-x&x5`y}avDty)39?x!3_#n?x2!Q` zs}OFD2_+V@Y-KUe3R5B!N;)@=l+;x#E98L?<v$pi2)H~GfphTgz3R@A!I=gj^Ukfz zA@U)}=<hZl@OXnH$I-4~U(smSvtQ8&`fshFQNZj^-EB(^*vzpEVckF89g!92R-O6P g&8%<m`SI@ef3O6~&@o0fCqB9*c;b(@>GE;E0E>@F$^ZZW literal 0 HcmV?d00001 From 4057e9ff6415f058800ec39deb0968b690369114 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Wed, 8 Aug 2018 12:55:42 +1000 Subject: [PATCH 121/218] 0.28.3.gfm.13 --- CMakeLists.txt | 2 +- changelog.txt | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5813243f1..3fc7a3d32 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 3) -set(PROJECT_VERSION_GFM 12) +set(PROJECT_VERSION_GFM 13) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 3d62a9bed..4c2ad7e10 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,14 @@ +[0.28.3.gfm.13] + + * Footnote rendering bugfix (Michael Camilleri, #90). + * Debian packaging (Joachim Nilsson, #97). + * Add CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE for redcarpet compatibility. + * Add CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES (FUJI Goro, #86, #87). + * Fix pathological nested list parsing (Phil Turnbull, #95). + * Expose more of the extension APIs (Minghao Liu, #96). + * Add python example which uses extensions (Greg Stein, #102). + * Add CMARK_OPT_FULL_INFO_STRING (Mike Kavouras, #103). + [0.28.3.gfm.12] * Various security and bug fixes. From 0accc9499ca76177b733f4a20e261689ceb8454f Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Fri, 10 Aug 2018 09:58:54 +1000 Subject: [PATCH 122/218] add plaintext render func for strikethru --- extensions/strikethrough.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/extensions/strikethrough.c b/extensions/strikethrough.c index 0d9418090..b13566932 100644 --- a/extensions/strikethrough.c +++ b/extensions/strikethrough.c @@ -133,6 +133,12 @@ static void html_render(cmark_syntax_extension *extension, } } +static void plaintext_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + renderer->out(renderer, node, "~", false, LITERAL); +} + cmark_syntax_extension *create_strikethrough_extension(void) { cmark_syntax_extension *ext = cmark_syntax_extension_new("strikethrough"); cmark_llist *special_chars = NULL; @@ -143,6 +149,7 @@ cmark_syntax_extension *create_strikethrough_extension(void) { cmark_syntax_extension_set_latex_render_func(ext, latex_render); cmark_syntax_extension_set_man_render_func(ext, man_render); cmark_syntax_extension_set_html_render_func(ext, html_render); + cmark_syntax_extension_set_plaintext_render_func(ext, plaintext_render); CMARK_NODE_STRIKETHROUGH = cmark_syntax_extension_add_node(1); cmark_syntax_extension_set_match_inline_func(ext, match); From 1b1f90db662af5d7c4b4bca8f0c1aa9b4c3b2d18 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Fri, 10 Aug 2018 10:03:51 +1000 Subject: [PATCH 123/218] 0.28.3.gfm.14 --- CMakeLists.txt | 2 +- changelog.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3fc7a3d32..8a151c985 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 3) -set(PROJECT_VERSION_GFM 13) +set(PROJECT_VERSION_GFM 14) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 4c2ad7e10..e7a7bc01a 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,7 @@ +[0.28.3.gfm.14] + + * Added a plaintext renderer for strikethrough nodes. + [0.28.3.gfm.13] * Footnote rendering bugfix (Michael Camilleri, #90). From 17b339e6eb58aefce53847a1ad7b930895fe4743 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 19 Aug 2018 16:40:30 -0700 Subject: [PATCH 124/218] commonmark writer: escape tilde (~). (#106) This is a special character, both for code blocks and for the optional strikeout extension. Eventually we should have a way for the extensions to modify what characters get escaped. --- src/commonmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commonmark.c b/src/commonmark.c index c0ba7ae5d..ab2692c0e 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -35,7 +35,7 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, c < 0x80 && escape != LITERAL && ((escape == NORMAL && (c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || - c == '>' || c == '\\' || c == '`' || c == '!' || + c == '>' || c == '\\' || c == '`' || c == '~' || c == '!' || (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || (renderer->begin_content && (c == '-' || c == '+' || c == '=') && // begin_content doesn't get set to false til we've passed digits From 1470c309bd6a03274e300fba4d1409bf05d4a556 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 19 Aug 2018 16:42:41 -0700 Subject: [PATCH 125/218] table extension: cosmetic fix for uniformity of output. (#105) Previously we'd get </tr></tbody></table> and </thead></table> on one line, which isn't consistent with the general formatting of the tables. This changes the output to insert line breaks, and updates tests and spec. --- extensions/table.c | 9 ++- ...tensions-table-prefer-style-attributes.txt | 4 +- test/extensions.txt | 64 ++++++++++++++----- test/spec.txt | 27 ++++++-- 4 files changed, 77 insertions(+), 27 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index add277166..e955b0d6d 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -578,10 +578,15 @@ static void html_render(cmark_syntax_extension *extension, cmark_strbuf_putc(html, '>'); table_state->need_closing_table_body = false; } else { - if (table_state->need_closing_table_body) + if (table_state->need_closing_table_body) { + cmark_html_render_cr(html); cmark_strbuf_puts(html, "</tbody>"); + cmark_html_render_cr(html); + } table_state->need_closing_table_body = false; - cmark_strbuf_puts(html, "</table>\n"); + cmark_html_render_cr(html); + cmark_strbuf_puts(html, "</table>"); + cmark_html_render_cr(html); } } else if (node->type == CMARK_NODE_TABLE_ROW) { if (entering) { diff --git a/test/extensions-table-prefer-style-attributes.txt b/test/extensions-table-prefer-style-attributes.txt index 09cb7299a..0379def1b 100644 --- a/test/extensions-table-prefer-style-attributes.txt +++ b/test/extensions-table-prefer-style-attributes.txt @@ -32,5 +32,7 @@ fff | ggg | hhh | iii | jjj <td style="text-align: center">hhh</td> <td>iii</td> <td style="text-align: right">jjj</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` diff --git a/test/extensions.txt b/test/extensions.txt index 6c8baefa1..5a1c3d990 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -31,7 +31,9 @@ Here's a well-formed table, doing everything it should. <tr> <td>mno</td> <td>pqr</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` We're going to mix up the table now; we'll demonstrate that inline formatting @@ -64,7 +66,9 @@ Hi! <tr> <td>But <em><strong>inline elements do</strong></em>.</td> <td>x</td> -</tr></tbody></table> +</tr> +</tbody> +</table> <p>Hi!</p> ```````````````````````````````` @@ -96,20 +100,23 @@ Here we demonstrate some edge cases about what is and isn't a table. <th>Just enough table</th> <th>to be considered table</th> </tr> -</thead></table> +</thead> +</table> <p>| ---- | --- |</p> <table> <thead> <tr> <th>x</th> </tr> -</thead></table> +</thead> +</table> <table> <thead> <tr> <th>xyz</th> </tr> -</thead></table> +</thead> +</table> ```````````````````````````````` A "simpler" table, GFM style: @@ -130,7 +137,9 @@ xyz | ghi <tr> <td>xyz</td> <td>ghi</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` We are making the parser slighly more lax here. Here is a table with spaces at @@ -162,7 +171,9 @@ Hi! <tr> <td>But <em><strong>inline elements do</strong></em>.</td> <td>x</td> -</tr></tbody></table> +</tr> +</tbody> +</table> <p>Hi!</p> ```````````````````````````````` @@ -190,7 +201,9 @@ fff | ggg | hhh | iii | jjj <td align="center">hhh</td> <td>iii</td> <td align="right">jjj</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` ### Table cell count mismatches @@ -240,7 +253,9 @@ than the header are truncated. <td>1</td> <td>2</td> <td>3</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` ### Embedded pipes @@ -278,7 +293,9 @@ Tables with embedded pipes could be tricky. <tr> <td>don't <strong>_reparse_</strong></td> <td></td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` ### Oddly-formatted markers @@ -294,7 +311,8 @@ This shouldn't assert. <tr> <th>a</th> </tr> -</thead></table> +</thead> +</table> ```````````````````````````````` ### Escaping @@ -345,7 +363,9 @@ This shouldn't assert. <tr> <td>\a</td> <td><code>\a</code></td> -</tr></tbody></table> +</tr> +</tbody> +</table> <p>\ <code>\\</code></p> <p>\\ <code>\\\\</code></p> <p>_ <code>\_</code></p> @@ -373,7 +393,9 @@ This shouldn't assert. </tr> <tr> <td>ok <br> sure</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` ### Reference-style links @@ -397,7 +419,9 @@ Here's a link to [Freedom Planet 2][]. <tbody> <tr> <td>Here's a link to <a href="http://www.freedomplanet2.com/">Freedom Planet 2</a> in a table row.</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` ### Sequential cells @@ -420,7 +444,9 @@ Here's a link to [Freedom Planet 2][]. <td>d</td> <td></td> <td>e</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` ### Interaction with emphasis @@ -441,7 +467,9 @@ Here's a link to [Freedom Planet 2][]. <tr> <td><em><strong>(a)</strong></em></td> <td></td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` @@ -670,5 +698,7 @@ Autolink and tables. <tr> <td><a href="https://github.com">https://github.com</a> <a href="http://www.github.com">www.github.com</a></td> <td><a href="http://pokemon.com">http://pokemon.com</a></td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` diff --git a/test/spec.txt b/test/spec.txt index ded685d1a..01df54bee 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -3257,7 +3257,9 @@ right, or center alignment respectively. <tr> <td>baz</td> <td>bim</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` Cells in one column don't need to match length, though it's easier to read if @@ -3279,7 +3281,9 @@ bar | baz <tr> <td align="center">bar</td> <td align="right">baz</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` Include a pipe in a cell's content by escaping it, including inside other @@ -3303,7 +3307,9 @@ inline spans: </tr> <tr> <td>b <strong>|</strong> im</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` The table is broken at the first empty line, or beginning of another @@ -3326,7 +3332,9 @@ block-level structure: <tr> <td>bar</td> <td>baz</td> -</tr></tbody></table> +</tr> +</tbody> +</table> <blockquote> <p>bar</p> </blockquote> @@ -3355,7 +3363,9 @@ bar <tr> <td>bar</td> <td></td> -</tr></tbody></table> +</tr> +</tbody> +</table> <p>bar</p> ```````````````````````````````` @@ -3397,7 +3407,9 @@ cells are inserted. If there are greater, the excess is ignored: <tr> <td>bar</td> <td>baz</td> -</tr></tbody></table> +</tr> +</tbody> +</table> ```````````````````````````````` If there are no rows in the body, no `<tbody>` is generated in HTML output: @@ -3412,7 +3424,8 @@ If there are no rows in the body, no `<tbody>` is generated in HTML output: <th>abc</th> <th>def</th> </tr> -</thead></table> +</thead> +</table> ```````````````````````````````` </div> From 9ceb8bd71ea167cf3631649986ba855491d9cb8b Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 19 Aug 2018 16:42:56 -0700 Subject: [PATCH 126/218] commonmark writer/strikethrough: use two tildes for delimiters. (#104) Previously only one was used. --- extensions/strikethrough.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/extensions/strikethrough.c b/extensions/strikethrough.c index b13566932..87a2ac7c3 100644 --- a/extensions/strikethrough.c +++ b/extensions/strikethrough.c @@ -51,7 +51,6 @@ static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser, cmark_node_set_syntax_extension(strikethrough, self); - cmark_node_set_string_content(strikethrough, "~"); tmp = cmark_node_next(opener->inl_text); while (tmp) { @@ -94,7 +93,7 @@ static int can_contain(cmark_syntax_extension *extension, cmark_node *node, static void commonmark_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { - renderer->out(renderer, node, cmark_node_get_string_content(node), false, LITERAL); + renderer->out(renderer, node, "~~", false, LITERAL); } static void latex_render(cmark_syntax_extension *extension, From 322140091c5d67ffdee545e098a64bb9ac23660a Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Tue, 21 Aug 2018 11:58:41 +1000 Subject: [PATCH 127/218] Normalise header and define names (#109) * cmark -> cmark-gfm in most places * normalize filenames, exports * ensure some more #defines don't conflict --- CMakeLists.txt | 6 +- Makefile | 6 +- Makefile.nmake | 2 +- api_test/CMakeLists.txt | 4 +- api_test/cplusplus.cpp | 2 +- api_test/main.c | 8 +- extensions/CMakeLists.txt | 24 +-- extensions/autolink.h | 6 +- extensions/cmark-gfm-core-extensions.h | 25 +++ extensions/core-extensions.c | 4 +- extensions/core-extensions.h | 25 --- extensions/ext_scanners.h | 2 +- extensions/strikethrough.h | 6 +- extensions/table.c | 6 +- extensions/table.h | 6 +- extensions/tagfilter.h | 6 +- man/make_man_page.py | 4 +- man/man3/cmark-gfm.3 | 2 +- src/CMakeLists.txt | 20 +- src/arena.c | 4 +- src/blocks.c | 2 +- src/buffer.h | 46 ++--- src/chunk.h | 2 +- ...ension_api.h => cmark-gfm-extension_api.h} | 134 ++++++------- src/{cmark.h => cmark-gfm.h} | 176 +++++++++--------- src/cmark-gfm_version.h.in | 7 + src/cmark.c | 6 +- src/cmark_ctype.h | 12 +- src/cmark_version.h.in | 8 - src/commonmark.c | 2 +- src/footnotes.c | 2 +- src/houdini.h | 12 +- src/html.c | 2 +- src/inlines.c | 2 +- src/inlines.h | 2 +- src/iterator.c | 2 +- src/iterator.h | 2 +- src/latex.c | 2 +- src/libcmark-gfm.pc.in | 2 +- src/linked_list.c | 2 +- src/main.c | 12 +- src/man.c | 2 +- src/node.h | 8 +- src/plugin.h | 4 +- src/references.c | 2 +- src/registry.c | 2 +- src/registry.h | 8 +- src/render.c | 2 +- src/scanners.h | 2 +- src/syntax_extension.c | 2 +- src/syntax_extension.h | 4 +- src/utf8.h | 12 +- src/xml.c | 2 +- test/cmark-fuzz.c | 2 +- test/cmark.py | 6 +- wrappers/wrapper_ext.py | 12 +- 56 files changed, 337 insertions(+), 338 deletions(-) create mode 100644 extensions/cmark-gfm-core-extensions.h delete mode 100644 extensions/core-extensions.h rename src/{cmark_extension_api.h => cmark-gfm-extension_api.h} (94%) rename src/{cmark.h => cmark-gfm.h} (83%) create mode 100644 src/cmark-gfm_version.h.in delete mode 100644 src/cmark_version.h.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a151c985..3224b29a9 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ if(CMAKE_MAJOR_VERSION GREATER 2) cmake_policy(SET CMP0048 OLD) endif() -project(cmark) +project(cmark-gfm) include("FindAsan.cmake") @@ -14,7 +14,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make") endif() -set(PROJECT_NAME "cmark") +set(PROJECT_NAME "cmark-gfm") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) @@ -22,7 +22,7 @@ set(PROJECT_VERSION_PATCH 3) set(PROJECT_VERSION_GFM 14) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) -option(CMARK_TESTS "Build cmark tests and enable testing" ON) +option(CMARK_TESTS "Build cmark-gfm tests and enable testing" ON) option(CMARK_STATIC "Build static libcmark-gfm library" ON) option(CMARK_SHARED "Build shared libcmark-gfm library" ON) option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF) diff --git a/Makefile b/Makefile index 51a2c1967..85cfd9fb7 100644 --- a/Makefile +++ b/Makefile @@ -101,7 +101,7 @@ mingw: cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchain-mingw32.cmake -DCMAKE_INSTALL_PREFIX=$(MINGW_INSTALLDIR) ;\ $(MAKE) && $(MAKE) install -man/man3/cmark-gfm.3: src/cmark.h | $(CMARK) +man/man3/cmark-gfm.3: src/cmark-gfm.h | $(CMARK) python man/make_man_page.py $< > $@ \ archive: @@ -222,5 +222,5 @@ distclean: clean -rm -rf $(BENCHFILE) $(ALLTESTS) progit docker: - docker build -t cmark $(CURDIR)/tools - docker run --privileged -t -i -v $(CURDIR):/src/cmark -w /src/cmark cmark /bin/bash + docker build -t cmark-gfm $(CURDIR)/tools + docker run --privileged -t -i -v $(CURDIR):/src/cmark-gfm -w /src/cmark-gfm cmark-gfm /bin/bash diff --git a/Makefile.nmake b/Makefile.nmake index cb35b3df7..80ba4f16d 100644 --- a/Makefile.nmake +++ b/Makefile.nmake @@ -3,7 +3,7 @@ DATADIR=data BUILDDIR=build INSTALLDIR=windows SPEC=test/spec.txt -PROG=$(BUILDDIR)\src\cmark.exe +PROG=$(BUILDDIR)\src\cmark-gfm.exe GENERATOR=NMake Makefiles all: $(BUILDDIR)/CMakeFiles diff --git a/api_test/CMakeLists.txt b/api_test/CMakeLists.txt index da1149849..c5e660996 100644 --- a/api_test/CMakeLists.txt +++ b/api_test/CMakeLists.txt @@ -10,9 +10,9 @@ include_directories( ${PROJECT_BINARY_DIR}/extensions ) if(CMARK_SHARED) - target_link_libraries(api_test libcmark-gfmextensions libcmark-gfm) + target_link_libraries(api_test libcmark-gfm-extensions libcmark-gfm) else() - target_link_libraries(api_test libcmark-gfmextensions_static libcmark-gfm_static) + target_link_libraries(api_test libcmark-gfm-extensions_static libcmark-gfm_static) endif() # Compiler flags diff --git a/api_test/cplusplus.cpp b/api_test/cplusplus.cpp index 5e8f722a3..480c75708 100644 --- a/api_test/cplusplus.cpp +++ b/api_test/cplusplus.cpp @@ -1,6 +1,6 @@ #include <cstdlib> -#include "cmark.h" +#include "cmark-gfm.h" #include "cplusplus.h" #include "harness.h" diff --git a/api_test/main.c b/api_test/main.c index d27e7cfad..4151000df 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -3,9 +3,9 @@ #include <string.h> #define CMARK_NO_SHORT_NAMES -#include "cmark.h" +#include "cmark-gfm.h" #include "node.h" -#include "../extensions/core-extensions.h" +#include "../extensions/cmark-gfm-core-extensions.h" #include "harness.h" #include "cplusplus.h" @@ -36,8 +36,8 @@ static void test_incomplete_char(test_batch_runner *runner, const char *utf8, static void test_continuation_byte(test_batch_runner *runner, const char *utf8); static void version(test_batch_runner *runner) { - INT_EQ(runner, cmark_version(), CMARK_VERSION, "cmark_version"); - STR_EQ(runner, cmark_version_string(), CMARK_VERSION_STRING, + INT_EQ(runner, cmark_version(), CMARK_GFM_VERSION, "cmark_version"); + STR_EQ(runner, cmark_version_string(), CMARK_GFM_VERSION_STRING, "cmark_version_string"); } diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 4c2a57a5e..87d70acc4 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 2.8) -set(LIBRARY "libcmark-gfmextensions") -set(STATICLIBRARY "libcmark-gfmextensions_static") +set(LIBRARY "libcmark-gfm-extensions") +set(STATICLIBRARY "libcmark-gfm-extensions_static") set(LIBRARY_SOURCES core-extensions.c table.c @@ -29,7 +29,7 @@ if (CMARK_SHARED) add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) set_target_properties(${LIBRARY} PROPERTIES - OUTPUT_NAME "cmark-gfmextensions" + OUTPUT_NAME "cmark-gfm-extensions" SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} VERSION ${PROJECT_VERSION}) @@ -37,10 +37,10 @@ if (CMARK_SHARED) APPEND PROPERTY MACOSX_RPATH true) # Avoid name clash between PROGRAM and LIBRARY pdb files. - set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfmextensions_dll) + set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfm-extensions_dll) generate_export_header(${LIBRARY} - BASE_NAME cmarkextensions) + BASE_NAME cmark-gfm-extensions) list(APPEND CMARK_INSTALL ${LIBRARY}) target_link_libraries(${LIBRARY} libcmark-gfm) @@ -56,17 +56,17 @@ if (CMARK_STATIC) if (MSVC) set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmark-gfmextensions_static" + OUTPUT_NAME "cmark-gfm-extensions_static" VERSION ${PROJECT_VERSION}) else() set_target_properties(${STATICLIBRARY} PROPERTIES - OUTPUT_NAME "cmark-gfmextensions" + OUTPUT_NAME "cmark-gfm-extensions" VERSION ${PROJECT_VERSION}) endif(MSVC) if (NOT CMARK_SHARED) generate_export_header(${STATICLIBRARY} - BASE_NAME cmarkextensions) + BASE_NAME cmark-gfm-extensions) endif() list(APPEND CMARK_INSTALL ${STATICLIBRARY}) @@ -76,7 +76,7 @@ set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON) include (InstallRequiredSystemLibraries) install(TARGETS ${CMARK_INSTALL} - EXPORT cmark-gfmextensions + EXPORT cmark-gfm-extensions RUNTIME DESTINATION bin LIBRARY DESTINATION lib${LIB_SUFFIX} ARCHIVE DESTINATION lib${LIB_SUFFIX} @@ -84,12 +84,12 @@ install(TARGETS ${CMARK_INSTALL} if (CMARK_SHARED OR CMARK_STATIC) install(FILES - core-extensions.h - ${CMAKE_CURRENT_BINARY_DIR}/cmarkextensions_export.h + cmark-gfm-core-extensions.h + ${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm-extensions_export.h DESTINATION include ) - install(EXPORT cmark-gfmextensions DESTINATION lib${LIB_SUFFIX}/cmake-gfmextensions) + install(EXPORT cmark-gfm-extensions DESTINATION lib${LIB_SUFFIX}/cmake-gfm-extensions) endif() # Feature tests diff --git a/extensions/autolink.h b/extensions/autolink.h index ee2ea2ffc..4e179379d 100644 --- a/extensions/autolink.h +++ b/extensions/autolink.h @@ -1,7 +1,7 @@ -#ifndef AUTOLINK_H -#define AUTOLINK_H +#ifndef CMARK_GFM_AUTOLINK_H +#define CMARK_GFM_AUTOLINK_H -#include "core-extensions.h" +#include "cmark-gfm-core-extensions.h" cmark_syntax_extension *create_autolink_extension(void); diff --git a/extensions/cmark-gfm-core-extensions.h b/extensions/cmark-gfm-core-extensions.h new file mode 100644 index 000000000..075905e82 --- /dev/null +++ b/extensions/cmark-gfm-core-extensions.h @@ -0,0 +1,25 @@ +#ifndef CMARK_GFM_CORE_EXTENSIONS_H +#define CMARK_GFM_CORE_EXTENSIONS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cmark-gfm-extension_api.h" +#include "cmark-gfm-extensions_export.h" +#include <stdint.h> + +CMARK_GFM_EXTENSIONS_EXPORT +void cmark_gfm_core_extensions_ensure_registered(void); + +CMARK_GFM_EXTENSIONS_EXPORT +uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node); + +CMARK_GFM_EXTENSIONS_EXPORT +uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 4659ab177..e436a5d15 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -1,4 +1,4 @@ -#include "core-extensions.h" +#include "cmark-gfm-core-extensions.h" #include "autolink.h" #include "strikethrough.h" #include "table.h" @@ -15,7 +15,7 @@ static int core_extensions_registration(cmark_plugin *plugin) { return 1; } -void core_extensions_ensure_registered(void) { +void cmark_gfm_core_extensions_ensure_registered(void) { static int registered = 0; if (!registered) { diff --git a/extensions/core-extensions.h b/extensions/core-extensions.h deleted file mode 100644 index a9fd0cb28..000000000 --- a/extensions/core-extensions.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef CORE_EXTENSIONS_H -#define CORE_EXTENSIONS_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "cmark_extension_api.h" -#include "cmarkextensions_export.h" -#include <stdint.h> - -CMARKEXTENSIONS_EXPORT -void core_extensions_ensure_registered(void); - -CMARKEXTENSIONS_EXPORT -uint16_t cmarkextensions_get_table_columns(cmark_node *node); - -CMARKEXTENSIONS_EXPORT -uint8_t *cmarkextensions_get_table_alignments(cmark_node *node); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/extensions/ext_scanners.h b/extensions/ext_scanners.h index 53584d891..3bfe586c1 100644 --- a/extensions/ext_scanners.h +++ b/extensions/ext_scanners.h @@ -1,5 +1,5 @@ #include "chunk.h" -#include "cmark.h" +#include "cmark-gfm.h" #ifdef __cplusplus extern "C" { diff --git a/extensions/strikethrough.h b/extensions/strikethrough.h index 1c43f5792..a52a2b4ac 100644 --- a/extensions/strikethrough.h +++ b/extensions/strikethrough.h @@ -1,7 +1,7 @@ -#ifndef STRIKETHROUGH_H -#define STRIKETHROUGH_H +#ifndef CMARK_GFM_STRIKETHROUGH_H +#define CMARK_GFM_STRIKETHROUGH_H -#include "core-extensions.h" +#include "cmark-gfm-core-extensions.h" extern cmark_node_type CMARK_NODE_STRIKETHROUGH; cmark_syntax_extension *create_strikethrough_extension(void); diff --git a/extensions/table.c b/extensions/table.c index e955b0d6d..1d675e04d 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -1,4 +1,4 @@ -#include <cmark_extension_api.h> +#include <cmark-gfm-extension_api.h> #include <html.h> #include <inlines.h> #include <parser.h> @@ -685,14 +685,14 @@ cmark_syntax_extension *create_table_extension(void) { return self; } -uint16_t cmarkextensions_get_table_columns(cmark_node *node) { +uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node) { if (node->type != CMARK_NODE_TABLE) return 0; return ((node_table *)node->as.opaque)->n_columns; } -uint8_t *cmarkextensions_get_table_alignments(cmark_node *node) { +uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node) { if (node->type != CMARK_NODE_TABLE) return 0; diff --git a/extensions/table.h b/extensions/table.h index 85bb1a4c7..f6a0634f0 100644 --- a/extensions/table.h +++ b/extensions/table.h @@ -1,7 +1,7 @@ -#ifndef TABLE_H -#define TABLE_H +#ifndef CMARK_GFM_TABLE_H +#define CMARK_GFM_TABLE_H -#include "core-extensions.h" +#include "cmark-gfm-core-extensions.h" extern cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, diff --git a/extensions/tagfilter.h b/extensions/tagfilter.h index 4068b5071..9a5f388d4 100644 --- a/extensions/tagfilter.h +++ b/extensions/tagfilter.h @@ -1,7 +1,7 @@ -#ifndef TAGFILTER_H -#define TAGFILTER_H +#ifndef CMARK_GFM_TAGFILTER_H +#define CMARK_GFM_TAGFILTER_H -#include "core-extensions.h" +#include "cmark-gfm-core-extensions.h" cmark_syntax_extension *create_tagfilter_extension(void); diff --git a/man/make_man_page.py b/man/make_man_page.py index a37818be1..d95620252 100644 --- a/man/make_man_page.py +++ b/man/make_man_page.py @@ -46,9 +46,9 @@ def md2man(text): comment_start_re = re.compile('^\/\*\* ?') comment_delim_re = re.compile('^[/ ]\** ?') comment_end_re = re.compile('^ \**\/') -function_re = re.compile('^ *(?:CMARK_EXPORT\s+)?(?P<type>(?:const\s+)?\w+(?:\s*[*])?)\s*(?P<name>\w+)\s*\((?P<args>[^)]*)\)') +function_re = re.compile('^ *(?:CMARK_GFM_EXPORT\s+)?(?P<type>(?:const\s+)?\w+(?:\s*[*])?)\s*(?P<name>\w+)\s*\((?P<args>[^)]*)\)') blank_re = re.compile('^\s*$') -macro_re = re.compile('CMARK_EXPORT *') +macro_re = re.compile('CMARK_GFM_EXPORT *') typedef_start_re = re.compile('typedef.*{$') typedef_end_re = re.compile('}') single_quote_re = re.compile("(?<!\w)'([^']+)'(?!\w)") diff --git a/man/man3/cmark-gfm.3 b/man/man3/cmark-gfm.3 index 797d16734..15294fd9f 100644 --- a/man/man3/cmark-gfm.3 +++ b/man/man3/cmark-gfm.3 @@ -1,4 +1,4 @@ -.TH cmark-gfm 3 "August 08, 2018" "LOCAL" "Library Functions Manual" +.TH cmark-gfm 3 "August 21, 2018" "LOCAL" "Library Functions Manual" .SH NAME .PP diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3ebb3a684..0fc398551 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,8 +7,8 @@ include(GNUInstallDirs) set(LIBRARY "libcmark-gfm") set(STATICLIBRARY "libcmark-gfm_static") set(HEADERS - cmark.h - cmark_extension_api.h + cmark-gfm.h + cmark-gfm-extension_api.h parser.h buffer.h node.h @@ -69,8 +69,8 @@ include_directories( ${PROJECT_BINARY_DIR}/extensions ) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmark_version.h.in - ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmark-gfm_version.h.in + ${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm_version.h) include (GenerateExportHeader) @@ -78,9 +78,9 @@ add_executable(${PROGRAM} ${PROGRAM_SOURCES}) add_compiler_export_flags() if(CMARK_SHARED) - target_link_libraries(${PROGRAM} libcmark-gfmextensions libcmark-gfm) + target_link_libraries(${PROGRAM} libcmark-gfm-extensions libcmark-gfm) elseif(CMARK_STATIC) - target_link_libraries(${PROGRAM} libcmark-gfmextensions_static libcmark-gfm_static) + target_link_libraries(${PROGRAM} libcmark-gfm-extensions_static libcmark-gfm_static) endif() # Disable the PUBLIC declarations when compiling the executable: @@ -171,10 +171,10 @@ if(CMARK_SHARED OR CMARK_STATIC) DESTINATION ${libdir}/pkgconfig) install(FILES - cmark.h - cmark_extension_api.h - ${CMAKE_CURRENT_BINARY_DIR}/cmark_export.h - ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h + cmark-gfm.h + cmark-gfm-extension_api.h + ${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm_export.h + ${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm_version.h DESTINATION include ) diff --git a/src/arena.c b/src/arena.c index 801fb3c1b..83a15255f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,8 +1,8 @@ #include <stdlib.h> #include <string.h> #include <stdint.h> -#include "cmark.h" -#include "cmark_extension_api.h" +#include "cmark-gfm.h" +#include "cmark-gfm-extension_api.h" static struct arena_chunk { size_t sz, used; diff --git a/src/blocks.c b/src/blocks.c index 377cc0e64..97661f3e6 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -13,7 +13,7 @@ #include "syntax_extension.h" #include "config.h" #include "parser.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "node.h" #include "references.h" #include "utf8.h" diff --git a/src/buffer.h b/src/buffer.h index 35af9c390..b85bb4406 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -7,7 +7,7 @@ #include <limits.h> #include <stdint.h> #include "config.h" -#include "cmark.h" +#include "cmark-gfm.h" #ifdef __cplusplus extern "C" { @@ -30,32 +30,32 @@ extern unsigned char cmark_strbuf__initbuf[]; * For the cases where CMARK_BUF_INIT cannot be used to do static * initialization. */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, bufsize_t initial_size); /** * Grow the buffer to hold at least `target_size` bytes. */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_free(cmark_strbuf *buf); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); -CMARK_EXPORT +CMARK_GFM_EXPORT bufsize_t cmark_strbuf_len(const cmark_strbuf *buf); -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); -CMARK_EXPORT +CMARK_GFM_EXPORT unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf); @@ -65,48 +65,48 @@ static CMARK_INLINE const char *cmark_strbuf_cstr(const cmark_strbuf *buf) { #define cmark_strbuf_at(buf, n) ((buf)->ptr[n]) -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_sets(cmark_strbuf *buf, const char *string); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_putc(cmark_strbuf *buf, int c); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_puts(cmark_strbuf *buf, const char *string); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_clear(cmark_strbuf *buf); -CMARK_EXPORT +CMARK_GFM_EXPORT bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos); -CMARK_EXPORT +CMARK_GFM_EXPORT bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_rtrim(cmark_strbuf *buf); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_trim(cmark_strbuf *buf); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_strbuf_unescape(cmark_strbuf *s); #ifdef __cplusplus diff --git a/src/chunk.h b/src/chunk.h index c6a68687c..dfed13f19 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -4,7 +4,7 @@ #include <string.h> #include <stdlib.h> #include <assert.h> -#include "cmark.h" +#include "cmark-gfm.h" #include "buffer.h" #include "memory.h" #include "cmark_ctype.h" diff --git a/src/cmark_extension_api.h b/src/cmark-gfm-extension_api.h similarity index 94% rename from src/cmark_extension_api.h rename to src/cmark-gfm-extension_api.h index 57ff229b4..853948c43 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark-gfm-extension_api.h @@ -1,11 +1,11 @@ -#ifndef CMARK_CMARK_EXTENSION_API_H -#define CMARK_CMARK_EXTENSION_API_H +#ifndef CMARK_GFM_EXTENSION_API_H +#define CMARK_GFM_EXTENSION_API_H #ifdef __cplusplus extern "C" { #endif -#include "cmark.h" +#include "cmark-gfm.h" struct cmark_renderer; struct cmark_html_renderer; @@ -170,7 +170,7 @@ typedef int (*cmark_plugin_init_func)(cmark_plugin *plugin); * This takes ownership of 'extension', one should not call * 'cmark_syntax_extension_free' on a registered extension. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_plugin_register_syntax_extension(cmark_plugin *plugin, cmark_syntax_extension *extension); @@ -180,7 +180,7 @@ int cmark_plugin_register_syntax_extension(cmark_plugin *plugin, * It can then be attached to a cmark_parser * with the cmark_parser_attach_syntax_extension method. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_syntax_extension *cmark_find_syntax_extension(const char *name); /** Should create and add a new open block to 'parent_container' if @@ -260,143 +260,143 @@ typedef void (*cmark_opaque_free_func) (cmark_syntax_extension *extension, /** Free a cmark_syntax_extension. */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_free (cmark_mem *mem, cmark_syntax_extension *extension); /** Return a newly-constructed cmark_syntax_extension, named 'name'. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_syntax_extension *cmark_syntax_extension_new (const char *name); -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_node_type cmark_syntax_extension_add_node(int is_inline); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension, int emphasis); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension, cmark_open_block_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension, cmark_match_block_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension, cmark_match_inline_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension, cmark_inline_from_delim_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension, cmark_llist *special_chars); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension, cmark_get_type_string_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension, cmark_can_contain_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension, cmark_contains_inlines_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension, cmark_html_render_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension, cmark_html_filter_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension, cmark_commonmark_escape_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, void *priv, cmark_free_func free_func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension, cmark_postprocess_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension, cmark_opaque_free_func func); /** See the documentation for 'cmark_syntax_extension' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser, cmark_ispunct_func func); /** Return the index of the line currently being parsed, starting with 1. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_parser_get_line_number(cmark_parser *parser); /** Return the offset in bytes in the line being processed. @@ -407,7 +407,7 @@ int cmark_parser_get_line_number(cmark_parser *parser); * * Here, offset will first be 0, then 5 (the index of the 'f' character). */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_parser_get_offset(cmark_parser *parser); /** @@ -441,7 +441,7 @@ int cmark_parser_get_offset(cmark_parser *parser); * cmark_parser_has_partially_consumed_tab() will now return * 'true'. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_parser_get_column(cmark_parser *parser); /** Return the absolute index in bytes of the first nonspace @@ -456,7 +456,7 @@ int cmark_parser_get_column(cmark_parser *parser); * 0 offset (16) first_nonspace (28) * ``` */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_parser_get_first_nonspace(cmark_parser *parser); /** Return the absolute index of the first nonspace column coming after 'offset' @@ -466,7 +466,7 @@ int cmark_parser_get_first_nonspace(cmark_parser *parser); * See the documentation for cmark_parser_get_first_nonspace() and * cmark_parser_get_column() for more information. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_parser_get_first_nonspace_column(cmark_parser *parser); /** Return the difference between the values returned by @@ -476,7 +476,7 @@ int cmark_parser_get_first_nonspace_column(cmark_parser *parser); * This is not a byte offset, as it can count one tab as multiple * characters. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_parser_get_indent(cmark_parser *parser); /** Return 'true' if the line currently being processed has been entirely @@ -507,7 +507,7 @@ int cmark_parser_get_indent(cmark_parser *parser); * * At this point, this function will now return 'true'. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_parser_is_blank(cmark_parser *parser); /** Return 'true' if the value returned by cmark_parser_get_offset() @@ -516,13 +516,13 @@ int cmark_parser_is_blank(cmark_parser *parser); * See the documentation for cmark_parser_get_column() for more * information. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_parser_has_partially_consumed_tab(cmark_parser *parser); /** Return the length in bytes of the previously processed line, excluding potential * newline (\n) and carriage return (\r) trailing characters. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_parser_get_last_line_length(cmark_parser *parser); /** Add a child to 'parent' during the parsing process. @@ -531,7 +531,7 @@ int cmark_parser_get_last_line_length(cmark_parser *parser); * this function will back up till it hits a node that can, closing * blocks as appropriate. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_node*cmark_parser_add_child(cmark_parser *parser, cmark_node *parent, cmark_node_type block_type, @@ -542,14 +542,14 @@ cmark_node*cmark_parser_add_child(cmark_parser *parser, * See the documentation of cmark_parser_get_offset() and * cmark_parser_get_column() for more information. */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_parser_advance_offset(cmark_parser *parser, const char *input, int count, int columns); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len); /** Attach the syntax 'extension' to the 'parser', to provide extra syntax @@ -559,33 +559,33 @@ void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_ * Returns 'true' if the 'extension' was successfully attached, * 'false' otherwise. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension); /** Change the type of 'node'. * * Return 0 if the type could be changed, 1 otherwise. */ -CMARK_EXPORT int cmark_node_set_type(cmark_node *node, cmark_node_type type); +CMARK_GFM_EXPORT int cmark_node_set_type(cmark_node *node, cmark_node_type type); /** Return the string content for all types of 'node'. * The pointer stays valid as long as 'node' isn't freed. */ -CMARK_EXPORT const char *cmark_node_get_string_content(cmark_node *node); +CMARK_GFM_EXPORT const char *cmark_node_get_string_content(cmark_node *node); /** Set the string 'content' for all types of 'node'. * Copies 'content'. */ -CMARK_EXPORT int cmark_node_set_string_content(cmark_node *node, const char *content); +CMARK_GFM_EXPORT int cmark_node_set_string_content(cmark_node *node, const char *content); /** Get the syntax extension responsible for the creation of 'node'. * Return NULL if 'node' was created because it matched standard syntax rules. */ -CMARK_EXPORT cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node); +CMARK_GFM_EXPORT cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node); /** Set the syntax extension responsible for creating 'node'. */ -CMARK_EXPORT int cmark_node_set_syntax_extension(cmark_node *node, +CMARK_GFM_EXPORT int cmark_node_set_syntax_extension(cmark_node *node, cmark_syntax_extension *extension); /** @@ -600,63 +600,63 @@ CMARK_EXPORT int cmark_node_set_syntax_extension(cmark_node *node, typedef int (*cmark_inline_predicate)(int c); /** Advance the current inline parsing offset */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_inline_parser_advance_offset(cmark_inline_parser *parser); /** Get the current inline parsing offset */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_inline_parser_get_offset(cmark_inline_parser *parser); /** Set the offset in bytes in the chunk being processed by the given inline parser. */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset); /** Gets the cmark_chunk being operated on by the given inline parser. * Use cmark_inline_parser_get_offset to get our current position in the chunk. */ -CMARK_EXPORT +CMARK_GFM_EXPORT struct cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser); /** Returns 1 if the inline parser is currently in a bracket; pass 1 for 'image' * if you want to know about an image-type bracket, 0 for link-type. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image); /** Remove the last n characters from the last child of the given node. * This only works where all n characters are in the single last child, and the last * child is CMARK_NODE_TEXT. */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_node_unput(cmark_node *node, int n); /** Get the character located at the current inline parsing offset */ -CMARK_EXPORT +CMARK_GFM_EXPORT unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser); /** Get the character located 'pos' bytes in the current line. */ -CMARK_EXPORT +CMARK_GFM_EXPORT unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, int pos); /** Whether the inline parser has reached the end of the current line */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_inline_parser_is_eof(cmark_inline_parser *parser); /** Get the characters located after the current inline parsing offset * while 'pred' matches. Free after usage. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred); /** Push a delimiter on the delimiter stack. * See <<http://spec.commonmark.org/0.24/#phase-2-inline-structure> for * more information on the parameters */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser, unsigned char c, int can_open, @@ -665,16 +665,16 @@ void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser, /** Remove 'delim' from the delimiter stack */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim); -CMARK_EXPORT +CMARK_GFM_EXPORT delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser); -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_inline_parser_get_line(cmark_inline_parser *parser); -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_inline_parser_get_column(cmark_inline_parser *parser); /** Convenience function to scan a given delimiter. @@ -691,7 +691,7 @@ int cmark_inline_parser_get_column(cmark_inline_parser *parser); * Returns the number of delimiters encountered, in the limit * of 'max_delims', and advances the inline parsing offset. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, int max_delims, unsigned char c, @@ -700,16 +700,16 @@ int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, int *punct_before, int *punct_after); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_manage_extensions_special_characters(cmark_parser *parser, int add); -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_arena_push(void); -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_arena_pop(void); #ifdef __cplusplus diff --git a/src/cmark.h b/src/cmark-gfm.h similarity index 83% rename from src/cmark.h rename to src/cmark-gfm.h index ef9e4a150..a26f640ef 100644 --- a/src/cmark.h +++ b/src/cmark-gfm.h @@ -1,10 +1,10 @@ -#ifndef CMARK_CMARK_H -#define CMARK_CMARK_H +#ifndef CMARK_GFM_H +#define CMARK_GFM_H #include <stdio.h> #include <stdint.h> -#include "cmark_export.h" -#include "cmark_version.h" +#include "cmark-gfm_export.h" +#include "cmark-gfm_version.h" #ifdef __cplusplus extern "C" { @@ -25,7 +25,7 @@ extern "C" { * UTF-8-encoded string. It is the caller's responsibility * to free the returned buffer. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_markdown_to_html(const char *text, size_t len, int options); /** ## Node Structure @@ -109,19 +109,19 @@ typedef struct cmark_mem { /** The default memory allocator; uses the system's calloc, * realloc and free. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_mem *cmark_get_default_mem_allocator(); /** An arena allocator; uses system calloc to allocate large * slabs of memory. Memory in these slabs is not reused at all. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_mem *cmark_get_arena_mem_allocator(); /** Resets the arena allocator, quickly returning all used memory * to the operating system. */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_arena_reset(void); /** Callback for freeing user data with a 'cmark_mem' context. @@ -151,7 +151,7 @@ typedef struct _cmark_llist /** Append an element to the linked list, return the possibly modified * head of the list. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_llist * cmark_llist_append (cmark_mem * mem, cmark_llist * head, void * data); @@ -159,14 +159,14 @@ cmark_llist * cmark_llist_append (cmark_mem * mem, /** Free the list starting with 'head', calling 'free_func' with the * data pointer of each of its elements */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_llist_free_full (cmark_mem * mem, cmark_llist * head, cmark_free_func free_func); /** Free the list starting with 'head' */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_llist_free (cmark_mem * mem, cmark_llist * head); @@ -178,18 +178,18 @@ void cmark_llist_free (cmark_mem * mem, * other required properties, which it is the caller's responsibility * to assign. */ -CMARK_EXPORT cmark_node *cmark_node_new(cmark_node_type type); +CMARK_GFM_EXPORT cmark_node *cmark_node_new(cmark_node_type type); /** Same as `cmark_node_new`, but explicitly listing the memory * allocator used to allocate the node. Note: be sure to use the same * allocator for every node in a tree, or bad things can happen. */ -CMARK_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type, +CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem); /** Frees the memory allocated for a node and any children. */ -CMARK_EXPORT void cmark_node_free(cmark_node *node); +CMARK_GFM_EXPORT void cmark_node_free(cmark_node *node); /** * ## Tree Traversal @@ -198,24 +198,24 @@ CMARK_EXPORT void cmark_node_free(cmark_node *node); /** Returns the next node in the sequence after 'node', or NULL if * there is none. */ -CMARK_EXPORT cmark_node *cmark_node_next(cmark_node *node); +CMARK_GFM_EXPORT cmark_node *cmark_node_next(cmark_node *node); /** Returns the previous node in the sequence after 'node', or NULL if * there is none. */ -CMARK_EXPORT cmark_node *cmark_node_previous(cmark_node *node); +CMARK_GFM_EXPORT cmark_node *cmark_node_previous(cmark_node *node); /** Returns the parent of 'node', or NULL if there is none. */ -CMARK_EXPORT cmark_node *cmark_node_parent(cmark_node *node); +CMARK_GFM_EXPORT cmark_node *cmark_node_parent(cmark_node *node); /** Returns the first child of 'node', or NULL if 'node' has no children. */ -CMARK_EXPORT cmark_node *cmark_node_first_child(cmark_node *node); +CMARK_GFM_EXPORT cmark_node *cmark_node_first_child(cmark_node *node); /** Returns the last child of 'node', or NULL if 'node' has no children. */ -CMARK_EXPORT cmark_node *cmark_node_last_child(cmark_node *node); +CMARK_GFM_EXPORT cmark_node *cmark_node_last_child(cmark_node *node); /** * ## Iterator @@ -274,40 +274,40 @@ typedef enum { * The memory allocated for the iterator should be released using * 'cmark_iter_free' when it is no longer needed. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_iter *cmark_iter_new(cmark_node *root); /** Frees the memory allocated for an iterator. */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_iter_free(cmark_iter *iter); /** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`, * `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`). */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_event_type cmark_iter_next(cmark_iter *iter); /** Returns the current node. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_node *cmark_iter_get_node(cmark_iter *iter); /** Returns the current event type. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_event_type cmark_iter_get_event_type(cmark_iter *iter); /** Returns the root node. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_node *cmark_iter_get_root(cmark_iter *iter); /** Resets the iterator so that the current node is 'current' and * the event type is 'event_type'. The new current node must be a * descendant of the root node or the root node itself. */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_iter_reset(cmark_iter *iter, cmark_node *current, cmark_event_type event_type); @@ -317,42 +317,42 @@ void cmark_iter_reset(cmark_iter *iter, cmark_node *current, /** Returns the user data of 'node'. */ -CMARK_EXPORT void *cmark_node_get_user_data(cmark_node *node); +CMARK_GFM_EXPORT void *cmark_node_get_user_data(cmark_node *node); /** Sets arbitrary user data for 'node'. Returns 1 on success, * 0 on failure. */ -CMARK_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data); +CMARK_GFM_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data); /** Set free function for user data */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_node_set_user_data_free_func(cmark_node *node, cmark_free_func free_func); /** Returns the type of 'node', or `CMARK_NODE_NONE` on error. */ -CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node); +CMARK_GFM_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node); /** Like 'cmark_node_get_type', but returns a string representation of the type, or `"<unknown>"`. */ -CMARK_EXPORT +CMARK_GFM_EXPORT const char *cmark_node_get_type_string(cmark_node *node); /** Returns the string contents of 'node', or an empty string if none is set. Returns NULL if called on a node that does not have string content. */ -CMARK_EXPORT const char *cmark_node_get_literal(cmark_node *node); +CMARK_GFM_EXPORT const char *cmark_node_get_literal(cmark_node *node); /** Sets the string contents of 'node'. Returns 1 on success, * 0 on failure. */ -CMARK_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content); +CMARK_GFM_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content); /** Returns the heading level of 'node', or 0 if 'node' is not a heading. */ -CMARK_EXPORT int cmark_node_get_heading_level(cmark_node *node); +CMARK_GFM_EXPORT int cmark_node_get_heading_level(cmark_node *node); /* For backwards compatibility */ #define cmark_node_get_header_level cmark_node_get_heading_level @@ -360,126 +360,126 @@ CMARK_EXPORT int cmark_node_get_heading_level(cmark_node *node); /** Sets the heading level of 'node', returning 1 on success and 0 on error. */ -CMARK_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level); +CMARK_GFM_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level); /** Returns the list type of 'node', or `CMARK_NO_LIST` if 'node' * is not a list. */ -CMARK_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node); +CMARK_GFM_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node); /** Sets the list type of 'node', returning 1 on success and 0 on error. */ -CMARK_EXPORT int cmark_node_set_list_type(cmark_node *node, +CMARK_GFM_EXPORT int cmark_node_set_list_type(cmark_node *node, cmark_list_type type); /** Returns the list delimiter type of 'node', or `CMARK_NO_DELIM` if 'node' * is not a list. */ -CMARK_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node); +CMARK_GFM_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node); /** Sets the list delimiter type of 'node', returning 1 on success and 0 * on error. */ -CMARK_EXPORT int cmark_node_set_list_delim(cmark_node *node, +CMARK_GFM_EXPORT int cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim); /** Returns starting number of 'node', if it is an ordered list, otherwise 0. */ -CMARK_EXPORT int cmark_node_get_list_start(cmark_node *node); +CMARK_GFM_EXPORT int cmark_node_get_list_start(cmark_node *node); /** Sets starting number of 'node', if it is an ordered list. Returns 1 * on success, 0 on failure. */ -CMARK_EXPORT int cmark_node_set_list_start(cmark_node *node, int start); +CMARK_GFM_EXPORT int cmark_node_set_list_start(cmark_node *node, int start); /** Returns 1 if 'node' is a tight list, 0 otherwise. */ -CMARK_EXPORT int cmark_node_get_list_tight(cmark_node *node); +CMARK_GFM_EXPORT int cmark_node_get_list_tight(cmark_node *node); /** Sets the "tightness" of a list. Returns 1 on success, 0 on failure. */ -CMARK_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight); +CMARK_GFM_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight); /** Returns the info string from a fenced code block. */ -CMARK_EXPORT const char *cmark_node_get_fence_info(cmark_node *node); +CMARK_GFM_EXPORT const char *cmark_node_get_fence_info(cmark_node *node); /** Sets the info string in a fenced code block, returning 1 on * success and 0 on failure. */ -CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info); +CMARK_GFM_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info); /** Sets code blocks fencing details */ -CMARK_EXPORT int cmark_node_set_fenced(cmark_node * node, int fenced, +CMARK_GFM_EXPORT int cmark_node_set_fenced(cmark_node * node, int fenced, int length, int offset, char character); /** Returns code blocks fencing details */ -CMARK_EXPORT int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character); +CMARK_GFM_EXPORT int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character); /** Returns the URL of a link or image 'node', or an empty string if no URL is set. Returns NULL if called on a node that is not a link or image. */ -CMARK_EXPORT const char *cmark_node_get_url(cmark_node *node); +CMARK_GFM_EXPORT const char *cmark_node_get_url(cmark_node *node); /** Sets the URL of a link or image 'node'. Returns 1 on success, * 0 on failure. */ -CMARK_EXPORT int cmark_node_set_url(cmark_node *node, const char *url); +CMARK_GFM_EXPORT int cmark_node_set_url(cmark_node *node, const char *url); /** Returns the title of a link or image 'node', or an empty string if no title is set. Returns NULL if called on a node that is not a link or image. */ -CMARK_EXPORT const char *cmark_node_get_title(cmark_node *node); +CMARK_GFM_EXPORT const char *cmark_node_get_title(cmark_node *node); /** Sets the title of a link or image 'node'. Returns 1 on success, * 0 on failure. */ -CMARK_EXPORT int cmark_node_set_title(cmark_node *node, const char *title); +CMARK_GFM_EXPORT int cmark_node_set_title(cmark_node *node, const char *title); /** Returns the literal "on enter" text for a custom 'node', or an empty string if no on_enter is set. Returns NULL if called on a non-custom node. */ -CMARK_EXPORT const char *cmark_node_get_on_enter(cmark_node *node); +CMARK_GFM_EXPORT const char *cmark_node_get_on_enter(cmark_node *node); /** Sets the literal text to render "on enter" for a custom 'node'. Any children of the node will be rendered after this text. Returns 1 on success 0 on failure. */ -CMARK_EXPORT int cmark_node_set_on_enter(cmark_node *node, +CMARK_GFM_EXPORT int cmark_node_set_on_enter(cmark_node *node, const char *on_enter); /** Returns the literal "on exit" text for a custom 'node', or an empty string if no on_exit is set. Returns NULL if called on a non-custom node. */ -CMARK_EXPORT const char *cmark_node_get_on_exit(cmark_node *node); +CMARK_GFM_EXPORT const char *cmark_node_get_on_exit(cmark_node *node); /** Sets the literal text to render "on exit" for a custom 'node'. Any children of the node will be rendered before this text. Returns 1 on success 0 on failure. */ -CMARK_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit); +CMARK_GFM_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit); /** Returns the line on which 'node' begins. */ -CMARK_EXPORT int cmark_node_get_start_line(cmark_node *node); +CMARK_GFM_EXPORT int cmark_node_get_start_line(cmark_node *node); /** Returns the column at which 'node' begins. */ -CMARK_EXPORT int cmark_node_get_start_column(cmark_node *node); +CMARK_GFM_EXPORT int cmark_node_get_start_column(cmark_node *node); /** Returns the line on which 'node' ends. */ -CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node); +CMARK_GFM_EXPORT int cmark_node_get_end_line(cmark_node *node); /** Returns the column at which 'node' ends. */ -CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node); +CMARK_GFM_EXPORT int cmark_node_get_end_column(cmark_node *node); /** * ## Tree Manipulation @@ -488,40 +488,40 @@ CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node); /** Unlinks a 'node', removing it from the tree, but not freeing its * memory. (Use 'cmark_node_free' for that.) */ -CMARK_EXPORT void cmark_node_unlink(cmark_node *node); +CMARK_GFM_EXPORT void cmark_node_unlink(cmark_node *node); /** Inserts 'sibling' before 'node'. Returns 1 on success, 0 on failure. */ -CMARK_EXPORT int cmark_node_insert_before(cmark_node *node, +CMARK_GFM_EXPORT int cmark_node_insert_before(cmark_node *node, cmark_node *sibling); /** Inserts 'sibling' after 'node'. Returns 1 on success, 0 on failure. */ -CMARK_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling); +CMARK_GFM_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling); /** Replaces 'oldnode' with 'newnode' and unlinks 'oldnode' (but does * not free its memory). * Returns 1 on success, 0 on failure. */ -CMARK_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode); +CMARK_GFM_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode); /** Adds 'child' to the beginning of the children of 'node'. * Returns 1 on success, 0 on failure. */ -CMARK_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child); +CMARK_GFM_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child); /** Adds 'child' to the end of the children of 'node'. * Returns 1 on success, 0 on failure. */ -CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child); +CMARK_GFM_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child); /** Consolidates adjacent text nodes. */ -CMARK_EXPORT void cmark_consolidate_text_nodes(cmark_node *root); +CMARK_GFM_EXPORT void cmark_consolidate_text_nodes(cmark_node *root); /** Ensures a node and all its children own their own chunk memory. */ -CMARK_EXPORT void cmark_node_own(cmark_node *root); +CMARK_GFM_EXPORT void cmark_node_own(cmark_node *root); /** * ## Parsing @@ -547,27 +547,27 @@ CMARK_EXPORT void cmark_node_own(cmark_node *root); /** Creates a new parser object. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_parser *cmark_parser_new(int options); /** Creates a new parser object with the given memory allocator */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem); /** Frees memory allocated for a parser object. */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_parser_free(cmark_parser *parser); /** Feeds a string of length 'len' to 'parser'. */ -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len); /** Finish parsing and return a pointer to a tree of nodes. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_node *cmark_parser_finish(cmark_parser *parser); /** Parse a CommonMark document in 'buffer' of length 'len'. @@ -575,14 +575,14 @@ cmark_node *cmark_parser_finish(cmark_parser *parser); * the node tree should be released using 'cmark_node_free' * when it is no longer needed. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_node *cmark_parse_document(const char *buffer, size_t len, int options); /** Parse a CommonMark document in file 'f', returning a pointer to * a tree of nodes. The memory allocated for the node tree should be * released using 'cmark_node_free' when it is no longer needed. */ -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_node *cmark_parse_file(FILE *f, int options); /** @@ -592,74 +592,74 @@ cmark_node *cmark_parse_file(FILE *f, int options); /** Render a 'node' tree as XML. It is the caller's responsibility * to free the returned buffer. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_xml(cmark_node *root, int options); /** As for 'cmark_render_xml', but specifying the allocator to use for * the resulting string. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem); /** Render a 'node' tree as an HTML fragment. It is up to the user * to add an appropriate header and footer. It is the caller's * responsibility to free the returned buffer. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions); /** As for 'cmark_render_html', but specifying the allocator to use for * the resulting string. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem); /** Render a 'node' tree as a groff man page, without the header. * It is the caller's responsibility to free the returned buffer. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_man(cmark_node *root, int options, int width); /** As for 'cmark_render_man', but specifying the allocator to use for * the resulting string. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); /** Render a 'node' tree as a commonmark document. * It is the caller's responsibility to free the returned buffer. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_commonmark(cmark_node *root, int options, int width); /** As for 'cmark_render_commonmark', but specifying the allocator to use for * the resulting string. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); /** Render a 'node' tree as a plain text document. * It is the caller's responsibility to free the returned buffer. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_plaintext(cmark_node *root, int options, int width); /** As for 'cmark_render_plaintext', but specifying the allocator to use for * the resulting string. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_plaintext_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); /** Render a 'node' tree as a LaTeX document. * It is the caller's responsibility to free the returned buffer. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_latex(cmark_node *root, int options, int width); /** As for 'cmark_render_latex', but specifying the allocator to use for * the resulting string. */ -CMARK_EXPORT +CMARK_GFM_EXPORT char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); /** @@ -751,13 +751,13 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar * * In hexadecimal format, the number 0x010203 represents version 1.2.3. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_version(void); /** The library version string for runtime checks. Also available as * macro CMARK_VERSION_STRING for compile time checks. */ -CMARK_EXPORT +CMARK_GFM_EXPORT const char *cmark_version_string(void); /** # AUTHORS diff --git a/src/cmark-gfm_version.h.in b/src/cmark-gfm_version.h.in new file mode 100644 index 000000000..0847d0095 --- /dev/null +++ b/src/cmark-gfm_version.h.in @@ -0,0 +1,7 @@ +#ifndef CMARK_GFM_VERSION_H +#define CMARK_GFM_VERSION_H + +#define CMARK_GFM_VERSION ((@PROJECT_VERSION_MAJOR@ << 24) | (@PROJECT_VERSION_MINOR@ << 16) | (@PROJECT_VERSION_PATCH@ << 8) | @PROJECT_VERSION_GFM@) +#define CMARK_GFM_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@.gfm.@PROJECT_VERSION_GFM@" + +#endif diff --git a/src/cmark.c b/src/cmark.c index f6b4cdf77..b3fad4b08 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -4,15 +4,15 @@ #include "registry.h" #include "node.h" #include "houdini.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "buffer.h" cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_FOOTNOTE_DEFINITION; cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_FOOTNOTE_REFERENCE; -int cmark_version() { return CMARK_VERSION; } +int cmark_version() { return CMARK_GFM_VERSION; } -const char *cmark_version_string() { return CMARK_VERSION_STRING; } +const char *cmark_version_string() { return CMARK_GFM_VERSION_STRING; } static void *xcalloc(size_t nmem, size_t size) { void *ptr = calloc(nmem, size); diff --git a/src/cmark_ctype.h b/src/cmark_ctype.h index 4b90940a0..67c1cb037 100644 --- a/src/cmark_ctype.h +++ b/src/cmark_ctype.h @@ -5,25 +5,25 @@ extern "C" { #endif -#include "cmark_export.h" +#include "cmark-gfm_export.h" /** Locale-independent versions of functions from ctype.h. * We want cmark to behave the same no matter what the system locale. */ -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_isspace(char c); -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_ispunct(char c); -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_isalnum(char c); -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_isdigit(char c); -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_isalpha(char c); #ifdef __cplusplus diff --git a/src/cmark_version.h.in b/src/cmark_version.h.in deleted file mode 100644 index f1ae0990a..000000000 --- a/src/cmark_version.h.in +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef CMARK_VERSION_H -#define CMARK_VERSION_H - -#define CMARK_VERSION ((@PROJECT_VERSION_MAJOR@ << 24) | (@PROJECT_VERSION_MINOR@ << 16) | (@PROJECT_VERSION_PATCH@ << 8) | @PROJECT_VERSION_GFM@) -#define CMARK_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@.gfm.@PROJECT_VERSION_GFM@" -#define CMARK_GFM_VERSION @PROJECT_VERSION_GFM@ - -#endif diff --git a/src/commonmark.c b/src/commonmark.c index ab2692c0e..9943e4c53 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -5,7 +5,7 @@ #include <assert.h> #include "config.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "node.h" #include "buffer.h" #include "utf8.h" diff --git a/src/footnotes.c b/src/footnotes.c index ccd452cc1..f2d2765f4 100644 --- a/src/footnotes.c +++ b/src/footnotes.c @@ -1,4 +1,4 @@ -#include "cmark.h" +#include "cmark-gfm.h" #include "parser.h" #include "footnotes.h" #include "inlines.h" diff --git a/src/houdini.h b/src/houdini.h index 7852c3a83..7625b045b 100644 --- a/src/houdini.h +++ b/src/houdini.h @@ -31,22 +31,22 @@ extern "C" { #define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10) #define HOUDINI_UNESCAPED_SIZE(x) (x) -CMARK_EXPORT +CMARK_GFM_EXPORT bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); -CMARK_EXPORT +CMARK_GFM_EXPORT int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); -CMARK_EXPORT +CMARK_GFM_EXPORT int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure); -CMARK_EXPORT +CMARK_GFM_EXPORT int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); -CMARK_EXPORT +CMARK_GFM_EXPORT void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); -CMARK_EXPORT +CMARK_GFM_EXPORT int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); diff --git a/src/html.c b/src/html.c index c9ef17f74..7a34f7e91 100644 --- a/src/html.c +++ b/src/html.c @@ -4,7 +4,7 @@ #include <assert.h> #include "cmark_ctype.h" #include "config.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "houdini.h" #include "scanners.h" #include "syntax_extension.h" diff --git a/src/inlines.c b/src/inlines.c index 31c3b8333..45a634877 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -7,7 +7,7 @@ #include "node.h" #include "parser.h" #include "references.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "houdini.h" #include "utf8.h" #include "scanners.h" diff --git a/src/inlines.h b/src/inlines.h index 8c8174c64..7dd91bf52 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -10,7 +10,7 @@ extern "C" { cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_parse_inlines(cmark_parser *parser, cmark_node *parent, cmark_map *refmap, diff --git a/src/iterator.c b/src/iterator.c index 5557dff31..13fdb7616 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -3,7 +3,7 @@ #include "config.h" #include "node.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "iterator.h" cmark_iter *cmark_iter_new(cmark_node *root) { diff --git a/src/iterator.h b/src/iterator.h index fc745df5b..1155593df 100644 --- a/src/iterator.h +++ b/src/iterator.h @@ -5,7 +5,7 @@ extern "C" { #endif -#include "cmark.h" +#include "cmark-gfm.h" #include "memory.h" typedef struct { diff --git a/src/latex.c b/src/latex.c index 1767cee39..8be15b0d5 100644 --- a/src/latex.c +++ b/src/latex.c @@ -4,7 +4,7 @@ #include <assert.h> #include "config.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "node.h" #include "buffer.h" #include "utf8.h" diff --git a/src/libcmark-gfm.pc.in b/src/libcmark-gfm.pc.in index 4aeca7c53..46fff31a2 100644 --- a/src/libcmark-gfm.pc.in +++ b/src/libcmark-gfm.pc.in @@ -6,5 +6,5 @@ includedir=@CMAKE_INSTALL_PREFIX@/include Name: libcmark-gfm Description: CommonMark parsing, rendering, and manipulation with GitHub Flavored Markdown extensions Version: @PROJECT_VERSION@ -Libs: -L${libdir} -lcmark-gfm -lcmark-gfmextensions +Libs: -L${libdir} -lcmark-gfm -lcmark-gfm-extensions Cflags: -I${includedir} diff --git a/src/linked_list.c b/src/linked_list.c index f8bc60422..8c26dc557 100644 --- a/src/linked_list.c +++ b/src/linked_list.c @@ -1,6 +1,6 @@ #include <stdlib.h> -#include "cmark.h" +#include "cmark-gfm.h" cmark_llist *cmark_llist_append(cmark_mem *mem, cmark_llist *head, void *data) { cmark_llist *tmp; diff --git a/src/main.c b/src/main.c index 9c79598c7..8fd612b7b 100644 --- a/src/main.c +++ b/src/main.c @@ -4,14 +4,14 @@ #include <errno.h> #include "config.h" #include "memory.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "node.h" -#include "cmark_extension_api.h" +#include "cmark-gfm-extension_api.h" #include "syntax_extension.h" #include "parser.h" #include "registry.h" -#include "../extensions/core-extensions.h" +#include "../extensions/cmark-gfm-core-extensions.h" #if defined(_WIN32) && !defined(__CYGWIN__) #include <io.h> @@ -118,7 +118,7 @@ int main(int argc, char *argv[]) { int options = CMARK_OPT_DEFAULT; int res = 1; - core_extensions_ensure_registered(); + cmark_gfm_core_extensions_ensure_registered(); #if defined(_WIN32) && !defined(__CYGWIN__) _setmode(_fileno(stdin), _O_BINARY); @@ -129,8 +129,8 @@ int main(int argc, char *argv[]) { for (i = 1; i < argc; i++) { if (strcmp(argv[i], "--version") == 0) { - printf("cmark %s", CMARK_VERSION_STRING); - printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n"); + printf("cmark-gfm %s", CMARK_GFM_VERSION_STRING); + printf(" - CommonMark with GitHub Flavored Markdown converter\n(C) 2014-2016 John MacFarlane\n"); goto success; } else if (strcmp(argv[i], "--list-extensions") == 0) { print_extensions(); diff --git a/src/man.c b/src/man.c index 1e1a69d26..441a96e49 100644 --- a/src/man.c +++ b/src/man.c @@ -4,7 +4,7 @@ #include <assert.h> #include "config.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "node.h" #include "buffer.h" #include "utf8.h" diff --git a/src/node.h b/src/node.h index 1d8aa50df..0a392555b 100644 --- a/src/node.h +++ b/src/node.h @@ -8,8 +8,8 @@ extern "C" { #include <stdio.h> #include <stdint.h> -#include "cmark.h" -#include "cmark_extension_api.h" +#include "cmark-gfm.h" +#include "cmark-gfm-extension_api.h" #include "buffer.h" #include "chunk.h" @@ -89,7 +89,7 @@ struct cmark_node { static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) { return node->content.mem; } -CMARK_EXPORT int cmark_node_check(cmark_node *node, FILE *out); +CMARK_GFM_EXPORT int cmark_node_check(cmark_node *node, FILE *out); static CMARK_INLINE bool CMARK_NODE_TYPE_BLOCK_P(cmark_node_type node_type) { return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_BLOCK; @@ -107,7 +107,7 @@ static CMARK_INLINE bool CMARK_NODE_INLINE_P(cmark_node *node) { return node != NULL && CMARK_NODE_TYPE_INLINE_P((cmark_node_type) node->type); } -CMARK_EXPORT bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type); +CMARK_GFM_EXPORT bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type); #ifdef __cplusplus } diff --git a/src/plugin.h b/src/plugin.h index b9e9d2994..7bcbd19a2 100644 --- a/src/plugin.h +++ b/src/plugin.h @@ -5,8 +5,8 @@ extern "C" { #endif -#include "cmark.h" -#include "cmark_extension_api.h" +#include "cmark-gfm.h" +#include "cmark-gfm-extension_api.h" /** * cmark_plugin: diff --git a/src/references.c b/src/references.c index 2cd4b4435..7e7f34b38 100644 --- a/src/references.c +++ b/src/references.c @@ -1,4 +1,4 @@ -#include "cmark.h" +#include "cmark-gfm.h" #include "parser.h" #include "references.h" #include "inlines.h" diff --git a/src/registry.c b/src/registry.c index 3ff01f2ad..f4f2040d6 100644 --- a/src/registry.c +++ b/src/registry.c @@ -3,7 +3,7 @@ #include <string.h> #include "config.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "syntax_extension.h" #include "registry.h" #include "plugin.h" diff --git a/src/registry.h b/src/registry.h index 0f0fbae26..fece2b63f 100644 --- a/src/registry.h +++ b/src/registry.h @@ -5,16 +5,16 @@ extern "C" { #endif -#include "cmark.h" +#include "cmark-gfm.h" #include "plugin.h" -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_register_plugin(cmark_plugin_init_func reg_fn); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_release_plugins(void); -CMARK_EXPORT +CMARK_GFM_EXPORT cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem); #ifdef __cplusplus diff --git a/src/render.c b/src/render.c index 7f0902462..feb207544 100644 --- a/src/render.c +++ b/src/render.c @@ -1,7 +1,7 @@ #include <stdlib.h> #include "buffer.h" #include "chunk.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "utf8.h" #include "render.h" #include "node.h" diff --git a/src/scanners.h b/src/scanners.h index ffdbaeb06..1e5c8ccd8 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -1,7 +1,7 @@ #ifndef CMARK_SCANNERS_H #define CMARK_SCANNERS_H -#include "cmark.h" +#include "cmark-gfm.h" #include "chunk.h" #ifdef __cplusplus diff --git a/src/syntax_extension.c b/src/syntax_extension.c index d079efade..f5c00bb4a 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -1,7 +1,7 @@ #include <stdlib.h> #include <assert.h> -#include "cmark.h" +#include "cmark-gfm.h" #include "syntax_extension.h" #include "buffer.h" diff --git a/src/syntax_extension.h b/src/syntax_extension.h index f51f1f4ca..e0390b2a2 100644 --- a/src/syntax_extension.h +++ b/src/syntax_extension.h @@ -1,8 +1,8 @@ #ifndef CMARK_SYNTAX_EXTENSION_H #define CMARK_SYNTAX_EXTENSION_H -#include "cmark.h" -#include "cmark_extension_api.h" +#include "cmark-gfm.h" +#include "cmark-gfm-extension_api.h" #include "config.h" struct cmark_syntax_extension { diff --git a/src/utf8.h b/src/utf8.h index 5e6434483..04ec1611b 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -8,24 +8,24 @@ extern "C" { #endif -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); -CMARK_EXPORT +CMARK_GFM_EXPORT void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line, bufsize_t size); -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_utf8proc_is_space(int32_t uc); -CMARK_EXPORT +CMARK_GFM_EXPORT int cmark_utf8proc_is_punctuation(int32_t uc); #ifdef __cplusplus diff --git a/src/xml.c b/src/xml.c index ea53b99c6..920fe7171 100644 --- a/src/xml.c +++ b/src/xml.c @@ -4,7 +4,7 @@ #include <assert.h> #include "config.h" -#include "cmark.h" +#include "cmark-gfm.h" #include "node.h" #include "buffer.h" #include "houdini.h" diff --git a/test/cmark-fuzz.c b/test/cmark-fuzz.c index 6425c1492..3a263efc9 100644 --- a/test/cmark-fuzz.c +++ b/test/cmark-fuzz.c @@ -1,6 +1,6 @@ #include <stdint.h> #include <stdlib.h> -#include "cmark.h" +#include "cmark-gfm.h" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { int options = 0; diff --git a/test/cmark.py b/test/cmark.py index 44d0dc8bf..c9f484b06 100644 --- a/test/cmark.py +++ b/test/cmark.py @@ -12,7 +12,7 @@ def pipe_through_prog(prog, text): return [p1.returncode, result.decode('utf-8'), err] def parse(lib, extlib, text, extensions): - core_extensions_ensure_registered = extlib.core_extensions_ensure_registered + cmark_gfm_core_extensions_ensure_registered = extlib.cmark_gfm_core_extensions_ensure_registered find_syntax_extension = lib.cmark_find_syntax_extension find_syntax_extension.restype = c_void_p @@ -32,7 +32,7 @@ def parse(lib, extlib, text, extensions): parser_finish.restype = c_void_p parser_finish.argtypes = [c_void_p] - core_extensions_ensure_registered() + cmark_gfm_core_extensions_ensure_registered() parser = parser_new(0) for e in set(extensions): @@ -97,7 +97,7 @@ def __init__(self, prog=None, library_dir=None, extensions=None): break cmark = CDLL(libpath) extlib = CDLL(os.path.join( - library_dir, "..", "extensions", prefix + "cmark-gfmextensions" + suffix)) + library_dir, "..", "extensions", prefix + "cmark-gfm-extensions" + suffix)) self.to_html = lambda x, exts=[]: to_html(cmark, extlib, x, exts + self.extensions) self.to_commonmark = lambda x, exts=[]: to_commonmark(cmark, extlib, x, exts + self.extensions) diff --git a/wrappers/wrapper_ext.py b/wrappers/wrapper_ext.py index 1d6f9785e..f08cc2076 100755 --- a/wrappers/wrapper_ext.py +++ b/wrappers/wrapper_ext.py @@ -3,9 +3,9 @@ # # Example for using the shared library from python. # Will work with either python 2 or python 3. -# Requires cmark-gfm and cmark-gfmextensions libraries to be installed. +# Requires cmark-gfm and cmark-gfm-extensions libraries to be installed. # -# This particular example uses the GitHub extensions from the gfmextensions +# This particular example uses the GitHub extensions from the gfm-extensions # library. EXTENSIONS specifies which to use, and the sample shows how to # connect them into a parser. # @@ -15,13 +15,13 @@ if sys.platform == 'darwin': libname = 'libcmark-gfm.dylib' - extname = 'libcmark-gfmextensions.dylib' + extname = 'libcmark-gfm-extensions.dylib' elif sys.platform == 'win32': libname = 'cmark-gfm.dll' - extname = 'cmark-gfmextensions.dll' + extname = 'cmark-gfm-extensions.dll' else: libname = 'libcmark-gfm.so' - extname = 'libcmark-gfmextensions.so' + extname = 'libcmark-gfm-extensions.so' cmark = ctypes.CDLL(libname) cmark_ext = ctypes.CDLL(extname) @@ -76,7 +76,7 @@ # Set up the libcmark-gfm library and its extensions -F_register = cmark_ext.core_extensions_ensure_registered +F_register = cmark_ext.cmark_gfm_core_extensions_ensure_registered F_register.restype = None F_register.argtypes = ( ) F_register() From f649003ec8db28a6dbaf1a92443365649adcb04f Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Tue, 21 Aug 2018 12:01:31 +1000 Subject: [PATCH 128/218] 0.28.3.gfm.15 --- CMakeLists.txt | 2 +- changelog.txt | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3224b29a9..c5f184fe5 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark-gfm") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 3) -set(PROJECT_VERSION_GFM 14) +set(PROJECT_VERSION_GFM 15) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark-gfm tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index e7a7bc01a..dc92205bb 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,10 @@ +[0.28.3.gfm.15] + + * Escape non-strikethrough tildes (~) in commonmark output (John MacFarlane, #106). + * Cosmetic fix to table HTML output (John MacFarlane, #105). + * Use two tildes for strikethrough CommonMark output (John MacFarlane, #104). + * Normalised header and define names (#109). + [0.28.3.gfm.14] * Added a plaintext renderer for strikethrough nodes. From ea2a07d050329d211af437ea5f9b5c45804a40d8 Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Wed, 5 Sep 2018 12:16:37 +1000 Subject: [PATCH 129/218] `~` should not be escaped in href (#110) * test * fix: do not percent-encode ~ --- src/houdini_href_e.c | 4 ++-- test/regression.txt | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/houdini_href_e.c b/src/houdini_href_e.c index 9152803df..8c38d2fb9 100644 --- a/src/houdini_href_e.c +++ b/src/houdini_href_e.c @@ -7,7 +7,7 @@ /* * The following characters will not be escaped: * - * -_.+!*'(),%#@?=;:/,+&$ alphanum + * -_.+!*'(),%#@?=;:/,+&$~ alphanum * * Note that this character set is the addition of: * @@ -35,7 +35,7 @@ static const char HREF_SAFE[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/test/regression.txt b/test/regression.txt index 0569b3617..87c0107ef 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -169,3 +169,11 @@ This ~text~~~~ is ~~~~curious~. . <p>This <del>text</del> is <del>curious</del>.</p> ```````````````````````````````` + +`~` should not be escaped in href — https://github.com/github/markup/issues/311 + +```````````````````````````````` example +[x](http://members.aon.at/~nkehrer/ibm_5110/emu5110.html) +. +<p><a href="http://members.aon.at/~nkehrer/ibm_5110/emu5110.html">x</a></p> +```````````````````````````````` From a9dbc9c8f885e9fb36d990e312e9de1f2d51b13f Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Mon, 10 Sep 2018 10:20:31 +1000 Subject: [PATCH 130/218] Footnotes in tables (#112) * add failing regression test * footnotes to appear as an extension * allow TABLE_CELL to contain FOOTNOTE_REFERENCE --- extensions/table.c | 3 ++- src/main.c | 12 ++++++++---- test/CMakeLists.txt | 12 ++++++------ test/regression.txt | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 51 insertions(+), 11 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index 1d675e04d..ef08a3ae8 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -380,7 +380,8 @@ static int can_contain(cmark_syntax_extension *extension, cmark_node *node, child_type == CMARK_NODE_EMPH || child_type == CMARK_NODE_STRONG || child_type == CMARK_NODE_LINK || child_type == CMARK_NODE_IMAGE || child_type == CMARK_NODE_STRIKETHROUGH || - child_type == CMARK_NODE_HTML_INLINE; + child_type == CMARK_NODE_HTML_INLINE || + child_type == CMARK_NODE_FOOTNOTE_REFERENCE; } return false; } diff --git a/src/main.c b/src/main.c index 8fd612b7b..877735f6b 100644 --- a/src/main.c +++ b/src/main.c @@ -41,7 +41,6 @@ void print_usage() { printf(" --smart Use smart punctuation\n"); printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n"); printf(" --github-pre-lang Use GitHub-style <pre lang> for code blocks\n"); - printf(" --footnotes Parse footnotes\n"); printf(" --extension, -e EXTENSION_NAME Specify an extension name to use\n"); printf(" --list-extensions List available extensions and quit\n"); printf(" --strikethrough-double-tilde Only parse strikethrough (if enabled)\n"); @@ -93,7 +92,7 @@ static void print_extensions(void) { cmark_llist *syntax_extensions; cmark_llist *tmp; - printf ("Available extensions:\n"); + printf ("Available extensions:\nfootnotes\n"); cmark_mem *mem = cmark_get_default_mem_allocator(); syntax_extensions = cmark_list_syntax_extensions(mem); @@ -151,8 +150,6 @@ int main(int argc, char *argv[]) { options |= CMARK_OPT_SMART; } else if (strcmp(argv[i], "--github-pre-lang") == 0) { options |= CMARK_OPT_GITHUB_PRE_LANG; - } else if (strcmp(argv[i], "--footnotes") == 0) { - options |= CMARK_OPT_FOOTNOTES; } else if (strcmp(argv[i], "--safe") == 0) { options |= CMARK_OPT_SAFE; } else if (strcmp(argv[i], "--validate-utf8") == 0) { @@ -202,6 +199,10 @@ int main(int argc, char *argv[]) { } else if ((strcmp(argv[i], "-e") == 0) || (strcmp(argv[i], "--extension") == 0)) { i += 1; // Simpler to handle extensions in a second pass, as we can directly register // them with the parser. + + if (i < argc && strcmp(argv[i], "footnotes") == 0) { + options |= CMARK_OPT_FOOTNOTES; + } } else if (*argv[i] == '-') { print_usage(); goto failure; @@ -220,6 +221,9 @@ int main(int argc, char *argv[]) { if ((strcmp(argv[i], "-e") == 0) || (strcmp(argv[i], "--extension") == 0)) { i += 1; if (i < argc) { + if (strcmp(argv[i], "footnotes") == 0) { + continue; + } cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(argv[i]); if (!syntax_extension) { fprintf(stderr, "Unknown extension %s\n", argv[i]); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9ddc7327a..148f29ce1 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -69,24 +69,24 @@ IF (PYTHONINTERP_FOUND) "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" - "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --footnotes" - "--extensions" "table strikethrough autolink tagfilter" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" + "--extensions" "table strikethrough autolink tagfilter footnotes" ) add_test(roundtrip_extensions_executable ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" - "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --footnotes" - "--extensions" "table strikethrough autolink tagfilter" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" + "--extensions" "table strikethrough autolink tagfilter footnotes" ) add_test(option_table_prefer_style_attributes ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions-table-prefer-style-attributes.txt" - "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --footnotes --table-prefer-style-attributes" - "--extensions" "table strikethrough autolink tagfilter" + "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --table-prefer-style-attributes" + "--extensions" "table strikethrough autolink tagfilter footnotes" ) add_test(option_full_info_string diff --git a/test/regression.txt b/test/regression.txt index 87c0107ef..dac3c3835 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -177,3 +177,38 @@ This ~text~~~~ is ~~~~curious~. . <p><a href="http://members.aon.at/~nkehrer/ibm_5110/emu5110.html">x</a></p> ```````````````````````````````` + +Footnotes in tables + +```````````````````````````````` example table footnotes +A footnote in a paragraph[^1] + +| Column1 | Column2 | +| --------- | ------- | +| foot [^1] | note | + +[^1]: a footnote +. +<p>A footnote in a paragraph<sup class="footnote-ref"><a href="#fn1" id="fnref1">1</a></sup></p> +<table> +<thead> +<tr> +<th>Column1</th> +<th>Column2</th> +</tr> +</thead> +<tbody> +<tr> +<td>foot <sup class="footnote-ref"><a href="#fn1" id="fnref1">1</a></sup></td> +<td>note</td> +</tr> +</tbody> +</table> +<section class="footnotes"> +<ol> +<li id="fn1"> +<p>a footnote <a href="#fnref1" class="footnote-backref">↩</a></p> +</li> +</ol> +</section> +```````````````````````````````` From 446767da506d080ea12f197c19ff6ea9eae9032b Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 10 Sep 2018 14:24:10 +1000 Subject: [PATCH 131/218] 0.28.3.gfm.16 --- CMakeLists.txt | 2 +- changelog.txt | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c5f184fe5..0d8aa11f4 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark-gfm") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 3) -set(PROJECT_VERSION_GFM 15) +set(PROJECT_VERSION_GFM 16) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark-gfm tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index dc92205bb..81ef42cd9 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,8 @@ +[0.28.3.gfm.16] + + * Do not percent-encode tildes (~) in HTML attribute values (#110). + * Fix footnote references in tables (#112). + [0.28.3.gfm.15] * Escape non-strikethrough tildes (~) in commonmark output (John MacFarlane, #106). From 22d114957f076955860820319ba942d054873250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Ojeda=20B=C3=A4r?= <n.oje.bar@gmail.com> Date: Fri, 14 Sep 2018 01:14:19 +0200 Subject: [PATCH 132/218] Allow extension to provide "opaque" alloc function (#89) --- extensions/table.c | 38 +++++++++++++++++++++++++++++++++++ src/cmark-gfm-extension_api.h | 12 +++++++++-- src/cmark-gfm.h | 8 ++++++++ src/node.c | 20 +++++++++++++++--- src/syntax_extension.c | 5 +++++ src/syntax_extension.h | 1 + 6 files changed, 79 insertions(+), 5 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index ef08a3ae8..f93d82d39 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -648,6 +648,16 @@ static void html_render(cmark_syntax_extension *extension, } } +static void opaque_alloc(cmark_syntax_extension *self, cmark_mem *mem, cmark_node *node) { + if (node->type == CMARK_NODE_TABLE) { + node->as.opaque = mem->calloc(1, sizeof(node_table)); + } else if (node->type == CMARK_NODE_TABLE_ROW) { + node->as.opaque = mem->calloc(1, sizeof(node_table_row)); + } else if (node->type == CMARK_NODE_TABLE_CELL) { + node->as.opaque = mem->calloc(1, sizeof(node_cell)); + } +} + static void opaque_free(cmark_syntax_extension *self, cmark_mem *mem, cmark_node *node) { if (node->type == CMARK_NODE_TABLE) { free_node_table(mem, node->as.opaque); @@ -677,6 +687,7 @@ cmark_syntax_extension *create_table_extension(void) { cmark_syntax_extension_set_latex_render_func(self, latex_render); cmark_syntax_extension_set_man_render_func(self, man_render); cmark_syntax_extension_set_html_render_func(self, html_render); + cmark_syntax_extension_set_opaque_alloc_func(self, opaque_alloc); cmark_syntax_extension_set_opaque_free_func(self, opaque_free); cmark_syntax_extension_set_commonmark_escape_func(self, escape); CMARK_NODE_TABLE = cmark_syntax_extension_add_node(0); @@ -699,3 +710,30 @@ uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node) { return ((node_table *)node->as.opaque)->alignments; } + +int cmarkextensions_set_table_columns(cmark_node *node, uint16_t n_columns) { + return set_n_table_columns(node, n_columns); +} + +int cmarkextensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments) { + uint8_t *a = (uint8_t *)cmark_node_mem(node)->calloc(1, ncols); + memcpy(a, alignments, ncols); + return set_table_alignments(node, a); +} + +int cmarkextensions_get_table_row_is_header(cmark_node *node) +{ + if (!node || node->type != CMARK_NODE_TABLE_ROW) + return 0; + + return ((node_table_row *)node->as.opaque)->is_header; +} + +int cmarkextensions_set_table_row_is_header(cmark_node *node, int is_header) +{ + if (!node || node->type != CMARK_NODE_TABLE_ROW) + return 0; + + ((node_table_row *)node->as.opaque)->is_header = (is_header != 0); + return 1; +} diff --git a/src/cmark-gfm-extension_api.h b/src/cmark-gfm-extension_api.h index 853948c43..d531ced60 100644 --- a/src/cmark-gfm-extension_api.h +++ b/src/cmark-gfm-extension_api.h @@ -106,8 +106,6 @@ typedef struct cmark_plugin cmark_plugin; * with 'cmark_syntax_extension_set_private', * and optionally define a free function for this data. */ -typedef struct cmark_syntax_extension cmark_syntax_extension; - typedef struct subject cmark_inline_parser; /** Exposed raw for now */ @@ -254,6 +252,10 @@ typedef cmark_node *(*cmark_postprocess_func) (cmark_syntax_extension *extension typedef int (*cmark_ispunct_func) (char c); +typedef void (*cmark_opaque_alloc_func) (cmark_syntax_extension *extension, + cmark_mem *mem, + cmark_node *node); + typedef void (*cmark_opaque_free_func) (cmark_syntax_extension *extension, cmark_mem *mem, cmark_node *node); @@ -382,6 +384,12 @@ CMARK_GFM_EXPORT void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension, cmark_postprocess_func func); +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_GFM_EXPORT +void cmark_syntax_extension_set_opaque_alloc_func(cmark_syntax_extension *extension, + cmark_opaque_alloc_func func); + /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT diff --git a/src/cmark-gfm.h b/src/cmark-gfm.h index a26f640ef..02a1e71c0 100644 --- a/src/cmark-gfm.h +++ b/src/cmark-gfm.h @@ -92,6 +92,7 @@ typedef enum { typedef struct cmark_node cmark_node; typedef struct cmark_parser cmark_parser; typedef struct cmark_iter cmark_iter; +typedef struct cmark_syntax_extension cmark_syntax_extension; /** * ## Custom memory allocator support @@ -187,6 +188,13 @@ CMARK_GFM_EXPORT cmark_node *cmark_node_new(cmark_node_type type); CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem); +CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_ext(cmark_node_type type, + cmark_syntax_extension *extension); + +CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_mem_and_ext(cmark_node_type type, + cmark_mem *mem, + cmark_syntax_extension *extension); + /** Frees the memory allocated for a node and any children. */ CMARK_GFM_EXPORT void cmark_node_free(cmark_node *node); diff --git a/src/node.c b/src/node.c index 3f94834cf..0118d6511 100644 --- a/src/node.c +++ b/src/node.c @@ -69,10 +69,11 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) { return cmark_node_can_contain_type(node, (cmark_node_type) child->type); } -cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) { +cmark_node *cmark_node_new_with_mem_and_ext(cmark_node_type type, cmark_mem *mem, cmark_syntax_extension *extension) { cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node)); cmark_strbuf_init(mem, &node->content, 0); node->type = (uint16_t)type; + node->extension = extension; switch (node->type) { case CMARK_NODE_HEADING: @@ -91,12 +92,25 @@ cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) { break; } + if (node->extension && node->extension->opaque_alloc_func) { + node->extension->opaque_alloc_func(node->extension, mem, node); + } + return node; } -cmark_node *cmark_node_new(cmark_node_type type) { +cmark_node *cmark_node_new_with_ext(cmark_node_type type, cmark_syntax_extension *extension) { extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; - return cmark_node_new_with_mem(type, &CMARK_DEFAULT_MEM_ALLOCATOR); + return cmark_node_new_with_mem_and_ext(type, &CMARK_DEFAULT_MEM_ALLOCATOR, extension); +} + +cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) +{ + return cmark_node_new_with_mem_and_ext(type, mem, NULL); +} + +cmark_node *cmark_node_new(cmark_node_type type) { + return cmark_node_new_with_ext(type, NULL); } static void free_node_as(cmark_node *node) { diff --git a/src/syntax_extension.c b/src/syntax_extension.c index f5c00bb4a..c042479e7 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -128,6 +128,11 @@ void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension) { return extension->priv; } +void cmark_syntax_extension_set_opaque_alloc_func(cmark_syntax_extension *extension, + cmark_opaque_alloc_func func) { + extension->opaque_alloc_func = func; +} + void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension, cmark_opaque_free_func func) { extension->opaque_free_func = func; diff --git a/src/syntax_extension.h b/src/syntax_extension.h index e0390b2a2..fdbcd3fac 100644 --- a/src/syntax_extension.h +++ b/src/syntax_extension.h @@ -25,6 +25,7 @@ struct cmark_syntax_extension { cmark_html_render_func html_render_func; cmark_html_filter_func html_filter_func; cmark_postprocess_func postprocess_func; + cmark_opaque_alloc_func opaque_alloc_func; cmark_opaque_free_func opaque_free_func; cmark_commonmark_escape_func commonmark_escape_func; }; From 258d2a46c6bb240e2aa1024aba6bd9d39cad73d0 Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Mon, 24 Sep 2018 17:25:22 +1000 Subject: [PATCH 133/218] XML attribute formatters (#116) * add custom xml attr formatter * don't allocate Premature design optimisation. --- extensions/cmark-gfm-core-extensions.h | 3 +++ extensions/table.c | 31 ++++++++++++++++++++++---- src/cmark-gfm-extension_api.h | 9 ++++++++ src/syntax_extension.c | 5 +++++ src/syntax_extension.h | 1 + src/xml.c | 7 ++++++ 6 files changed, 52 insertions(+), 4 deletions(-) diff --git a/extensions/cmark-gfm-core-extensions.h b/extensions/cmark-gfm-core-extensions.h index 075905e82..dce86484d 100644 --- a/extensions/cmark-gfm-core-extensions.h +++ b/extensions/cmark-gfm-core-extensions.h @@ -18,6 +18,9 @@ uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node); CMARK_GFM_EXTENSIONS_EXPORT uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node); +CMARK_GFM_EXTENSIONS_EXPORT +int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node); + #ifdef __cplusplus } #endif diff --git a/extensions/table.c b/extensions/table.c index f93d82d39..c2ddd2a24 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -9,6 +9,7 @@ #include "ext_scanners.h" #include "strikethrough.h" #include "table.h" +#include "cmark-gfm-core-extensions.h" cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, CMARK_NODE_TABLE_CELL; @@ -488,6 +489,27 @@ static void latex_render(cmark_syntax_extension *extension, } } +static const char *xml_attr(cmark_syntax_extension *extension, + cmark_node *node) { + if (node->type == CMARK_NODE_TABLE_CELL) { + if (cmark_gfm_extensions_get_table_row_is_header(node->parent)) { + uint8_t *alignments = get_table_alignments(node->parent->parent); + int i = 0; + cmark_node *n; + for (n = node->parent->first_child; n; n = n->next, ++i) + if (n == node) + break; + switch (alignments[i]) { + case 'l': return " align=\"left\""; + case 'c': return " align=\"center\""; + case 'r': return " align=\"right\""; + } + } + } + + return NULL; +} + static void man_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { @@ -685,6 +707,7 @@ cmark_syntax_extension *create_table_extension(void) { cmark_syntax_extension_set_commonmark_render_func(self, commonmark_render); cmark_syntax_extension_set_plaintext_render_func(self, commonmark_render); cmark_syntax_extension_set_latex_render_func(self, latex_render); + cmark_syntax_extension_set_xml_attr_func(self, xml_attr); cmark_syntax_extension_set_man_render_func(self, man_render); cmark_syntax_extension_set_html_render_func(self, html_render); cmark_syntax_extension_set_opaque_alloc_func(self, opaque_alloc); @@ -711,17 +734,17 @@ uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node) { return ((node_table *)node->as.opaque)->alignments; } -int cmarkextensions_set_table_columns(cmark_node *node, uint16_t n_columns) { +int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns) { return set_n_table_columns(node, n_columns); } -int cmarkextensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments) { +int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments) { uint8_t *a = (uint8_t *)cmark_node_mem(node)->calloc(1, ncols); memcpy(a, alignments, ncols); return set_table_alignments(node, a); } -int cmarkextensions_get_table_row_is_header(cmark_node *node) +int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE_ROW) return 0; @@ -729,7 +752,7 @@ int cmarkextensions_get_table_row_is_header(cmark_node *node) return ((node_table_row *)node->as.opaque)->is_header; } -int cmarkextensions_set_table_row_is_header(cmark_node *node, int is_header) +int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header) { if (!node || node->type != CMARK_NODE_TABLE_ROW) return 0; diff --git a/src/cmark-gfm-extension_api.h b/src/cmark-gfm-extension_api.h index d531ced60..9403c4f00 100644 --- a/src/cmark-gfm-extension_api.h +++ b/src/cmark-gfm-extension_api.h @@ -236,6 +236,9 @@ typedef int (*cmark_commonmark_escape_func) (cmark_syntax_extension *extension, cmark_node *node, int c); +typedef const char* (*cmark_xml_attr_func) (cmark_syntax_extension *extension, + cmark_node *node); + typedef void (*cmark_html_render_func) (cmark_syntax_extension *extension, struct cmark_html_renderer *renderer, cmark_node *node, @@ -345,6 +348,12 @@ void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extens /** See the documentation for 'cmark_syntax_extension' */ CMARK_GFM_EXPORT +void cmark_syntax_extension_set_xml_attr_func(cmark_syntax_extension *extension, + cmark_xml_attr_func func); + + /** See the documentation for 'cmark_syntax_extension' + */ +CMARK_GFM_EXPORT void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); diff --git a/src/syntax_extension.c b/src/syntax_extension.c index c042479e7..d24fe43e6 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -97,6 +97,11 @@ void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extens extension->latex_render_func = func; } +void cmark_syntax_extension_set_xml_attr_func(cmark_syntax_extension *extension, + cmark_xml_attr_func func) { + extension->xml_attr_func = func; +} + void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension, cmark_common_render_func func) { extension->man_render_func = func; diff --git a/src/syntax_extension.h b/src/syntax_extension.h index fdbcd3fac..a5fe11e57 100644 --- a/src/syntax_extension.h +++ b/src/syntax_extension.h @@ -21,6 +21,7 @@ struct cmark_syntax_extension { cmark_common_render_func commonmark_render_func; cmark_common_render_func plaintext_render_func; cmark_common_render_func latex_render_func; + cmark_xml_attr_func xml_attr_func; cmark_common_render_func man_render_func; cmark_html_render_func html_render_func; cmark_html_filter_func html_filter_func; diff --git a/src/xml.c b/src/xml.c index 951759bef..2975bf96c 100644 --- a/src/xml.c +++ b/src/xml.c @@ -8,6 +8,7 @@ #include "node.h" #include "buffer.h" #include "houdini.h" +#include "syntax_extension.h" #define BUFFER_SIZE 100 @@ -50,6 +51,12 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, cmark_strbuf_puts(xml, buffer); } + if (node->extension && node->extension->xml_attr_func) { + const char* r = node->extension->xml_attr_func(node->extension, node); + if (r != NULL) + cmark_strbuf_puts(xml, r); + } + literal = false; switch (node->type) { From e7dc6ae40fc4a538ce6257e06847a38d9075215a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= <maelle.salmon@yahoo.se> Date: Tue, 25 Sep 2018 09:35:45 +0200 Subject: [PATCH 134/218] Add support for tables and strike-through text in the XSLT (#117) * Add support for tables and strike-through text in the XSLT * Update xml2md.xsl * Add specific stylesheet for tables and strike-through * Add comment --- tools/xml2md_gfm.xsl | 80 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 tools/xml2md_gfm.xsl diff --git a/tools/xml2md_gfm.xsl b/tools/xml2md_gfm.xsl new file mode 100644 index 000000000..d564b5c13 --- /dev/null +++ b/tools/xml2md_gfm.xsl @@ -0,0 +1,80 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- + +xml2md_gfm.xsl +============== + +This XSLT stylesheet is a complement to xml2md.xsl with templates supporting GitHub-flavored Markdown extensions (tables, strike-through). + +--> + +<xsl:stylesheet + version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:md="http://commonmark.org/xml/1.0"> + + +<!-- Import commonmark XSL --> + +<xsl:import href="xml2md.xsl"/> +<xsl:template match="/"> + <xsl:apply-imports/> +</xsl:template> + +<!-- params --> + +<xsl:output method="text" encoding="utf-8"/> + + +<!-- Table --> + +<xsl:template match="md:table"> + <xsl:apply-templates select="." mode="indent-block"/> + <xsl:apply-templates select="md:*"/> +</xsl:template> + +<xsl:template match="md:table_header"> + <xsl:text>| </xsl:text> + <xsl:apply-templates select="md:*"/> + <xsl:text>&#xa; | </xsl:text> + <xsl:for-each select="md:table_cell"> + <xsl:choose> + <xsl:when test="@align = 'right'"> + <xsl:text> ---: |</xsl:text> + </xsl:when> + <xsl:when test="@align = 'left'"> + <xsl:text> :--- |</xsl:text> + </xsl:when> + <xsl:when test="@align = 'center'"> + <xsl:text> :---: |</xsl:text> + </xsl:when> + <xsl:otherwise> + <xsl:text> --- |</xsl:text> + </xsl:otherwise> + </xsl:choose> + </xsl:for-each> + <xsl:text>&#xa;</xsl:text> +</xsl:template> + +<xsl:template match="md:table_cell"> + <xsl:apply-templates select="md:*"/> + <xsl:text>| </xsl:text> +</xsl:template> + +<xsl:template match="md:table_row"> + <xsl:text>| </xsl:text> + <xsl:apply-templates select="md:*"/> + <xsl:text>&#xa;</xsl:text> +</xsl:template> + + +<!-- Striked-through --> + +<xsl:template match="md:strikethrough"> + <xsl:text>~~</xsl:text> + <xsl:apply-templates select="md:*"/> + <xsl:text>~~</xsl:text> +</xsl:template> + +</xsl:stylesheet> From 1512c9cf5671583d21d0c0f7e936e830b770e803 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Wed, 26 Sep 2018 09:57:59 +1000 Subject: [PATCH 135/218] 0.28.3.gfm.17 --- CMakeLists.txt | 2 +- changelog.txt | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d8aa11f4..b332874d8 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark-gfm") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 3) -set(PROJECT_VERSION_GFM 16) +set(PROJECT_VERSION_GFM 17) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark-gfm tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 81ef42cd9..08a8f60bc 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,12 @@ +[0.28.3.gfm.17] + + * Allow extension to provide opaque allocation function (Nicolás Ojeda + Bär, #89). + * Upstream optimisations and fixes. + * Extensions can add custom XML attributes (#116). + * Support for GFM extensions in cmark XML to CommonMark XSLT converter + (Maëlle Salmon, #117). + [0.28.3.gfm.16] * Do not percent-encode tildes (~) in HTML attribute values (#110). From 4ec9b35421fa7aed9b67a4d1ed5c1894e6498f1e Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Mon, 8 Oct 2018 10:16:50 +1100 Subject: [PATCH 136/218] Be more strict on matching strikethrough (#120) --- extensions/strikethrough.c | 5 ++++- test/extensions.txt | 10 ++++++++-- test/regression.txt | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/extensions/strikethrough.c b/extensions/strikethrough.c index 87a2ac7c3..8145d23b0 100644 --- a/extensions/strikethrough.c +++ b/extensions/strikethrough.c @@ -28,7 +28,7 @@ static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser, res->start_column = cmark_inline_parser_get_column(inline_parser) - delims; if ((left_flanking || right_flanking) && - (!(parser->options & CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE) || delims == 2)) { + (delims == 2 || (!(parser->options & CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE) && delims == 1))) { cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking, right_flanking, res); } @@ -46,6 +46,9 @@ static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser, strikethrough = opener->inl_text; + if (opener->inl_text->as.literal.len != closer->inl_text->as.literal.len) + goto done; + if (!cmark_node_set_type(strikethrough, CMARK_NODE_STRIKETHROUGH)) goto done; diff --git a/test/extensions.txt b/test/extensions.txt index 5a1c3d990..0b578079a 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -492,12 +492,18 @@ No, they are not~ This ~is ~ legit~ isn't ~ legit. -This is just ~~~~~one~~~~~ huge strikethrough. +This is not ~~~~~one~~~~~ huge strikethrough. + +~one~ ~~two~~ ~~~three~~~ + +No ~mismatch~~ . <p>These are ~not strikethroughs.</p> <p>No, they are not~</p> <p>This <del>is ~ legit</del> isn't ~ legit.</p> -<p>This is just <del>one</del> huge strikethrough.</p> +<p>This is not ~~~~~one~~~~~ huge strikethrough.</p> +<p><del>one</del> <del>two</del> ~~~three~~~</p> +<p>No ~mismatch~~</p> ```````````````````````````````` diff --git a/test/regression.txt b/test/regression.txt index b62689a6a..5aa1b3449 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -167,7 +167,7 @@ cmark-gfm strikethrough rules ```````````````````````````````` example strikethrough This ~text~~~~ is ~~~~curious~. . -<p>This <del>text</del> is <del>curious</del>.</p> +<p>This <del>text~~~~ is ~~~~curious</del>.</p> ```````````````````````````````` `~` should not be escaped in href — https://github.com/github/markup/issues/311 From 975a9d1e4b8306dd3472f337754d5c5144c40d09 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Thu, 11 Oct 2018 09:21:40 +1100 Subject: [PATCH 137/218] Remove /debian by suggestion in #122 --- debian/changelog | 11 ---- debian/cmark-gfm.install | 2 - debian/compat | 1 - debian/control | 19 ------ debian/copyright | 128 --------------------------------------- debian/rules | 14 ----- debian/source/format | 1 - debian/source/options | 1 - debian/watch | 4 -- 9 files changed, 181 deletions(-) delete mode 100644 debian/changelog delete mode 100644 debian/cmark-gfm.install delete mode 100644 debian/compat delete mode 100644 debian/control delete mode 100644 debian/copyright delete mode 100755 debian/rules delete mode 100644 debian/source/format delete mode 100644 debian/source/options delete mode 100644 debian/watch diff --git a/debian/changelog b/debian/changelog deleted file mode 100644 index de68ad759..000000000 --- a/debian/changelog +++ /dev/null @@ -1,11 +0,0 @@ -cmark-gfm (0.28.3.gfm.12-wmo1) unstable; urgency=medium - - * Repackage for GitHub flavor, native GIT package - - -- Joachim Nilsson <joachim.nilsson@westermo.se> Mon, 04 Jun 2018 11:02:12 +0200 - -cmark (0.26.1-1) unstable; urgency=low - - * Initial release (closes: #833682) - - -- Peter Eisentraut <petere@debian.org> Mon, 05 Sep 2016 01:53:18 +0000 diff --git a/debian/cmark-gfm.install b/debian/cmark-gfm.install deleted file mode 100644 index 63c303b03..000000000 --- a/debian/cmark-gfm.install +++ /dev/null @@ -1,2 +0,0 @@ -usr/bin/ -usr/share/man/man1/ diff --git a/debian/compat b/debian/compat deleted file mode 100644 index ec635144f..000000000 --- a/debian/compat +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/debian/control b/debian/control deleted file mode 100644 index f81b2feaa..000000000 --- a/debian/control +++ /dev/null @@ -1,19 +0,0 @@ -Source: cmark-gfm -Build-Depends: cmake, debhelper (>= 9) -Homepage: https://github.com/github/cmark -Maintainer: Joachim Nilsson <joachim.nilsson@westermo.se> -Priority: optional -Section: text -Standards-Version: 3.9.8 - -Package: cmark-gfm -Architecture: any -Depends: ${misc:Depends}, ${shlibs:Depends} -Description: CommonMark parsing and rendering program, GitHub flavor - cmark is the C reference implementation of CommonMark, a rationalized - version of Markdown syntax with a spec. This package provides a - command-line program (cmark) for parsing and rendering CommonMark - documents. - . - This is the GitHub flavored Markdown with extensions for org-mode style - tables, auto-linking, and more. diff --git a/debian/copyright b/debian/copyright deleted file mode 100644 index e2cdefb47..000000000 --- a/debian/copyright +++ /dev/null @@ -1,128 +0,0 @@ -Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ -Upstream-Contact: https://github.com/commonmark/cmark -Source: https://github.com/commonmark/cmark/releases - -Files: * -Copyright: 2014, John MacFarlane -License: BSD-2-clause - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - . - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - . - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - . - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Files: FindAsan.cmake -Copyright: 2013, Matthew Arsenault -License: Expat - -Files: bench/statistics.py -Copyright: 2013, Steven D'Aprano <steve+python@pearwood.info> -License: Apache - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - . - http://www.apache.org/licenses/LICENSE-2.0 - . - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -Files: data/CaseFolding-3.2.0.txt -Copyright: 1991-2016 Unicode, Inc. All rights reserved. -License: Unicode - Distributed under the Terms of Use in http://www.unicode.org/copyright.html. - . - Permission is hereby granted, free of charge, to any person obtaining - a copy of the Unicode data files and any associated documentation - (the "Data Files") or Unicode software and any associated documentation - (the "Software") to deal in the Data Files or Software - without restriction, including without limitation the rights to use, - copy, modify, merge, publish, distribute, and/or sell copies of - the Data Files or Software, and to permit persons to whom the Data Files - or Software are furnished to do so, provided that either - (a) this copyright and permission notice appear with all copies - of the Data Files or Software, or - (b) this copyright and permission notice appear in associated - Documentation. - . - THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF - ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE - WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS - NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL - DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - PERFORMANCE OF THE DATA FILES OR SOFTWARE. - . - Except as contained in this notice, the name of a copyright holder - shall not be used in advertising or otherwise to promote the sale, - use or other dealings in these Data Files or Software without prior - written authorization of the copyright holder. -Comment: - The original file is at - <http://www.unicode.org/Public/3.2-Update/CaseFolding-3.2.0.txt>. - The license applies per - <http://www.unicode.org/copyright.html#License>. - -Files: src/houdini.h src/houdini_href_e.c src/houdini_html_e.c src/houdini_html_u.c -Copyright: 2012, Vicent Martí -License: Expat - -Files: src/buffer.h src/buffer.c src/chunk.h -Copyright: 2012, Github, Inc. -License: Expat - -Files: src/utf8.c src/utf8.c -Copyright: 2009, Public Software Group e. V., Berlin, Germany -License: Expat - -Files: test/normalize.py -Copyright: 2013, Karl Dubost -License: Expat - -Files: test/spec.txt -Copyright: 2014-15, John MacFarlane -License: CC-BY-SA - Released under the Creative Commons CC-BY-SA 4.0 license: - <http://creativecommons.org/licenses/by-sa/4.0/>. - -License: Expat - Permission is hereby granted, free of charge, to any person obtaining a copy of - this software and associated documentation files (the "Software"), to deal in - the Software without restriction, including without limitation the rights to - use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies - of the Software, and to permit persons to whom the Software is furnished to do - so, subject to the following conditions: - . - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - . - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. diff --git a/debian/rules b/debian/rules deleted file mode 100755 index ee0a94eaf..000000000 --- a/debian/rules +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/make -f -#export DH_VERBOSE=1 - -%: - dh $@ --buildsystem=cmake --parallel - -override_dh_auto_configure: - dh_auto_configure -- -DCMARK_SHARED=OFF -DCMAKE_INSTALL_PREFIX=/usr - -override_dh_auto_install: - dh_auto_install --destdir=debian/tmp - -override_dh_strip: - dh_strip --no-automatic-dbgsym diff --git a/debian/source/format b/debian/source/format deleted file mode 100644 index 89ae9db8f..000000000 --- a/debian/source/format +++ /dev/null @@ -1 +0,0 @@ -3.0 (native) diff --git a/debian/source/options b/debian/source/options deleted file mode 100644 index ec7126d16..000000000 --- a/debian/source/options +++ /dev/null @@ -1 +0,0 @@ -extend-diff-ignore = __pycache__ diff --git a/debian/watch b/debian/watch deleted file mode 100644 index db876cbfb..000000000 --- a/debian/watch +++ /dev/null @@ -1,4 +0,0 @@ -version=4 -opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%cmark-$1.tar.gz%" \ - https://github.com/github/cmark/releases \ - (?:.*?/)?v?(\d[\d.]*)\.tar\.gz debian uupdate From 6e73dea2d8ed5c896065a561bda6cfa1db8c5b6e Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Tue, 16 Oct 2018 10:34:12 +1100 Subject: [PATCH 138/218] update travis-ci link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 14d7abd67..5e7b76f75 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ cmark-gfm ========= -[![Build Status]](https://travis-ci.org/github/cmark) +[![Build Status]](https://travis-ci.org/github/cmark-gfm) [![Windows Build Status]](https://ci.appveyor.com/project/github/cmark) `cmark-gfm` is an extended version of the C reference implementation of From a9ed0e24e52b5baf4c901bd0ddc0db8294438349 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Tue, 16 Oct 2018 10:36:10 +1100 Subject: [PATCH 139/218] fix image target --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e7b76f75..354c09a94 100644 --- a/README.md +++ b/README.md @@ -200,7 +200,7 @@ most of the C library's API and its test harness. [cmake]: http://www.cmake.org/download/ [re2c]: http://re2c.org [commonmark.js]: https://github.com/commonmark/commonmark.js -[Build Status]: https://img.shields.io/travis/github/cmark/master.svg?style=flat +[Build Status]: https://img.shields.io/travis/github/cmark-gfm/master.svg?style=flat [Windows Build Status]: https://ci.appveyor.com/api/projects/status/wv7ifhqhv5itm3d5?svg=true [american fuzzy lop]: http://lcamtuf.coredump.cx/afl/ [libFuzzer]: http://llvm.org/docs/LibFuzzer.html From f64691b85dff279a4bec5a0694ac3793e64946ab Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Wed, 17 Oct 2018 14:42:51 +1100 Subject: [PATCH 140/218] Default to safe operation (#123) * default to safe * fix setter test --- README.md | 17 +++++++++-------- api_test/main.c | 4 ++-- man/man3/cmark-gfm.3 | 35 ++++++++++++++++++----------------- src/cmark-gfm.h | 16 ++++++++-------- src/html.c | 8 ++++---- src/main.c | 6 +++--- test/cmark-fuzz.c | 2 +- test/cmark.py | 3 ++- 8 files changed, 47 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 354c09a94..0b2975742 100644 --- a/README.md +++ b/README.md @@ -163,14 +163,15 @@ be found in the man pages in the `man` subdirectory. Security -------- -By default, the library will pass through raw HTML and potentially -dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`). - -It is recommended that users either disable this potentially unsafe -feature by using the option `CMARK_OPT_SAFE` (or `--safe` with the -command-line program), or run the output through an HTML sanitizer -to protect against -[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting). +By default, the library will scrub raw HTML and potentially dangerous links +(`javascript:`, `vbscript:`, `data:`, `file:`). Please note this is the +_opposite_ of the upstream [`cmark`](https://github.com/CommonMark/cmark) +library, a change introduced in `cmark-gfm` in version `0.28.3.gfm.18`. + +To allow these, use the option `CMARK_OPT_UNSAFE` (or `--unsafe`) with the +command line program. If doing so, we recommend you use a HTML sanitizer +specific to your needs to protect against [XSS +attacks](http://en.wikipedia.org/wiki/Cross-site_scripting). Contributing ------------ diff --git a/api_test/main.c b/api_test/main.c index 92f78c877..246192954 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -178,7 +178,7 @@ static void accessors(test_batch_runner *runner) { OK(runner, cmark_node_set_literal(string, literal + sizeof("prefix")), "set_literal suffix"); - char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL); + char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT | CMARK_OPT_UNSAFE, NULL); static const char expected_html[] = "<h3>Header</h3>\n" "<ol start=\"3\">\n" @@ -910,7 +910,7 @@ static void test_safe(test_batch_runner *runner) { "a>\n[link](JAVAscript:alert('hi'))\n![image](" "file:my.js)\n"; char *html = cmark_markdown_to_html(raw_html, sizeof(raw_html) - 1, - CMARK_OPT_DEFAULT | CMARK_OPT_SAFE); + CMARK_OPT_DEFAULT); STR_EQ(runner, html, "<!-- raw HTML omitted -->\n<p><!-- raw HTML omitted " "-->hi<!-- raw HTML omitted -->\n<a " "href=\"\">link</a>\n<img src=\"\" alt=\"image\" " diff --git a/man/man3/cmark-gfm.3 b/man/man3/cmark-gfm.3 index 75749121f..f218abb1c 100644 --- a/man/man3/cmark-gfm.3 +++ b/man/man3/cmark-gfm.3 @@ -1,4 +1,4 @@ -.TH cmark-gfm 3 "September 17, 2018" "LOCAL" "Library Functions Manual" +.TH cmark-gfm 3 "October 17, 2018" "LOCAL" "Library Functions Manual" .SH NAME .PP @@ -852,22 +852,6 @@ Include a \f[C]data\-sourcepos\f[] attribute on all block elements. .PP Render \f[C]softbreak\f[] elements as hard line breaks. -.PP -.nf -\fC -.RS 0n -#define CMARK_OPT_SAFE (1 << 3) -.RE -\f[] -.fi - -.PP -Suppress raw HTML and unsafe links (\f[C]javascript:\f[], -\f[C]vbscript:\f[], \f[C]file:\f[], and \f[C]data:\f[], except for -\f[C]image/png\f[], \f[C]image/gif\f[], \f[C]image/jpeg\f[], or -\f[C]image/webp\f[] mime types). Raw HTML is replaced by a placeholder -HTML comment. Unsafe links are replaced by empty strings. - .PP .nf \fC @@ -995,6 +979,23 @@ Use style attributes to align table cells instead of align attributes. Include the remainder of the info string in code blocks in a separate attribute. +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_UNSAFE (1 << 17) +.RE +\f[] +.fi + +.PP +Allow raw HTML and unsafe links, \f[C]javascript:\f[], +\f[C]vbscript:\f[], \f[C]file:\f[], and all \f[C]data:\f[] URLs \-\- by +default, only \f[C]image/png\f[], \f[C]image/gif\f[], +\f[C]image/jpeg\f[], or \f[C]image/webp\f[] mime types are allowed. +Without this option, raw HTML is replaced by a placeholder HTML comment, +and unsafe links are replaced by empty strings. + .SS Version information diff --git a/src/cmark-gfm.h b/src/cmark-gfm.h index 02a1e71c0..6baac9a8c 100644 --- a/src/cmark-gfm.h +++ b/src/cmark-gfm.h @@ -690,14 +690,6 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar */ #define CMARK_OPT_HARDBREAKS (1 << 2) -/** Suppress raw HTML and unsafe links (`javascript:`, `vbscript:`, - * `file:`, and `data:`, except for `image/png`, `image/gif`, - * `image/jpeg`, or `image/webp` mime types). Raw HTML is replaced - * by a placeholder HTML comment. Unsafe links are replaced by - * empty strings. - */ -#define CMARK_OPT_SAFE (1 << 3) - /** Render `softbreak` elements as spaces. */ #define CMARK_OPT_NOBREAKS (1 << 4) @@ -746,6 +738,14 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar */ #define CMARK_OPT_FULL_INFO_STRING (1 << 16) +/** Allow raw HTML and unsafe links, `javascript:`, `vbscript:`, `file:`, and + * all `data:` URLs -- by default, only `image/png`, `image/gif`, `image/jpeg`, + * or `image/webp` mime types are allowed. Without this option, raw HTML is + * replaced by a placeholder HTML comment, and unsafe links are replaced by + * empty strings. + */ +#define CMARK_OPT_UNSAFE (1 << 17) + /** * ## Version information */ diff --git a/src/html.c b/src/html.c index 7a34f7e91..d4d6cf13e 100644 --- a/src/html.c +++ b/src/html.c @@ -227,7 +227,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, case CMARK_NODE_HTML_BLOCK: cmark_html_render_cr(html); - if (options & CMARK_OPT_SAFE) { + if (!(options & CMARK_OPT_UNSAFE)) { cmark_strbuf_puts(html, "<!-- raw HTML omitted -->"); } else if (renderer->filter_extensions) { filter_html_block(renderer, node->as.literal.data, node->as.literal.len); @@ -305,7 +305,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, break; case CMARK_NODE_HTML_INLINE: - if (options & CMARK_OPT_SAFE) { + if (!(options & CMARK_OPT_UNSAFE)) { cmark_strbuf_puts(html, "<!-- raw HTML omitted -->"); } else { filtered = false; @@ -354,7 +354,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, case CMARK_NODE_LINK: if (entering) { cmark_strbuf_puts(html, "<a href=\""); - if (!((options & CMARK_OPT_SAFE) && + if (!(!(options & CMARK_OPT_UNSAFE) && scan_dangerous_url(&node->as.link.url, 0))) { houdini_escape_href(html, node->as.link.url.data, node->as.link.url.len); @@ -372,7 +372,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, case CMARK_NODE_IMAGE: if (entering) { cmark_strbuf_puts(html, "<img src=\""); - if (!((options & CMARK_OPT_SAFE) && + if (!(!(options & CMARK_OPT_UNSAFE) && scan_dangerous_url(&node->as.link.url, 0))) { houdini_escape_href(html, node->as.link.url.data, node->as.link.url.len); diff --git a/src/main.c b/src/main.c index 877735f6b..cb9610e1d 100644 --- a/src/main.c +++ b/src/main.c @@ -37,7 +37,7 @@ void print_usage() { printf(" --sourcepos Include source position attribute\n"); printf(" --hardbreaks Treat newlines as hard line breaks\n"); printf(" --nobreaks Render soft line breaks as spaces\n"); - printf(" --safe Suppress raw HTML and dangerous URLs\n"); + printf(" --unsafe Allow raw HTML and dangerous URLs\n"); printf(" --smart Use smart punctuation\n"); printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n"); printf(" --github-pre-lang Use GitHub-style <pre lang> for code blocks\n"); @@ -150,8 +150,8 @@ int main(int argc, char *argv[]) { options |= CMARK_OPT_SMART; } else if (strcmp(argv[i], "--github-pre-lang") == 0) { options |= CMARK_OPT_GITHUB_PRE_LANG; - } else if (strcmp(argv[i], "--safe") == 0) { - options |= CMARK_OPT_SAFE; + } else if (strcmp(argv[i], "--unsafe") == 0) { + options |= CMARK_OPT_UNSAFE; } else if (strcmp(argv[i], "--validate-utf8") == 0) { options |= CMARK_OPT_VALIDATE_UTF8; } else if (strcmp(argv[i], "--liberal-html-tag") == 0) { diff --git a/test/cmark-fuzz.c b/test/cmark-fuzz.c index 3a263efc9..be571d842 100644 --- a/test/cmark-fuzz.c +++ b/test/cmark-fuzz.c @@ -9,7 +9,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { int options = *(const int *)data; /* Mask off valid option bits */ - options = options & (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_SAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART); + options = options & (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART); /* Remainder of input is the markdown */ const char *markdown = (const char *)(data + sizeof(options)); diff --git a/test/cmark.py b/test/cmark.py index c9f484b06..f9a2909a3 100644 --- a/test/cmark.py +++ b/test/cmark.py @@ -57,7 +57,7 @@ def to_html(lib, extlib, text, extensions): render_html = lib.cmark_render_html render_html.restype = c_char_p render_html.argtypes = [c_void_p, c_int, c_void_p] - result = render_html(document, 0, syntax_extensions).decode('utf-8') + result = render_html(document, 1 << 17, syntax_extensions).decode('utf-8') return [0, result, ''] def to_commonmark(lib, extlib, text, extensions): @@ -77,6 +77,7 @@ def __init__(self, prog=None, library_dir=None, extensions=None): self.extensions = extensions.split() if prog: + prog += ' --unsafe' extsfun = lambda exts: ''.join([' -e ' + e for e in set(exts)]) self.to_html = lambda x, exts=[]: pipe_through_prog(prog + extsfun(exts + self.extensions), x) self.to_commonmark = lambda x, exts=[]: pipe_through_prog(prog + ' -t commonmark' + extsfun(exts + self.extensions), x) From 304bce082bb73ebb6f68fd9b2ea66d1dbe7f96ba Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Wed, 17 Oct 2018 14:45:50 +1100 Subject: [PATCH 141/218] 0.28.3.gfm.18 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b332874d8..9f298aa0d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark-gfm") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 3) -set(PROJECT_VERSION_GFM 17) +set(PROJECT_VERSION_GFM 18) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark-gfm tests and enable testing" ON) From 198669d953e01aeb976e16b21fceafcbb5a37e09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20D=C3=A9coret?= <475285+Xadeck@users.noreply.github.com> Date: Thu, 18 Oct 2018 01:32:04 +0200 Subject: [PATCH 142/218] Prevent out-of-bound memory access. (#124) * Prevent out-of-bound memory access. Before the change, the scan_delimiters function could return max_delims+1. The scan_delimiters function is called by extensions/strikethrough with: 18: inline_parser, sizeof(buffer)-1, '~' where buffer is: 12: char buffer[101]; So scan_delimiters can return delims = 101. A few lines later, it is used to 0-terminate the buffer with: 23: buffer[delims] = 0; so it effectively does buffer[101] = 0, which is an out-of-bound access. A test was added. That's the test that I used inside Google to reproduce the bug, thanks to internal infrastructure (sanitizers) to detect bad memory access. Unfortunately, I couldnt' reproduce the bug with cmark-gfm build suite. I tried `make asan test` but it gave nothing. I'm still including the test for reference. * Limit the recursion in autolink extension. This is not really a bug, but it's possible to send an input markdown consisting of lots of @ signs, and the recursion will cause memory explosion. The limit depends on the running environment, but there is no reason to accept arbitrarily long sequence of @, so let's just cut off at 1000. * Reverted change --- src/inlines.c | 2 +- test/extensions.txt | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/inlines.c b/src/inlines.c index 3ed19588e..c8dd1e9fb 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1551,7 +1551,7 @@ int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, } } - while (peek_char(parser) == c && numdelims <= max_delims) { + while (peek_char(parser) == c && numdelims < max_delims) { numdelims++; advance(parser); } diff --git a/test/extensions.txt b/test/extensions.txt index 0b578079a..3894ec575 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -506,6 +506,9 @@ No ~mismatch~~ <p>No ~mismatch~~</p> ```````````````````````````````` +Using 200 tilde since it overflows the internal buffer +size (100) for parsing delimiters in inlines.c +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~striked~ ## Autolinks From 21f7420f42cd970732c65155befccb68e5b0144a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20D=C3=A9coret?= <475285+Xadeck@users.noreply.github.com> Date: Thu, 18 Oct 2018 01:33:40 +0200 Subject: [PATCH 143/218] Limit the recursion in autolink extension. (#125) This is not really a bug, but it's possible to send an input markdown consisting of lots of @ signs, and the recursion will cause memory explosion. The limit depends on the running environment, but there is no reason to accept arbitrarily long sequence of @, so let's just cut off at 1000. --- extensions/autolink.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/extensions/autolink.c b/extensions/autolink.c index 03a135f17..41564ee4c 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -269,7 +269,11 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser, // inline was finished in inlines.c. } -static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset) { +static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset, int depth) { + // postprocess_text can recurse very deeply if there is a very long line of + // '@' only. Stop at a reasonable depth to ensure it cannot crash. + if (depth > 1000) return; + size_t link_end; uint8_t *data = text->as.literal.data, *at; @@ -307,7 +311,7 @@ static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset) } if (rewind == 0 || ns > 0) { - postprocess_text(parser, text, max_rewind + 1 + offset); + postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1); return; } @@ -327,14 +331,14 @@ static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset) if (link_end < 2 || nb != 1 || np == 0 || (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) { - postprocess_text(parser, text, max_rewind + 1 + offset); + postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1); return; } link_end = autolink_delim(data, link_end); if (link_end == 0) { - postprocess_text(parser, text, max_rewind + 1 + offset); + postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1); return; } @@ -369,7 +373,7 @@ static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset) text->as.literal.len = offset + max_rewind - rewind; text->as.literal.data[text->as.literal.len] = 0; - postprocess_text(parser, post, 0); + postprocess_text(parser, post, 0, depth + 1); } static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) { @@ -396,7 +400,7 @@ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser } if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) { - postprocess_text(parser, node, 0); + postprocess_text(parser, node, 0, /*depth*/0); } } From bbb9dd1d3d91762561648e699156a3e696657585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20D=C3=A9coret?= <475285+Xadeck@users.noreply.github.com> Date: Thu, 18 Oct 2018 01:34:07 +0200 Subject: [PATCH 144/218] Add plaintext rendering for footnotes. Otherwise, it crashes in debug (#126) mode. Reproduction: ``` make debug build/src/cmark-gfm --to plaintext --extension footnotes <<EOF Paragraph with [^1] footnote. [^1]:this is the footnote EOF aborted ``` After the change, it is: ``` make debug build/src/cmark-gfm --to plaintext --extension footnotes <<EOF Paragraph with [^1] footnote. [^1]:this is the footnote EOF Paragraph with [^1] footnote. [^1]: this is the footnote ``` --- src/plaintext.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/plaintext.c b/src/plaintext.c index 910adf466..b25e4a396 100644 --- a/src/plaintext.c +++ b/src/plaintext.c @@ -191,6 +191,28 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, case CMARK_NODE_IMAGE: break; + case CMARK_NODE_FOOTNOTE_REFERENCE: + if (entering) { + LIT("[^"); + OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL); + LIT("]"); + } + break; + + case CMARK_NODE_FOOTNOTE_DEFINITION: + if (entering) { + renderer->footnote_ix += 1; + LIT("[^"); + char n[32]; + snprintf(n, sizeof(n), "%d", renderer->footnote_ix); + OUT(n, false, LITERAL); + LIT("]: "); + + cmark_strbuf_puts(renderer->prefix, " "); + } else { + cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4); + } + break; default: assert(false); break; From bc48e4fc160cf55d5014284bd231e2b2e79ba65f Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Thu, 18 Oct 2018 10:50:01 +1100 Subject: [PATCH 145/218] 0.28.3.gfm.19 --- CMakeLists.txt | 2 +- changelog.txt | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f298aa0d..c47e26941 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark-gfm") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 3) -set(PROJECT_VERSION_GFM 18) +set(PROJECT_VERSION_GFM 19) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark-gfm tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index 08a8f60bc..13f620c57 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,14 @@ +[0.28.3.gfm.19] + + * Prevent out-of-bound memory access in strikethrough matcher (Xavier Décoret, #124). + * Limit recursion in autolink extension (Xavier Décoret, #125). + * Add plaintext rendering for footnotes (Xavier Décoret, #126). + +[0.28.3.gfm.18] + + * Match strikethrough more strictly (#120). + * Default to safe operation (#123). + [0.28.3.gfm.17] * Allow extension to provide opaque allocation function (Nicolás Ojeda From 3c4da5bcbdb0d07c09248d9f729585fb15cb6902 Mon Sep 17 00:00:00 2001 From: Phil Turnbull <philipturnbull@github.com> Date: Thu, 18 Oct 2018 18:24:27 -0400 Subject: [PATCH 146/218] Add GFM extensions to fuzzing harness (#127) * Check for empty buffer when rendering For empty documents, `->size` is zero so `renderer.buffer->ptr[renderer.buffer->size - 1]` will cause an out-of-bounds read. Empty buffers always point to the global `cmark_strbuf__initbuf` buffer so we read `cmark_strbuf__initbuf[-1]`. * Don't discard empty fuzz test-cases We currently discard fuzz test-cases that are empty but empty inputs are valid markdown. This improves the fuzzing coverage slightly. * Fuzz width parameter too Allow the `width` parameter to be generated too so we get better fuzz-coverage. * Allow GFM-specific options * Load GFM extensions in fuzz harness * Avoid out-of-bounds read in _ext_scan_at An off-by-one error in _ext_scan_at can trigger an out-of-bounds read with `offset == len`. This can be triggered when scanning the end of a table header: ``` ==14==WARNING: MemorySanitizer: use-of-uninitialized-value 0 0x574410 in _scan_table_cell /src/octofuzz/extensions/ext_scanners.c:557:9 1 0x57093f in _ext_scan_at /src/octofuzz/extensions/ext_scanners.c:15:11 2 0x565fb8 in row_from_string /src/octofuzz/extensions/table.c:126:20 3 0x55b360 in try_opening_table_header /src/octofuzz/extensions/table.c:182:16 4 0x55b360 in try_opening_table_block /src/octofuzz/extensions/table.c:330 5 0x59377e in open_new_blocks /src/octofuzz/src/blocks.c:1216:27 6 0x59377e in S_process_line /src/octofuzz/src/blocks.c:1405 7 0x58ba2e in cmark_parser_finish /src/octofuzz/src/blocks.c:1432:5 8 0x55a1ea in LLVMFuzzerTestOneInput /src/octofuzz/test/cmark-fuzz.c:64:23 9 0x4d256f in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /src/libfuzzer/FuzzerLoop.cpp:463:13 10 0x49a14c in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /src/libfuzzer/FuzzerDriver.cpp:273:6 11 0x4adcbe in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /src/libfuzzer/FuzzerDriver.cpp:689:9 12 0x4992c1 in main /src/libfuzzer/FuzzerMain.cpp:20:10 13 0x7fcdd8b4c82f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2082f) 14 0x41ea18 in _start (/work/work891511962/cmark_fuzzer+0x41ea18) Unnitialized value was created by a heap allocation 0 0x44d6d5 in realloc /src/llvm/projects/compiler-rt/lib/msan/msan_interceptors.cc:847 1 0x576cfc in xrealloc /src/octofuzz/src/cmark.c:27:19 2 0x65ae70 in cmark_strbuf_grow /src/octofuzz/src/buffer.c:58:31 3 0x65ae70 in cmark_strbuf_init /src/octofuzz/src/buffer.c:32 4 0x59da38 in make_block /src/octofuzz/src/blocks.c:69:3 5 0x59da38 in add_child /src/octofuzz/src/blocks.c:379 6 0x59da38 in add_text_to_container /src/octofuzz/src/blocks.c:1343 7 0x59da38 in S_process_line /src/octofuzz/src/blocks.c:1409 8 0x58b307 in S_parser_feed /src/octofuzz/src/blocks.c:702:9 9 0x55a1ca in LLVMFuzzerTestOneInput /src/octofuzz/test/cmark-fuzz.c:63:5 10 0x4d256f in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /src/libfuzzer/FuzzerLoop.cpp:463:13 11 0x49a14c in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /src/libfuzzer/FuzzerDriver.cpp:273:6 12 0x4adcbe in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /src/libfuzzer/FuzzerDriver.cpp:689:9 13 0x4992c1 in main /src/libfuzzer/FuzzerMain.cpp:20:10 14 0x7fcdd8b4c82f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2082f) ``` --- extensions/ext_scanners.c | 881 +++++++++++++++---------------------- extensions/ext_scanners.re | 2 +- src/render.c | 2 +- test/cmark-fuzz.c | 74 +++- test/fuzzing_dictionary | 18 + 5 files changed, 439 insertions(+), 538 deletions(-) diff --git a/extensions/ext_scanners.c b/extensions/ext_scanners.c index 7e0f5f23b..2590cea6b 100644 --- a/extensions/ext_scanners.c +++ b/extensions/ext_scanners.c @@ -1,12 +1,12 @@ -/* Generated by re2c 0.15.3 */ -#include <stdlib.h> +/* Generated by re2c 1.1.1 */ #include "ext_scanners.h" +#include <stdlib.h> bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset) { bufsize_t res; - if (ptr == NULL || offset > len) { + if (ptr == NULL || offset >= len) { return 0; } else { unsigned char lim = ptr[len]; @@ -44,35 +44,35 @@ bufsize_t _scan_table_start(const unsigned char *p) { if (yych <= 0x1F) { if (yych <= '\t') { if (yych <= 0x08) - goto yy6; - goto yy3; + goto yy3; + goto yy4; } else { if (yych <= '\n') goto yy2; if (yych <= '\f') - goto yy3; - goto yy6; + goto yy4; + goto yy3; } } else { if (yych <= '-') { if (yych <= ' ') - goto yy3; + goto yy4; if (yych <= ',') - goto yy6; + goto yy3; goto yy5; } else { if (yych == ':') - goto yy4; - goto yy6; + goto yy6; + goto yy3; } } } else { if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '|') - goto yy3; + goto yy4; if (yych <= 0x7F) - goto yy6; + goto yy3; } else { if (yych <= 0xDF) goto yy7; @@ -83,80 +83,68 @@ bufsize_t _scan_table_start(const unsigned char *p) { } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy14; + goto yy11; if (yych <= 0xEF) goto yy10; - goto yy11; + goto yy12; } else { if (yych <= 0xF3) - goto yy12; - if (yych <= 0xF4) goto yy13; + if (yych <= 0xF4) + goto yy14; } } } yy2 : { return 0; } yy3: - yych = *(marker = ++p); - if (yybm[0 + yych] & 128) { - goto yy22; - } - if (yych <= '\f') { - if (yych == '\t') - goto yy29; - if (yych <= '\n') - goto yy2; - goto yy29; - } else { - if (yych <= ' ') { - if (yych <= 0x1F) - goto yy2; - goto yy29; - } else { - if (yych == ':') - goto yy31; - goto yy2; - } - } + ++p; + goto yy2; yy4: yych = *(marker = ++p); - if (yybm[0 + yych] & 128) { - goto yy22; + if (yybm[0 + yych] & 64) { + goto yy15; } + if (yych == '-') + goto yy17; + if (yych == ':') + goto yy19; goto yy2; yy5: yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy22; + goto yy17; } if (yych <= ' ') { if (yych <= 0x08) goto yy2; if (yych <= '\r') - goto yy16; + goto yy21; if (yych <= 0x1F) goto yy2; - goto yy16; + goto yy21; } else { if (yych <= ':') { if (yych <= '9') goto yy2; - goto yy15; + goto yy20; } else { if (yych == '|') - goto yy16; + goto yy21; goto yy2; } } yy6: - yych = *++p; + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy17; + } goto yy2; yy7: yych = *++p; if (yych <= 0x7F) goto yy8; if (yych <= 0xBF) - goto yy6; + goto yy3; yy8: p = marker; goto yy2; @@ -176,14 +164,14 @@ bufsize_t _scan_table_start(const unsigned char *p) { goto yy8; yy11: yych = *++p; - if (yych <= 0x8F) + if (yych <= 0x7F) goto yy8; - if (yych <= 0xBF) - goto yy10; + if (yych <= 0x9F) + goto yy7; goto yy8; yy12: yych = *++p; - if (yych <= 0x7F) + if (yych <= 0x8F) goto yy8; if (yych <= 0xBF) goto yy10; @@ -192,191 +180,126 @@ bufsize_t _scan_table_start(const unsigned char *p) { yych = *++p; if (yych <= 0x7F) goto yy8; - if (yych <= 0x8F) + if (yych <= 0xBF) goto yy10; goto yy8; yy14: yych = *++p; if (yych <= 0x7F) goto yy8; - if (yych <= 0x9F) - goto yy7; + if (yych <= 0x8F) + goto yy10; goto yy8; yy15: - ++p; - yych = *p; - yy16: + yych = *++p; if (yybm[0 + yych] & 64) { goto yy15; } - if (yych <= '\r') { - if (yych <= 0x08) - goto yy8; - if (yych <= '\n') - goto yy20; + if (yych == '-') + goto yy17; + if (yych == ':') goto yy19; - } else { - if (yych != '|') - goto yy8; - } + goto yy8; yy17: - ++p; - yych = *p; - if (yych <= 0x1F) { - if (yych <= '\n') { - if (yych <= 0x08) - goto yy8; - if (yych <= '\t') - goto yy17; - goto yy20; - } else { - if (yych <= '\f') - goto yy17; - if (yych >= 0x0E) - goto yy8; - } - } else { - if (yych <= '-') { - if (yych <= ' ') - goto yy17; - if (yych <= ',') - goto yy8; - goto yy25; - } else { - if (yych == ':') - goto yy24; - goto yy8; - } - } - yy19: yych = *++p; - if (yych != '\n') - goto yy8; - yy20: - ++p; - { return (bufsize_t)(p - start); } - yy22: - ++p; - yych = *p; if (yybm[0 + yych] & 128) { - goto yy22; + goto yy17; } if (yych <= 0x1F) { if (yych <= '\n') { if (yych <= 0x08) goto yy8; if (yych <= '\t') - goto yy15; - goto yy20; + goto yy20; + goto yy22; } else { if (yych <= '\f') - goto yy15; + goto yy20; if (yych <= '\r') - goto yy19; + goto yy24; goto yy8; } } else { if (yych <= ':') { if (yych <= ' ') - goto yy15; + goto yy20; if (yych <= '9') goto yy8; - goto yy15; + goto yy20; } else { if (yych == '|') - goto yy17; - goto yy8; - } - } - yy24: - ++p; - yych = *p; - if (yych != '-') - goto yy8; - yy25: - ++p; - yych = *p; - if (yych <= ' ') { - if (yych <= '\n') { - if (yych <= 0x08) - goto yy8; - if (yych >= '\n') - goto yy20; - } else { - if (yych <= '\f') - goto yy27; - if (yych <= '\r') - goto yy19; - if (yych <= 0x1F) - goto yy8; - } - } else { - if (yych <= '9') { - if (yych == '-') goto yy25; goto yy8; - } else { - if (yych <= ':') - goto yy27; - if (yych == '|') - goto yy17; - goto yy8; } } - yy27: - ++p; - yych = *p; + yy19: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy17; + } + goto yy8; + yy20: + yych = *++p; + yy21: if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) goto yy8; - goto yy27; + goto yy20; } else { if (yych <= '\n') - goto yy20; + goto yy22; if (yych <= '\f') - goto yy27; - goto yy19; + goto yy20; + goto yy24; } } else { if (yych <= ' ') { if (yych <= 0x1F) goto yy8; - goto yy27; + goto yy20; } else { if (yych == '|') - goto yy17; + goto yy25; goto yy8; } } - yy29: + yy22: ++p; - yych = *p; - if (yybm[0 + yych] & 128) { + { return (bufsize_t)(p - start); } + yy24: + yych = *++p; + if (yych == '\n') goto yy22; + goto yy8; + yy25: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy17; } - if (yych <= '\f') { - if (yych == '\t') - goto yy29; - if (yych <= '\n') - goto yy8; - goto yy29; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych <= 0x08) + goto yy8; + goto yy25; + } else { + if (yych <= '\n') + goto yy22; + if (yych <= '\f') + goto yy25; + goto yy24; + } } else { if (yych <= ' ') { if (yych <= 0x1F) goto yy8; - goto yy29; + goto yy25; } else { - if (yych != ':') - goto yy8; + if (yych == ':') + goto yy19; + goto yy8; } } - yy31: - ++p; - if (yybm[0 + (yych = *p)] & 128) { - goto yy22; - } - goto yy8; } } @@ -387,261 +310,180 @@ bufsize_t _scan_table_cell(const unsigned char *p) { { unsigned char yych; static const unsigned char yybm[] = { - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 128, 128, 0, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 64, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 128, - 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 0, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, + 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, }; yych = *(marker = p); - if (yych <= 0x7F) { - if (yych <= '\r') { - if (yych == '\n') - goto yy34; - if (yych <= '\f') - goto yy36; - goto yy45; - } else { - if (yych <= '\\') { - if (yych <= '[') - goto yy36; - goto yy35; - } else { - if (yych == '|') - goto yy45; - goto yy36; - } - } - } else { - if (yych <= 0xED) { - if (yych <= 0xDF) { - if (yych >= 0xC2) - goto yy37; - } else { - if (yych <= 0xE0) - goto yy39; - if (yych <= 0xEC) - goto yy40; - goto yy44; - } - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy40; - goto yy41; - } else { - if (yych <= 0xF3) - goto yy42; - if (yych <= 0xF4) - goto yy43; - } - } - } - yy34 : { return (bufsize_t)(p - start); } - yy35: - yych = *(marker = ++p); - if (yych == '|') - goto yy49; - goto yy50; - yy36: - yych = *(marker = ++p); - goto yy50; - yy37: - yych = *++p; - if (yych <= 0x7F) - goto yy38; - if (yych <= 0xBF) - goto yy36; - yy38: - p = marker; - goto yy34; - yy39: - yych = *++p; - if (yych <= 0x9F) - goto yy38; - if (yych <= 0xBF) - goto yy37; - goto yy38; - yy40: - yych = *++p; - if (yych <= 0x7F) - goto yy38; - if (yych <= 0xBF) - goto yy37; - goto yy38; - yy41: - yych = *++p; - if (yych <= 0x8F) - goto yy38; - if (yych <= 0xBF) - goto yy40; - goto yy38; - yy42: - yych = *++p; - if (yych <= 0x7F) - goto yy38; - if (yych <= 0xBF) - goto yy40; - goto yy38; - yy43: - yych = *++p; - if (yych <= 0x7F) - goto yy38; - if (yych <= 0x8F) - goto yy40; - goto yy38; - yy44: - yych = *++p; - if (yych <= 0x7F) - goto yy38; - if (yych <= 0x9F) - goto yy37; - goto yy38; - yy45: - ++p; - { return 0; } - yy47: - marker = ++p; - yych = *p; if (yybm[0 + yych] & 64) { - goto yy47; + goto yy30; } - if (yych <= 0xDF) { - if (yych <= '\f') { - if (yych == '\n') - goto yy34; - } else { + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= '\n') + goto yy29; if (yych <= '\r') - goto yy34; - if (yych <= 0x7F) - goto yy49; + goto yy32; + goto yy34; + } else { + if (yych <= '|') + goto yy32; if (yych <= 0xC1) - goto yy34; - goto yy51; + goto yy29; + if (yych <= 0xDF) + goto yy36; + goto yy38; } } else { if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy52; if (yych == 0xED) - goto yy57; - goto yy53; + goto yy40; + goto yy39; } else { if (yych <= 0xF0) - goto yy54; + goto yy41; if (yych <= 0xF3) - goto yy55; + goto yy42; if (yych <= 0xF4) - goto yy56; - goto yy34; + goto yy43; } } - yy49: - marker = ++p; - yych = *p; - yy50: - if (yybm[0 + yych] & 128) { - goto yy49; + yy29 : { return (bufsize_t)(p - start); } + yy30: + yych = *(marker = ++p); + if (yybm[0 + yych] & 64) { + goto yy30; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\r') - goto yy34; + goto yy29; if (yych <= '\\') - goto yy47; - goto yy34; + goto yy34; + goto yy29; } else { if (yych <= 0xDF) - goto yy51; + goto yy36; if (yych <= 0xE0) - goto yy52; - goto yy53; + goto yy38; + goto yy39; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy57; + goto yy40; if (yych <= 0xEF) - goto yy53; - goto yy54; + goto yy39; + goto yy41; } else { if (yych <= 0xF3) - goto yy55; + goto yy42; if (yych <= 0xF4) - goto yy56; - goto yy34; + goto yy43; + goto yy29; } } - yy51: + yy32: ++p; - yych = *p; + { return 0; } + yy34: + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy34; + } + if (yych <= 0xDF) { + if (yych <= '\f') { + if (yych == '\n') + goto yy29; + goto yy30; + } else { + if (yych <= '\r') + goto yy29; + if (yych <= 0x7F) + goto yy30; + if (yych <= 0xC1) + goto yy29; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy38; + if (yych == 0xED) + goto yy40; + goto yy39; + } else { + if (yych <= 0xF0) + goto yy41; + if (yych <= 0xF3) + goto yy42; + if (yych <= 0xF4) + goto yy43; + goto yy29; + } + } + yy36: + yych = *++p; if (yych <= 0x7F) - goto yy38; + goto yy37; if (yych <= 0xBF) - goto yy49; - goto yy38; - yy52: - ++p; - yych = *p; + goto yy30; + yy37: + p = marker; + goto yy29; + yy38: + yych = *++p; if (yych <= 0x9F) - goto yy38; + goto yy37; if (yych <= 0xBF) - goto yy51; - goto yy38; - yy53: - ++p; - yych = *p; + goto yy36; + goto yy37; + yy39: + yych = *++p; if (yych <= 0x7F) - goto yy38; + goto yy37; if (yych <= 0xBF) - goto yy51; - goto yy38; - yy54: - ++p; - yych = *p; + goto yy36; + goto yy37; + yy40: + yych = *++p; + if (yych <= 0x7F) + goto yy37; + if (yych <= 0x9F) + goto yy36; + goto yy37; + yy41: + yych = *++p; if (yych <= 0x8F) - goto yy38; + goto yy37; if (yych <= 0xBF) - goto yy53; - goto yy38; - yy55: - ++p; - yych = *p; + goto yy39; + goto yy37; + yy42: + yych = *++p; if (yych <= 0x7F) - goto yy38; + goto yy37; if (yych <= 0xBF) - goto yy53; - goto yy38; - yy56: - ++p; - yych = *p; + goto yy39; + goto yy37; + yy43: + yych = *++p; if (yych <= 0x7F) - goto yy38; + goto yy37; if (yych <= 0x8F) - goto yy53; - goto yy38; - yy57: - ++p; - yych = *p; - if (yych <= 0x7F) - goto yy38; - if (yych <= 0x9F) - goto yy51; - goto yy38; + goto yy39; + goto yy37; } } @@ -671,116 +513,111 @@ bufsize_t _scan_table_cell_end(const unsigned char *p) { if (yych <= 0xDF) { if (yych <= '{') { if (yych != '\n') - goto yy63; + goto yy47; } else { if (yych <= '|') - goto yy61; + goto yy48; if (yych <= 0x7F) - goto yy63; + goto yy47; if (yych >= 0xC2) - goto yy64; + goto yy51; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy66; + goto yy53; if (yych == 0xED) - goto yy71; - goto yy67; + goto yy55; + goto yy54; } else { if (yych <= 0xF0) - goto yy68; + goto yy56; if (yych <= 0xF3) - goto yy69; + goto yy57; if (yych <= 0xF4) - goto yy70; + goto yy58; } } - yy60 : { return 0; } - yy61: + yy46 : { return 0; } + yy47: + ++p; + goto yy46; + yy48: yyaccept = 1; yych = *(marker = ++p); - goto yy73; - yy62 : { return (bufsize_t)(p - start); } - yy63: - yych = *++p; - goto yy60; - yy64: + if (yybm[0 + yych] & 128) { + goto yy48; + } + if (yych <= 0x08) + goto yy50; + if (yych <= '\n') + goto yy59; + if (yych <= '\r') + goto yy60; + yy50 : { return (bufsize_t)(p - start); } + yy51: yych = *++p; if (yych <= 0x7F) - goto yy65; + goto yy52; if (yych <= 0xBF) - goto yy63; - yy65: + goto yy47; + yy52: p = marker; if (yyaccept == 0) { - goto yy60; + goto yy46; } else { - goto yy62; + goto yy50; } - yy66: + yy53: yych = *++p; if (yych <= 0x9F) - goto yy65; + goto yy52; if (yych <= 0xBF) - goto yy64; - goto yy65; - yy67: + goto yy51; + goto yy52; + yy54: yych = *++p; if (yych <= 0x7F) - goto yy65; + goto yy52; if (yych <= 0xBF) - goto yy64; - goto yy65; - yy68: + goto yy51; + goto yy52; + yy55: + yych = *++p; + if (yych <= 0x7F) + goto yy52; + if (yych <= 0x9F) + goto yy51; + goto yy52; + yy56: yych = *++p; if (yych <= 0x8F) - goto yy65; + goto yy52; if (yych <= 0xBF) - goto yy67; - goto yy65; - yy69: + goto yy54; + goto yy52; + yy57: yych = *++p; if (yych <= 0x7F) - goto yy65; + goto yy52; if (yych <= 0xBF) - goto yy67; - goto yy65; - yy70: + goto yy54; + goto yy52; + yy58: yych = *++p; if (yych <= 0x7F) - goto yy65; + goto yy52; if (yych <= 0x8F) - goto yy67; - goto yy65; - yy71: - yych = *++p; - if (yych <= 0x7F) - goto yy65; - if (yych <= 0x9F) - goto yy64; - goto yy65; - yy72: - yyaccept = 1; - marker = ++p; - yych = *p; - yy73: - if (yybm[0 + yych] & 128) { - goto yy72; - } - if (yych <= 0x08) - goto yy62; - if (yych <= '\n') - goto yy75; - if (yych >= 0x0E) - goto yy62; - yych = *++p; - if (yych != '\n') - goto yy65; - yy75: + goto yy54; + goto yy52; + yy59: ++p; - yych = *p; - goto yy62; + goto yy50; + yy60: + yych = *++p; + if (yych == '\n') + goto yy59; + goto yy52; } } @@ -809,133 +646,131 @@ bufsize_t _scan_table_row_end(const unsigned char *p) { if (yych <= 0xC1) { if (yych <= '\f') { if (yych <= 0x08) - goto yy83; + goto yy64; if (yych == '\n') - goto yy81; - goto yy79; + goto yy66; + goto yy65; } else { if (yych <= 0x1F) { if (yych <= '\r') - goto yy80; - goto yy83; + goto yy68; + goto yy64; } else { if (yych <= ' ') - goto yy79; + goto yy65; if (yych <= 0x7F) - goto yy83; + goto yy64; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy84; + goto yy69; if (yych <= 0xE0) - goto yy86; + goto yy71; if (yych <= 0xEC) - goto yy87; - goto yy91; + goto yy72; + goto yy73; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy87; - goto yy88; + goto yy72; + goto yy74; } else { if (yych <= 0xF3) - goto yy89; + goto yy75; if (yych <= 0xF4) - goto yy90; + goto yy76; } } } - yy78 : { return 0; } - yy79: + yy63 : { return 0; } + yy64: + ++p; + goto yy63; + yy65: yych = *(marker = ++p); if (yych <= 0x08) - goto yy78; + goto yy63; if (yych <= '\r') - goto yy94; + goto yy78; if (yych == ' ') - goto yy94; - goto yy78; - yy80: - yych = *++p; - if (yych != '\n') goto yy78; - yy81: + goto yy63; + yy66: ++p; { return (bufsize_t)(p - start); } - yy83: + yy68: yych = *++p; - goto yy78; - yy84: + if (yych == '\n') + goto yy66; + goto yy63; + yy69: yych = *++p; if (yych <= 0x7F) - goto yy85; + goto yy70; if (yych <= 0xBF) - goto yy83; - yy85: + goto yy64; + yy70: p = marker; - goto yy78; - yy86: + goto yy63; + yy71: yych = *++p; if (yych <= 0x9F) - goto yy85; + goto yy70; if (yych <= 0xBF) - goto yy84; - goto yy85; - yy87: + goto yy69; + goto yy70; + yy72: yych = *++p; if (yych <= 0x7F) - goto yy85; + goto yy70; if (yych <= 0xBF) - goto yy84; - goto yy85; - yy88: + goto yy69; + goto yy70; + yy73: + yych = *++p; + if (yych <= 0x7F) + goto yy70; + if (yych <= 0x9F) + goto yy69; + goto yy70; + yy74: yych = *++p; if (yych <= 0x8F) - goto yy85; + goto yy70; if (yych <= 0xBF) - goto yy87; - goto yy85; - yy89: + goto yy72; + goto yy70; + yy75: yych = *++p; if (yych <= 0x7F) - goto yy85; + goto yy70; if (yych <= 0xBF) - goto yy87; - goto yy85; - yy90: + goto yy72; + goto yy70; + yy76: yych = *++p; if (yych <= 0x7F) - goto yy85; + goto yy70; if (yych <= 0x8F) - goto yy87; - goto yy85; - yy91: - yych = *++p; - if (yych <= 0x7F) - goto yy85; - if (yych <= 0x9F) - goto yy84; - goto yy85; - yy92: + goto yy72; + goto yy70; + yy77: yych = *++p; - if (yych == '\n') - goto yy81; - goto yy85; - yy93: - ++p; - yych = *p; - yy94: + yy78: if (yybm[0 + yych] & 128) { - goto yy93; + goto yy77; } if (yych <= 0x08) - goto yy85; + goto yy70; if (yych <= '\n') - goto yy81; - if (yych <= '\r') - goto yy92; - goto yy85; + goto yy66; + if (yych >= 0x0E) + goto yy70; + yych = *++p; + if (yych == '\n') + goto yy66; + goto yy70; } } diff --git a/extensions/ext_scanners.re b/extensions/ext_scanners.re index b7a649f63..25bdc0807 100644 --- a/extensions/ext_scanners.re +++ b/extensions/ext_scanners.re @@ -5,7 +5,7 @@ bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned cha { bufsize_t res; - if (ptr == NULL || offset > len) { + if (ptr == NULL || offset >= len) { return 0; } else { unsigned char lim = ptr[len]; diff --git a/src/render.c b/src/render.c index feb207544..958c046f7 100644 --- a/src/render.c +++ b/src/render.c @@ -188,7 +188,7 @@ char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, } // ensure final newline - if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') { + if (renderer.buffer->size == 0 || renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') { cmark_strbuf_putc(renderer.buffer, '\n'); } diff --git a/test/cmark-fuzz.c b/test/cmark-fuzz.c index be571d842..e83d1ae1e 100644 --- a/test/cmark-fuzz.c +++ b/test/cmark-fuzz.c @@ -1,28 +1,76 @@ #include <stdint.h> #include <stdlib.h> +#include <string.h> #include "cmark-gfm.h" +#include "cmark-gfm-core-extensions.h" + +const char *extension_names[] = { + "autolink", + "strikethrough", + "table", + "tagfilter", + NULL, +}; + +int LLVMFuzzerInitialize(int *argc, char ***argv) { + cmark_gfm_core_extensions_ensure_registered(); + return 0; +} int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - int options = 0; - if (size > sizeof(options)) { - /* First 4 bytes of input are treated as options */ - int options = *(const int *)data; + struct __attribute__((packed)) { + int options; + int width; + } fuzz_config; + + if (size >= sizeof(fuzz_config)) { + /* The beginning of `data` is treated as fuzzer configuration */ + memcpy(&fuzz_config, data, sizeof(fuzz_config)); /* Mask off valid option bits */ - options = options & (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART); + fuzz_config.options &= ( + CMARK_OPT_SOURCEPOS | + CMARK_OPT_HARDBREAKS | + CMARK_OPT_NOBREAKS | + CMARK_OPT_NORMALIZE | + CMARK_OPT_VALIDATE_UTF8 | + CMARK_OPT_SMART | + /* GFM specific options */ + CMARK_OPT_GITHUB_PRE_LANG | + CMARK_OPT_LIBERAL_HTML_TAG | + CMARK_OPT_FOOTNOTES | + CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE | + CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES | + CMARK_OPT_FULL_INFO_STRING | + CMARK_OPT_UNSAFE + ); /* Remainder of input is the markdown */ - const char *markdown = (const char *)(data + sizeof(options)); - const size_t markdown_size = size - sizeof(options); - cmark_node *doc = cmark_parse_document(markdown, markdown_size, options); + const char *markdown = (const char *)(data + sizeof(fuzz_config)); + const size_t markdown_size = size - sizeof(fuzz_config); + cmark_parser *parser = cmark_parser_new(fuzz_config.options); + + for (const char **it = extension_names; *it; ++it) { + const char *extension_name = *it; + cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(extension_name); + if (!syntax_extension) { + fprintf(stderr, "%s is not a valid syntax extension\n", extension_name); + abort(); + } + cmark_parser_attach_syntax_extension(parser, syntax_extension); + } + + cmark_parser_feed(parser, markdown, markdown_size); + cmark_node *doc = cmark_parser_finish(parser); - free(cmark_render_commonmark(doc, options, 80)); - free(cmark_render_html(doc, options, NULL)); - free(cmark_render_latex(doc, options, 80)); - free(cmark_render_man(doc, options, 80)); - free(cmark_render_xml(doc, options)); + free(cmark_render_commonmark(doc, fuzz_config.options, fuzz_config.width)); + free(cmark_render_html(doc, fuzz_config.options, NULL)); + free(cmark_render_latex(doc, fuzz_config.options, fuzz_config.width)); + free(cmark_render_man(doc, fuzz_config.options, fuzz_config.width)); + free(cmark_render_xml(doc, fuzz_config.options)); cmark_node_free(doc); + cmark_parser_free(parser); } return 0; } diff --git a/test/fuzzing_dictionary b/test/fuzzing_dictionary index b06783c94..448c448f1 100644 --- a/test/fuzzing_dictionary +++ b/test/fuzzing_dictionary @@ -47,3 +47,21 @@ tag_open_q="<?" tag_sq2_close="]]>" tag_xml_q="<?xml?>" underscore="_" + +# GFM specific + +strikethrough="~~~strike~~~" +user_mention="@octocat" +email_mention="octocat@github.com" +http="http://" +https="https://" +ftp="ftp://" +title_tag="title" +textarea_tag="textarea" +style_tag="style" +xmp_tag="xmp" +iframe_tag="iframe" +noembed_tag="noembed" +noframes_tag="noframes" +script_tag="script" +plaintext_tag="plaintext" From 36e36cde4cbd8e22177eb62205aa931bf839f508 Mon Sep 17 00:00:00 2001 From: Jonas Wagner <Sjlver@users.noreply.github.com> Date: Tue, 23 Oct 2018 01:12:30 +0200 Subject: [PATCH 147/218] Fix a buffer overread in the CMark tables extension. (#128) * Fix a buffer overread in the CMark tables extension. The following Markdown string is an example that causes an overread: "|\n-|" This was discovered by the Google Autofuzz project. * Add regression test for table buffer overread --- extensions/table.c | 8 +++++--- test/regression.txt | 10 ++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index c2ddd2a24..9829c3d1a 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -114,7 +114,7 @@ static table_row *row_from_string(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *string, int len) { table_row *row = NULL; - bufsize_t cell_matched, pipe_matched, offset; + bufsize_t cell_matched = 1, pipe_matched = 1, offset; row = (table_row *)parser->mem->calloc(1, sizeof(table_row)); row->n_columns = 0; @@ -122,7 +122,9 @@ static table_row *row_from_string(cmark_syntax_extension *self, offset = scan_table_cell_end(string, len, 0); - do { + // Parse the cells of the row. Stop if we reach the end of the input, or if we + // cannot detect any more cells. + while (offset < len && (cell_matched || pipe_matched)) { cell_matched = scan_table_cell(string, len, offset); pipe_matched = scan_table_cell_end(string, len, offset + cell_matched); @@ -149,7 +151,7 @@ static table_row *row_from_string(cmark_syntax_extension *self, pipe_matched = scan_table_row_end(string, len, offset); offset += pipe_matched; } - } while ((cell_matched || pipe_matched) && offset < len); + } if (offset != len || !row->n_columns) { free_table_row(parser->mem, row); diff --git a/test/regression.txt b/test/regression.txt index 5aa1b3449..45819a4b9 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -241,3 +241,13 @@ Issue #530 - link parsing corner cases <p><a href="%3C%3Cb">a</a></p> <p><a href="%3Cb">a</a></p> ```````````````````````````````` + +Pull request #128 - Buffer overread in tables extension + +```````````````````````````````` example table +| +-| +. +<p>| +-|</p> +```````````````````````````````` From d53d533a3f2c5f30a87ff111ab82cdb0537a159e Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 12 Nov 2018 15:56:34 +1100 Subject: [PATCH 148/218] don't crash on test failure on macos --- api_test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api_test/CMakeLists.txt b/api_test/CMakeLists.txt index c5e660996..55f33e087 100644 --- a/api_test/CMakeLists.txt +++ b/api_test/CMakeLists.txt @@ -26,5 +26,5 @@ if(MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4706 /D_CRT_SECURE_NO_WARNINGS") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP") elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -std=c99 -pedantic -D_BSD_SOURCE -D_POSIX_SOURCE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -std=c99 -pedantic") endif() From 44eafe05efbca62b54ca7e9e844be7f4de51427d Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Mon, 19 Nov 2018 13:21:46 +1100 Subject: [PATCH 149/218] use pledge(2) (#132) --- src/main.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/main.c b/src/main.c index cb9610e1d..deb5c01ac 100644 --- a/src/main.c +++ b/src/main.c @@ -13,6 +13,10 @@ #include "../extensions/cmark-gfm-core-extensions.h" +#if defined(__OpenBSD__) +#include <unistd.h> +#endif + #if defined(_WIN32) && !defined(__CYGWIN__) #include <io.h> #include <fcntl.h> @@ -117,6 +121,13 @@ int main(int argc, char *argv[]) { int options = CMARK_OPT_DEFAULT; int res = 1; +#if defined(__OpenBSD__) + if (pledge("stdio rpath", NULL) != 0) { + perror("pledge"); + return 1; + } +#endif + cmark_gfm_core_extensions_ensure_registered(); #if defined(_WIN32) && !defined(__CYGWIN__) @@ -264,6 +275,13 @@ int main(int argc, char *argv[]) { } } +#if defined(__OpenBSD__) + if (pledge("stdio", NULL) != 0) { + perror("pledge"); + return 1; + } +#endif + document = cmark_parser_finish(parser); if (!document || !print_document(document, writer, options, width, parser)) From 42f5a52dc345fe2df4808caa36d10e1557e2a1ce Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Wed, 21 Nov 2018 13:34:07 +1100 Subject: [PATCH 150/218] check for OpenBSD 5.9+ --- src/main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main.c b/src/main.c index deb5c01ac..6ef6f9ea3 100644 --- a/src/main.c +++ b/src/main.c @@ -14,7 +14,11 @@ #include "../extensions/cmark-gfm-core-extensions.h" #if defined(__OpenBSD__) -#include <unistd.h> +# include <sys/param.h> +# if OpenBSD >= 201605 +# define USE_PLEDGE +# include <unistd.h> +# endif #endif #if defined(_WIN32) && !defined(__CYGWIN__) @@ -121,7 +125,7 @@ int main(int argc, char *argv[]) { int options = CMARK_OPT_DEFAULT; int res = 1; -#if defined(__OpenBSD__) +#ifdef USE_PLEDGE if (pledge("stdio rpath", NULL) != 0) { perror("pledge"); return 1; @@ -275,7 +279,7 @@ int main(int argc, char *argv[]) { } } -#if defined(__OpenBSD__) +#ifdef USE_PLEDGE if (pledge("stdio", NULL) != 0) { perror("pledge"); return 1; From 9bdf783a11550cd5109d42f0a9b36e16d586e7c2 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Tue, 27 Nov 2018 15:25:11 +1100 Subject: [PATCH 151/218] be more liberal in strikethru regression --- test/regression.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/regression.txt b/test/regression.txt index 45819a4b9..e1bde80da 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -165,9 +165,9 @@ cmark-gfm strikethrough rules ```````````````````````````````` ```````````````````````````````` example strikethrough -This ~text~~~~ is ~~~~curious~. +This ~text~ ~~is~~ ~~~curious~~~. . -<p>This <del>text~~~~ is ~~~~curious</del>.</p> +<p>This <del>text</del> <del>is</del> ~~~curious~~~.</p> ```````````````````````````````` `~` should not be escaped in href — https://github.com/github/markup/issues/311 From 02b19e0f4a7553731c9a7580323954dbcadce581 Mon Sep 17 00:00:00 2001 From: Ashe Connor <kivikakk@github.com> Date: Tue, 27 Nov 2018 16:09:06 +1100 Subject: [PATCH 152/218] fix misplaced parenthesis --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0b2975742..3a65c3255 100644 --- a/README.md +++ b/README.md @@ -168,8 +168,8 @@ By default, the library will scrub raw HTML and potentially dangerous links _opposite_ of the upstream [`cmark`](https://github.com/CommonMark/cmark) library, a change introduced in `cmark-gfm` in version `0.28.3.gfm.18`. -To allow these, use the option `CMARK_OPT_UNSAFE` (or `--unsafe`) with the -command line program. If doing so, we recommend you use a HTML sanitizer +To allow these, use the option `CMARK_OPT_UNSAFE` (or `--unsafe` with the +command line program). If doing so, we recommend you use a HTML sanitizer specific to your needs to protect against [XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting). From 6d60bfa2734b0ac4eee08e2e7acf68e3f1224c0e Mon Sep 17 00:00:00 2001 From: Watson <watson1978@gmail.com> Date: Mon, 3 Dec 2018 08:01:19 +0900 Subject: [PATCH 153/218] add tasklist extension (#94) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add tasklist extension * recognize “+” as list marker * use long type to avoid warning in MSVC environment * recognize ordered list > Two list items are of the same type if they begin with a list marker of the same type. Two list markers are of the same type if (a) they are bullet list markers using the same character (-, +, or *) or (b) they are ordered list numbers with the same delimiter (either . or )). ( https://github.github.com/gfm/#task-list-items-extension- ) * add preliminary tests * fix regular extension tests * fix tasklist nesting --- extensions/CMakeLists.txt | 1 + extensions/core-extensions.c | 2 + extensions/ext_scanners.c | 383 +++++++++++++++++++++++++++++++++++ extensions/ext_scanners.h | 2 + extensions/ext_scanners.re | 11 + extensions/tasklist.c | 127 ++++++++++++ extensions/tasklist.h | 8 + test/CMakeLists.txt | 6 +- test/extensions.txt | 29 +++ 9 files changed, 566 insertions(+), 3 deletions(-) create mode 100644 extensions/tasklist.c create mode 100644 extensions/tasklist.h diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 87d70acc4..4881153eb 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -10,6 +10,7 @@ set(LIBRARY_SOURCES ext_scanners.c ext_scanners.re ext_scanners.h + tasklist.c ) include_directories( diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index e436a5d15..846e2bc2b 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -3,6 +3,7 @@ #include "strikethrough.h" #include "table.h" #include "tagfilter.h" +#include "tasklist.h" #include "registry.h" #include "plugin.h" @@ -12,6 +13,7 @@ static int core_extensions_registration(cmark_plugin *plugin) { create_strikethrough_extension()); cmark_plugin_register_syntax_extension(plugin, create_autolink_extension()); cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension()); + cmark_plugin_register_syntax_extension(plugin, create_tasklist_extension()); return 1; } diff --git a/extensions/ext_scanners.c b/extensions/ext_scanners.c index 2590cea6b..c3de227ae 100644 --- a/extensions/ext_scanners.c +++ b/extensions/ext_scanners.c @@ -774,3 +774,386 @@ bufsize_t _scan_table_row_end(const unsigned char *p) { goto yy70; } } +bufsize_t _scan_tasklist(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 64, 64, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *(marker = p); + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= '\t') { + if (yych <= 0x08) + goto yy83; + goto yy84; + } else { + if (yych <= '\n') + goto yy82; + if (yych <= '\f') + goto yy84; + goto yy83; + } + } else { + if (yych <= '+') { + if (yych <= ' ') + goto yy84; + if (yych <= ')') + goto yy83; + goto yy85; + } else { + if (yych == '-') + goto yy85; + goto yy83; + } + } + } else { + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '9') + goto yy86; + if (yych <= 0x7F) + goto yy83; + } else { + if (yych <= 0xDF) + goto yy87; + if (yych <= 0xE0) + goto yy89; + goto yy90; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy91; + if (yych <= 0xEF) + goto yy90; + goto yy92; + } else { + if (yych <= 0xF3) + goto yy93; + if (yych <= 0xF4) + goto yy94; + } + } + } + yy82 : { return 0; } + yy83: + ++p; + goto yy82; + yy84: + yych = *(marker = ++p); + if (yybm[0 + yych] & 64) { + goto yy95; + } + if (yych <= ',') { + if (yych <= ')') + goto yy82; + if (yych <= '+') + goto yy97; + goto yy82; + } else { + if (yych <= '-') + goto yy97; + if (yych <= '/') + goto yy82; + if (yych <= '9') + goto yy98; + goto yy82; + } + yy85: + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych == '\t') + goto yy99; + goto yy82; + } else { + if (yych <= '\f') + goto yy99; + if (yych == ' ') + goto yy99; + goto yy82; + } + yy86: + yych = *(marker = ++p); + if (yych <= 0x1F) { + if (yych <= '\t') { + if (yych <= 0x08) + goto yy102; + goto yy97; + } else { + if (yych <= '\n') + goto yy82; + if (yych <= '\f') + goto yy97; + goto yy102; + } + } else { + if (yych <= 0x7F) { + if (yych <= ' ') + goto yy97; + goto yy102; + } else { + if (yych <= 0xC1) + goto yy82; + if (yych <= 0xF4) + goto yy102; + goto yy82; + } + } + yy87: + yych = *++p; + if (yych <= 0x7F) + goto yy88; + if (yych <= 0xBF) + goto yy83; + yy88: + p = marker; + goto yy82; + yy89: + yych = *++p; + if (yych <= 0x9F) + goto yy88; + if (yych <= 0xBF) + goto yy87; + goto yy88; + yy90: + yych = *++p; + if (yych <= 0x7F) + goto yy88; + if (yych <= 0xBF) + goto yy87; + goto yy88; + yy91: + yych = *++p; + if (yych <= 0x7F) + goto yy88; + if (yych <= 0x9F) + goto yy87; + goto yy88; + yy92: + yych = *++p; + if (yych <= 0x8F) + goto yy88; + if (yych <= 0xBF) + goto yy90; + goto yy88; + yy93: + yych = *++p; + if (yych <= 0x7F) + goto yy88; + if (yych <= 0xBF) + goto yy90; + goto yy88; + yy94: + yych = *++p; + if (yych <= 0x7F) + goto yy88; + if (yych <= 0x8F) + goto yy90; + goto yy88; + yy95: + yych = *++p; + if (yybm[0 + yych] & 64) { + goto yy95; + } + if (yych <= ',') { + if (yych <= ')') + goto yy88; + if (yych >= ',') + goto yy88; + } else { + if (yych <= '-') + goto yy97; + if (yych <= '/') + goto yy88; + if (yych <= '9') + goto yy98; + goto yy88; + } + yy97: + yych = *++p; + if (yych == '[') + goto yy88; + goto yy100; + yy98: + yych = *++p; + if (yych <= '\n') { + if (yych == '\t') + goto yy97; + goto yy102; + } else { + if (yych <= '\f') + goto yy97; + if (yych == ' ') + goto yy97; + goto yy102; + } + yy99: + yych = *++p; + yy100: + if (yych <= '\f') { + if (yych == '\t') + goto yy99; + if (yych <= '\n') + goto yy88; + goto yy99; + } else { + if (yych <= ' ') { + if (yych <= 0x1F) + goto yy88; + goto yy99; + } else { + if (yych == '[') + goto yy110; + goto yy88; + } + } + yy101: + yych = *++p; + yy102: + if (yybm[0 + yych] & 128) { + goto yy101; + } + if (yych <= 0xC1) { + if (yych <= '\f') { + if (yych <= 0x08) + goto yy97; + if (yych == '\n') + goto yy88; + goto yy99; + } else { + if (yych == ' ') + goto yy99; + if (yych <= 0x7F) + goto yy97; + goto yy88; + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy103; + if (yych <= 0xE0) + goto yy104; + if (yych <= 0xEC) + goto yy105; + goto yy106; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy105; + goto yy107; + } else { + if (yych <= 0xF3) + goto yy108; + if (yych <= 0xF4) + goto yy109; + goto yy88; + } + } + } + yy103: + yych = *++p; + if (yych <= 0x7F) + goto yy88; + if (yych <= 0xBF) + goto yy97; + goto yy88; + yy104: + yych = *++p; + if (yych <= 0x9F) + goto yy88; + if (yych <= 0xBF) + goto yy103; + goto yy88; + yy105: + yych = *++p; + if (yych <= 0x7F) + goto yy88; + if (yych <= 0xBF) + goto yy103; + goto yy88; + yy106: + yych = *++p; + if (yych <= 0x7F) + goto yy88; + if (yych <= 0x9F) + goto yy103; + goto yy88; + yy107: + yych = *++p; + if (yych <= 0x8F) + goto yy88; + if (yych <= 0xBF) + goto yy105; + goto yy88; + yy108: + yych = *++p; + if (yych <= 0x7F) + goto yy88; + if (yych <= 0xBF) + goto yy105; + goto yy88; + yy109: + yych = *++p; + if (yych <= 0x7F) + goto yy88; + if (yych <= 0x8F) + goto yy105; + goto yy88; + yy110: + yych = *++p; + if (yych <= 'W') { + if (yych != ' ') + goto yy88; + } else { + if (yych <= 'X') + goto yy111; + if (yych != 'x') + goto yy88; + } + yy111: + yych = *++p; + if (yych != ']') + goto yy88; + yych = *++p; + if (yych <= '\n') { + if (yych != '\t') + goto yy88; + } else { + if (yych <= '\f') + goto yy113; + if (yych != ' ') + goto yy88; + } + yy113: + yych = *++p; + if (yych <= '\n') { + if (yych == '\t') + goto yy113; + } else { + if (yych <= '\f') + goto yy113; + if (yych == ' ') + goto yy113; + } + { return (bufsize_t)(p - start); } + } +} diff --git a/extensions/ext_scanners.h b/extensions/ext_scanners.h index 3bfe586c1..6dd4a725d 100644 --- a/extensions/ext_scanners.h +++ b/extensions/ext_scanners.h @@ -11,11 +11,13 @@ bufsize_t _scan_table_start(const unsigned char *p); bufsize_t _scan_table_cell(const unsigned char *p); bufsize_t _scan_table_cell_end(const unsigned char *p); bufsize_t _scan_table_row_end(const unsigned char *p); +bufsize_t _scan_tasklist(const unsigned char *p); #define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n) #define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n) #define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n) #define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n) +#define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n) #ifdef __cplusplus } diff --git a/extensions/ext_scanners.re b/extensions/ext_scanners.re index 25bdc0807..94a4c6732 100644 --- a/extensions/ext_scanners.re +++ b/extensions/ext_scanners.re @@ -31,6 +31,8 @@ bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned cha table_marker = (spacechar*[:]?[-]+[:]?spacechar*); table_cell = (escaped_char|[^|\r\n])*; + + tasklist = spacechar*("-"|"+"|"*"|[0-9]+.)spacechar+("[ ]"|"[x]")spacechar+; */ bufsize_t _scan_table_start(const unsigned char *p) @@ -72,3 +74,12 @@ bufsize_t _scan_table_row_end(const unsigned char *p) .? { return 0; } */ } +bufsize_t _scan_tasklist(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + tasklist { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} diff --git a/extensions/tasklist.c b/extensions/tasklist.c new file mode 100644 index 000000000..7351a4481 --- /dev/null +++ b/extensions/tasklist.c @@ -0,0 +1,127 @@ +#include "tasklist.h" +#include <parser.h> +#include <render.h> +#include <html.h> +#include "ext_scanners.h" + +typedef enum { + CMARK_TASKLIST_NOCHECKED, + CMARK_TASKLIST_CHECKED, +} cmark_tasklist_type; + +static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) { + return "tasklist"; +} + +static bool parse_node_item_prefix(cmark_parser *parser, const char *input, + cmark_node *container) { + bool res = false; + + if (parser->indent >= + container->as.list.marker_offset + container->as.list.padding) { + cmark_parser_advance_offset(parser, input, container->as.list.marker_offset + + container->as.list.padding, + true); + res = true; + } else if (parser->blank && container->first_child != NULL) { + // if container->first_child is NULL, then the opening line + // of the list item was blank after the list marker; in this + // case, we are done with the list item. + cmark_parser_advance_offset(parser, input, parser->first_nonspace - parser->offset, + false); + res = true; + } + return res; +} + +static int matches(cmark_syntax_extension *self, cmark_parser *parser, + unsigned char *input, int len, + cmark_node *parent_container) { + return parse_node_item_prefix(parser, (const char*)input, parent_container); +} + +static int can_contain(cmark_syntax_extension *extension, cmark_node *node, + cmark_node_type child_type) { + return (node->type == CMARK_NODE_ITEM) ? 1 : 0; +} + +static cmark_node *open_tasklist_item(cmark_syntax_extension *self, + int indented, cmark_parser *parser, + cmark_node *parent_container, + unsigned char *input, int len) { + cmark_node_type node_type = cmark_node_get_type(parent_container); + if (node_type != CMARK_NODE_ITEM) { + return NULL; + } + + bufsize_t matched = scan_tasklist(input, len, 0); + if (!matched) { + return NULL; + } + + cmark_node_set_syntax_extension(parent_container, self); + cmark_parser_advance_offset(parser, (char *)input, 3, false); + + long userdata; + if (strstr((char*)input, "[x]")) { + userdata = CMARK_TASKLIST_CHECKED; + } else { + userdata = CMARK_TASKLIST_NOCHECKED; + } + cmark_node_set_user_data(parent_container, (void*)userdata); + + return NULL; +} + +static void commonmark_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + renderer->cr(renderer); + long userdata = (long)cmark_node_get_user_data(node); + if (userdata == CMARK_TASKLIST_CHECKED) { + renderer->out(renderer, node, " - [x] ", false, LITERAL); + } else { + renderer->out(renderer, node, " - [ ] ", false, LITERAL); + } + cmark_strbuf_puts(renderer->prefix, " "); + } else { + cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4); + renderer->cr(renderer); + } +} + +static void html_render(cmark_syntax_extension *extension, + cmark_html_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + cmark_html_render_cr(renderer->html); + cmark_strbuf_puts(renderer->html, "<li class=\"task-list-item\""); + cmark_html_render_sourcepos(node, renderer->html, options); + cmark_strbuf_putc(renderer->html, '>'); + long userdata = (long)cmark_node_get_user_data(node); + if (userdata == CMARK_TASKLIST_CHECKED) { + cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" checked=\"\" disabled=\"\" /> "); + } else { + cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" disabled=\"\" /> "); + } + } else { + cmark_strbuf_puts(renderer->html, "</li>\n"); + } +} + +cmark_syntax_extension *create_tasklist_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("tasklist"); + + cmark_syntax_extension_set_match_block_func(ext, matches); + cmark_syntax_extension_set_get_type_string_func(ext, get_type_string); + cmark_syntax_extension_set_open_block_func(ext, open_tasklist_item); + cmark_syntax_extension_set_can_contain_func(ext, can_contain); + cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render); + cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render); + cmark_syntax_extension_set_html_render_func(ext, html_render); + + return ext; +} diff --git a/extensions/tasklist.h b/extensions/tasklist.h new file mode 100644 index 000000000..26e9d96d2 --- /dev/null +++ b/extensions/tasklist.h @@ -0,0 +1,8 @@ +#ifndef TASKLIST_H +#define TASKLIST_H + +#include "cmark-gfm-core-extensions.h" + +cmark_syntax_extension *create_tasklist_extension(void); + +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 148f29ce1..2b5c99bc5 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -70,7 +70,7 @@ IF (PYTHONINTERP_FOUND) "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" - "--extensions" "table strikethrough autolink tagfilter footnotes" + "--extensions" "table strikethrough autolink tagfilter footnotes tasklist" ) add_test(roundtrip_extensions_executable @@ -78,7 +78,7 @@ IF (PYTHONINTERP_FOUND) "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm" - "--extensions" "table strikethrough autolink tagfilter footnotes" + "--extensions" "table strikethrough autolink tagfilter footnotes tasklist" ) add_test(option_table_prefer_style_attributes @@ -86,7 +86,7 @@ IF (PYTHONINTERP_FOUND) "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions-table-prefer-style-attributes.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --table-prefer-style-attributes" - "--extensions" "table strikethrough autolink tagfilter footnotes" + "--extensions" "table strikethrough autolink tagfilter footnotes tasklist" ) add_test(option_full_info_string diff --git a/test/extensions.txt b/test/extensions.txt index 3894ec575..b2d06165d 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -711,3 +711,32 @@ Autolink and tables. </tbody> </table> ```````````````````````````````` + +## Task lists + +```````````````````````````````` example +- [ ] foo +- [x] bar +. +<ul> +<li class="task-list-item"><input type="checkbox" disabled="" /> foo</li> +<li class="task-list-item"><input type="checkbox" checked="" disabled="" /> bar</li> +</ul> +```````````````````````````````` + +```````````````````````````````` example +- [x] foo + - [ ] bar + - [x] baz +- [ ] bim +. +<ul> +<li class="task-list-item"><input type="checkbox" checked="" disabled="" /> foo +<ul> +<li class="task-list-item"><input type="checkbox" disabled="" /> bar</li> +<li class="task-list-item"><input type="checkbox" checked="" disabled="" /> baz</li> +</ul> +</li> +<li class="task-list-item"><input type="checkbox" disabled="" /> bim</li> +</ul> +```````````````````````````````` From b153c5a125e81f117c8f3b4cf9f71ccad1ac5b65 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 3 Dec 2018 10:03:05 +1100 Subject: [PATCH 154/218] add changelog entry --- changelog.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/changelog.txt b/changelog.txt index 13f620c57..7d9c1d33d 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,7 @@ +[pre] + + * Add tasklist extension implementation (Watson, #94). + [0.28.3.gfm.19] * Prevent out-of-bound memory access in strikethrough matcher (Xavier Décoret, #124). From 301eefc4863f19e45f08ed841083f7b0d217f440 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 3 Dec 2018 10:22:27 +1100 Subject: [PATCH 155/218] fix attribution --- changelog.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.txt b/changelog.txt index 7d9c1d33d..d55c3cd35 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,6 +1,6 @@ [pre] - * Add tasklist extension implementation (Watson, #94). + * Add tasklist extension implementation (Watson1978, #94). [0.28.3.gfm.19] From 3785191c3afb6cabd56776f6d6c73c90f3131d5d Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Fri, 1 Feb 2019 09:50:00 +1100 Subject: [PATCH 156/218] 0.28.3.gfm.20 --- CMakeLists.txt | 2 +- changelog.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c47e26941..8ce839326 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ set(PROJECT_NAME "cmark-gfm") set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 28) set(PROJECT_VERSION_PATCH 3) -set(PROJECT_VERSION_GFM 19) +set(PROJECT_VERSION_GFM 20) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} ) option(CMARK_TESTS "Build cmark-gfm tests and enable testing" ON) diff --git a/changelog.txt b/changelog.txt index d55c3cd35..13732dd88 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,4 +1,4 @@ -[pre] +[0.28.3.gfm.20] * Add tasklist extension implementation (Watson1978, #94). From 90048f92149657d572f63013ea099a0d7fef8f71 Mon Sep 17 00:00:00 2001 From: Phil Turnbull <philipturnbull@github.com> Date: Mon, 4 Feb 2019 21:48:42 -0500 Subject: [PATCH 157/218] Remove options mask from fuzzing harness (#129) The option mask needs to be kept up to date when new options are added, which is error-prone. Just remove the masking logic as `cmark_parser_new` does not validate the options bitmask anyway. --- test/cmark-fuzz.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/test/cmark-fuzz.c b/test/cmark-fuzz.c index e83d1ae1e..0a6dc3da9 100644 --- a/test/cmark-fuzz.c +++ b/test/cmark-fuzz.c @@ -27,24 +27,6 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { /* The beginning of `data` is treated as fuzzer configuration */ memcpy(&fuzz_config, data, sizeof(fuzz_config)); - /* Mask off valid option bits */ - fuzz_config.options &= ( - CMARK_OPT_SOURCEPOS | - CMARK_OPT_HARDBREAKS | - CMARK_OPT_NOBREAKS | - CMARK_OPT_NORMALIZE | - CMARK_OPT_VALIDATE_UTF8 | - CMARK_OPT_SMART | - /* GFM specific options */ - CMARK_OPT_GITHUB_PRE_LANG | - CMARK_OPT_LIBERAL_HTML_TAG | - CMARK_OPT_FOOTNOTES | - CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE | - CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES | - CMARK_OPT_FULL_INFO_STRING | - CMARK_OPT_UNSAFE - ); - /* Remainder of input is the markdown */ const char *markdown = (const char *)(data + sizeof(fuzz_config)); const size_t markdown_size = size - sizeof(fuzz_config); From 4b9523d302aad37e201ead6bbcc1c48a7fcd3016 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Tue, 12 Feb 2019 11:19:44 +1100 Subject: [PATCH 158/218] remove the class here We shouldn't be prescriptive with CSS class names. --- extensions/tasklist.c | 2 +- test/extensions.txt | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/extensions/tasklist.c b/extensions/tasklist.c index 7351a4481..1253474c8 100644 --- a/extensions/tasklist.c +++ b/extensions/tasklist.c @@ -98,7 +98,7 @@ static void html_render(cmark_syntax_extension *extension, bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { cmark_html_render_cr(renderer->html); - cmark_strbuf_puts(renderer->html, "<li class=\"task-list-item\""); + cmark_strbuf_puts(renderer->html, "<li"); cmark_html_render_sourcepos(node, renderer->html, options); cmark_strbuf_putc(renderer->html, '>'); long userdata = (long)cmark_node_get_user_data(node); diff --git a/test/extensions.txt b/test/extensions.txt index b2d06165d..4aa1c67c0 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -719,8 +719,8 @@ Autolink and tables. - [x] bar . <ul> -<li class="task-list-item"><input type="checkbox" disabled="" /> foo</li> -<li class="task-list-item"><input type="checkbox" checked="" disabled="" /> bar</li> +<li><input type="checkbox" disabled="" /> foo</li> +<li><input type="checkbox" checked="" disabled="" /> bar</li> </ul> ```````````````````````````````` @@ -731,12 +731,12 @@ Autolink and tables. - [ ] bim . <ul> -<li class="task-list-item"><input type="checkbox" checked="" disabled="" /> foo +<li><input type="checkbox" checked="" disabled="" /> foo <ul> -<li class="task-list-item"><input type="checkbox" disabled="" /> bar</li> -<li class="task-list-item"><input type="checkbox" checked="" disabled="" /> baz</li> +<li><input type="checkbox" disabled="" /> bar</li> +<li><input type="checkbox" checked="" disabled="" /> baz</li> </ul> </li> -<li class="task-list-item"><input type="checkbox" disabled="" /> bim</li> +<li><input type="checkbox" disabled="" /> bim</li> </ul> ```````````````````````````````` From 9f7ca0252da7c1bdc81b5b14c8dbb922d8c5f1da Mon Sep 17 00:00:00 2001 From: Garen Torikian <gjtorikian@users.noreply.github.com> Date: Sun, 3 Mar 2019 20:09:39 -0500 Subject: [PATCH 159/218] Adjustments to how the tasklist generation occurs (#136) * Adjustments to how the tasklist generation occurs * hit it until it passes spec * clean up compiler warnings --- extensions/cmark-gfm-core-extensions.h | 3 +++ extensions/tasklist.c | 32 ++++++++++++++++---------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/extensions/cmark-gfm-core-extensions.h b/extensions/cmark-gfm-core-extensions.h index dce86484d..8ab049dba 100644 --- a/extensions/cmark-gfm-core-extensions.h +++ b/extensions/cmark-gfm-core-extensions.h @@ -21,6 +21,9 @@ uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node); CMARK_GFM_EXTENSIONS_EXPORT int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node); +CMARK_GFM_EXTENSIONS_EXPORT +char *cmark_gfm_extensions_get_tasklist_state(cmark_node *node); + #ifdef __cplusplus } #endif diff --git a/extensions/tasklist.c b/extensions/tasklist.c index 1253474c8..3c273fcba 100644 --- a/extensions/tasklist.c +++ b/extensions/tasklist.c @@ -13,6 +13,18 @@ static const char *get_type_string(cmark_syntax_extension *extension, cmark_node return "tasklist"; } +char *cmark_gfm_extensions_get_tasklist_state(cmark_node *node) { + if (!node || ((int)node->as.opaque != CMARK_TASKLIST_CHECKED && (int)node->as.opaque != CMARK_TASKLIST_NOCHECKED)) + return 0; + + if ((int)node->as.opaque != CMARK_TASKLIST_CHECKED) { + return "checked"; + } + else { + return "unchecked"; + } +} + static bool parse_node_item_prefix(cmark_parser *parser, const char *input, cmark_node *container) { bool res = false; @@ -62,13 +74,11 @@ static cmark_node *open_tasklist_item(cmark_syntax_extension *self, cmark_node_set_syntax_extension(parent_container, self); cmark_parser_advance_offset(parser, (char *)input, 3, false); - long userdata; if (strstr((char*)input, "[x]")) { - userdata = CMARK_TASKLIST_CHECKED; + parent_container->as.opaque = (void *)CMARK_TASKLIST_CHECKED; } else { - userdata = CMARK_TASKLIST_NOCHECKED; + parent_container->as.opaque = (void *)CMARK_TASKLIST_NOCHECKED; } - cmark_node_set_user_data(parent_container, (void*)userdata); return NULL; } @@ -79,15 +89,14 @@ static void commonmark_render(cmark_syntax_extension *extension, bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { renderer->cr(renderer); - long userdata = (long)cmark_node_get_user_data(node); - if (userdata == CMARK_TASKLIST_CHECKED) { - renderer->out(renderer, node, " - [x] ", false, LITERAL); + if ((int)node->as.opaque == CMARK_TASKLIST_CHECKED) { + renderer->out(renderer, node, "- [x] ", false, LITERAL); } else { - renderer->out(renderer, node, " - [ ] ", false, LITERAL); + renderer->out(renderer, node, "- [ ] ", false, LITERAL); } - cmark_strbuf_puts(renderer->prefix, " "); + cmark_strbuf_puts(renderer->prefix, " "); } else { - cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4); + cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2); renderer->cr(renderer); } } @@ -101,8 +110,7 @@ static void html_render(cmark_syntax_extension *extension, cmark_strbuf_puts(renderer->html, "<li"); cmark_html_render_sourcepos(node, renderer->html, options); cmark_strbuf_putc(renderer->html, '>'); - long userdata = (long)cmark_node_get_user_data(node); - if (userdata == CMARK_TASKLIST_CHECKED) { + if ((int)node->as.opaque == CMARK_TASKLIST_CHECKED) { cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" checked=\"\" disabled=\"\" /> "); } else { cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" disabled=\"\" /> "); From 2a9996f6718f2f6e63b4506b4d62e9027dce5e5c Mon Sep 17 00:00:00 2001 From: Keith Packard <keithp@keithp.com> Date: Tue, 12 Mar 2019 16:50:27 -0700 Subject: [PATCH 160/218] Define _DEFAULT_SOURCE to get various posix/gnu glibc functions declared (#137) fdopen, strdup and others are not declared by glibc header files unless _DEFAULT_SOURCE is defined. Signed-off-by: Keith Packard <keithp@keithp.com> --- api_test/harness.c | 1 + 1 file changed, 1 insertion(+) diff --git a/api_test/harness.c b/api_test/harness.c index 702dc9098..6b7336f97 100644 --- a/api_test/harness.c +++ b/api_test/harness.c @@ -1,3 +1,4 @@ +#define _DEFAULT_SOURCE #include <stdarg.h> #include <stdio.h> #include <stdlib.h> From 365dabb681b2e784eaf59cced3d28088b465b257 Mon Sep 17 00:00:00 2001 From: Keith Packard <keithp@keithp.com> Date: Tue, 12 Mar 2019 16:53:08 -0700 Subject: [PATCH 161/218] Add automatic configuration of compiler to get large file support (#138) Use CheckFileOffsetBits.cmake to get correct defines for LFS. This is necessary on 32-bit machines to avoid having trouble with file whose size requires more than 32-bits to represent. Signed-off-by: Keith Packard <keithp@keithp.com> --- CMakeLists.txt | 1 + CheckFileOffsetBits.c | 14 +++++++++++++ CheckFileOffsetBits.cmake | 43 +++++++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 3 +++ 4 files changed, 61 insertions(+) create mode 100644 CheckFileOffsetBits.c create mode 100644 CheckFileOffsetBits.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ce839326..af96c8ed0 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,7 @@ endif() project(cmark-gfm) include("FindAsan.cmake") +include("CheckFileOffsetBits.cmake") if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make") diff --git a/CheckFileOffsetBits.c b/CheckFileOffsetBits.c new file mode 100644 index 000000000..d948fecf2 --- /dev/null +++ b/CheckFileOffsetBits.c @@ -0,0 +1,14 @@ +#include <sys/types.h> + +#define KB ((off_t)1024) +#define MB ((off_t)1024 * KB) +#define GB ((off_t)1024 * MB) +#define TB ((off_t)1024 * GB) +int t2[(((64 * GB -1) % 671088649) == 268434537) + && (((TB - (64 * GB -1) + 255) % 1792151290) == 305159546)? 1: -1]; + +int main() +{ + ; + return 0; +} diff --git a/CheckFileOffsetBits.cmake b/CheckFileOffsetBits.cmake new file mode 100644 index 000000000..8a74b9e11 --- /dev/null +++ b/CheckFileOffsetBits.cmake @@ -0,0 +1,43 @@ +# - Check if _FILE_OFFSET_BITS macro needed for large files +# CHECK_FILE_OFFSET_BITS () +# +# The following variables may be set before calling this macro to +# modify the way the check is run: +# +# CMAKE_REQUIRED_FLAGS = string of compile command line flags +# CMAKE_REQUIRED_DEFINITIONS = list of macros to define (-DFOO=bar) +# CMAKE_REQUIRED_INCLUDES = list of include directories +# Copyright (c) 2009, Michihiro NAKAJIMA +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +#INCLUDE(CheckCSourceCompiles) + +GET_FILENAME_COMPONENT(_selfdir_CheckFileOffsetBits + "${CMAKE_CURRENT_LIST_FILE}" PATH) + +MACRO (CHECK_FILE_OFFSET_BITS) + IF(NOT DEFINED _FILE_OFFSET_BITS) + MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files") + TRY_COMPILE(__WITHOUT_FILE_OFFSET_BITS_64 + ${CMAKE_CURRENT_BINARY_DIR} + ${_selfdir_CheckFileOffsetBits}/CheckFileOffsetBits.c + COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS}) + IF(NOT __WITHOUT_FILE_OFFSET_BITS_64) + TRY_COMPILE(__WITH_FILE_OFFSET_BITS_64 + ${CMAKE_CURRENT_BINARY_DIR} + ${_selfdir_CheckFileOffsetBits}/CheckFileOffsetBits.c + COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS} -D_FILE_OFFSET_BITS=64) + ENDIF(NOT __WITHOUT_FILE_OFFSET_BITS_64) + + IF(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64) + SET(_FILE_OFFSET_BITS 64 CACHE INTERNAL "_FILE_OFFSET_BITS macro needed for large files") + MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files - needed") + ELSE(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64) + SET(_FILE_OFFSET_BITS "" CACHE INTERNAL "_FILE_OFFSET_BITS macro needed for large files") + MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files - not needed") + ENDIF(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64) + ENDIF(NOT DEFINED _FILE_OFFSET_BITS) + +ENDMACRO (CHECK_FILE_OFFSET_BITS) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f5812af2d..fd835b57e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -74,6 +74,9 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmark-gfm_version.h.in include (GenerateExportHeader) +include("../CheckFileOffsetBits.cmake") +CHECK_FILE_OFFSET_BITS() + add_executable(${PROGRAM} ${PROGRAM_SOURCES}) add_compiler_export_flags() From 87c0139ee1213528f4f24e08e6844d261b8d0f1b Mon Sep 17 00:00:00 2001 From: Tim <NightFlyer@users.noreply.github.com> Date: Sun, 28 Apr 2019 21:00:37 -0700 Subject: [PATCH 162/218] Fix bug with determining if task is complete & adjust to spec. (#142) * Had incorrect check for whether opaque data meant that the task was complete * The spec says that either an upper or lower case X means the task is complete. --- extensions/tasklist.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/extensions/tasklist.c b/extensions/tasklist.c index 3c273fcba..509aca7ab 100644 --- a/extensions/tasklist.c +++ b/extensions/tasklist.c @@ -17,7 +17,7 @@ char *cmark_gfm_extensions_get_tasklist_state(cmark_node *node) { if (!node || ((int)node->as.opaque != CMARK_TASKLIST_CHECKED && (int)node->as.opaque != CMARK_TASKLIST_NOCHECKED)) return 0; - if ((int)node->as.opaque != CMARK_TASKLIST_CHECKED) { + if ((int)node->as.opaque == CMARK_TASKLIST_CHECKED) { return "checked"; } else { @@ -74,7 +74,8 @@ static cmark_node *open_tasklist_item(cmark_syntax_extension *self, cmark_node_set_syntax_extension(parent_container, self); cmark_parser_advance_offset(parser, (char *)input, 3, false); - if (strstr((char*)input, "[x]")) { + // Either an upper or lower case X means the task is completed. + if (strstr((char*)input, "[x]") || strstr((char*)input, "[X]")) { parent_container->as.opaque = (void *)CMARK_TASKLIST_CHECKED; } else { parent_container->as.opaque = (void *)CMARK_TASKLIST_NOCHECKED; From 4a7985e26cab780c6c001c5d1ec891125e592573 Mon Sep 17 00:00:00 2001 From: Tim <NightFlyer@users.noreply.github.com> Date: Tue, 30 Apr 2019 17:41:08 -0700 Subject: [PATCH 163/218] Add XML attribute to tasklist (#145) I'm not positive this is the best attribute to use, but at least it doesn't lose the information as to whether the task was completed. --- extensions/tasklist.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/extensions/tasklist.c b/extensions/tasklist.c index 509aca7ab..9caaadb4c 100644 --- a/extensions/tasklist.c +++ b/extensions/tasklist.c @@ -121,6 +121,15 @@ static void html_render(cmark_syntax_extension *extension, } } +static const char *xml_attr(cmark_syntax_extension *extension, + cmark_node *node) { + if ((int)node->as.opaque == CMARK_TASKLIST_CHECKED) { + return " completed=\"true\""; + } else { + return " completed=\"false\""; + } +} + cmark_syntax_extension *create_tasklist_extension(void) { cmark_syntax_extension *ext = cmark_syntax_extension_new("tasklist"); @@ -131,6 +140,7 @@ cmark_syntax_extension *create_tasklist_extension(void) { cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render); cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render); cmark_syntax_extension_set_html_render_func(ext, html_render); + cmark_syntax_extension_set_xml_attr_func(ext, xml_attr); return ext; } From 17169ab61509d04dcec7aa8ae3d40a2a0419ec53 Mon Sep 17 00:00:00 2001 From: Waldir Pimenta <waldyrious@gmail.com> Date: Thu, 2 May 2019 00:45:23 +0100 Subject: [PATCH 164/218] Specify parenthesis matching in autolink extension (#148) Also add two more examples to illustrate the behavior --- test/spec.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index 380fe5880..e737c6ac0 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -9242,16 +9242,22 @@ Visit www.commonmark.org/a.b. When an autolink ends in `)`, we scan the entire autolink for the total number of parentheses. If there is a greater number of closing parentheses than -opening ones, we don't consider the last character part of the autolink, in -order to facilitate including an autolink inside a parenthesis: +opening ones, we don't consider the unmatched trailing parentheses part of the +autolink, in order to facilitate including an autolink inside a parenthesis: ```````````````````````````````` example autolink www.google.com/search?q=Markup+(business) +www.google.com/search?q=Markup+(business))) + (www.google.com/search?q=Markup+(business)) + +(www.google.com/search?q=Markup+(business) . <p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p> +<p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>))</p> <p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>)</p> +<p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p> ```````````````````````````````` This check is only done when the link ends in a closing parentheses `)`, so if From 78d8268435bc3e440281bbe9bbc612b64ecbf648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johel=20Ernesto=20Guerrero=20Pe=C3=B1a?= <johelegp@gmail.com> Date: Fri, 3 May 2019 01:54:54 -0400 Subject: [PATCH 165/218] Fix valid domain ambiguity (#151) --- test/spec.txt | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index e737c6ac0..b02d26b9e 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -9203,11 +9203,13 @@ circumstances. All such recognized autolinks can only come at the beginning of a line, after whitespace, or any of the delimiting characters `*`, `_`, `~`, and `(`. -An [extended www autolink](@) will be recognized when the text `www.` is found -followed by a [valid domain]. A [valid domain](@) consists of alphanumeric -characters, underscores (`_`), hyphens (`-`) and periods (`.`). There must be -at least one period, and no underscores may be present in the last two segments -of the domain. +An [extended www autolink](@) will be recognized +when the text `www.` is found followed by a [valid domain]. +A [valid domain](@) consists of segments +of alphanumeric characters, underscores (`_`) and hyphens (`-`) +separated by periods (`.`). +There must be at least one period, +and no underscores may be present in the last two segments of the domain. The scheme `http` will be inserted automatically: From 10730e1f51f841c37e9af0e0ef2c196808a1eb1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johel=20Ernesto=20Guerrero=20Pe=C3=B1a?= <johelegp@gmail.com> Date: Fri, 3 May 2019 02:00:11 -0400 Subject: [PATCH 166/218] Fix extended email autolink ambiguity (#152) --- test/spec.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index b02d26b9e..7c09560ec 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -9318,10 +9318,10 @@ the following rules: * One ore more characters which are alphanumeric, or `.`, `-`, `_`, or `+`. * An `@` symbol. -* One or more characters which are alphanumeric, or `.`, `-`, or `_`. At least - one of the characters here must be a period (`.`). The last character must - not be one of `-` or `_`. If the last character is a period (`.`), it will - be excluded from the autolink. +* One or more characters which are alphanumeric, or `-` or `_`, + separated by periods (`.`). + There must be at least one period. + The last character must not be one of `-` or `_`. The scheme `mailto:` will automatically be added to the generated link: From ef13dc52e19058950bfa5f76d30eda6d355e0c8d Mon Sep 17 00:00:00 2001 From: jim <hi.jinhu.zhang@gmail.com> Date: Wed, 8 May 2019 09:21:12 +0800 Subject: [PATCH 167/218] Fix table cannot be recoginsed without empty line (#154) * fix(table): recoginse-without-empty-line (#141) * fix(table): fix bufsize_t not convert to uint16_t * fix(table): fix uint16_6 not convert to int * fix(table): fix uint16_6 not convert to int * fix(table): clear unused type conversion * restore whitespace * Always free `paragraph_content` `cmark_node_set_string_content` allocates and copies the data in `paragraph_content` so it is not needed afterwards. ``` ================================================================= ==14==ERROR: LeakSanitizer: detected memory leaks Direct leak of 24 byte(s) in 1 object(s) allocated from: 0 0x4dd330 in calloc /src/llvm/projects/compiler-rt/lib/asan/asan_malloc_linux.cc:97 1 0x59e243 in xcalloc /src/octofuzz/src/cmark.c:18:15 2 0x58fd75 in unescape_pipes /src/octofuzz/extensions/table.c:95:39 3 0x58fd75 in try_inserting_table_header_paragraph /src/octofuzz/extensions/table.c:187 4 0x58fd75 in try_opening_table_header /src/octofuzz/extensions/table.c:254 5 0x58fd75 in try_opening_table_block /src/octofuzz/extensions/table.c:370 6 0x5b22d5 in open_new_blocks /src/octofuzz/src/blocks.c:1275:27 7 0x5b22d5 in S_process_line /src/octofuzz/src/blocks.c:1465 8 0x5aa7f0 in cmark_parser_finish /src/octofuzz/src/blocks.c:1492:5 9 0x58f2fc in LLVMFuzzerTestOneInput /src/octofuzz/test/cmark-fuzz.c:46:23 Indirect leak of 8 byte(s) in 1 object(s) allocated from: 0 0x4dd580 in realloc /src/llvm/projects/compiler-rt/lib/asan/asan_malloc_linux.cc:107 1 0x59e2d3 in xrealloc /src/octofuzz/src/cmark.c:27:19 2 0x640364 in cmark_strbuf_grow /src/octofuzz/src/buffer.c:57:31 3 0x640364 in cmark_strbuf_init /src/octofuzz/src/buffer.c:31 4 0x58fd8b in unescape_pipes /src/octofuzz/extensions/table.c:98:3 5 0x58fd8b in try_inserting_table_header_paragraph /src/octofuzz/extensions/table.c:187 6 0x58fd8b in try_opening_table_header /src/octofuzz/extensions/table.c:254 7 0x58fd8b in try_opening_table_block /src/octofuzz/extensions/table.c:370 8 0x5b22d5 in open_new_blocks /src/octofuzz/src/blocks.c:1275:27 9 0x5b22d5 in S_process_line /src/octofuzz/src/blocks.c:1465 10 0x5aa7f0 in cmark_parser_finish /src/octofuzz/src/blocks.c:1492:5 11 0x58f2fc in LLVMFuzzerTestOneInput /src/octofuzz/test/cmark-fuzz.c:46:23 SUMMARY: AddressSanitizer: 32 byte(s) leaked in 2 allocation(s). ``` --- .gitignore | 1 + extensions/table.c | 65 ++++++++++++++++++++++++++++++++++++--------- test/extensions.txt | 26 ++++++++++++++++++ 3 files changed, 79 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index c2ee13f57..d503137bb 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ bstrlib.txt build cmark.dSYM/* cmark +.vscode # Testing and benchmark alltests.md diff --git a/extensions/table.c b/extensions/table.c index 9829c3d1a..0ea31cbc3 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -16,6 +16,7 @@ cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, typedef struct { uint16_t n_columns; + int paragraph_offset; cmark_llist *cells; } table_row; @@ -115,6 +116,7 @@ static table_row *row_from_string(cmark_syntax_extension *self, int len) { table_row *row = NULL; bufsize_t cell_matched = 1, pipe_matched = 1, offset; + int cell_end_offset; row = (table_row *)parser->mem->calloc(1, sizeof(table_row)); row->n_columns = 0; @@ -129,20 +131,32 @@ static table_row *row_from_string(cmark_syntax_extension *self, pipe_matched = scan_table_cell_end(string, len, offset + cell_matched); if (cell_matched || pipe_matched) { - cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, - cell_matched); - cmark_strbuf_trim(cell_buf); - - node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell)); - cell->buf = cell_buf; - cell->start_offset = offset; - cell->end_offset = offset + cell_matched - 1; - while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') { - --cell->start_offset; - ++cell->internal_offset; + cell_end_offset = offset + cell_matched - 1; + + if (string[cell_end_offset] == '\n' || string[cell_end_offset] == '\r') { + row->paragraph_offset = cell_end_offset; + + cmark_llist_free_full(parser->mem, row->cells, (cmark_free_func)free_table_cell); + row->cells = NULL; + row->n_columns = 0; + } else { + cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, + cell_matched); + cmark_strbuf_trim(cell_buf); + + node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell)); + cell->buf = cell_buf; + cell->start_offset = offset; + cell->end_offset = cell_end_offset; + + while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') { + --cell->start_offset; + ++cell->internal_offset; + } + + row->n_columns += 1; + row->cells = cmark_llist_append(parser->mem, row->cells, cell); } - row->n_columns += 1; - row->cells = cmark_llist_append(parser->mem, row->cells, cell); } offset += cell_matched + pipe_matched; @@ -161,6 +175,26 @@ static table_row *row_from_string(cmark_syntax_extension *self, return row; } +static void try_inserting_table_header_paragraph(cmark_parser *parser, + cmark_node *parent_container, + unsigned char *parent_string, + int paragraph_offset) { + cmark_node *paragraph; + cmark_strbuf *paragraph_content; + + paragraph = cmark_node_new_with_mem(CMARK_NODE_PARAGRAPH, parser->mem); + + paragraph_content = unescape_pipes(parser->mem, parent_string, paragraph_offset); + cmark_strbuf_trim(paragraph_content); + cmark_node_set_string_content(paragraph, (char *) paragraph_content->ptr); + cmark_strbuf_free(paragraph_content); + parser->mem->free(paragraph_content); + + if (!cmark_node_insert_before(parent_container, paragraph)) { + parser->mem->free(paragraph); + } +} + static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_parser *parser, cmark_node *parent_container, @@ -217,6 +251,11 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, return parent_container; } + if (header_row->paragraph_offset) { + try_inserting_table_header_paragraph(parser, parent_container, (unsigned char *)parent_string, + header_row->paragraph_offset); + } + cmark_node_set_syntax_extension(parent_container, self); parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table)); diff --git a/test/extensions.txt b/test/extensions.txt index 4aa1c67c0..66fec2121 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -472,6 +472,32 @@ Here's a link to [Freedom Planet 2][]. </table> ```````````````````````````````` +### a table can be recognised when separated from a paragraph of text without an empty line + +```````````````````````````````` example +123 +456 +| a | b | +| ---| --- | +d | e +. +<p>123 +456</p> +<table> +<thead> +<tr> +<th>a</th> +<th>b</th> +</tr> +</thead> +<tbody> +<tr> +<td>d</td> +<td>e</td> +</tr> +</tbody> +</table> +```````````````````````````````` ## Strikethroughs From 4ee490174d0b188aa601601f4592d4b21ab4f49f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johel=20Ernesto=20Guerrero=20Pe=C3=B1a?= <johelegp@gmail.com> Date: Tue, 7 May 2019 22:09:53 -0400 Subject: [PATCH 168/218] Fix hard line break example (#155) * Add another space so that is seems like a hard line break * Fix example rendering --- test/spec.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index 7c09560ec..582131d70 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -9756,10 +9756,10 @@ bar</em></p> Line breaks do not occur inside code spans ```````````````````````````````` example -`code +`code span` . -<p><code>code span</code></p> +<p><code>code span</code></p> ```````````````````````````````` From 6a90249cae54d5a149aad15f53b98cb20f607ed7 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Mon, 13 May 2019 10:17:18 +1000 Subject: [PATCH 169/218] correct _STATIC_DEFINE flag names Fixes #156. --- extensions/CMakeLists.txt | 2 +- src/CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 4881153eb..0c007c706 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -52,7 +52,7 @@ if (CMARK_STATIC) add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES}) set_target_properties(${STATICLIBRARY} PROPERTIES - COMPILE_FLAGS "-DCMARK_STATIC_DEFINE -DCMARKEXTENSIONS_STATIC_DEFINE" + COMPILE_FLAGS "-DCMARK_GFM_STATIC_DEFINE -DCMARK_GFM_EXTENSIONS_STATIC_DEFINE" POSITION_INDEPENDENT_CODE ON) if (MSVC) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4b14f3680..a01c76078 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -87,7 +87,7 @@ endif() # Disable the PUBLIC declarations when compiling the executable: set_target_properties(${PROGRAM} PROPERTIES - COMPILE_FLAGS "-DCMARK_STATIC_DEFINE -DCMARKEXTENSIONS_STATIC_DEFINE") + COMPILE_FLAGS "-DCMARK_GFM_STATIC_DEFINE -DCMARK_GFM_EXTENSIONS_STATIC_DEFINE") # Check integrity of node structure when compiled as debug: set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES -DDEBUG") @@ -123,7 +123,7 @@ endif() if (CMARK_STATIC) add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES}) set_target_properties(${STATICLIBRARY} PROPERTIES - COMPILE_FLAGS -DCMARK_STATIC_DEFINE + COMPILE_FLAGS -DCMARK_GFM_STATIC_DEFINE POSITION_INDEPENDENT_CODE ON) if (MSVC) From 4b6ceb68bc544684fbdf9ce220483162a2dd2ea1 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Wed, 15 May 2019 15:15:54 +1000 Subject: [PATCH 170/218] import spec changes --- test/spec.txt | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index 582131d70..6244c8d73 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -5995,12 +5995,12 @@ Here are some nonentities: ```````````````````````````````` example &nbsp &x; &#; &#x; -&#987654321; +&#87654321; &#abcdef0; &ThisIsNotDefined; &hi?; . <p>&amp;nbsp &amp;x; &amp;#; &amp;#x; -&amp;#987654321; +&amp;#87654321; &amp;#abcdef0; &amp;ThisIsNotDefined; &amp;hi?;</p> ```````````````````````````````` @@ -8232,9 +8232,8 @@ perform the *Unicode case fold*, strip leading and trailing matching reference link definitions, the one that comes first in the document is used. (It is desirable in such cases to emit a warning.) -The contents of the first link label are parsed as inlines, which are -used as the link's text. The link's URI and title are provided by the -matching [link reference definition]. +The link's URI and title are provided by the matching [link +reference definition]. Here is a simple example: @@ -8327,11 +8326,11 @@ emphasis grouping: ```````````````````````````````` example -[foo *bar][ref] +[foo *bar][ref]* [ref]: /uri . -<p><a href="/uri">foo *bar</a></p> +<p><a href="/uri">foo *bar</a>*</p> ```````````````````````````````` @@ -8379,11 +8378,11 @@ Matching is case-insensitive: Unicode case fold is used: ```````````````````````````````` example -[Толпой][Толпой] is a Russian word. +[ẞ] -[ТОЛПОЙ]: /url +[SS]: /url . -<p><a href="/url">Толпой</a> is a Russian word.</p> +<p><a href="/url">ẞ</a></p> ```````````````````````````````` From 438f3b2881a9d8562a1d02e9e4c7a7a3214f674c Mon Sep 17 00:00:00 2001 From: Tim <NightFlyer@users.noreply.github.com> Date: Thu, 30 May 2019 18:25:15 -0700 Subject: [PATCH 171/218] Change cmark_gfm_extensions_get_tasklist_state to cmark_gfm_extensions_tasklist_state_is_checked (#161) This is in response to issue #160. This removes cmark_gfm_extensions_get_tasklist_state and adds the new cmark_gfm_extensions_tasklist_state_is_checked. --- extensions/cmark-gfm-core-extensions.h | 3 ++- extensions/tasklist.c | 15 +++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/extensions/cmark-gfm-core-extensions.h b/extensions/cmark-gfm-core-extensions.h index 8ab049dba..126f2c7fe 100644 --- a/extensions/cmark-gfm-core-extensions.h +++ b/extensions/cmark-gfm-core-extensions.h @@ -7,6 +7,7 @@ extern "C" { #include "cmark-gfm-extension_api.h" #include "cmark-gfm-extensions_export.h" +#include "config.h" #include <stdint.h> CMARK_GFM_EXTENSIONS_EXPORT @@ -22,7 +23,7 @@ CMARK_GFM_EXTENSIONS_EXPORT int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node); CMARK_GFM_EXTENSIONS_EXPORT -char *cmark_gfm_extensions_get_tasklist_state(cmark_node *node); +bool cmark_gfm_extensions_tasklist_state_is_checked(cmark_node *node); #ifdef __cplusplus } diff --git a/extensions/tasklist.c b/extensions/tasklist.c index 9caaadb4c..4af7637a7 100644 --- a/extensions/tasklist.c +++ b/extensions/tasklist.c @@ -9,19 +9,22 @@ typedef enum { CMARK_TASKLIST_CHECKED, } cmark_tasklist_type; +// Local constants +static const char *TYPE_STRING = "tasklist"; + static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) { - return "tasklist"; + return TYPE_STRING; } -char *cmark_gfm_extensions_get_tasklist_state(cmark_node *node) { - if (!node || ((int)node->as.opaque != CMARK_TASKLIST_CHECKED && (int)node->as.opaque != CMARK_TASKLIST_NOCHECKED)) - return 0; +bool cmark_gfm_extensions_tasklist_state_is_checked(cmark_node *node) { + if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING)) + return false; if ((int)node->as.opaque == CMARK_TASKLIST_CHECKED) { - return "checked"; + return true; } else { - return "unchecked"; + return false; } } From f5c77c6fcab516c424ca96057c0f89e1cca65819 Mon Sep 17 00:00:00 2001 From: Tim <NightFlyer@users.noreply.github.com> Date: Tue, 18 Jun 2019 20:43:43 -0700 Subject: [PATCH 172/218] Make "set" methods public, add "set" method for tasklist (#162) Also rename "get" method for tasklist to better match others. --- extensions/cmark-gfm-core-extensions.h | 26 ++++++++++++++++++++++++-- extensions/tasklist.c | 19 ++++++++++++++++++- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/extensions/cmark-gfm-core-extensions.h b/extensions/cmark-gfm-core-extensions.h index 126f2c7fe..0645915f9 100644 --- a/extensions/cmark-gfm-core-extensions.h +++ b/extensions/cmark-gfm-core-extensions.h @@ -7,7 +7,7 @@ extern "C" { #include "cmark-gfm-extension_api.h" #include "cmark-gfm-extensions_export.h" -#include "config.h" +#include "config.h" // for bool #include <stdint.h> CMARK_GFM_EXTENSIONS_EXPORT @@ -16,14 +16,36 @@ void cmark_gfm_core_extensions_ensure_registered(void); CMARK_GFM_EXTENSIONS_EXPORT uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node); +/** Sets the number of columns for the table, returning 1 on success and 0 on error. + */ +CMARK_GFM_EXTENSIONS_EXPORT +int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns); + CMARK_GFM_EXTENSIONS_EXPORT uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node); +/** Sets the alignments for the table, returning 1 on success and 0 on error. + */ +CMARK_GFM_EXTENSIONS_EXPORT +int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments); + CMARK_GFM_EXTENSIONS_EXPORT int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node); +/** Sets whether the node is a table header row, returning 1 on success and 0 on error. + */ +CMARK_GFM_EXTENSIONS_EXPORT +int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header); + +CMARK_GFM_EXTENSIONS_EXPORT +bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node); +/* For backwards compatibility */ +#define cmark_gfm_extensions_tasklist_is_checked cmark_gfm_extensions_get_tasklist_item_checked + +/** Sets whether a tasklist item is "checked" (completed), returning 1 on success and 0 on error. + */ CMARK_GFM_EXTENSIONS_EXPORT -bool cmark_gfm_extensions_tasklist_state_is_checked(cmark_node *node); +int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked); #ifdef __cplusplus } diff --git a/extensions/tasklist.c b/extensions/tasklist.c index 4af7637a7..65f7481fe 100644 --- a/extensions/tasklist.c +++ b/extensions/tasklist.c @@ -16,7 +16,24 @@ static const char *get_type_string(cmark_syntax_extension *extension, cmark_node return TYPE_STRING; } -bool cmark_gfm_extensions_tasklist_state_is_checked(cmark_node *node) { + +// Return 1 if state was set, 0 otherwise +int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked) { + // The node has to exist, and be an extension, and actually be the right type in order to get the value. + if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING)) + return 0; + + if (is_checked) { + node->as.opaque = (void *)CMARK_TASKLIST_CHECKED; + return 1; + } + else { + node->as.opaque = (void *)CMARK_TASKLIST_NOCHECKED; + return 1; + } +} + +bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) { if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING)) return false; From b47d804bd7d763c2f3ea3175d6ccce5ca2b3adef Mon Sep 17 00:00:00 2001 From: rysavyjan <46950152+rysavyjan@users.noreply.github.com> Date: Fri, 21 Jun 2019 08:22:45 +0200 Subject: [PATCH 173/218] Fixes Visual C++ 2019 compiler warnings for x64 targets (#166) warning C4311: 'type cast': pointer truncation from 'void *' to 'int' Fixes https://github.com/github/cmark-gfm/issues/165 --- extensions/tasklist.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/extensions/tasklist.c b/extensions/tasklist.c index 65f7481fe..df11c413d 100644 --- a/extensions/tasklist.c +++ b/extensions/tasklist.c @@ -37,7 +37,7 @@ bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) { if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING)) return false; - if ((int)node->as.opaque == CMARK_TASKLIST_CHECKED) { + if (node->as.opaque == (void *)CMARK_TASKLIST_CHECKED) { return true; } else { @@ -110,7 +110,7 @@ static void commonmark_render(cmark_syntax_extension *extension, bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { renderer->cr(renderer); - if ((int)node->as.opaque == CMARK_TASKLIST_CHECKED) { + if (node->as.opaque == (void *)CMARK_TASKLIST_CHECKED) { renderer->out(renderer, node, "- [x] ", false, LITERAL); } else { renderer->out(renderer, node, "- [ ] ", false, LITERAL); @@ -131,7 +131,7 @@ static void html_render(cmark_syntax_extension *extension, cmark_strbuf_puts(renderer->html, "<li"); cmark_html_render_sourcepos(node, renderer->html, options); cmark_strbuf_putc(renderer->html, '>'); - if ((int)node->as.opaque == CMARK_TASKLIST_CHECKED) { + if (node->as.opaque == (void *)CMARK_TASKLIST_CHECKED) { cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" checked=\"\" disabled=\"\" /> "); } else { cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" disabled=\"\" /> "); @@ -143,7 +143,7 @@ static void html_render(cmark_syntax_extension *extension, static const char *xml_attr(cmark_syntax_extension *extension, cmark_node *node) { - if ((int)node->as.opaque == CMARK_TASKLIST_CHECKED) { + if (node->as.opaque == (void *)CMARK_TASKLIST_CHECKED) { return " completed=\"true\""; } else { return " completed=\"false\""; From 0996db0d7d2bb812a952134c46a8bc7e5bbc584f Mon Sep 17 00:00:00 2001 From: Tim <NightFlyer@users.noreply.github.com> Date: Mon, 24 Jun 2019 17:49:26 -0700 Subject: [PATCH 174/218] Fix bug where tasklist extension was using union in two ways. (#169) The tasklist extension was using the "as" union both as a list (in `tasklist.c:parse_node_item_prefix`) and as an opaque (in `tasklist.c:open_tasklist_item`). This meant that strange bugs could occur because the underlying union memory was being overwritten. It manifested when using nested task lists indented by 4 spaces. To fix this, I added the "checked" field to the `cmark_list` structure. This allows the tasklist extension to use the `as.list` union member in all its operations. This is appropriate because a tasklist item is a list item in all essentials -- and even shares the CMARK_NODE_ITEM type. --- extensions/tasklist.c | 24 ++++++------------ src/node.h | 1 + test/extensions.txt | 58 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 17 deletions(-) diff --git a/extensions/tasklist.c b/extensions/tasklist.c index df11c413d..7bef45499 100644 --- a/extensions/tasklist.c +++ b/extensions/tasklist.c @@ -23,21 +23,15 @@ int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_che if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING)) return 0; - if (is_checked) { - node->as.opaque = (void *)CMARK_TASKLIST_CHECKED; - return 1; - } - else { - node->as.opaque = (void *)CMARK_TASKLIST_NOCHECKED; - return 1; - } + node->as.list.checked = is_checked; + return 1; } bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) { if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING)) return false; - if (node->as.opaque == (void *)CMARK_TASKLIST_CHECKED) { + if (node->as.list.checked) { return true; } else { @@ -95,11 +89,7 @@ static cmark_node *open_tasklist_item(cmark_syntax_extension *self, cmark_parser_advance_offset(parser, (char *)input, 3, false); // Either an upper or lower case X means the task is completed. - if (strstr((char*)input, "[x]") || strstr((char*)input, "[X]")) { - parent_container->as.opaque = (void *)CMARK_TASKLIST_CHECKED; - } else { - parent_container->as.opaque = (void *)CMARK_TASKLIST_NOCHECKED; - } + parent_container->as.list.checked = (strstr((char*)input, "[x]") || strstr((char*)input, "[X]")); return NULL; } @@ -110,7 +100,7 @@ static void commonmark_render(cmark_syntax_extension *extension, bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { renderer->cr(renderer); - if (node->as.opaque == (void *)CMARK_TASKLIST_CHECKED) { + if (node->as.list.checked) { renderer->out(renderer, node, "- [x] ", false, LITERAL); } else { renderer->out(renderer, node, "- [ ] ", false, LITERAL); @@ -131,7 +121,7 @@ static void html_render(cmark_syntax_extension *extension, cmark_strbuf_puts(renderer->html, "<li"); cmark_html_render_sourcepos(node, renderer->html, options); cmark_strbuf_putc(renderer->html, '>'); - if (node->as.opaque == (void *)CMARK_TASKLIST_CHECKED) { + if (node->as.list.checked) { cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" checked=\"\" disabled=\"\" /> "); } else { cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" disabled=\"\" /> "); @@ -143,7 +133,7 @@ static void html_render(cmark_syntax_extension *extension, static const char *xml_attr(cmark_syntax_extension *extension, cmark_node *node) { - if (node->as.opaque == (void *)CMARK_TASKLIST_CHECKED) { + if (node->as.list.checked) { return " completed=\"true\""; } else { return " completed=\"false\""; diff --git a/src/node.h b/src/node.h index 6b05df17a..6391db9f9 100644 --- a/src/node.h +++ b/src/node.h @@ -21,6 +21,7 @@ typedef struct { cmark_delim_type delimiter; unsigned char bullet_char; bool tight; + bool checked; // For task list extension } cmark_list; typedef struct { diff --git a/test/extensions.txt b/test/extensions.txt index 66fec2121..0d9993782 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -750,11 +750,59 @@ Autolink and tables. </ul> ```````````````````````````````` +Show that a task list and a regular list get processed the same in +the way that sublists are created. If something works in a list +item, then it should work the same way with a task. The only +difference should be the tasklist marker. So, if we use something +other than a space or x, it won't be recognized as a task item, and +so will be treated as a regular item. + ```````````````````````````````` example - [x] foo - [ ] bar - [x] baz - [ ] bim + +Show a regular (non task) list to show that it has the same structure +- [@] foo + - [@] bar + - [@] baz +- [@] bim +. +<ul> +<li><input type="checkbox" checked="" disabled="" /> foo +<ul> +<li><input type="checkbox" disabled="" /> bar</li> +<li><input type="checkbox" checked="" disabled="" /> baz</li> +</ul> +</li> +<li><input type="checkbox" disabled="" /> bim</li> +</ul> +<p>Show a regular (non task) list to show that it has the same structure</p> +<ul> +<li>[@] foo +<ul> +<li>[@] bar</li> +<li>[@] baz</li> +</ul> +</li> +<li>[@] bim</li> +</ul> +```````````````````````````````` +Use a larger indent -- a task list and a regular list should produce +the same structure. + +```````````````````````````````` example +- [x] foo + - [ ] bar + - [x] baz +- [ ] bim + +Show a regular (non task) list to show that it has the same structure +- [@] foo + - [@] bar + - [@] baz +- [@] bim . <ul> <li><input type="checkbox" checked="" disabled="" /> foo @@ -765,4 +813,14 @@ Autolink and tables. </li> <li><input type="checkbox" disabled="" /> bim</li> </ul> +<p>Show a regular (non task) list to show that it has the same structure</p> +<ul> +<li>[@] foo +<ul> +<li>[@] bar</li> +<li>[@] baz</li> +</ul> +</li> +<li>[@] bim</li> +</ul> ```````````````````````````````` From 9a5bbf7ac55e7fe25a94ddf65d93650f1927bd53 Mon Sep 17 00:00:00 2001 From: Ashe Connor <ashe@kivikakk.ee> Date: Tue, 25 Jun 2019 10:50:21 +1000 Subject: [PATCH 175/218] Revert "import spec changes" Making CI useful again for now. This reverts commit 4b6ceb68bc544684fbdf9ce220483162a2dd2ea1. --- test/spec.txt | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index 6244c8d73..582131d70 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -5995,12 +5995,12 @@ Here are some nonentities: ```````````````````````````````` example &nbsp &x; &#; &#x; -&#87654321; +&#987654321; &#abcdef0; &ThisIsNotDefined; &hi?; . <p>&amp;nbsp &amp;x; &amp;#; &amp;#x; -&amp;#87654321; +&amp;#987654321; &amp;#abcdef0; &amp;ThisIsNotDefined; &amp;hi?;</p> ```````````````````````````````` @@ -8232,8 +8232,9 @@ perform the *Unicode case fold*, strip leading and trailing matching reference link definitions, the one that comes first in the document is used. (It is desirable in such cases to emit a warning.) -The link's URI and title are provided by the matching [link -reference definition]. +The contents of the first link label are parsed as inlines, which are +used as the link's text. The link's URI and title are provided by the +matching [link reference definition]. Here is a simple example: @@ -8326,11 +8327,11 @@ emphasis grouping: ```````````````````````````````` example -[foo *bar][ref]* +[foo *bar][ref] [ref]: /uri . -<p><a href="/uri">foo *bar</a>*</p> +<p><a href="/uri">foo *bar</a></p> ```````````````````````````````` @@ -8378,11 +8379,11 @@ Matching is case-insensitive: Unicode case fold is used: ```````````````````````````````` example -[ẞ] +[Толпой][Толпой] is a Russian word. -[SS]: /url +[ТОЛПОЙ]: /url . -<p><a href="/url">ẞ</a></p> +<p><a href="/url">Толпой</a> is a Russian word.</p> ```````````````````````````````` From aed182ed089f1c4d42b75657064ae76904e9e024 Mon Sep 17 00:00:00 2001 From: apnadkarni <apnmbx-wits@yahoo.com> Date: Wed, 17 Jul 2019 06:31:48 +0530 Subject: [PATCH 176/218] Add link to Tcl bindings. (#171) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c5a12c0cc..a36d6f9b3 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ There are also libraries that wrap `libcmark` for [Perl](https://metacpan.org/release/CommonMark), [Python](https://pypi.python.org/pypi/paka.cmark), [R](https://cran.r-project.org/package=commonmark), +[Tcl](https://github.com/apnadkarni/tcl-cmark), [Scala](https://github.com/sparsetech/cmark-scala) and [Node.js](https://github.com/killa123/node-cmark). From 36c1553d2a1f04dc1628e76b18490edeff78b8d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Gos=C5=82awski?= <dawid@dawidgoslawski.pl> Date: Thu, 5 Sep 2019 03:21:42 +0200 Subject: [PATCH 177/218] Correct path to artifact (#173) --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index fa3fc8621..f88e5cc40 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -14,8 +14,8 @@ build_script: - 'tools\appveyor-build.bat' artifacts: - - path: build/src/cmark.exe - name: cmark.exe + - path: build/src/cmark-gfm.exe + name: cmark-gfm.exe test_script: - 'nmake test' From 8a624e548a35c967846da252614e10738a1d0539 Mon Sep 17 00:00:00 2001 From: Arthur Schreiber <arthurschreiber@github.com> Date: Tue, 3 Mar 2020 10:56:13 +0100 Subject: [PATCH 178/218] Rebuild `ext_scanners.c` with latest `re2c`. --- extensions/ext_scanners.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/ext_scanners.c b/extensions/ext_scanners.c index c3de227ae..a9ed7b2a1 100644 --- a/extensions/ext_scanners.c +++ b/extensions/ext_scanners.c @@ -1,4 +1,4 @@ -/* Generated by re2c 1.1.1 */ +/* Generated by re2c 1.3 */ #include "ext_scanners.h" #include <stdlib.h> From db111875a0eac8d4df50066071f77d68dbf3e393 Mon Sep 17 00:00:00 2001 From: Jonas Wagner <jbw@google.com> Date: Tue, 3 Mar 2020 11:03:29 +0100 Subject: [PATCH 179/218] [PATCH] Fix `O(n*n)` corner-case runtime in GFM's table extension. Detected at Google by #autofuzz: https://google.github.io/oss-fuzz/ --- extensions/ext_scanners.c | 999 +++++++++++++------------------------ extensions/ext_scanners.re | 54 +- extensions/table.c | 69 +-- 3 files changed, 429 insertions(+), 693 deletions(-) diff --git a/extensions/ext_scanners.c b/extensions/ext_scanners.c index a9ed7b2a1..95e13649b 100644 --- a/extensions/ext_scanners.c +++ b/extensions/ext_scanners.c @@ -1,4 +1,5 @@ /* Generated by re2c 1.3 */ + #include "ext_scanners.h" #include <stdlib.h> @@ -39,276 +40,191 @@ bufsize_t _scan_table_start(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - yych = *(marker = p); - if (yych <= '{') { - if (yych <= 0x1F) { - if (yych <= '\t') { - if (yych <= 0x08) - goto yy3; + yych = *p; + if (yych <= ' ') { + if (yych <= '\n') { + if (yych == '\t') goto yy4; - } else { - if (yych <= '\n') - goto yy2; - if (yych <= '\f') - goto yy4; - goto yy3; - } } else { - if (yych <= '-') { - if (yych <= ' ') - goto yy4; - if (yych <= ',') - goto yy3; - goto yy5; - } else { - if (yych == ':') - goto yy6; - goto yy3; - } + if (yych <= '\f') + goto yy4; + if (yych >= ' ') + goto yy4; } } else { - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '|') - goto yy4; - if (yych <= 0x7F) - goto yy3; - } else { - if (yych <= 0xDF) - goto yy7; - if (yych <= 0xE0) - goto yy9; - goto yy10; - } + if (yych <= '9') { + if (yych == '-') + goto yy5; } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy11; - if (yych <= 0xEF) - goto yy10; - goto yy12; - } else { - if (yych <= 0xF3) - goto yy13; - if (yych <= 0xF4) - goto yy14; - } + if (yych <= ':') + goto yy6; + if (yych == '|') + goto yy4; } } - yy2 : { return 0; } - yy3: ++p; - goto yy2; + yy3 : { return 0; } yy4: yych = *(marker = ++p); if (yybm[0 + yych] & 64) { - goto yy15; + goto yy7; } if (yych == '-') - goto yy17; + goto yy10; if (yych == ':') - goto yy19; - goto yy2; + goto yy12; + goto yy3; yy5: yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy17; + goto yy10; } if (yych <= ' ') { if (yych <= 0x08) - goto yy2; + goto yy3; if (yych <= '\r') - goto yy21; + goto yy14; if (yych <= 0x1F) - goto yy2; - goto yy21; + goto yy3; + goto yy14; } else { if (yych <= ':') { if (yych <= '9') - goto yy2; - goto yy20; + goto yy3; + goto yy13; } else { if (yych == '|') - goto yy21; - goto yy2; + goto yy14; + goto yy3; } } yy6: yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy17; + goto yy10; } - goto yy2; + goto yy3; yy7: - yych = *++p; - if (yych <= 0x7F) - goto yy8; - if (yych <= 0xBF) - goto yy3; - yy8: - p = marker; - goto yy2; - yy9: - yych = *++p; - if (yych <= 0x9F) - goto yy8; - if (yych <= 0xBF) - goto yy7; - goto yy8; - yy10: - yych = *++p; - if (yych <= 0x7F) - goto yy8; - if (yych <= 0xBF) - goto yy7; - goto yy8; - yy11: - yych = *++p; - if (yych <= 0x7F) - goto yy8; - if (yych <= 0x9F) - goto yy7; - goto yy8; - yy12: - yych = *++p; - if (yych <= 0x8F) - goto yy8; - if (yych <= 0xBF) - goto yy10; - goto yy8; - yy13: - yych = *++p; - if (yych <= 0x7F) - goto yy8; - if (yych <= 0xBF) - goto yy10; - goto yy8; - yy14: - yych = *++p; - if (yych <= 0x7F) - goto yy8; - if (yych <= 0x8F) - goto yy10; - goto yy8; - yy15: yych = *++p; if (yybm[0 + yych] & 64) { - goto yy15; + goto yy7; } if (yych == '-') - goto yy17; + goto yy10; if (yych == ':') - goto yy19; - goto yy8; - yy17: + goto yy12; + yy9: + p = marker; + goto yy3; + yy10: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy17; + goto yy10; } if (yych <= 0x1F) { if (yych <= '\n') { if (yych <= 0x08) - goto yy8; + goto yy9; if (yych <= '\t') - goto yy20; - goto yy22; + goto yy13; + goto yy15; } else { if (yych <= '\f') - goto yy20; + goto yy13; if (yych <= '\r') - goto yy24; - goto yy8; + goto yy17; + goto yy9; } } else { if (yych <= ':') { if (yych <= ' ') - goto yy20; + goto yy13; if (yych <= '9') - goto yy8; - goto yy20; + goto yy9; + goto yy13; } else { if (yych == '|') - goto yy25; - goto yy8; + goto yy18; + goto yy9; } } - yy19: + yy12: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy17; + goto yy10; } - goto yy8; - yy20: + goto yy9; + yy13: yych = *++p; - yy21: + yy14: if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) - goto yy8; - goto yy20; + goto yy9; + goto yy13; } else { if (yych <= '\n') - goto yy22; + goto yy15; if (yych <= '\f') - goto yy20; - goto yy24; + goto yy13; + goto yy17; } } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy8; - goto yy20; + goto yy9; + goto yy13; } else { if (yych == '|') - goto yy25; - goto yy8; + goto yy18; + goto yy9; } } - yy22: + yy15: ++p; { return (bufsize_t)(p - start); } - yy24: + yy17: yych = *++p; if (yych == '\n') - goto yy22; - goto yy8; - yy25: + goto yy15; + goto yy9; + yy18: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy17; + goto yy10; } if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) - goto yy8; - goto yy25; + goto yy9; + goto yy18; } else { if (yych <= '\n') - goto yy22; + goto yy15; if (yych <= '\f') - goto yy25; - goto yy24; + goto yy18; + goto yy17; } } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy8; - goto yy25; + goto yy9; + goto yy18; } else { if (yych == ':') - goto yy19; - goto yy8; + goto yy12; + goto yy9; } } } } bufsize_t _scan_table_cell(const unsigned char *p) { - const unsigned char *marker = NULL; const unsigned char *start = p; { unsigned char yych; + unsigned int yyaccept = 0; static const unsigned char yybm[] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, @@ -326,53 +242,51 @@ bufsize_t _scan_table_cell(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - yych = *(marker = p); + yych = *p; if (yybm[0 + yych] & 64) { - goto yy30; + goto yy22; } - if (yych <= 0xE0) { - if (yych <= '\\') { - if (yych <= '\n') - goto yy29; + if (yych <= 0xEC) { + if (yych <= 0xC1) { if (yych <= '\r') - goto yy32; - goto yy34; + goto yy25; + if (yych <= '\\') + goto yy27; + goto yy25; } else { - if (yych <= '|') - goto yy32; - if (yych <= 0xC1) - goto yy29; if (yych <= 0xDF) - goto yy36; - goto yy38; + goto yy29; + if (yych <= 0xE0) + goto yy30; + goto yy31; } } else { - if (yych <= 0xEF) { - if (yych == 0xED) - goto yy40; - goto yy39; + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy32; + if (yych <= 0xEF) + goto yy31; + goto yy33; } else { - if (yych <= 0xF0) - goto yy41; if (yych <= 0xF3) - goto yy42; + goto yy34; if (yych <= 0xF4) - goto yy43; + goto yy35; + goto yy25; } } - yy29 : { return (bufsize_t)(p - start); } - yy30: + yy22: + yyaccept = 0; yych = *(marker = ++p); if (yybm[0 + yych] & 64) { - goto yy30; + goto yy22; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\r') - goto yy29; + goto yy24; if (yych <= '\\') - goto yy34; - goto yy29; + goto yy27; } else { if (yych <= 0xDF) goto yy36; @@ -392,29 +306,31 @@ bufsize_t _scan_table_cell(const unsigned char *p) { goto yy42; if (yych <= 0xF4) goto yy43; - goto yy29; } } - yy32: + yy24 : { return (bufsize_t)(p - start); } + yy25: ++p; - { return 0; } - yy34: + yy26 : { return 0; } + yy27: + yyaccept = 0; yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy34; + goto yy27; } if (yych <= 0xDF) { if (yych <= '\f') { if (yych == '\n') - goto yy29; - goto yy30; + goto yy24; + goto yy22; } else { if (yych <= '\r') - goto yy29; + goto yy24; if (yych <= 0x7F) - goto yy30; + goto yy22; if (yych <= 0xC1) - goto yy29; + goto yy24; + goto yy36; } } else { if (yych <= 0xEF) { @@ -430,18 +346,77 @@ bufsize_t _scan_table_cell(const unsigned char *p) { goto yy42; if (yych <= 0xF4) goto yy43; - goto yy29; + goto yy24; } } + yy29: + yych = *++p; + if (yych <= 0x7F) + goto yy26; + if (yych <= 0xBF) + goto yy22; + goto yy26; + yy30: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x9F) + goto yy26; + if (yych <= 0xBF) + goto yy36; + goto yy26; + yy31: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy26; + if (yych <= 0xBF) + goto yy36; + goto yy26; + yy32: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy26; + if (yych <= 0x9F) + goto yy36; + goto yy26; + yy33: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x8F) + goto yy26; + if (yych <= 0xBF) + goto yy39; + goto yy26; + yy34: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy26; + if (yych <= 0xBF) + goto yy39; + goto yy26; + yy35: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy26; + if (yych <= 0x8F) + goto yy39; + goto yy26; yy36: yych = *++p; if (yych <= 0x7F) goto yy37; if (yych <= 0xBF) - goto yy30; + goto yy22; yy37: p = marker; - goto yy29; + if (yyaccept == 0) { + goto yy24; + } else { + goto yy26; + } yy38: yych = *++p; if (yych <= 0x9F) @@ -488,12 +463,10 @@ bufsize_t _scan_table_cell(const unsigned char *p) { } bufsize_t _scan_table_cell_end(const unsigned char *p) { - const unsigned char *marker = NULL; const unsigned char *start = p; { unsigned char yych; - unsigned int yyaccept = 0; static const unsigned char yybm[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, @@ -509,115 +482,17 @@ bufsize_t _scan_table_cell_end(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - yych = *(marker = p); - if (yych <= 0xDF) { - if (yych <= '{') { - if (yych != '\n') - goto yy47; - } else { - if (yych <= '|') - goto yy48; - if (yych <= 0x7F) - goto yy47; - if (yych >= 0xC2) - goto yy51; - } - } else { - if (yych <= 0xEF) { - if (yych <= 0xE0) - goto yy53; - if (yych == 0xED) - goto yy55; - goto yy54; - } else { - if (yych <= 0xF0) - goto yy56; - if (yych <= 0xF3) - goto yy57; - if (yych <= 0xF4) - goto yy58; - } - } - yy46 : { return 0; } - yy47: + yych = *p; + if (yych == '|') + goto yy48; ++p; - goto yy46; + { return 0; } yy48: - yyaccept = 1; - yych = *(marker = ++p); + yych = *++p; if (yybm[0 + yych] & 128) { goto yy48; } - if (yych <= 0x08) - goto yy50; - if (yych <= '\n') - goto yy59; - if (yych <= '\r') - goto yy60; - yy50 : { return (bufsize_t)(p - start); } - yy51: - yych = *++p; - if (yych <= 0x7F) - goto yy52; - if (yych <= 0xBF) - goto yy47; - yy52: - p = marker; - if (yyaccept == 0) { - goto yy46; - } else { - goto yy50; - } - yy53: - yych = *++p; - if (yych <= 0x9F) - goto yy52; - if (yych <= 0xBF) - goto yy51; - goto yy52; - yy54: - yych = *++p; - if (yych <= 0x7F) - goto yy52; - if (yych <= 0xBF) - goto yy51; - goto yy52; - yy55: - yych = *++p; - if (yych <= 0x7F) - goto yy52; - if (yych <= 0x9F) - goto yy51; - goto yy52; - yy56: - yych = *++p; - if (yych <= 0x8F) - goto yy52; - if (yych <= 0xBF) - goto yy54; - goto yy52; - yy57: - yych = *++p; - if (yych <= 0x7F) - goto yy52; - if (yych <= 0xBF) - goto yy54; - goto yy52; - yy58: - yych = *++p; - if (yych <= 0x7F) - goto yy52; - if (yych <= 0x8F) - goto yy54; - goto yy52; - yy59: - ++p; - goto yy50; - yy60: - yych = *++p; - if (yych == '\n') - goto yy59; - goto yy52; + { return (bufsize_t)(p - start); } } } @@ -642,138 +517,62 @@ bufsize_t _scan_table_row_end(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - yych = *(marker = p); - if (yych <= 0xC1) { - if (yych <= '\f') { - if (yych <= 0x08) - goto yy64; - if (yych == '\n') - goto yy66; - goto yy65; - } else { - if (yych <= 0x1F) { - if (yych <= '\r') - goto yy68; - goto yy64; - } else { - if (yych <= ' ') - goto yy65; - if (yych <= 0x7F) - goto yy64; - } - } + yych = *p; + if (yych <= '\f') { + if (yych <= 0x08) + goto yy53; + if (yych == '\n') + goto yy56; + goto yy55; } else { - if (yych <= 0xED) { - if (yych <= 0xDF) - goto yy69; - if (yych <= 0xE0) - goto yy71; - if (yych <= 0xEC) - goto yy72; - goto yy73; - } else { - if (yych <= 0xF0) { - if (yych <= 0xEF) - goto yy72; - goto yy74; - } else { - if (yych <= 0xF3) - goto yy75; - if (yych <= 0xF4) - goto yy76; - } - } + if (yych <= '\r') + goto yy58; + if (yych == ' ') + goto yy55; } - yy63 : { return 0; } - yy64: + yy53: ++p; - goto yy63; - yy65: + yy54 : { return 0; } + yy55: yych = *(marker = ++p); if (yych <= 0x08) - goto yy63; + goto yy54; if (yych <= '\r') - goto yy78; + goto yy60; if (yych == ' ') - goto yy78; - goto yy63; - yy66: + goto yy60; + goto yy54; + yy56: ++p; { return (bufsize_t)(p - start); } - yy68: + yy58: yych = *++p; if (yych == '\n') - goto yy66; - goto yy63; - yy69: - yych = *++p; - if (yych <= 0x7F) - goto yy70; - if (yych <= 0xBF) - goto yy64; - yy70: - p = marker; - goto yy63; - yy71: - yych = *++p; - if (yych <= 0x9F) - goto yy70; - if (yych <= 0xBF) - goto yy69; - goto yy70; - yy72: - yych = *++p; - if (yych <= 0x7F) - goto yy70; - if (yych <= 0xBF) - goto yy69; - goto yy70; - yy73: - yych = *++p; - if (yych <= 0x7F) - goto yy70; - if (yych <= 0x9F) - goto yy69; - goto yy70; - yy74: - yych = *++p; - if (yych <= 0x8F) - goto yy70; - if (yych <= 0xBF) - goto yy72; - goto yy70; - yy75: - yych = *++p; - if (yych <= 0x7F) - goto yy70; - if (yych <= 0xBF) - goto yy72; - goto yy70; - yy76: - yych = *++p; - if (yych <= 0x7F) - goto yy70; - if (yych <= 0x8F) - goto yy72; - goto yy70; - yy77: + goto yy56; + goto yy54; + yy59: yych = *++p; - yy78: + yy60: if (yybm[0 + yych] & 128) { - goto yy77; + goto yy59; } if (yych <= 0x08) - goto yy70; + goto yy61; if (yych <= '\n') - goto yy66; - if (yych >= 0x0E) - goto yy70; + goto yy56; + if (yych <= '\r') + goto yy62; + yy61: + p = marker; + goto yy54; + yy62: yych = *++p; if (yych == '\n') - goto yy66; - goto yy70; + goto yy56; + goto yy61; } } + bufsize_t _scan_tasklist(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -798,361 +597,281 @@ bufsize_t _scan_tasklist(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - yych = *(marker = p); - if (yych <= '/') { - if (yych <= 0x1F) { - if (yych <= '\t') { - if (yych <= 0x08) - goto yy83; - goto yy84; - } else { - if (yych <= '\n') - goto yy82; - if (yych <= '\f') - goto yy84; - goto yy83; - } + yych = *p; + if (yych <= ' ') { + if (yych <= '\n') { + if (yych == '\t') + goto yy67; } else { - if (yych <= '+') { - if (yych <= ' ') - goto yy84; - if (yych <= ')') - goto yy83; - goto yy85; - } else { - if (yych == '-') - goto yy85; - goto yy83; - } + if (yych <= '\f') + goto yy67; + if (yych >= ' ') + goto yy67; } } else { - if (yych <= 0xEC) { - if (yych <= 0xC1) { - if (yych <= '9') - goto yy86; - if (yych <= 0x7F) - goto yy83; - } else { - if (yych <= 0xDF) - goto yy87; - if (yych <= 0xE0) - goto yy89; - goto yy90; - } + if (yych <= ',') { + if (yych <= ')') + goto yy65; + if (yych <= '+') + goto yy68; } else { - if (yych <= 0xF0) { - if (yych <= 0xED) - goto yy91; - if (yych <= 0xEF) - goto yy90; - goto yy92; - } else { - if (yych <= 0xF3) - goto yy93; - if (yych <= 0xF4) - goto yy94; - } + if (yych <= '-') + goto yy68; + if (yych <= '/') + goto yy65; + if (yych <= '9') + goto yy69; } } - yy82 : { return 0; } - yy83: + yy65: ++p; - goto yy82; - yy84: + yy66 : { return 0; } + yy67: yych = *(marker = ++p); if (yybm[0 + yych] & 64) { - goto yy95; + goto yy70; } if (yych <= ',') { if (yych <= ')') - goto yy82; + goto yy66; if (yych <= '+') - goto yy97; - goto yy82; + goto yy73; + goto yy66; } else { if (yych <= '-') - goto yy97; + goto yy73; if (yych <= '/') - goto yy82; + goto yy66; if (yych <= '9') - goto yy98; - goto yy82; + goto yy74; + goto yy66; } - yy85: + yy68: yych = *(marker = ++p); if (yych <= '\n') { if (yych == '\t') - goto yy99; - goto yy82; + goto yy75; + goto yy66; } else { if (yych <= '\f') - goto yy99; + goto yy75; if (yych == ' ') - goto yy99; - goto yy82; + goto yy75; + goto yy66; } - yy86: + yy69: yych = *(marker = ++p); if (yych <= 0x1F) { if (yych <= '\t') { if (yych <= 0x08) - goto yy102; - goto yy97; + goto yy78; + goto yy73; } else { if (yych <= '\n') - goto yy82; + goto yy66; if (yych <= '\f') - goto yy97; - goto yy102; + goto yy73; + goto yy78; } } else { if (yych <= 0x7F) { if (yych <= ' ') - goto yy97; - goto yy102; + goto yy73; + goto yy78; } else { if (yych <= 0xC1) - goto yy82; + goto yy66; if (yych <= 0xF4) - goto yy102; - goto yy82; + goto yy78; + goto yy66; } } - yy87: - yych = *++p; - if (yych <= 0x7F) - goto yy88; - if (yych <= 0xBF) - goto yy83; - yy88: - p = marker; - goto yy82; - yy89: - yych = *++p; - if (yych <= 0x9F) - goto yy88; - if (yych <= 0xBF) - goto yy87; - goto yy88; - yy90: - yych = *++p; - if (yych <= 0x7F) - goto yy88; - if (yych <= 0xBF) - goto yy87; - goto yy88; - yy91: - yych = *++p; - if (yych <= 0x7F) - goto yy88; - if (yych <= 0x9F) - goto yy87; - goto yy88; - yy92: - yych = *++p; - if (yych <= 0x8F) - goto yy88; - if (yych <= 0xBF) - goto yy90; - goto yy88; - yy93: - yych = *++p; - if (yych <= 0x7F) - goto yy88; - if (yych <= 0xBF) - goto yy90; - goto yy88; - yy94: - yych = *++p; - if (yych <= 0x7F) - goto yy88; - if (yych <= 0x8F) - goto yy90; - goto yy88; - yy95: + yy70: yych = *++p; if (yybm[0 + yych] & 64) { - goto yy95; + goto yy70; } if (yych <= ',') { if (yych <= ')') - goto yy88; - if (yych >= ',') - goto yy88; + goto yy72; + if (yych <= '+') + goto yy73; } else { if (yych <= '-') - goto yy97; + goto yy73; if (yych <= '/') - goto yy88; + goto yy72; if (yych <= '9') - goto yy98; - goto yy88; + goto yy74; } - yy97: + yy72: + p = marker; + goto yy66; + yy73: yych = *++p; if (yych == '[') - goto yy88; - goto yy100; - yy98: + goto yy72; + goto yy76; + yy74: yych = *++p; if (yych <= '\n') { if (yych == '\t') - goto yy97; - goto yy102; + goto yy73; + goto yy78; } else { if (yych <= '\f') - goto yy97; + goto yy73; if (yych == ' ') - goto yy97; - goto yy102; + goto yy73; + goto yy78; } - yy99: + yy75: yych = *++p; - yy100: + yy76: if (yych <= '\f') { if (yych == '\t') - goto yy99; + goto yy75; if (yych <= '\n') - goto yy88; - goto yy99; + goto yy72; + goto yy75; } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy88; - goto yy99; + goto yy72; + goto yy75; } else { if (yych == '[') - goto yy110; - goto yy88; + goto yy86; + goto yy72; } } - yy101: + yy77: yych = *++p; - yy102: + yy78: if (yybm[0 + yych] & 128) { - goto yy101; + goto yy77; } if (yych <= 0xC1) { if (yych <= '\f') { if (yych <= 0x08) - goto yy97; + goto yy73; if (yych == '\n') - goto yy88; - goto yy99; + goto yy72; + goto yy75; } else { if (yych == ' ') - goto yy99; + goto yy75; if (yych <= 0x7F) - goto yy97; - goto yy88; + goto yy73; + goto yy72; } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy103; + goto yy79; if (yych <= 0xE0) - goto yy104; + goto yy80; if (yych <= 0xEC) - goto yy105; - goto yy106; + goto yy81; + goto yy82; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy105; - goto yy107; + goto yy81; + goto yy83; } else { if (yych <= 0xF3) - goto yy108; + goto yy84; if (yych <= 0xF4) - goto yy109; - goto yy88; + goto yy85; + goto yy72; } } } - yy103: + yy79: yych = *++p; if (yych <= 0x7F) - goto yy88; + goto yy72; if (yych <= 0xBF) - goto yy97; - goto yy88; - yy104: + goto yy73; + goto yy72; + yy80: yych = *++p; if (yych <= 0x9F) - goto yy88; + goto yy72; if (yych <= 0xBF) - goto yy103; - goto yy88; - yy105: + goto yy79; + goto yy72; + yy81: yych = *++p; if (yych <= 0x7F) - goto yy88; + goto yy72; if (yych <= 0xBF) - goto yy103; - goto yy88; - yy106: + goto yy79; + goto yy72; + yy82: yych = *++p; if (yych <= 0x7F) - goto yy88; + goto yy72; if (yych <= 0x9F) - goto yy103; - goto yy88; - yy107: + goto yy79; + goto yy72; + yy83: yych = *++p; if (yych <= 0x8F) - goto yy88; + goto yy72; if (yych <= 0xBF) - goto yy105; - goto yy88; - yy108: + goto yy81; + goto yy72; + yy84: yych = *++p; if (yych <= 0x7F) - goto yy88; + goto yy72; if (yych <= 0xBF) - goto yy105; - goto yy88; - yy109: + goto yy81; + goto yy72; + yy85: yych = *++p; if (yych <= 0x7F) - goto yy88; + goto yy72; if (yych <= 0x8F) - goto yy105; - goto yy88; - yy110: + goto yy81; + goto yy72; + yy86: yych = *++p; if (yych <= 'W') { if (yych != ' ') - goto yy88; + goto yy72; } else { if (yych <= 'X') - goto yy111; + goto yy87; if (yych != 'x') - goto yy88; + goto yy72; } - yy111: + yy87: yych = *++p; if (yych != ']') - goto yy88; + goto yy72; yych = *++p; if (yych <= '\n') { if (yych != '\t') - goto yy88; + goto yy72; } else { if (yych <= '\f') - goto yy113; + goto yy89; if (yych != ' ') - goto yy88; + goto yy72; } - yy113: + yy89: yych = *++p; if (yych <= '\n') { if (yych == '\t') - goto yy113; + goto yy89; } else { if (yych <= '\f') - goto yy113; + goto yy89; if (yych == ' ') - goto yy113; + goto yy89; } { return (bufsize_t)(p - start); } } diff --git a/extensions/ext_scanners.re b/extensions/ext_scanners.re index 94a4c6732..9d7706fba 100644 --- a/extensions/ext_scanners.re +++ b/extensions/ext_scanners.re @@ -1,3 +1,6 @@ +/*!re2c re2c:flags:no-debug-info = 1; */ +/*!re2c re2c:indent:string = ' '; */ + #include <stdlib.h> #include "ext_scanners.h" @@ -22,7 +25,6 @@ bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned cha re2c:define:YYCTYPE = "unsigned char"; re2c:define:YYCURSOR = p; re2c:define:YYMARKER = marker; - re2c:define:YYCTXMARKER = marker; re2c:yyfill:enable = 0; spacechar = [ \t\v\f]; @@ -30,7 +32,7 @@ bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned cha escaped_char = [\\][|!"#$%&'()*+,./:;<=>?@[\\\]^_`{}~-]; table_marker = (spacechar*[:]?[-]+[:]?spacechar*); - table_cell = (escaped_char|[^|\r\n])*; + table_cell = (escaped_char|[^|\r\n])+; tasklist = spacechar*("-"|"+"|"*"|[0-9]+.)spacechar+("[ ]"|"[x]")spacechar+; */ @@ -39,47 +41,51 @@ bufsize_t _scan_table_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; -/*!re2c - [|]? table_marker ([|] table_marker)* [|]? spacechar* newline { return (bufsize_t)(p - start); } - .? { return 0; } -*/ + /*!re2c + [|]? table_marker ([|] table_marker)* [|]? spacechar* newline { + return (bufsize_t)(p - start); + } + * { return 0; } + */ } bufsize_t _scan_table_cell(const unsigned char *p) { - const unsigned char *marker = NULL; const unsigned char *start = p; -/*!re2c - table_cell { return (bufsize_t)(p - start); } - .? { return 0; } -*/ + /*!re2c + // In fact, `table_cell` matches non-empty table cells only. The empty + // string is also a valid table cell, but is handled by the default rule. + // This approach prevents re2c's match-empty-string warning. + table_cell { return (bufsize_t)(p - start); } + * { return 0; } + */ } bufsize_t _scan_table_cell_end(const unsigned char *p) { - const unsigned char *marker = NULL; const unsigned char *start = p; -/*!re2c - [|] spacechar* newline? { return (bufsize_t)(p - start); } - .? { return 0; } -*/ + /*!re2c + [|] spacechar* { return (bufsize_t)(p - start); } + * { return 0; } + */ } bufsize_t _scan_table_row_end(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; -/*!re2c - spacechar* newline { return (bufsize_t)(p - start); } - .? { return 0; } -*/ + /*!re2c + spacechar* newline { return (bufsize_t)(p - start); } + * { return 0; } + */ } + bufsize_t _scan_tasklist(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; -/*!re2c - tasklist { return (bufsize_t)(p - start); } - .? { return 0; } -*/ + /*!re2c + tasklist { return (bufsize_t)(p - start); } + * { return 0; } + */ } diff --git a/extensions/table.c b/extensions/table.c index 0ea31cbc3..5606dc263 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -114,19 +114,32 @@ static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsi static table_row *row_from_string(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *string, int len) { + // Parses a single table row. It has the following form: + // `delim? table_cell (delim table_cell)* delim? newline` + // Note that cells are allowed to be empty. + // + // From the GitHub-flavored Markdown specification: + // + // > Each row consists of cells containing arbitrary text, in which inlines + // > are parsed, separated by pipes (|). A leading and trailing pipe is also + // > recommended for clarity of reading, and if there’s otherwise parsing + // > ambiguity. + table_row *row = NULL; bufsize_t cell_matched = 1, pipe_matched = 1, offset; int cell_end_offset; + int expect_more_cells = 1; row = (table_row *)parser->mem->calloc(1, sizeof(table_row)); row->n_columns = 0; row->cells = NULL; + // Scan past the (optional) leading pipe. offset = scan_table_cell_end(string, len, 0); // Parse the cells of the row. Stop if we reach the end of the input, or if we // cannot detect any more cells. - while (offset < len && (cell_matched || pipe_matched)) { + while (offset < len && expect_more_cells) { cell_matched = scan_table_cell(string, len, offset); pipe_matched = scan_table_cell_end(string, len, offset + cell_matched); @@ -140,6 +153,9 @@ static table_row *row_from_string(cmark_syntax_extension *self, row->cells = NULL; row->n_columns = 0; } else { + // We are guaranteed to have a cell, since (1) either we found some + // content and cell_matched, or (2) we found an empty cell followed by a + // pipe. cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, cell_matched); cmark_strbuf_trim(cell_buf); @@ -147,7 +163,7 @@ static table_row *row_from_string(cmark_syntax_extension *self, node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell)); cell->buf = cell_buf; cell->start_offset = offset; - cell->end_offset = cell_end_offset; + cell->end_offset = offset + cell_matched - 1; while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') { --cell->start_offset; @@ -161,13 +177,16 @@ static table_row *row_from_string(cmark_syntax_extension *self, offset += cell_matched + pipe_matched; - if (!pipe_matched) { - pipe_matched = scan_table_row_end(string, len, offset); - offset += pipe_matched; + if (pipe_matched) { + expect_more_cells = 1; + } else { + // We've scanned the last cell. Skip over the final newline and stop. + offset += scan_table_row_end(string, len, offset); + expect_more_cells = 0; } } - if (offset != len || !row->n_columns) { + if (offset != len || row->n_columns == 0) { free_table_row(parser->mem, row); row = NULL; } @@ -199,8 +218,6 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, cmark_parser *parser, cmark_node *parent_container, unsigned char *input, int len) { - bufsize_t matched = - scan_table_start(input, len, cmark_parser_get_first_nonspace(parser)); cmark_node *table_header; table_row *header_row = NULL; table_row *marker_row = NULL; @@ -208,41 +225,37 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, const char *parent_string; uint16_t i; - if (!matched) - return parent_container; - - parent_string = cmark_node_get_string_content(parent_container); - - cmark_arena_push(); - - header_row = row_from_string(self, parser, (unsigned char *)parent_string, - (int)strlen(parent_string)); - - if (!header_row) { - free_table_row(parser->mem, header_row); - cmark_arena_pop(); + if (!scan_table_start(input, len, cmark_parser_get_first_nonspace(parser))) { return parent_container; } + // Since scan_table_start was successful, we must have a marker row. marker_row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); - assert(marker_row); - if (header_row->n_columns != marker_row->n_columns) { - free_table_row(parser->mem, header_row); + cmark_arena_push(); + + // Check for a matching header row. We call `row_from_string` with the entire + // (potentially long) parent container as input, but this should be safe since + // `row_from_string` bails out early if it does not find a row. + parent_string = cmark_node_get_string_content(parent_container); + header_row = row_from_string(self, parser, (unsigned char *)parent_string, + (int)strlen(parent_string)); + if (!header_row || header_row->n_columns != marker_row->n_columns) { free_table_row(parser->mem, marker_row); + free_table_row(parser->mem, header_row); cmark_arena_pop(); return parent_container; } if (cmark_arena_pop()) { + marker_row = row_from_string( + self, parser, input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); header_row = row_from_string(self, parser, (unsigned char *)parent_string, (int)strlen(parent_string)); - marker_row = row_from_string(self, parser, - input + cmark_parser_get_first_nonspace(parser), - len - cmark_parser_get_first_nonspace(parser)); } if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { @@ -257,9 +270,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, } cmark_node_set_syntax_extension(parent_container, self); - parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table)); - set_n_table_columns(parent_container, header_row->n_columns); uint8_t *alignments = From 32673772b00b562f33b7b8d7aaf0a3258efc37a8 Mon Sep 17 00:00:00 2001 From: Arthur Schreiber <arthurschreiber@github.com> Date: Tue, 3 Mar 2020 10:45:23 +0100 Subject: [PATCH 180/218] Add a test. --- test/pathological_tests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/pathological_tests.py b/test/pathological_tests.py index 4312ea526..4d4d38939 100644 --- a/test/pathological_tests.py +++ b/test/pathological_tests.py @@ -87,6 +87,9 @@ def badhash(ref): "unclosed links B": ("[a](b" * 30000, re.compile("(\[a\]\(b){30000}")), + "tables": + ("aaa\rbbb\n-\v\n" * 30000, + re.compile("^<p>aaa</p>\n<table>\n<thead>\n<tr>\n<th>bbb</th>\n</tr>\n</thead>\n<tbody>\n(<tr>\n<td>aaa</td>\n</tr>\n<tr>\n<td>bbb</td>\n</tr>\n<tr>\n<td>-\x0b</td>\n</tr>\n){29999}</tbody>\n</table>\n$")), # "many references": # ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000, # re.compile("(\[0\] ){4999}")), @@ -106,7 +109,7 @@ def run_test(inp, regex): parser.add_argument('--library-dir', dest='library_dir', nargs='?', default=None, help='directory containing dynamic library') args = parser.parse_args(sys.argv[1:]) - cmark = CMark(prog=args.program, library_dir=args.library_dir) + cmark = CMark(prog=args.program, library_dir=args.library_dir, extensions="table") [rc, actual, err] = cmark.to_html(inp) if rc != 0: From ddf21bb54b6476075e4e6a010714ea8502913112 Mon Sep 17 00:00:00 2001 From: Arthur Schreiber <arthurschreiber@github.com> Date: Tue, 3 Mar 2020 11:29:10 +0100 Subject: [PATCH 181/218] Restore compatibility with other changes. --- extensions/ext_scanners.c | 1 + extensions/ext_scanners.re | 1 + extensions/table.c | 72 +++++++++++++++++++++----------------- 3 files changed, 42 insertions(+), 32 deletions(-) diff --git a/extensions/ext_scanners.c b/extensions/ext_scanners.c index 95e13649b..0d3ba2881 100644 --- a/extensions/ext_scanners.c +++ b/extensions/ext_scanners.c @@ -220,6 +220,7 @@ bufsize_t _scan_table_start(const unsigned char *p) { } bufsize_t _scan_table_cell(const unsigned char *p) { + const unsigned char *marker = NULL; const unsigned char *start = p; { diff --git a/extensions/ext_scanners.re b/extensions/ext_scanners.re index 9d7706fba..9144e5b48 100644 --- a/extensions/ext_scanners.re +++ b/extensions/ext_scanners.re @@ -51,6 +51,7 @@ bufsize_t _scan_table_start(const unsigned char *p) bufsize_t _scan_table_cell(const unsigned char *p) { + const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c // In fact, `table_cell` matches non-empty table cells only. The empty diff --git a/extensions/table.c b/extensions/table.c index 5606dc263..a5bb44067 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -127,8 +127,8 @@ static table_row *row_from_string(cmark_syntax_extension *self, table_row *row = NULL; bufsize_t cell_matched = 1, pipe_matched = 1, offset; - int cell_end_offset; int expect_more_cells = 1; + int row_end_offset = 0; row = (table_row *)parser->mem->calloc(1, sizeof(table_row)); row->n_columns = 0; @@ -144,35 +144,25 @@ static table_row *row_from_string(cmark_syntax_extension *self, pipe_matched = scan_table_cell_end(string, len, offset + cell_matched); if (cell_matched || pipe_matched) { - cell_end_offset = offset + cell_matched - 1; - - if (string[cell_end_offset] == '\n' || string[cell_end_offset] == '\r') { - row->paragraph_offset = cell_end_offset; - - cmark_llist_free_full(parser->mem, row->cells, (cmark_free_func)free_table_cell); - row->cells = NULL; - row->n_columns = 0; - } else { - // We are guaranteed to have a cell, since (1) either we found some - // content and cell_matched, or (2) we found an empty cell followed by a - // pipe. - cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, - cell_matched); - cmark_strbuf_trim(cell_buf); - - node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell)); - cell->buf = cell_buf; - cell->start_offset = offset; - cell->end_offset = offset + cell_matched - 1; - - while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') { - --cell->start_offset; - ++cell->internal_offset; - } - - row->n_columns += 1; - row->cells = cmark_llist_append(parser->mem, row->cells, cell); + // We are guaranteed to have a cell, since (1) either we found some + // content and cell_matched, or (2) we found an empty cell followed by a + // pipe. + cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, + cell_matched); + cmark_strbuf_trim(cell_buf); + + node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell)); + cell->buf = cell_buf; + cell->start_offset = offset; + cell->end_offset = offset + cell_matched - 1; + + while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') { + --cell->start_offset; + ++cell->internal_offset; } + + row->n_columns += 1; + row->cells = cmark_llist_append(parser->mem, row->cells, cell); } offset += cell_matched + pipe_matched; @@ -180,9 +170,27 @@ static table_row *row_from_string(cmark_syntax_extension *self, if (pipe_matched) { expect_more_cells = 1; } else { - // We've scanned the last cell. Skip over the final newline and stop. - offset += scan_table_row_end(string, len, offset); - expect_more_cells = 0; + // We've scanned the last cell. Check if we have reached the end of the row + row_end_offset = scan_table_row_end(string, len, offset); + offset += row_end_offset; + + // If the end of the row is not the end of the input, + // the row is not a real row but potentially part of the paragraph + // preceding the table. + if (row_end_offset && offset != len) { + row->paragraph_offset = offset; + + cmark_llist_free_full(parser->mem, row->cells, (cmark_free_func)free_table_cell); + row->cells = NULL; + row->n_columns = 0; + + // Scan past the (optional) leading pipe. + offset += scan_table_cell_end(string, len, offset); + + expect_more_cells = 1; + } else { + expect_more_cells = 0; + } } } From 88a6f870ff8588929160d369c304fdfb39698760 Mon Sep 17 00:00:00 2001 From: Ashley Garland <acgarland@apple.com> Date: Wed, 23 Sep 2020 10:32:28 -0700 Subject: [PATCH 182/218] Add Swift Package Manager Support - Move headers to expected `include` directory - Modify source files to include headers by search path --- Package.swift | 42 ++++++++++ api_test/CMakeLists.txt | 7 +- api_test/cplusplus.cpp | 2 +- api_test/main.c | 4 +- extensions/CMakeLists.txt | 6 +- .../{ => include}/cmark-gfm-core-extensions.h | 0 .../include/cmark-gfm-extensions_export.h | 42 ++++++++++ src/CMakeLists.txt | 27 +------ src/{ => include}/buffer.h | 0 src/{ => include}/chunk.h | 0 src/{ => include}/cmark-gfm-extension_api.h | 0 src/{ => include}/cmark-gfm.h | 0 src/include/cmark-gfm_export.h | 42 ++++++++++ src/include/cmark-gfm_version.h | 7 ++ src/{ => include}/cmark_ctype.h | 0 src/include/config.h | 76 +++++++++++++++++++ src/{ => include}/footnotes.h | 0 src/{ => include}/houdini.h | 0 src/{ => include}/html.h | 0 src/{ => include}/inlines.h | 0 src/{ => include}/iterator.h | 0 src/{ => include}/map.h | 0 src/{ => include}/node.h | 0 src/{ => include}/parser.h | 0 src/{ => include}/plugin.h | 0 src/{ => include}/references.h | 0 src/{ => include}/registry.h | 0 src/{ => include}/render.h | 0 src/{ => include}/scanners.h | 0 src/{ => include}/syntax_extension.h | 0 src/{ => include}/utf8.h | 0 src/main.c | 2 +- 32 files changed, 224 insertions(+), 33 deletions(-) create mode 100644 Package.swift rename extensions/{ => include}/cmark-gfm-core-extensions.h (100%) create mode 100644 extensions/include/cmark-gfm-extensions_export.h rename src/{ => include}/buffer.h (100%) rename src/{ => include}/chunk.h (100%) rename src/{ => include}/cmark-gfm-extension_api.h (100%) rename src/{ => include}/cmark-gfm.h (100%) create mode 100644 src/include/cmark-gfm_export.h create mode 100644 src/include/cmark-gfm_version.h rename src/{ => include}/cmark_ctype.h (100%) create mode 100644 src/include/config.h rename src/{ => include}/footnotes.h (100%) rename src/{ => include}/houdini.h (100%) rename src/{ => include}/html.h (100%) rename src/{ => include}/inlines.h (100%) rename src/{ => include}/iterator.h (100%) rename src/{ => include}/map.h (100%) rename src/{ => include}/node.h (100%) rename src/{ => include}/parser.h (100%) rename src/{ => include}/plugin.h (100%) rename src/{ => include}/references.h (100%) rename src/{ => include}/registry.h (100%) rename src/{ => include}/render.h (100%) rename src/{ => include}/scanners.h (100%) rename src/{ => include}/syntax_extension.h (100%) rename src/{ => include}/utf8.h (100%) diff --git a/Package.swift b/Package.swift new file mode 100644 index 000000000..6e9a89cb2 --- /dev/null +++ b/Package.swift @@ -0,0 +1,42 @@ +// swift-tools-version:5.3 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +import PackageDescription + +let package = Package( + name: "cmark-gfm", + products: [ + // Products define the executables and libraries a package produces, and make them visible to other packages. + .library( + name: "cmark-gfm", + targets: ["cmark-gfm"]), + .library( + name: "cmark-gfm-extensions", + targets: ["cmark-gfm-extensions"]), + ], + targets: [ + .target(name: "cmark-gfm", + path: "src", + exclude: [ + "main.c", + "scanners.re", + "libcmark-gfm.pc.in", + "config.h.in", + "CMakeLists.txt", + "cmark-gfm_version.h.in", + "case_fold_switch.inc", + "entities.inc", + ] + ), + .target(name: "cmark-gfm-extensions", + dependencies: [ + "cmark-gfm", + ], + path: "extensions", + exclude: [ + "CMakeLists.txt", + "ext_scanners.re", + ] + ), + ] +) diff --git a/api_test/CMakeLists.txt b/api_test/CMakeLists.txt index 55f33e087..2ae27d762 100644 --- a/api_test/CMakeLists.txt +++ b/api_test/CMakeLists.txt @@ -5,9 +5,10 @@ add_executable(api_test main.c ) include_directories( - ${PROJECT_SOURCE_DIR}/src - ${PROJECT_BINARY_DIR}/src - ${PROJECT_BINARY_DIR}/extensions + ${PROJECT_SOURCE_DIR}/src/include + ${PROJECT_BINARY_DIR}/src/include + ${PROJECT_SOURCE_DIR}/extensions/include + ${PROJECT_BINARY_DIR}/extensions/include ) if(CMARK_SHARED) target_link_libraries(api_test libcmark-gfm-extensions libcmark-gfm) diff --git a/api_test/cplusplus.cpp b/api_test/cplusplus.cpp index 480c75708..e06a4e766 100644 --- a/api_test/cplusplus.cpp +++ b/api_test/cplusplus.cpp @@ -1,6 +1,6 @@ #include <cstdlib> -#include "cmark-gfm.h" +#include <cmark-gfm.h> #include "cplusplus.h" #include "harness.h" diff --git a/api_test/main.c b/api_test/main.c index 62006eaa9..a43cf915b 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -3,9 +3,9 @@ #include <string.h> #define CMARK_NO_SHORT_NAMES -#include "cmark-gfm.h" +#include <cmark-gfm.h> #include "node.h" -#include "../extensions/cmark-gfm-core-extensions.h" +#include <cmark-gfm-core-extensions.h> #include "harness.h" #include "cplusplus.h" diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 0c007c706..f1d04a33b 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -14,13 +14,13 @@ set(LIBRARY_SOURCES ) include_directories( - ${PROJECT_SOURCE_DIR}/src - ${PROJECT_BINARY_DIR}/src + ${PROJECT_SOURCE_DIR}/src/include + ${PROJECT_BINARY_DIR}/src/include ) include (GenerateExportHeader) -include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) +include_directories(include ${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") diff --git a/extensions/cmark-gfm-core-extensions.h b/extensions/include/cmark-gfm-core-extensions.h similarity index 100% rename from extensions/cmark-gfm-core-extensions.h rename to extensions/include/cmark-gfm-core-extensions.h diff --git a/extensions/include/cmark-gfm-extensions_export.h b/extensions/include/cmark-gfm-extensions_export.h new file mode 100644 index 000000000..69c0bd787 --- /dev/null +++ b/extensions/include/cmark-gfm-extensions_export.h @@ -0,0 +1,42 @@ + +#ifndef CMARK_GFM_EXTENSIONS_EXPORT_H +#define CMARK_GFM_EXTENSIONS_EXPORT_H + +#ifdef CMARK_GFM_EXTENSIONS_STATIC_DEFINE +# define CMARK_GFM_EXTENSIONS_EXPORT +# define CMARK_GFM_EXTENSIONS_NO_EXPORT +#else +# ifndef CMARK_GFM_EXTENSIONS_EXPORT +# ifdef libcmark_gfm_extensions_EXPORTS + /* We are building this library */ +# define CMARK_GFM_EXTENSIONS_EXPORT __attribute__((visibility("default"))) +# else + /* We are using this library */ +# define CMARK_GFM_EXTENSIONS_EXPORT __attribute__((visibility("default"))) +# endif +# endif + +# ifndef CMARK_GFM_EXTENSIONS_NO_EXPORT +# define CMARK_GFM_EXTENSIONS_NO_EXPORT __attribute__((visibility("hidden"))) +# endif +#endif + +#ifndef CMARK_GFM_EXTENSIONS_DEPRECATED +# define CMARK_GFM_EXTENSIONS_DEPRECATED __attribute__ ((__deprecated__)) +#endif + +#ifndef CMARK_GFM_EXTENSIONS_DEPRECATED_EXPORT +# define CMARK_GFM_EXTENSIONS_DEPRECATED_EXPORT CMARK_GFM_EXTENSIONS_EXPORT CMARK_GFM_EXTENSIONS_DEPRECATED +#endif + +#ifndef CMARK_GFM_EXTENSIONS_DEPRECATED_NO_EXPORT +# define CMARK_GFM_EXTENSIONS_DEPRECATED_NO_EXPORT CMARK_GFM_EXTENSIONS_NO_EXPORT CMARK_GFM_EXTENSIONS_DEPRECATED +#endif + +#if 0 /* DEFINE_NO_DEPRECATED */ +# ifndef CMARK_GFM_EXTENSIONS_NO_DEPRECATED +# define CMARK_GFM_EXTENSIONS_NO_DEPRECATED +# endif +#endif + +#endif /* CMARK_GFM_EXTENSIONS_EXPORT_H */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84dd2a037..f5a2eae13 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,27 +6,7 @@ include(GNUInstallDirs) set(LIBRARY "libcmark-gfm") set(STATICLIBRARY "libcmark-gfm_static") -set(HEADERS - cmark-gfm.h - cmark-gfm-extension_api.h - parser.h - buffer.h - node.h - iterator.h - chunk.h - references.h - footnotes.h - map.h - utf8.h - scanners.h - inlines.h - houdini.h - cmark_ctype.h - render.h - registry.h - syntax_extension.h - plugin.h - ) +file(GLOB HEADERS include/*.h) set(LIBRARY_SOURCES cmark.c node.c @@ -62,9 +42,10 @@ set(LIBRARY_SOURCES set(PROGRAM "cmark-gfm") set(PROGRAM_SOURCES main.c) -include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) +include_directories(include ${CMAKE_CURRENT_BINARY_DIR}) include_directories( - ${PROJECT_BINARY_DIR}/extensions + ${PROJECT_SOURCE_DIR}/extensions/include + ${PROJECT_BINARY_DIR}/extensions/include ) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmark-gfm_version.h.in diff --git a/src/buffer.h b/src/include/buffer.h similarity index 100% rename from src/buffer.h rename to src/include/buffer.h diff --git a/src/chunk.h b/src/include/chunk.h similarity index 100% rename from src/chunk.h rename to src/include/chunk.h diff --git a/src/cmark-gfm-extension_api.h b/src/include/cmark-gfm-extension_api.h similarity index 100% rename from src/cmark-gfm-extension_api.h rename to src/include/cmark-gfm-extension_api.h diff --git a/src/cmark-gfm.h b/src/include/cmark-gfm.h similarity index 100% rename from src/cmark-gfm.h rename to src/include/cmark-gfm.h diff --git a/src/include/cmark-gfm_export.h b/src/include/cmark-gfm_export.h new file mode 100644 index 000000000..699d737f7 --- /dev/null +++ b/src/include/cmark-gfm_export.h @@ -0,0 +1,42 @@ + +#ifndef CMARK_GFM_EXPORT_H +#define CMARK_GFM_EXPORT_H + +#ifdef CMARK_GFM_STATIC_DEFINE +# define CMARK_GFM_EXPORT +# define CMARK_GFM_NO_EXPORT +#else +# ifndef CMARK_GFM_EXPORT +# ifdef libcmark_gfm_EXPORTS + /* We are building this library */ +# define CMARK_GFM_EXPORT __attribute__((visibility("default"))) +# else + /* We are using this library */ +# define CMARK_GFM_EXPORT __attribute__((visibility("default"))) +# endif +# endif + +# ifndef CMARK_GFM_NO_EXPORT +# define CMARK_GFM_NO_EXPORT __attribute__((visibility("hidden"))) +# endif +#endif + +#ifndef CMARK_GFM_DEPRECATED +# define CMARK_GFM_DEPRECATED __attribute__ ((__deprecated__)) +#endif + +#ifndef CMARK_GFM_DEPRECATED_EXPORT +# define CMARK_GFM_DEPRECATED_EXPORT CMARK_GFM_EXPORT CMARK_GFM_DEPRECATED +#endif + +#ifndef CMARK_GFM_DEPRECATED_NO_EXPORT +# define CMARK_GFM_DEPRECATED_NO_EXPORT CMARK_GFM_NO_EXPORT CMARK_GFM_DEPRECATED +#endif + +#if 0 /* DEFINE_NO_DEPRECATED */ +# ifndef CMARK_GFM_NO_DEPRECATED +# define CMARK_GFM_NO_DEPRECATED +# endif +#endif + +#endif /* CMARK_GFM_EXPORT_H */ diff --git a/src/include/cmark-gfm_version.h b/src/include/cmark-gfm_version.h new file mode 100644 index 000000000..7e7bd823d --- /dev/null +++ b/src/include/cmark-gfm_version.h @@ -0,0 +1,7 @@ +#ifndef CMARK_GFM_VERSION_H +#define CMARK_GFM_VERSION_H + +#define CMARK_GFM_VERSION ((0 << 24) | (29 << 16) | (0 << 8) | 0) +#define CMARK_GFM_VERSION_STRING "0.29.0.gfm.0" + +#endif diff --git a/src/cmark_ctype.h b/src/include/cmark_ctype.h similarity index 100% rename from src/cmark_ctype.h rename to src/include/cmark_ctype.h diff --git a/src/include/config.h b/src/include/config.h new file mode 100644 index 000000000..d38c7c7a5 --- /dev/null +++ b/src/include/config.h @@ -0,0 +1,76 @@ +#ifndef CMARK_CONFIG_H +#define CMARK_CONFIG_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define HAVE_STDBOOL_H + +#ifdef HAVE_STDBOOL_H + #include <stdbool.h> +#elif !defined(__cplusplus) + typedef char bool; +#endif + +#define HAVE___BUILTIN_EXPECT + +#define HAVE___ATTRIBUTE__ + +#ifdef HAVE___ATTRIBUTE__ + #define CMARK_ATTRIBUTE(list) __attribute__ (list) +#else + #define CMARK_ATTRIBUTE(list) +#endif + +#ifndef CMARK_INLINE + #if defined(_MSC_VER) && !defined(__cplusplus) + #define CMARK_INLINE __inline + #else + #define CMARK_INLINE inline + #endif +#endif + +/* snprintf and vsnprintf fallbacks for MSVC before 2015, + due to Valentin Milea http://stackoverflow.com/questions/2915672/ +*/ + +#if defined(_MSC_VER) && _MSC_VER < 1900 + +#include <stdio.h> +#include <stdarg.h> + +#define snprintf c99_snprintf +#define vsnprintf c99_vsnprintf + +CMARK_INLINE int c99_vsnprintf(char *outBuf, size_t size, const char *format, va_list ap) +{ + int count = -1; + + if (size != 0) + count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap); + if (count == -1) + count = _vscprintf(format, ap); + + return count; +} + +CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ...) +{ + int count; + va_list ap; + + va_start(ap, format); + count = c99_vsnprintf(outBuf, size, format, ap); + va_end(ap); + + return count; +} + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/footnotes.h b/src/include/footnotes.h similarity index 100% rename from src/footnotes.h rename to src/include/footnotes.h diff --git a/src/houdini.h b/src/include/houdini.h similarity index 100% rename from src/houdini.h rename to src/include/houdini.h diff --git a/src/html.h b/src/include/html.h similarity index 100% rename from src/html.h rename to src/include/html.h diff --git a/src/inlines.h b/src/include/inlines.h similarity index 100% rename from src/inlines.h rename to src/include/inlines.h diff --git a/src/iterator.h b/src/include/iterator.h similarity index 100% rename from src/iterator.h rename to src/include/iterator.h diff --git a/src/map.h b/src/include/map.h similarity index 100% rename from src/map.h rename to src/include/map.h diff --git a/src/node.h b/src/include/node.h similarity index 100% rename from src/node.h rename to src/include/node.h diff --git a/src/parser.h b/src/include/parser.h similarity index 100% rename from src/parser.h rename to src/include/parser.h diff --git a/src/plugin.h b/src/include/plugin.h similarity index 100% rename from src/plugin.h rename to src/include/plugin.h diff --git a/src/references.h b/src/include/references.h similarity index 100% rename from src/references.h rename to src/include/references.h diff --git a/src/registry.h b/src/include/registry.h similarity index 100% rename from src/registry.h rename to src/include/registry.h diff --git a/src/render.h b/src/include/render.h similarity index 100% rename from src/render.h rename to src/include/render.h diff --git a/src/scanners.h b/src/include/scanners.h similarity index 100% rename from src/scanners.h rename to src/include/scanners.h diff --git a/src/syntax_extension.h b/src/include/syntax_extension.h similarity index 100% rename from src/syntax_extension.h rename to src/include/syntax_extension.h diff --git a/src/utf8.h b/src/include/utf8.h similarity index 100% rename from src/utf8.h rename to src/include/utf8.h diff --git a/src/main.c b/src/main.c index a62c4f2ca..ecb63cdde 100644 --- a/src/main.c +++ b/src/main.c @@ -10,7 +10,7 @@ #include "parser.h" #include "registry.h" -#include "../extensions/cmark-gfm-core-extensions.h" +#include <cmark-gfm-core-extensions.h> #if defined(__OpenBSD__) # include <sys/param.h> From b4e18c1d74cb33a98a5a8755ae550681cfaa29c0 Mon Sep 17 00:00:00 2001 From: Ashley Garland <acgarland@apple.com> Date: Tue, 6 Oct 2020 11:22:15 -0700 Subject: [PATCH 183/218] Use pre-set config.h header --- extensions/CMakeLists.txt | 4 ++-- extensions/include/cmark-gfm-core-extensions.h | 2 +- src/CMakeLists.txt | 9 +++++---- src/blocks.c | 2 +- src/buffer.c | 2 +- src/commonmark.c | 2 +- src/html.c | 2 +- src/include/buffer.h | 2 +- src/include/cmark-gfm.h | 4 ++-- src/include/{config.h => cmark-gfm_config.h} | 0 src/include/houdini.h | 2 +- src/include/registry.h | 2 +- src/include/syntax_extension.h | 2 +- src/inlines.c | 2 +- src/iterator.c | 2 +- src/latex.c | 2 +- src/main.c | 2 +- src/man.c | 2 +- src/node.c | 2 +- src/registry.c | 2 +- src/xml.c | 2 +- 21 files changed, 26 insertions(+), 25 deletions(-) rename src/include/{config.h => cmark-gfm_config.h} (100%) diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index f1d04a33b..4977c98c3 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -85,8 +85,8 @@ install(TARGETS ${CMARK_INSTALL} if (CMARK_SHARED OR CMARK_STATIC) install(FILES - cmark-gfm-core-extensions.h - ${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm-extensions_export.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/cmark-gfm-core-extensions.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/cmark-gfm-extensions_export.h DESTINATION include ) diff --git a/extensions/include/cmark-gfm-core-extensions.h b/extensions/include/cmark-gfm-core-extensions.h index 0645915f9..51103e616 100644 --- a/extensions/include/cmark-gfm-core-extensions.h +++ b/extensions/include/cmark-gfm-core-extensions.h @@ -7,7 +7,7 @@ extern "C" { #include "cmark-gfm-extension_api.h" #include "cmark-gfm-extensions_export.h" -#include "config.h" // for bool +#include "cmark-gfm_config.h" // for bool #include <stdint.h> CMARK_GFM_EXTENSIONS_EXPORT diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f5a2eae13..874ad3822 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -149,10 +149,11 @@ if(CMARK_SHARED OR CMARK_STATIC) DESTINATION ${libdir}/pkgconfig) install(FILES - cmark-gfm.h - cmark-gfm-extension_api.h - ${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm_export.h - ${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm_version.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/cmark-gfm.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/cmark-gfm_config.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/cmark-gfm-extension_api.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/cmark-gfm_export.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/cmark-gfm_version.h DESTINATION include ) diff --git a/src/blocks.c b/src/blocks.c index 53e882f19..39949fc6d 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -11,7 +11,7 @@ #include "cmark_ctype.h" #include "syntax_extension.h" -#include "config.h" +#include "cmark-gfm_config.h" #include "parser.h" #include "cmark-gfm.h" #include "node.h" diff --git a/src/buffer.c b/src/buffer.c index c7934e57d..df6873731 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -7,7 +7,7 @@ #include <stdint.h> #include <limits.h> -#include "config.h" +#include "cmark-gfm_config.h" #include "cmark_ctype.h" #include "buffer.h" diff --git a/src/commonmark.c b/src/commonmark.c index f272d4d29..f078f1d3d 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -4,7 +4,7 @@ #include <stdint.h> #include <assert.h> -#include "config.h" +#include "cmark-gfm_config.h" #include "cmark-gfm.h" #include "node.h" #include "buffer.h" diff --git a/src/html.c b/src/html.c index ea1f6e189..110ba44dc 100644 --- a/src/html.c +++ b/src/html.c @@ -3,7 +3,7 @@ #include <string.h> #include <assert.h> #include "cmark_ctype.h" -#include "config.h" +#include "cmark-gfm_config.h" #include "cmark-gfm.h" #include "houdini.h" #include "scanners.h" diff --git a/src/include/buffer.h b/src/include/buffer.h index b85bb4406..96a8ba2fa 100644 --- a/src/include/buffer.h +++ b/src/include/buffer.h @@ -6,7 +6,7 @@ #include <string.h> #include <limits.h> #include <stdint.h> -#include "config.h" +#include "cmark-gfm_config.h" #include "cmark-gfm.h" #ifdef __cplusplus diff --git a/src/include/cmark-gfm.h b/src/include/cmark-gfm.h index 6fb28693c..f7f7b8038 100644 --- a/src/include/cmark-gfm.h +++ b/src/include/cmark-gfm.h @@ -111,13 +111,13 @@ typedef struct cmark_mem { * realloc and free. */ CMARK_GFM_EXPORT -cmark_mem *cmark_get_default_mem_allocator(); +cmark_mem *cmark_get_default_mem_allocator(void); /** An arena allocator; uses system calloc to allocate large * slabs of memory. Memory in these slabs is not reused at all. */ CMARK_GFM_EXPORT -cmark_mem *cmark_get_arena_mem_allocator(); +cmark_mem *cmark_get_arena_mem_allocator(void); /** Resets the arena allocator, quickly returning all used memory * to the operating system. diff --git a/src/include/config.h b/src/include/cmark-gfm_config.h similarity index 100% rename from src/include/config.h rename to src/include/cmark-gfm_config.h diff --git a/src/include/houdini.h b/src/include/houdini.h index 7625b045b..ff3c0887a 100644 --- a/src/include/houdini.h +++ b/src/include/houdini.h @@ -6,7 +6,7 @@ extern "C" { #endif #include <stdint.h> -#include "config.h" +#include "cmark-gfm_config.h" #include "buffer.h" #ifdef HAVE___BUILTIN_EXPECT diff --git a/src/include/registry.h b/src/include/registry.h index fece2b63f..ade02b9c0 100644 --- a/src/include/registry.h +++ b/src/include/registry.h @@ -6,7 +6,7 @@ extern "C" { #endif #include "cmark-gfm.h" -#include "plugin.h" +#include "cmark-gfm-extension_api.h" CMARK_GFM_EXPORT void cmark_register_plugin(cmark_plugin_init_func reg_fn); diff --git a/src/include/syntax_extension.h b/src/include/syntax_extension.h index a5fe11e57..081a54c9a 100644 --- a/src/include/syntax_extension.h +++ b/src/include/syntax_extension.h @@ -3,7 +3,7 @@ #include "cmark-gfm.h" #include "cmark-gfm-extension_api.h" -#include "config.h" +#include "cmark-gfm_config.h" struct cmark_syntax_extension { cmark_match_block_func last_block_matches; diff --git a/src/inlines.c b/src/inlines.c index c21430bde..17f10ad8b 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -3,7 +3,7 @@ #include <stdio.h> #include "cmark_ctype.h" -#include "config.h" +#include "cmark-gfm_config.h" #include "node.h" #include "parser.h" #include "references.h" diff --git a/src/iterator.c b/src/iterator.c index 13fdb7616..f107454df 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -1,7 +1,7 @@ #include <assert.h> #include <stdlib.h> -#include "config.h" +#include "cmark-gfm_config.h" #include "node.h" #include "cmark-gfm.h" #include "iterator.h" diff --git a/src/latex.c b/src/latex.c index 8be15b0d5..fc08e3bec 100644 --- a/src/latex.c +++ b/src/latex.c @@ -3,7 +3,7 @@ #include <string.h> #include <assert.h> -#include "config.h" +#include "cmark-gfm_config.h" #include "cmark-gfm.h" #include "node.h" #include "buffer.h" diff --git a/src/main.c b/src/main.c index ecb63cdde..2bab06284 100644 --- a/src/main.c +++ b/src/main.c @@ -2,7 +2,7 @@ #include <stdio.h> #include <string.h> #include <errno.h> -#include "config.h" +#include "cmark-gfm_config.h" #include "cmark-gfm.h" #include "node.h" #include "cmark-gfm-extension_api.h" diff --git a/src/man.c b/src/man.c index 441a96e49..f49b392e3 100644 --- a/src/man.c +++ b/src/man.c @@ -3,7 +3,7 @@ #include <string.h> #include <assert.h> -#include "config.h" +#include "cmark-gfm_config.h" #include "cmark-gfm.h" #include "node.h" #include "buffer.h" diff --git a/src/node.c b/src/node.c index 0118d6511..73f418ced 100644 --- a/src/node.c +++ b/src/node.c @@ -1,7 +1,7 @@ #include <stdlib.h> #include <string.h> -#include "config.h" +#include "cmark-gfm_config.h" #include "node.h" #include "syntax_extension.h" diff --git a/src/registry.c b/src/registry.c index f4f2040d6..5fb6c9b98 100644 --- a/src/registry.c +++ b/src/registry.c @@ -2,7 +2,7 @@ #include <stdlib.h> #include <string.h> -#include "config.h" +#include "cmark-gfm_config.h" #include "cmark-gfm.h" #include "syntax_extension.h" #include "registry.h" diff --git a/src/xml.c b/src/xml.c index 2975bf96c..1a2b48821 100644 --- a/src/xml.c +++ b/src/xml.c @@ -3,7 +3,7 @@ #include <string.h> #include <assert.h> -#include "config.h" +#include "cmark-gfm_config.h" #include "cmark-gfm.h" #include "node.h" #include "buffer.h" From c7734406d8d5b36210549d3a714927f23d33ad15 Mon Sep 17 00:00:00 2001 From: Ashley Garland <acgarland@apple.com> Date: Thu, 8 Oct 2020 10:06:58 -0700 Subject: [PATCH 184/218] Track opening backtick count for inline code spans --- src/.DS_Store | Bin 0 -> 6148 bytes src/include/cmark-gfm.h | 5 +++++ src/include/node.h | 1 + src/inlines.c | 1 + src/node.c | 4 ++++ 5 files changed, 11 insertions(+) create mode 100644 src/.DS_Store diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..16fc3eeabd69994654190062bfefa5d6ff475c3d GIT binary patch literal 6148 zcmeHKJ5EC}5S)cbL`st~rLVvZtSC7_E)bCi5>iA8l>Swmi=#37DTp5FLIIkU)?=@C zY<Y^eZvoi)JUjwR0CTz{K75#(@4HXzt|CUH^NceFJiiR@ho{pd`{#gjAMlDd9B};3 zKOKEbCIzH`6p#W^KnmPgfhw@`#f{I^aZ*4E{Cx%d`_Sl)y>Lv7PX~u+0f;k(!#Iy# zg4jGj?1f_@BQ#4YF{xH9h9#ZxR(ZW}OiVhgnh&d+tvVEo+j)MAbXZT+C<Uazr2_Z4 zTzdWA(y#RYmn5yEfE4&w3fOG3+pPJds;!g9d97{qJ>7FY>290{g+r8MVw7Vpyd1A0 cDf61ox!()N#Go@CbfSI+To;)X_-zHg0H8z_bpQYW literal 0 HcmV?d00001 diff --git a/src/include/cmark-gfm.h b/src/include/cmark-gfm.h index f7f7b8038..2e096c910 100644 --- a/src/include/cmark-gfm.h +++ b/src/include/cmark-gfm.h @@ -353,6 +353,11 @@ const char *cmark_node_get_type_string(cmark_node *node); */ CMARK_GFM_EXPORT const char *cmark_node_get_literal(cmark_node *node); +/** Returns the number of backtick characters used to open the + node if it is an inline code span, otherwise returns 0. + */ +CMARK_GFM_EXPORT int cmark_node_get_backtick_count(cmark_node *node); + /** Sets the string contents of 'node'. Returns 1 on success, * 0 on failure. */ diff --git a/src/include/node.h b/src/include/node.h index 6391db9f9..45d008a20 100644 --- a/src/include/node.h +++ b/src/include/node.h @@ -73,6 +73,7 @@ struct cmark_node { int internal_offset; uint16_t type; uint16_t flags; + int backtick_count; cmark_syntax_extension *extension; diff --git a/src/inlines.c b/src/inlines.c index 17f10ad8b..159c87bee 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -376,6 +376,7 @@ static cmark_node *handle_backticks(subject *subj, int options) { S_normalize_code(&buf); cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf)); + node->backtick_count = openticks.len; adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options); return node; } diff --git a/src/node.c b/src/node.c index 73f418ced..ee74f260d 100644 --- a/src/node.c +++ b/src/node.c @@ -349,6 +349,10 @@ const char *cmark_node_get_literal(cmark_node *node) { return NULL; } +int cmark_node_get_backtick_count(cmark_node *node) { + return node->backtick_count; +} + int cmark_node_set_literal(cmark_node *node, const char *content) { if (node == NULL) { return 0; From 1429a7f106dc8036e9d1a9317b5be99a62eb2bd4 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Mon, 7 Dec 2020 08:13:41 -0700 Subject: [PATCH 185/218] apply block offsets for autolink source position info --- api_test/main.c | 11 +++++++++-- src/inlines.c | 4 ++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index a43cf915b..1761e16ca 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1062,13 +1062,15 @@ static void source_pos_inlines(test_batch_runner *runner) { { static const char markdown[] = "*first*\n" - "second\n"; + "second\n" + "\n" + " <http://example.com>"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" - "<document sourcepos=\"1:1-2:6\" xmlns=\"http://commonmark.org/xml/1.0\">\n" + "<document sourcepos=\"1:1-4:23\" xmlns=\"http://commonmark.org/xml/1.0\">\n" " <paragraph sourcepos=\"1:1-2:6\">\n" " <emph sourcepos=\"1:1-1:7\">\n" " <text sourcepos=\"1:2-1:6\" xml:space=\"preserve\">first</text>\n" @@ -1076,6 +1078,11 @@ static void source_pos_inlines(test_batch_runner *runner) { " <softbreak />\n" " <text sourcepos=\"2:1-2:6\" xml:space=\"preserve\">second</text>\n" " </paragraph>\n" + " <paragraph sourcepos=\"4:4-4:23\">\n" + " <link sourcepos=\"4:4-4:23\" destination=\"http://example.com\" title=\"\">\n" + " <text sourcepos=\"4:5-4:22\" xml:space=\"preserve\">http://example.com</text>\n" + " </link>\n" + " </paragraph>\n" "</document>\n", "sourcepos are as expected"); free(xml); diff --git a/src/inlines.c b/src/inlines.c index 159c87bee..5b0c212b8 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -150,8 +150,8 @@ static CMARK_INLINE cmark_node *make_autolink(subject *subj, link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email); link->as.link.title = cmark_chunk_literal(""); link->start_line = link->end_line = subj->line; - link->start_column = start_column + 1; - link->end_column = end_column + 1; + link->start_column = start_column + 1 + subj->column_offset + subj->block_offset; + link->end_column = end_column + 1 + subj->column_offset + subj->block_offset; cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url)); return link; } From 57bee4e09abc0305fd9a4256c5f0e300eb64a878 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 8 Dec 2020 15:21:09 -0700 Subject: [PATCH 186/218] don't let blocks get end lines before their start lines rdar://72043555 --- api_test/main.c | 7 +++++-- src/blocks.c | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index 1761e16ca..2b2e820a1 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1004,13 +1004,14 @@ static void source_pos(test_batch_runner *runner) { "> Sure.\n" ">\n" "> 2. Yes, okay.\n" - "> ![ok](hi \"yes\")\n"; + "> ![ok](hi \"yes\")\n" + "<!-- HTML Comment -->"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" - "<document sourcepos=\"1:1-10:20\" xmlns=\"http://commonmark.org/xml/1.0\">\n" + "<document sourcepos=\"1:1-11:21\" xmlns=\"http://commonmark.org/xml/1.0\">\n" " <heading sourcepos=\"1:1-1:13\" level=\"1\">\n" " <text sourcepos=\"1:3-1:5\" xml:space=\"preserve\">Hi </text>\n" " <emph sourcepos=\"1:6-1:12\">\n" @@ -1052,6 +1053,8 @@ static void source_pos(test_batch_runner *runner) { " </item>\n" " </list>\n" " </block_quote>\n" + " <html_block sourcepos=\"11:1-11:21\" xml:space=\"preserve\">&lt;!-- HTML Comment --&gt;\n" + "</html_block>\n" "</document>\n", "sourcepos are as expected"); free(xml); diff --git a/src/blocks.c b/src/blocks.c index 39949fc6d..ab714122c 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -289,7 +289,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { b->end_column = parser->last_line_length; } else if (S_type(b) == CMARK_NODE_DOCUMENT || (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) || - (S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) { + (S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext) || + (parser->line_number - 1 < b->end_line)) { b->end_line = parser->line_number; b->end_column = parser->curline.size; if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n') From 34895556dd13ab70d52321d20f61cd64ccd23cbf Mon Sep 17 00:00:00 2001 From: Tony Parker <anthony.parker@apple.com> Date: Tue, 15 Dec 2020 14:23:57 -0800 Subject: [PATCH 187/218] Add inline directive syntax rdar://71571971 --- extensions/table.c | 2 +- man/man3/cmark-gfm.3 | 1 + src/cmark.c | 2 +- src/commonmark.c | 10 ++ src/html.c | 13 +++ src/include/cmark-gfm-extension_api.h | 6 +- src/include/cmark-gfm.h | 12 +++ src/include/node.h | 5 + src/inlines.c | 140 +++++++++++++++++++++++--- src/latex.c | 1 + src/man.c | 4 + src/node.c | 37 +++++++ src/plaintext.c | 4 + src/xml.c | 3 + 14 files changed, 222 insertions(+), 18 deletions(-) diff --git a/extensions/table.c b/extensions/table.c index a5bb44067..f5b4016aa 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -439,7 +439,7 @@ static int can_contain(cmark_syntax_extension *extension, cmark_node *node, } else if (node->type == CMARK_NODE_TABLE_CELL) { return child_type == CMARK_NODE_TEXT || child_type == CMARK_NODE_CODE || child_type == CMARK_NODE_EMPH || child_type == CMARK_NODE_STRONG || - child_type == CMARK_NODE_LINK || child_type == CMARK_NODE_IMAGE || + child_type == CMARK_NODE_LINK || child_type == CMARK_NODE_IMAGE || child_type == CMARK_NODE_ATTRIBUTE || child_type == CMARK_NODE_STRIKETHROUGH || child_type == CMARK_NODE_HTML_INLINE || child_type == CMARK_NODE_FOOTNOTE_REFERENCE; diff --git a/man/man3/cmark-gfm.3 b/man/man3/cmark-gfm.3 index 001b7c707..69af42e63 100644 --- a/man/man3/cmark-gfm.3 +++ b/man/man3/cmark-gfm.3 @@ -54,6 +54,7 @@ typedef enum { CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009, CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a, CMARK_NODE_FOOTNOTE_REFERENCE = CMARK_NODE_TYPE_INLINE | 0x000b, + CMARK_NODE_ATTRIBUTE = CMARK_NODE_TYPE_INLINE | 0x000c, } cmark_node_type; .RE \f[] diff --git a/src/cmark.c b/src/cmark.c index b3fad4b08..08477fa57 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -8,7 +8,7 @@ #include "buffer.h" cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_FOOTNOTE_DEFINITION; -cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_FOOTNOTE_REFERENCE; +cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_ATTRIBUTE; int cmark_version() { return CMARK_GFM_VERSION; } diff --git a/src/commonmark.c b/src/commonmark.c index f078f1d3d..789ef57e7 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -474,6 +474,16 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_ATTRIBUTE: + if (entering) { + LIT("^["); + } else { + LIT("]("); + OUT(cmark_node_get_attributes(node), false, LITERAL); + LIT(")"); + } + break; + case CMARK_NODE_FOOTNOTE_REFERENCE: if (entering) { LIT("[^"); diff --git a/src/html.c b/src/html.c index 110ba44dc..5959d7a0b 100644 --- a/src/html.c +++ b/src/html.c @@ -389,6 +389,19 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_ATTRIBUTE: + // TODO: Output span, attributes potentially controlling class/id here. For now just output the main string. + /* + if (entering) { + cmark_strbuf_puts(html, "<span __attributes=\""); + cmark_strbuf_put(html, node->as.attribute.attributes.data, node->as.attribute.attributes.len); + cmark_strbuf_puts(html, "\">"); + } else { + cmark_strbuf_puts(html, "</span>"); + } + */ + break; + case CMARK_NODE_FOOTNOTE_DEFINITION: if (entering) { if (renderer->footnote_ix == 0) { diff --git a/src/include/cmark-gfm-extension_api.h b/src/include/cmark-gfm-extension_api.h index 9403c4f00..104b4d00c 100644 --- a/src/include/cmark-gfm-extension_api.h +++ b/src/include/cmark-gfm-extension_api.h @@ -635,10 +635,10 @@ void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset); CMARK_GFM_EXPORT struct cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser); -/** Returns 1 if the inline parser is currently in a bracket; pass 1 for 'image' - * if you want to know about an image-type bracket, 0 for link-type. */ +/** Returns 1 if the inline parser is currently in a bracket; pass 2 for attribute, + * 1 for 'image' if you want to know about an image-type bracket, 0 for link-type. */ CMARK_GFM_EXPORT -int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image); +int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int type); /** Remove the last n characters from the last child of the given node. * This only works where all n characters are in the single last child, and the last diff --git a/src/include/cmark-gfm.h b/src/include/cmark-gfm.h index 2e096c910..31bfcaff9 100644 --- a/src/include/cmark-gfm.h +++ b/src/include/cmark-gfm.h @@ -66,6 +66,7 @@ typedef enum { CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009, CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a, CMARK_NODE_FOOTNOTE_REFERENCE = CMARK_NODE_TYPE_INLINE | 0x000b, + CMARK_NODE_ATTRIBUTE = CMARK_NODE_TYPE_INLINE | 0x000c, } cmark_node_type; extern cmark_node_type CMARK_NODE_LAST_BLOCK; @@ -453,6 +454,17 @@ CMARK_GFM_EXPORT const char *cmark_node_get_title(cmark_node *node); */ CMARK_GFM_EXPORT int cmark_node_set_title(cmark_node *node, const char *title); +/** Returns the attributes of an attribute 'node', or an empty string + if no attributes are set. Returns NULL if called on a node that is + not an attribute. + */ +CMARK_GFM_EXPORT const char *cmark_node_get_attributes(cmark_node *node); + +/** Sets the attributes of an attribute 'node'. Returns 1 on success, + * 0 on failure. + */ +CMARK_GFM_EXPORT int cmark_node_set_attributes(cmark_node *node, const char *attributes); + /** Returns the literal "on enter" text for a custom 'node', or an empty string if no on_enter is set. Returns NULL if called on a non-custom node. diff --git a/src/include/node.h b/src/include/node.h index 45d008a20..e2c0216d1 100644 --- a/src/include/node.h +++ b/src/include/node.h @@ -43,6 +43,10 @@ typedef struct { cmark_chunk title; } cmark_link; +typedef struct { + cmark_chunk attributes; +} cmark_attribute; + typedef struct { cmark_chunk on_enter; cmark_chunk on_exit; @@ -83,6 +87,7 @@ struct cmark_node { cmark_code code; cmark_heading heading; cmark_link link; + cmark_attribute attribute; cmark_custom custom; int html_block_type; void *opaque; diff --git a/src/inlines.c b/src/inlines.c index 5b0c212b8..daffe2f16 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -33,12 +33,18 @@ static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99"; #define MAXBACKTICKS 80 +typedef enum { + LINK, + IMAGE, + ATTRIBUTE +} bracket_type; + typedef struct bracket { struct bracket *previous; struct delimiter *previous_delimiter; cmark_node *inl_text; bufsize_t position; - bool image; + bracket_type type; bool active; bool bracket_after; } bracket; @@ -513,12 +519,12 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open, subj->last_delim = delim; } -static void push_bracket(subject *subj, bool image, cmark_node *inl_text) { +static void push_bracket(subject *subj, bracket_type type, cmark_node *inl_text) { bracket *b = (bracket *)subj->mem->calloc(1, sizeof(bracket)); if (subj->last_bracket != NULL) { subj->last_bracket->bracket_after = true; } - b->image = image; + b->type = type; b->active = true; b->inl_text = inl_text; b->previous = subj->last_bracket; @@ -1032,7 +1038,91 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset, return i - offset; } -// Return a link, an image, or a literal close bracket. +static bufsize_t manual_scan_attribute_attributes(cmark_chunk *input, bufsize_t offset, + cmark_chunk *output) { + bufsize_t i = offset; + size_t nb_p = 0; + + while (i < input->len) { + if (input->data[i] == '\\' && + i + 1 < input->len && + cmark_ispunct(input->data[i+1])) + i += 2; + else if (input->data[i] == '(') { + ++nb_p; + ++i; + if (nb_p > 32) + return -1; + } else if (input->data[i] == ')') { + if (nb_p == 0) + break; + --nb_p; + ++i; + } else { + ++i; + } + } + + if (i >= input->len) + return -1; + + { + cmark_chunk result = {input->data + offset, i - offset, 0}; + *output = result; + } + return i - offset; +} + +static cmark_node *handle_close_bracket_attribute(cmark_parser *parser, subject *subj, bracket *opener) { + bufsize_t startattributes, endattributes; + cmark_chunk attributes; + bufsize_t n; + cmark_node *inl; + cmark_chunk raw_label; + int found_label; + cmark_node *tmp, *tmpnext; + + // ^name[content](attributes) + // TODO: support name. we will not even enter this with a name because we fail the match first + + startattributes = subj->pos + 1; + + if (peek_char(subj) == '(' && + ((n = manual_scan_attribute_attributes(&subj->input, subj->pos + 1, + &attributes)) > -1)) { + + endattributes = subj->pos + 1 + n; + + if (peek_at(subj, endattributes) == ')') { + subj->pos = endattributes + 1; + attributes = cmark_chunk_dup(&subj->input, startattributes, endattributes - startattributes); + } + } + + inl = make_simple(subj->mem, CMARK_NODE_ATTRIBUTE); + inl->as.attribute.attributes = attributes; + inl->start_line = inl->end_line = subj->line; + inl->start_column = opener->inl_text->start_column; + inl->end_column = subj->pos + subj->column_offset + subj->block_offset; + cmark_node_insert_before(opener->inl_text, inl); + // Add content text: + tmp = opener->inl_text->next; + while (tmp) { + tmpnext = tmp->next; + cmark_node_append_child(inl, tmp); + tmp = tmpnext; + } + + // Free the bracket ^[: + cmark_node_free(opener->inl_text); + + process_emphasis(parser, subj, opener->previous_delimiter); + pop_bracket(subj); + + return NULL; +} + +// Return a link, an image, an attribute, or a literal close bracket. static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { bufsize_t initial_pos, after_link_text_pos; bufsize_t endurl, starttitle, endtitle, endall; @@ -1050,7 +1140,7 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { advance(subj); // advance past ] initial_pos = subj->pos; - // get last [ or ![ + // get last [ or ![ or ^[ opener = subj->last_bracket; if (opener == NULL) { @@ -1063,9 +1153,13 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); } + if (opener->type == ATTRIBUTE) { + return handle_close_bracket_attribute(parser, subj, opener); + } + // If we got here, we matched a potential link/image text. // Now we check to see if it's a link/image. - is_image = opener->image; + is_image = opener->type == IMAGE; after_link_text_pos = subj->pos; @@ -1188,7 +1282,7 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { if (!is_image) { opener = subj->last_bracket; while (opener != NULL) { - if (!opener->image) { + if (opener->type == LINK) { if (!opener->active) { break; } else { @@ -1341,7 +1435,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, case '[': advance(subj); new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("[")); - push_bracket(subj, false, new_inl); + push_bracket(subj, LINK, new_inl); break; case ']': new_inl = handle_close_bracket(parser, subj); @@ -1351,11 +1445,22 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, if (peek_char(subj) == '[' && peek_char_n(subj, 1) != '^') { advance(subj); new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("![")); - push_bracket(subj, true, new_inl); + push_bracket(subj, IMAGE, new_inl); } else { new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!")); } break; + case '^': + advance(subj); + // TODO: Support a name between ^ and [ + if (peek_char(subj) == '[') { + advance(subj); + new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("^[")); + push_bracket(subj, ATTRIBUTE, new_inl); + } else { + new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("^")); + } + break; default: new_inl = try_extensions(parser, parent, c, subj); if (new_inl != NULL) @@ -1604,10 +1709,19 @@ cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser) { return &parser->input; } -int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image) { - for (bracket *b = parser->last_bracket; b; b = b->previous) - if (b->active && b->image == (image != 0)) - return 1; +int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int type) { + for (bracket *b = parser->last_bracket; b; b = b->previous) { + if (b->active) { + switch (type) { + case 0: + return b->type == LINK; + case 1: + return b->type == IMAGE; + case 2: + return b->type == ATTRIBUTE; + } + } + } return 0; } diff --git a/src/latex.c b/src/latex.c index fc08e3bec..c204f6003 100644 --- a/src/latex.c +++ b/src/latex.c @@ -446,6 +446,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, case CMARK_NODE_FOOTNOTE_DEFINITION: case CMARK_NODE_FOOTNOTE_REFERENCE: + case CMARK_NODE_ATTRIBUTE: // TODO break; diff --git a/src/man.c b/src/man.c index f49b392e3..c60e1bfe7 100644 --- a/src/man.c +++ b/src/man.c @@ -256,6 +256,10 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_ATTRIBUTE: + OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); + break; + case CMARK_NODE_FOOTNOTE_DEFINITION: case CMARK_NODE_FOOTNOTE_REFERENCE: // TODO diff --git a/src/node.c b/src/node.c index ee74f260d..1d9b468cc 100644 --- a/src/node.c +++ b/src/node.c @@ -37,6 +37,7 @@ bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) { case CMARK_NODE_STRONG: case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: + case CMARK_NODE_ATTRIBUTE: case CMARK_NODE_CUSTOM_INLINE: return CMARK_NODE_TYPE_INLINE_P(child_type); @@ -132,6 +133,9 @@ static void free_node_as(cmark_node *node) { cmark_chunk_free(NODE_MEM(node), &node->as.link.url); cmark_chunk_free(NODE_MEM(node), &node->as.link.title); break; + case CMARK_NODE_ATTRIBUTE: + cmark_chunk_free(NODE_MEM(node), &node->as.attribute.attributes); + break; case CMARK_NODE_CUSTOM_BLOCK: case CMARK_NODE_CUSTOM_INLINE: cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_enter); @@ -256,6 +260,8 @@ const char *cmark_node_get_type_string(cmark_node *node) { return "link"; case CMARK_NODE_IMAGE: return "image"; + case CMARK_NODE_ATTRIBUTE: + return "attribute"; } return "<unknown>"; @@ -651,6 +657,37 @@ int cmark_node_set_title(cmark_node *node, const char *title) { return 0; } +const char *cmark_node_get_attributes(cmark_node *node) { + if (node == NULL) { + return NULL; + } + + switch (node->type) { + case CMARK_NODE_ATTRIBUTE: + return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.attribute.attributes); + default: + break; + } + + return NULL; +} + +int cmark_node_set_attributes(cmark_node *node, const char *attributes) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_ATTRIBUTE: + cmark_chunk_set_cstr(NODE_MEM(node), &node->as.attribute.attributes, attributes); + return 1; + default: + break; + } + + return 0; +} + const char *cmark_node_get_on_enter(cmark_node *node) { if (node == NULL) { return NULL; diff --git a/src/plaintext.c b/src/plaintext.c index b25e4a396..0edbd1406 100644 --- a/src/plaintext.c +++ b/src/plaintext.c @@ -191,6 +191,10 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, case CMARK_NODE_IMAGE: break; + case CMARK_NODE_ATTRIBUTE: + OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL); + break; + case CMARK_NODE_FOOTNOTE_REFERENCE: if (entering) { LIT("[^"); diff --git a/src/xml.c b/src/xml.c index 1a2b48821..d572290ae 100644 --- a/src/xml.c +++ b/src/xml.c @@ -133,6 +133,9 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, escape_xml(xml, node->as.link.title.data, node->as.link.title.len); cmark_strbuf_putc(xml, '"'); break; + case CMARK_NODE_ATTRIBUTE: + // TODO + break; default: break; } From f97a08caae2c995436da596475fa85ec5a7bf7eb Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Thu, 17 Dec 2020 14:06:06 -0700 Subject: [PATCH 188/218] properly set image/link sourcepos when spanning multiple lines --- api_test/main.c | 17 +++++++++++++++-- src/inlines.c | 3 ++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index 2b2e820a1..3199ab105 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1005,13 +1005,18 @@ static void source_pos(test_batch_runner *runner) { ">\n" "> 2. Yes, okay.\n" "> ![ok](hi \"yes\")\n" - "<!-- HTML Comment -->"; + "<!-- HTML Comment -->\n" + "\n" + "what happens if we spread a link [across multiple\n" + "lines][anchor]\n" + "\n" + "[anchor]: http://example.com\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" - "<document sourcepos=\"1:1-11:21\" xmlns=\"http://commonmark.org/xml/1.0\">\n" + "<document sourcepos=\"1:1-16:28\" xmlns=\"http://commonmark.org/xml/1.0\">\n" " <heading sourcepos=\"1:1-1:13\" level=\"1\">\n" " <text sourcepos=\"1:3-1:5\" xml:space=\"preserve\">Hi </text>\n" " <emph sourcepos=\"1:6-1:12\">\n" @@ -1055,6 +1060,14 @@ static void source_pos(test_batch_runner *runner) { " </block_quote>\n" " <html_block sourcepos=\"11:1-11:21\" xml:space=\"preserve\">&lt;!-- HTML Comment --&gt;\n" "</html_block>\n" + " <paragraph sourcepos=\"13:1-14:14\">\n" + " <text sourcepos=\"13:1-13:33\" xml:space=\"preserve\">what happens if we spread a link </text>\n" + " <link sourcepos=\"13:34-14:14\" destination=\"http://example.com\" title=\"\">\n" + " <text sourcepos=\"13:35-13:49\" xml:space=\"preserve\">across multiple</text>\n" + " <softbreak />\n" + " <text sourcepos=\"14:1-14:5\" xml:space=\"preserve\">lines</text>\n" + " </link>\n" + " </paragraph>\n" "</document>\n", "sourcepos are as expected"); free(xml); diff --git a/src/inlines.c b/src/inlines.c index daffe2f16..6bb88dca2 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1258,8 +1258,9 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK); inl->as.link.url = url; inl->as.link.title = title; - inl->start_line = inl->end_line = subj->line; + inl->start_line = opener->inl_text->start_line; inl->start_column = opener->inl_text->start_column; + inl->end_line = subj->line; inl->end_column = subj->pos + subj->column_offset + subj->block_offset; cmark_node_insert_before(opener->inl_text, inl); // Add link text: From 482d443c6654bbda3bc24f84a906f531bf33e306 Mon Sep 17 00:00:00 2001 From: Tony Parker <anthony.parker@apple.com> Date: Sun, 17 Jan 2021 12:42:05 -0800 Subject: [PATCH 189/218] Add ^ to special chars array This prevented attributes from being parsed properly. rdar://73301600 --- src/commonmark.c | 1 + src/inlines.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/commonmark.c b/src/commonmark.c index 789ef57e7..94fd4388f 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -38,6 +38,7 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || c == '>' || c == '\\' || c == '`' || c == '~' || c == '!' || (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || + (c == '^' && nextc == '[') || (renderer->begin_content && (c == '-' || c == '+' || c == '=') && // begin_content doesn't get set to false til we've passed digits // at the beginning of line, so... diff --git a/src/inlines.c b/src/inlines.c index 6bb88dca2..6ef6ecfe9 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1325,7 +1325,7 @@ static int8_t SPECIAL_CHARS[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, From 39ae2a8c4e9a183a232afcd8cf06f37a6a04455e Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 3 Aug 2021 08:50:58 -0600 Subject: [PATCH 190/218] fixes existing data races rdar://75568341 --- extensions/core-extensions.c | 13 +++++++++++-- src/inlines.c | 11 +++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 846e2bc2b..16e36ec62 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -1,3 +1,5 @@ +#include <pthread.h> + #include "cmark-gfm-core-extensions.h" #include "autolink.h" #include "strikethrough.h" @@ -7,21 +9,28 @@ #include "registry.h" #include "plugin.h" +pthread_mutex_t extensions_lock; + static int core_extensions_registration(cmark_plugin *plugin) { + pthread_mutex_lock(&extensions_lock); cmark_plugin_register_syntax_extension(plugin, create_table_extension()); cmark_plugin_register_syntax_extension(plugin, create_strikethrough_extension()); cmark_plugin_register_syntax_extension(plugin, create_autolink_extension()); cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension()); cmark_plugin_register_syntax_extension(plugin, create_tasklist_extension()); + pthread_mutex_unlock(&extensions_lock); return 1; } -void cmark_gfm_core_extensions_ensure_registered(void) { - static int registered = 0; +pthread_mutex_t registered_lock; +static _Atomic int registered = 0; +void cmark_gfm_core_extensions_ensure_registered(void) { + pthread_mutex_lock(&registered_lock); if (!registered) { cmark_register_plugin(core_extensions_registration); registered = 1; } + pthread_mutex_unlock(&registered_lock); } diff --git a/src/inlines.c b/src/inlines.c index 6ef6ecfe9..736b53b6c 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1,6 +1,7 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> +#include <pthread.h> #include "cmark_ctype.h" #include "cmark-gfm_config.h" @@ -64,7 +65,7 @@ typedef struct subject{ } subject; // Extensions may populate this. -static int8_t SKIP_CHARS[256]; +static _Atomic int8_t SKIP_CHARS[256]; static CMARK_INLINE bool S_is_line_end_char(char c) { return (c == '\n' || c == '\r'); @@ -1321,7 +1322,7 @@ static cmark_node *handle_newline(subject *subj) { } // "\r\n\\`&_*[]<!" -static int8_t SPECIAL_CHARS[256] = { +static _Atomic int8_t SPECIAL_CHARS[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -1349,6 +1350,8 @@ static char SMART_PUNCT_CHARS[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +pthread_mutex_t chars_lock; + static bufsize_t subject_find_special_char(subject *subj, int options) { bufsize_t n = subj->pos + 1; @@ -1364,15 +1367,19 @@ static bufsize_t subject_find_special_char(subject *subj, int options) { } void cmark_inlines_add_special_character(unsigned char c, bool emphasis) { + pthread_mutex_lock(&chars_lock); SPECIAL_CHARS[c] = 1; if (emphasis) SKIP_CHARS[c] = 1; + pthread_mutex_unlock(&chars_lock); } void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) { + pthread_mutex_lock(&chars_lock); SPECIAL_CHARS[c] = 0; if (emphasis) SKIP_CHARS[c] = 0; + pthread_mutex_unlock(&chars_lock); } static cmark_node *try_extensions(cmark_parser *parser, From d62fa12f069f119e966a01b8fe9998c2e3b65726 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 30 Mar 2021 10:04:30 -0600 Subject: [PATCH 191/218] add mutex initializer in new header --- extensions/core-extensions.c | 8 +++++++- src/include/mutex.h | 17 +++++++++++++++++ src/inlines.c | 27 ++++++++++++++++++++++----- 3 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 src/include/mutex.h diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 16e36ec62..801b5dafe 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -1,7 +1,9 @@ #include <pthread.h> +#include <stdatomic.h> #include "cmark-gfm-core-extensions.h" #include "autolink.h" +#include "mutex.h" #include "strikethrough.h" #include "table.h" #include "tagfilter.h" @@ -9,9 +11,11 @@ #include "registry.h" #include "plugin.h" -pthread_mutex_t extensions_lock; +static pthread_mutex_t extensions_lock; +static atomic_int extensions_latch = 0; static int core_extensions_registration(cmark_plugin *plugin) { + initialize_mutex_once(&extensions_lock, &extensions_latch); pthread_mutex_lock(&extensions_lock); cmark_plugin_register_syntax_extension(plugin, create_table_extension()); cmark_plugin_register_syntax_extension(plugin, @@ -24,9 +28,11 @@ static int core_extensions_registration(cmark_plugin *plugin) { } pthread_mutex_t registered_lock; +static atomic_int registered_latch = 0; static _Atomic int registered = 0; void cmark_gfm_core_extensions_ensure_registered(void) { + initialize_mutex_once(&registered_lock, &registered_latch); pthread_mutex_lock(&registered_lock); if (!registered) { cmark_register_plugin(core_extensions_registration); diff --git a/src/include/mutex.h b/src/include/mutex.h new file mode 100644 index 000000000..02ddb3d80 --- /dev/null +++ b/src/include/mutex.h @@ -0,0 +1,17 @@ +#ifndef CMARK_MUTEX_H +#define CMARK_MUTEX_H + +#include <pthread.h> +#include <stdatomic.h> + +#include "cmark-gfm_config.h" + +static CMARK_INLINE void initialize_mutex_once(pthread_mutex_t *m, atomic_int *latch) { + int expected = 0; + + if (atomic_compare_exchange_strong(latch, &expected, 1)) { + pthread_mutex_init(m, NULL); + } +} + +#endif diff --git a/src/inlines.c b/src/inlines.c index 736b53b6c..3ac09736a 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -2,6 +2,7 @@ #include <string.h> #include <stdio.h> #include <pthread.h> +#include <stdatomic.h> #include "cmark_ctype.h" #include "cmark-gfm_config.h" @@ -14,6 +15,7 @@ #include "scanners.h" #include "inlines.h" #include "syntax_extension.h" +#include "mutex.h" static const char *EMDASH = "\xE2\x80\x94"; static const char *ENDASH = "\xE2\x80\x93"; @@ -65,7 +67,10 @@ typedef struct subject{ } subject; // Extensions may populate this. -static _Atomic int8_t SKIP_CHARS[256]; +static int8_t SKIP_CHARS[256]; + +pthread_mutex_t chars_lock; +static atomic_int chars_latch = 0; static CMARK_INLINE bool S_is_line_end_char(char c) { return (c == '\n' || c == '\r'); @@ -405,6 +410,10 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, before_char = 10; } else { before_char_pos = subj->pos - 1; + + initialize_mutex_once(&chars_lock, &chars_latch); + pthread_mutex_lock(&chars_lock); + // walk back to the beginning of the UTF_8 sequence: while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) { before_char_pos -= 1; @@ -414,6 +423,8 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, if (len == -1 || (before_char < 256 && SKIP_CHARS[(unsigned char) before_char])) { before_char = 10; } + + pthread_mutex_unlock(&chars_lock); } if (c == '\'' || c == '"') { @@ -430,14 +441,20 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, after_char = 10; } else { after_char_pos = subj->pos; + + initialize_mutex_once(&chars_lock, &chars_latch); + pthread_mutex_lock(&chars_lock); + while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) { after_char_pos += 1; } len = cmark_utf8proc_iterate(subj->input.data + after_char_pos, subj->input.len - after_char_pos, &after_char); if (len == -1 || (after_char < 256 && SKIP_CHARS[(unsigned char) after_char])) { - after_char = 10; - } + after_char = 10; + } + + pthread_mutex_unlock(&chars_lock); } left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && @@ -1350,8 +1367,6 @@ static char SMART_PUNCT_CHARS[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; -pthread_mutex_t chars_lock; - static bufsize_t subject_find_special_char(subject *subj, int options) { bufsize_t n = subj->pos + 1; @@ -1367,6 +1382,7 @@ static bufsize_t subject_find_special_char(subject *subj, int options) { } void cmark_inlines_add_special_character(unsigned char c, bool emphasis) { + initialize_mutex_once(&chars_lock, &chars_latch); pthread_mutex_lock(&chars_lock); SPECIAL_CHARS[c] = 1; if (emphasis) @@ -1375,6 +1391,7 @@ void cmark_inlines_add_special_character(unsigned char c, bool emphasis) { } void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) { + initialize_mutex_once(&chars_lock, &chars_latch); pthread_mutex_lock(&chars_lock); SPECIAL_CHARS[c] = 0; if (emphasis) From 437dcb0e0f8bd28830cf086b5542c78efe30f650 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 30 Mar 2021 10:08:30 -0600 Subject: [PATCH 192/218] add locks around arena ops --- src/arena.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/arena.c b/src/arena.c index 83a15255f..2a1e06f8a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,8 +1,14 @@ +#include <pthread.h> +#include <stdatomic.h> #include <stdlib.h> #include <string.h> #include <stdint.h> #include "cmark-gfm.h" #include "cmark-gfm-extension_api.h" +#include "mutex.h" + +static pthread_mutex_t arena_lock; +static atomic_int arena_latch = 0; static struct arena_chunk { size_t sz, used; @@ -24,10 +30,13 @@ static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev } void cmark_arena_push(void) { + initialize_mutex_once(&arena_lock, &arena_latch); + pthread_mutex_lock(&arena_lock); if (!A) return; A->push_point = 1; A = alloc_arena_chunk(10240, A); + pthread_mutex_unlock(&arena_lock); } int cmark_arena_pop(void) { @@ -68,6 +77,9 @@ static void *arena_calloc(size_t nmem, size_t size) { const size_t align = sizeof(size_t) - 1; sz = (sz + align) & ~align; + // the arena lock will have already been set up by a previous call to init_arena + pthread_mutex_lock(&arena_lock); + if (sz > A->sz) { A->prev = alloc_arena_chunk(sz, A->prev); return (uint8_t *) A->prev->ptr + sizeof(size_t); @@ -77,6 +89,9 @@ static void *arena_calloc(size_t nmem, size_t size) { } void *ptr = (uint8_t *) A->ptr + A->used; A->used += sz; + + pthread_mutex_unlock(&arena_lock); + *((size_t *) ptr) = sz - sizeof(size_t); return (uint8_t *) ptr + sizeof(size_t); } From 92fb896e604f754ea414c7562b8a34729cf57df3 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 30 Mar 2021 10:08:53 -0600 Subject: [PATCH 193/218] tweak definitions of statics in inlines.c --- src/inlines.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index 3ac09736a..1ae2023ba 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1339,7 +1339,7 @@ static cmark_node *handle_newline(subject *subj) { } // "\r\n\\`&_*[]<!" -static _Atomic int8_t SPECIAL_CHARS[256] = { +static int8_t SPECIAL_CHARS[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -1353,7 +1353,7 @@ static _Atomic int8_t SPECIAL_CHARS[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // " ' . - -static char SMART_PUNCT_CHARS[] = { +static const char SMART_PUNCT_CHARS[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, From cb88af7ed0b45a233001eb3b472ae47a8fe001bc Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 30 Mar 2021 10:09:20 -0600 Subject: [PATCH 194/218] add locks around extensions registry ops --- src/registry.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/registry.c b/src/registry.c index 5fb6c9b98..f38671526 100644 --- a/src/registry.c +++ b/src/registry.c @@ -1,9 +1,11 @@ +#include <stdatomic.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include "cmark-gfm_config.h" #include "cmark-gfm.h" +#include "mutex.h" #include "syntax_extension.h" #include "registry.h" #include "plugin.h" @@ -12,6 +14,9 @@ extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; static cmark_llist *syntax_extensions = NULL; +static pthread_mutex_t extensions_lock; +static atomic_int extensions_latch = 0; + void cmark_register_plugin(cmark_plugin_init_func reg_fn) { cmark_plugin *plugin = cmark_plugin_new(); @@ -23,15 +28,23 @@ void cmark_register_plugin(cmark_plugin_init_func reg_fn) { cmark_llist *syntax_extensions_list = cmark_plugin_steal_syntax_extensions(plugin), *it; + initialize_mutex_once(&extensions_lock, &extensions_latch); + pthread_mutex_lock(&extensions_lock); + for (it = syntax_extensions_list; it; it = it->next) { syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions, it->data); } + + pthread_mutex_unlock(&extensions_lock); cmark_llist_free(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions_list); cmark_plugin_free(plugin); } void cmark_release_plugins(void) { + initialize_mutex_once(&extensions_lock, &extensions_latch); + pthread_mutex_lock(&extensions_lock); + if (syntax_extensions) { cmark_llist_free_full( &CMARK_DEFAULT_MEM_ALLOCATOR, @@ -39,25 +52,40 @@ void cmark_release_plugins(void) { (cmark_free_func) cmark_syntax_extension_free); syntax_extensions = NULL; } + + pthread_mutex_unlock(&extensions_lock); } cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem) { cmark_llist *it; cmark_llist *res = NULL; + initialize_mutex_once(&extensions_lock, &extensions_latch); + pthread_mutex_lock(&extensions_lock); + for (it = syntax_extensions; it; it = it->next) { res = cmark_llist_append(mem, res, it->data); } + + pthread_mutex_unlock(&extensions_lock); return res; } cmark_syntax_extension *cmark_find_syntax_extension(const char *name) { cmark_llist *tmp; + cmark_syntax_extension *res = NULL; + initialize_mutex_once(&extensions_lock, &extensions_latch); + pthread_mutex_lock(&extensions_lock); + for (tmp = syntax_extensions; tmp; tmp = tmp->next) { cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; - if (!strcmp(ext->name, name)) - return ext; + if (!strcmp(ext->name, name)) { + res = ext; + break; + } } - return NULL; + + pthread_mutex_unlock(&extensions_lock); + return res; } From 562c89a20f0a905716636491f8317b7555b57311 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 30 Mar 2021 11:42:39 -0600 Subject: [PATCH 195/218] make locking a compile-time setting --- CMakeLists.txt | 1 + extensions/core-extensions.c | 16 +++++----------- src/arena.c | 15 +++++---------- src/config.h.in | 2 ++ src/include/cmark-gfm_config.h | 2 ++ src/include/mutex.h | 26 +++++++++++++++++++++++--- src/inlines.c | 25 +++++++++---------------- src/registry.c | 24 +++++++++--------------- 8 files changed, 56 insertions(+), 55 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c92bde52d..e30278cb2 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ option(CMARK_TESTS "Build cmark-gfm tests and enable testing" ON) option(CMARK_STATIC "Build static libcmark-gfm library" ON) option(CMARK_SHARED "Build shared libcmark-gfm library" ON) option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF) +option(CMARK_THREADING "Add locks around static accesses via pthreads" OFF) add_subdirectory(src) add_subdirectory(extensions) diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 801b5dafe..544d8fa45 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -1,6 +1,3 @@ -#include <pthread.h> -#include <stdatomic.h> - #include "cmark-gfm-core-extensions.h" #include "autolink.h" #include "mutex.h" @@ -11,19 +8,17 @@ #include "registry.h" #include "plugin.h" -static pthread_mutex_t extensions_lock; -static atomic_int extensions_latch = 0; +CMARK_DEFINE_LOCK(extensions); static int core_extensions_registration(cmark_plugin *plugin) { - initialize_mutex_once(&extensions_lock, &extensions_latch); - pthread_mutex_lock(&extensions_lock); + CMARK_INITIALIZE_AND_LOCK(extensions); cmark_plugin_register_syntax_extension(plugin, create_table_extension()); cmark_plugin_register_syntax_extension(plugin, create_strikethrough_extension()); cmark_plugin_register_syntax_extension(plugin, create_autolink_extension()); cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension()); cmark_plugin_register_syntax_extension(plugin, create_tasklist_extension()); - pthread_mutex_unlock(&extensions_lock); + CMARK_UNLOCK(extensions); return 1; } @@ -32,11 +27,10 @@ static atomic_int registered_latch = 0; static _Atomic int registered = 0; void cmark_gfm_core_extensions_ensure_registered(void) { - initialize_mutex_once(&registered_lock, &registered_latch); - pthread_mutex_lock(&registered_lock); + CMARK_INITIALIZE_AND_LOCK(extensions); if (!registered) { cmark_register_plugin(core_extensions_registration); registered = 1; } - pthread_mutex_unlock(&registered_lock); + CMARK_UNLOCK(extensions); } diff --git a/src/arena.c b/src/arena.c index 2a1e06f8a..e2dbb1d1e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,5 +1,3 @@ -#include <pthread.h> -#include <stdatomic.h> #include <stdlib.h> #include <string.h> #include <stdint.h> @@ -7,8 +5,7 @@ #include "cmark-gfm-extension_api.h" #include "mutex.h" -static pthread_mutex_t arena_lock; -static atomic_int arena_latch = 0; +CMARK_DEFINE_LOCK(arena) static struct arena_chunk { size_t sz, used; @@ -30,13 +27,12 @@ static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev } void cmark_arena_push(void) { - initialize_mutex_once(&arena_lock, &arena_latch); - pthread_mutex_lock(&arena_lock); + CMARK_INITIALIZE_AND_LOCK(arena); if (!A) return; A->push_point = 1; A = alloc_arena_chunk(10240, A); - pthread_mutex_unlock(&arena_lock); + CMARK_UNLOCK(arena); } int cmark_arena_pop(void) { @@ -77,8 +73,7 @@ static void *arena_calloc(size_t nmem, size_t size) { const size_t align = sizeof(size_t) - 1; sz = (sz + align) & ~align; - // the arena lock will have already been set up by a previous call to init_arena - pthread_mutex_lock(&arena_lock); + CMARK_INITIALIZE_AND_LOCK(arena); if (sz > A->sz) { A->prev = alloc_arena_chunk(sz, A->prev); @@ -90,7 +85,7 @@ static void *arena_calloc(size_t nmem, size_t size) { void *ptr = (uint8_t *) A->ptr + A->used; A->used += sz; - pthread_mutex_unlock(&arena_lock); + CMARK_UNLOCK(arena); *((size_t *) ptr) = sz - sizeof(size_t); return (uint8_t *) ptr + sizeof(size_t); diff --git a/src/config.h.in b/src/config.h.in index de1a4dd49..65702b2e0 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -17,6 +17,8 @@ extern "C" { #cmakedefine HAVE___ATTRIBUTE__ +#cmakedefine CMARK_THREADING + #ifdef HAVE___ATTRIBUTE__ #define CMARK_ATTRIBUTE(list) __attribute__ (list) #else diff --git a/src/include/cmark-gfm_config.h b/src/include/cmark-gfm_config.h index d38c7c7a5..1f96cc558 100644 --- a/src/include/cmark-gfm_config.h +++ b/src/include/cmark-gfm_config.h @@ -17,6 +17,8 @@ extern "C" { #define HAVE___ATTRIBUTE__ +#define CMARK_THREADING + #ifdef HAVE___ATTRIBUTE__ #define CMARK_ATTRIBUTE(list) __attribute__ (list) #else diff --git a/src/include/mutex.h b/src/include/mutex.h index 02ddb3d80..16b5cfb46 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -1,11 +1,13 @@ #ifndef CMARK_MUTEX_H #define CMARK_MUTEX_H +#include "cmark-gfm_config.h" + +#ifdef CMARK_THREADING + #include <pthread.h> #include <stdatomic.h> -#include "cmark-gfm_config.h" - static CMARK_INLINE void initialize_mutex_once(pthread_mutex_t *m, atomic_int *latch) { int expected = 0; @@ -14,4 +16,22 @@ static CMARK_INLINE void initialize_mutex_once(pthread_mutex_t *m, atomic_int *l } } -#endif +#define CMARK_DEFINE_LOCK(NAME) \ +static pthread_mutex_t NAME##_lock; \ +static atomic_int NAME##_latch = 0; + +#define CMARK_INITIALIZE_AND_LOCK(NAME) \ +initialize_mutex_once(&NAME##_lock, &NAME##_latch); \ +pthread_mutex_lock(&NAME##_lock); + +#define CMARK_UNLOCK(NAME) pthread_mutex_unlock(&NAME##_lock); + +#else // no threading support + +#define CMARK_DEFINE_LOCK(NAME) +#define CMARK_INITIALIZE_AND_LOCK(NAME) +#define CMARK_UNLOCK(NAME) + +#endif // CMARK_THREADING + +#endif // CMARK_MUTEX_H diff --git a/src/inlines.c b/src/inlines.c index 1ae2023ba..9405b0127 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1,8 +1,6 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> -#include <pthread.h> -#include <stdatomic.h> #include "cmark_ctype.h" #include "cmark-gfm_config.h" @@ -69,8 +67,7 @@ typedef struct subject{ // Extensions may populate this. static int8_t SKIP_CHARS[256]; -pthread_mutex_t chars_lock; -static atomic_int chars_latch = 0; +CMARK_DEFINE_LOCK(chars); static CMARK_INLINE bool S_is_line_end_char(char c) { return (c == '\n' || c == '\r'); @@ -411,8 +408,7 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, } else { before_char_pos = subj->pos - 1; - initialize_mutex_once(&chars_lock, &chars_latch); - pthread_mutex_lock(&chars_lock); + CMARK_INITIALIZE_AND_LOCK(chars); // walk back to the beginning of the UTF_8 sequence: while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) { @@ -424,7 +420,7 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, before_char = 10; } - pthread_mutex_unlock(&chars_lock); + CMARK_UNLOCK(chars); } if (c == '\'' || c == '"') { @@ -442,8 +438,7 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, } else { after_char_pos = subj->pos; - initialize_mutex_once(&chars_lock, &chars_latch); - pthread_mutex_lock(&chars_lock); + CMARK_INITIALIZE_AND_LOCK(chars); while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) { after_char_pos += 1; @@ -454,7 +449,7 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, after_char = 10; } - pthread_mutex_unlock(&chars_lock); + CMARK_UNLOCK(chars); } left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && @@ -1382,21 +1377,19 @@ static bufsize_t subject_find_special_char(subject *subj, int options) { } void cmark_inlines_add_special_character(unsigned char c, bool emphasis) { - initialize_mutex_once(&chars_lock, &chars_latch); - pthread_mutex_lock(&chars_lock); + CMARK_INITIALIZE_AND_LOCK(chars); SPECIAL_CHARS[c] = 1; if (emphasis) SKIP_CHARS[c] = 1; - pthread_mutex_unlock(&chars_lock); + CMARK_UNLOCK(chars); } void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) { - initialize_mutex_once(&chars_lock, &chars_latch); - pthread_mutex_lock(&chars_lock); + CMARK_INITIALIZE_AND_LOCK(chars); SPECIAL_CHARS[c] = 0; if (emphasis) SKIP_CHARS[c] = 0; - pthread_mutex_unlock(&chars_lock); + CMARK_UNLOCK(chars); } static cmark_node *try_extensions(cmark_parser *parser, diff --git a/src/registry.c b/src/registry.c index f38671526..91f79530a 100644 --- a/src/registry.c +++ b/src/registry.c @@ -1,4 +1,3 @@ -#include <stdatomic.h> #include <stdint.h> #include <stdlib.h> #include <string.h> @@ -14,8 +13,7 @@ extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; static cmark_llist *syntax_extensions = NULL; -static pthread_mutex_t extensions_lock; -static atomic_int extensions_latch = 0; +CMARK_DEFINE_LOCK(extensions); void cmark_register_plugin(cmark_plugin_init_func reg_fn) { cmark_plugin *plugin = cmark_plugin_new(); @@ -28,22 +26,20 @@ void cmark_register_plugin(cmark_plugin_init_func reg_fn) { cmark_llist *syntax_extensions_list = cmark_plugin_steal_syntax_extensions(plugin), *it; - initialize_mutex_once(&extensions_lock, &extensions_latch); - pthread_mutex_lock(&extensions_lock); + CMARK_INITIALIZE_AND_LOCK(extensions); for (it = syntax_extensions_list; it; it = it->next) { syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions, it->data); } - pthread_mutex_unlock(&extensions_lock); + CMARK_UNLOCK(extensions); cmark_llist_free(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions_list); cmark_plugin_free(plugin); } void cmark_release_plugins(void) { - initialize_mutex_once(&extensions_lock, &extensions_latch); - pthread_mutex_lock(&extensions_lock); + CMARK_INITIALIZE_AND_LOCK(extensions); if (syntax_extensions) { cmark_llist_free_full( @@ -53,21 +49,20 @@ void cmark_release_plugins(void) { syntax_extensions = NULL; } - pthread_mutex_unlock(&extensions_lock); + CMARK_UNLOCK(extensions); } cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem) { cmark_llist *it; cmark_llist *res = NULL; - initialize_mutex_once(&extensions_lock, &extensions_latch); - pthread_mutex_lock(&extensions_lock); + CMARK_INITIALIZE_AND_LOCK(extensions); for (it = syntax_extensions; it; it = it->next) { res = cmark_llist_append(mem, res, it->data); } - pthread_mutex_unlock(&extensions_lock); + CMARK_UNLOCK(extensions); return res; } @@ -75,8 +70,7 @@ cmark_syntax_extension *cmark_find_syntax_extension(const char *name) { cmark_llist *tmp; cmark_syntax_extension *res = NULL; - initialize_mutex_once(&extensions_lock, &extensions_latch); - pthread_mutex_lock(&extensions_lock); + CMARK_INITIALIZE_AND_LOCK(extensions); for (tmp = syntax_extensions; tmp; tmp = tmp->next) { cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; @@ -86,6 +80,6 @@ cmark_syntax_extension *cmark_find_syntax_extension(const char *name) { } } - pthread_mutex_unlock(&extensions_lock); + CMARK_UNLOCK(extensions); return res; } From 18b7394bdb6a59bca0b5022eb8e47ccd84404c1c Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 30 Mar 2021 14:57:43 -0600 Subject: [PATCH 196/218] add latch macros and use them for registering plugins --- extensions/core-extensions.c | 13 ++----------- src/include/mutex.h | 28 ++++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 544d8fa45..56081e88c 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -8,29 +8,20 @@ #include "registry.h" #include "plugin.h" -CMARK_DEFINE_LOCK(extensions); - static int core_extensions_registration(cmark_plugin *plugin) { - CMARK_INITIALIZE_AND_LOCK(extensions); cmark_plugin_register_syntax_extension(plugin, create_table_extension()); cmark_plugin_register_syntax_extension(plugin, create_strikethrough_extension()); cmark_plugin_register_syntax_extension(plugin, create_autolink_extension()); cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension()); cmark_plugin_register_syntax_extension(plugin, create_tasklist_extension()); - CMARK_UNLOCK(extensions); return 1; } -pthread_mutex_t registered_lock; -static atomic_int registered_latch = 0; -static _Atomic int registered = 0; +CMARK_DEFINE_LATCH(registered); void cmark_gfm_core_extensions_ensure_registered(void) { - CMARK_INITIALIZE_AND_LOCK(extensions); - if (!registered) { + if (CMARK_CHECK_LATCH(registered)) { cmark_register_plugin(core_extensions_registration); - registered = 1; } - CMARK_UNLOCK(extensions); } diff --git a/src/include/mutex.h b/src/include/mutex.h index 16b5cfb46..781584c4c 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -8,10 +8,17 @@ #include <pthread.h> #include <stdatomic.h> -static CMARK_INLINE void initialize_mutex_once(pthread_mutex_t *m, atomic_int *latch) { +static CMARK_INLINE bool check_latch(atomic_int *latch) { int expected = 0; - if (atomic_compare_exchange_strong(latch, &expected, 1)) { + return true; + } else { + return false; + } +} + +static CMARK_INLINE void initialize_mutex_once(pthread_mutex_t *m, atomic_int *latch) { + if (check_latch(latch)) { pthread_mutex_init(m, NULL); } } @@ -26,12 +33,29 @@ pthread_mutex_lock(&NAME##_lock); #define CMARK_UNLOCK(NAME) pthread_mutex_unlock(&NAME##_lock); +#define CMARK_DEFINE_LATCH(NAME) static atomic_int NAME = 0; + +#define CMARK_CHECK_LATCH(NAME) check_latch(&NAME) + #else // no threading support +static CMARK_INLINE bool check_latch(int *latch) { + if (!*latch) { + *latch = 1; + return true; + } else { + return false; + } +} + #define CMARK_DEFINE_LOCK(NAME) #define CMARK_INITIALIZE_AND_LOCK(NAME) #define CMARK_UNLOCK(NAME) +#define CMARK_DEFINE_LATCH static int NAME = 0; + +#define CMARK_CHECK_LATCH check_latch(&NAME) + #endif // CMARK_THREADING #endif // CMARK_MUTEX_H From fc95a68ec4de76a101eb270b83e03bee4c36f180 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 30 Mar 2021 15:14:56 -0600 Subject: [PATCH 197/218] fix deadlock in arena --- src/arena.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index e2dbb1d1e..e28e680ea 100644 --- a/src/arena.c +++ b/src/arena.c @@ -28,10 +28,10 @@ static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev void cmark_arena_push(void) { CMARK_INITIALIZE_AND_LOCK(arena); - if (!A) - return; - A->push_point = 1; - A = alloc_arena_chunk(10240, A); + if (A) { + A->push_point = 1; + A = alloc_arena_chunk(10240, A); + } CMARK_UNLOCK(arena); } From 0489d3cc481c838be2adc878cd1ac5cda62ef60c Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Wed, 31 Mar 2021 14:48:48 -0600 Subject: [PATCH 198/218] use pthread_once instead of atomics --- extensions/core-extensions.c | 10 ++++++---- src/include/mutex.h | 29 +++++++---------------------- 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 56081e88c..131cdf402 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -18,10 +18,12 @@ static int core_extensions_registration(cmark_plugin *plugin) { return 1; } -CMARK_DEFINE_LATCH(registered); +CMARK_DEFINE_ONCE(registered); + +static void register_plugins(void) { + cmark_register_plugin(core_extensions_registration); +} void cmark_gfm_core_extensions_ensure_registered(void) { - if (CMARK_CHECK_LATCH(registered)) { - cmark_register_plugin(core_extensions_registration); - } + CMARK_RUN_ONCE(registered, register_plugins); } diff --git a/src/include/mutex.h b/src/include/mutex.h index 781584c4c..7415e76de 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -6,37 +6,22 @@ #ifdef CMARK_THREADING #include <pthread.h> -#include <stdatomic.h> -static CMARK_INLINE bool check_latch(atomic_int *latch) { - int expected = 0; - if (atomic_compare_exchange_strong(latch, &expected, 1)) { - return true; - } else { - return false; - } -} +#define CMARK_DEFINE_ONCE(NAME) static pthread_once_t NAME##_once = PTHREAD_ONCE_INIT; -static CMARK_INLINE void initialize_mutex_once(pthread_mutex_t *m, atomic_int *latch) { - if (check_latch(latch)) { - pthread_mutex_init(m, NULL); - } -} +#define CMARK_RUN_ONCE(NAME, FUNC) pthread_once(&NAME##_once, FUNC) #define CMARK_DEFINE_LOCK(NAME) \ static pthread_mutex_t NAME##_lock; \ -static atomic_int NAME##_latch = 0; +CMARK_DEFINE_ONCE(NAME); \ +static void initialize_##NAME() { pthread_mutex_init(&NAME##_lock, NULL); } #define CMARK_INITIALIZE_AND_LOCK(NAME) \ -initialize_mutex_once(&NAME##_lock, &NAME##_latch); \ +CMARK_RUN_ONCE(NAME, initialize_##NAME); \ pthread_mutex_lock(&NAME##_lock); #define CMARK_UNLOCK(NAME) pthread_mutex_unlock(&NAME##_lock); -#define CMARK_DEFINE_LATCH(NAME) static atomic_int NAME = 0; - -#define CMARK_CHECK_LATCH(NAME) check_latch(&NAME) - #else // no threading support static CMARK_INLINE bool check_latch(int *latch) { @@ -52,9 +37,9 @@ static CMARK_INLINE bool check_latch(int *latch) { #define CMARK_INITIALIZE_AND_LOCK(NAME) #define CMARK_UNLOCK(NAME) -#define CMARK_DEFINE_LATCH static int NAME = 0; +#define CMARK_DEFINE_ONCE(NAME) static int NAME = 0; -#define CMARK_CHECK_LATCH check_latch(&NAME) +#define CMARK_RUN_ONCE(NAME, FUNC) if (check_latch(&NAME)) FUNC(); #endif // CMARK_THREADING From a595ea902f9a3c29de4ee2cd3c633d3d6b20fb2b Mon Sep 17 00:00:00 2001 From: Jeremy Schonfeld <jschonfeld@apple.com> Date: Wed, 14 Apr 2021 14:52:13 -0700 Subject: [PATCH 199/218] Add preserve-whitespace and inline-only options rdar://76711302 --- api_test/main.c | 45 +++++++++++++++++++++++++++++++++++++++++ src/blocks.c | 37 +++++++++++++++++++++------------ src/include/cmark-gfm.h | 11 ++++++++++ src/inlines.c | 12 ++++++++--- 4 files changed, 89 insertions(+), 16 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index 3199ab105..89798c8dc 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1152,6 +1152,49 @@ static void ref_source_pos(test_batch_runner *runner) { cmark_node_free(doc); } +static void inline_only_opt(test_batch_runner *runner) { + static const char markdown[] = + "# My heading\n" + "> My block quote\n\n" + "- List item\n\n" + "[link](https://github.com)\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_INLINE_ONLY); + char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT, 0); + STR_EQ(runner, html, "<p># My heading\n" + "&gt; My block quote\n" + "\n" + "- List item\n" + "\n" + "<a href=\"https://github.com\">link</a>\n" + "</p>\n", "html is as expected"); + free(html); + cmark_node_free(doc); +} + +static void check_markdown_plaintext(test_batch_runner *runner, char *markdown) { + cmark_node *doc = cmark_parse_document(markdown, strlen(markdown), CMARK_OPT_PRESERVE_WHITESPACE); + cmark_node *pg = cmark_node_first_child(doc); + INT_EQ(runner, cmark_node_get_type(pg), CMARK_NODE_PARAGRAPH, "markdown '%s' did not produce a paragraph node", markdown); + cmark_node *textNode = cmark_node_first_child(pg); + INT_EQ(runner, cmark_node_get_type(textNode), CMARK_NODE_TEXT, "markdown '%s' did not produce a text node inside the paragraph node", markdown); + const char *text = cmark_node_get_literal(textNode); + STR_EQ(runner, text, markdown, "markdown '%s' resulted in '%s'", markdown, text); +} + +static void preserve_whitespace_opt(test_batch_runner *runner) { + check_markdown_plaintext(runner, "hello"); + check_markdown_plaintext(runner, "hello "); + check_markdown_plaintext(runner, " hello"); + check_markdown_plaintext(runner, " hello"); + check_markdown_plaintext(runner, "hello "); + check_markdown_plaintext(runner, "hel\nlo"); + check_markdown_plaintext(runner, "hel\n\nlo"); + check_markdown_plaintext(runner, "hel\nworld\nlo"); + check_markdown_plaintext(runner, " hel \n world \n lo "); + check_markdown_plaintext(runner, " hello \n \n world "); +} + int main() { int retval; test_batch_runner *runner = test_batch_runner_new(); @@ -1182,6 +1225,8 @@ int main() { source_pos(runner); source_pos_inlines(runner); ref_source_pos(runner); + inline_only_opt(runner); + preserve_whitespace_opt(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; diff --git a/src/blocks.c b/src/blocks.c index ab714122c..d10f7d20f 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -67,7 +67,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, bool eof); static void S_process_line(cmark_parser *parser, const unsigned char *buffer, - bufsize_t bytes); + bufsize_t bytes, bool ensureEndsInNewline); static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag, int start_line, int start_column) { @@ -687,6 +687,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, bool eof) { const unsigned char *end = buffer + len; static const uint8_t repl[] = {239, 191, 189}; + bool preserveWhitespace = parser->options & CMARK_OPT_PRESERVE_WHITESPACE; if (parser->last_buffer_ended_with_cr && *buffer == '\n') { // skip NL if last buffer ended with CR ; see #117 @@ -714,10 +715,10 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, if (process) { if (parser->linebuf.size > 0) { cmark_strbuf_put(&parser->linebuf, buffer, chunk_len); - S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); + S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size, !preserveWhitespace || !eof || eol < end); cmark_strbuf_clear(&parser->linebuf); } else { - S_process_line(parser, buffer, chunk_len); + S_process_line(parser, buffer, chunk_len, !preserveWhitespace || !eof || eol < end); } } else { if (eol < end && *eol == '\0') { @@ -1023,6 +1024,8 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input, *all_matched = false; cmark_node *container = parser->root; cmark_node_type cont_type; + + while (S_last_child_is_open(container)) { container = container->last_child; @@ -1337,7 +1340,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, // then treat this as a "lazy continuation line" and add it to // the open paragraph. if (parser->current != last_matched_container && - container == last_matched_container && !parser->blank && + container == last_matched_container && (!parser->blank || (parser->options & CMARK_OPT_PRESERVE_WHITESPACE)) && S_type(parser->current) == CMARK_NODE_PARAGRAPH) { add_line(parser->current, input, parser); } else { // not a lazy continuation @@ -1395,15 +1398,21 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, container->as.heading.setext == false) { chop_trailing_hashtags(input); } - S_advance_offset(parser, input, parser->first_nonspace - parser->offset, + if ((parser->options & CMARK_OPT_PRESERVE_WHITESPACE) == 0) + S_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); add_line(container, input, parser); } else { // create paragraph container for line - container = add_child(parser, container, CMARK_NODE_PARAGRAPH, - parser->first_nonspace + 1); - S_advance_offset(parser, input, parser->first_nonspace - parser->offset, - false); + if (parser->options & CMARK_OPT_PRESERVE_WHITESPACE) { + container = add_child(parser, container, CMARK_NODE_PARAGRAPH, + parser->offset + 1); + } else { + container = add_child(parser, container, CMARK_NODE_PARAGRAPH, + parser->first_nonspace + 1); + S_advance_offset(parser, input, parser->first_nonspace - parser->offset, + false); + } add_line(container, input, parser); } @@ -1413,7 +1422,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, /* See http://spec.commonmark.org/0.24/#phase-1-block-structure */ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, - bufsize_t bytes) { + bufsize_t bytes, bool ensureEndsInNewline) { cmark_node *last_matched_container; bool all_matched = true; cmark_node *container; @@ -1430,7 +1439,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, bytes = parser->curline.size; // ensure line ends with a newline: - if (bytes == 0 || !S_is_line_end_char(parser->curline.ptr[bytes - 1])) + if (ensureEndsInNewline && (bytes == 0 || !S_is_line_end_char(parser->curline.ptr[bytes - 1]))) cmark_strbuf_putc(&parser->curline, '\n'); parser->offset = 0; @@ -1463,7 +1472,9 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, current = parser->current; - open_new_blocks(parser, &container, &input, all_matched); + // Only open new blocks if we're not limited to inline + if ((parser->options & CMARK_OPT_INLINE_ONLY) == 0) + open_new_blocks(parser, &container, &input, all_matched); /* parser->current might have changed if feed_reentrant was called */ if (current == parser->current) @@ -1490,7 +1501,7 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { return NULL; if (parser->linebuf.size) { - S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); + S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size, (parser->options & CMARK_OPT_PRESERVE_WHITESPACE) == 0); cmark_strbuf_clear(&parser->linebuf); } diff --git a/src/include/cmark-gfm.h b/src/include/cmark-gfm.h index 31bfcaff9..14ab95f4a 100644 --- a/src/include/cmark-gfm.h +++ b/src/include/cmark-gfm.h @@ -769,6 +769,17 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar */ #define CMARK_OPT_FULL_INFO_STRING (1 << 16) +/** Parse only inline markdown directives. Block directives will not be + * parsed (their literal representations will remain in the output). + */ +#define CMARK_OPT_INLINE_ONLY (1 << 18) + +/** Parse the markdown input without removing preceding/trailing whitespace and + * without converting newline characters to breaks. Using this option also + * enables the CMARK_OPT_INLINE_ONLY option. + */ +#define CMARK_OPT_PRESERVE_WHITESPACE ((1 << 19) | CMARK_OPT_INLINE_ONLY) + /** * ## Version information */ diff --git a/src/inlines.c b/src/inlines.c index 9405b0127..a444586a7 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1424,7 +1424,12 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, switch (c) { case '\r': case '\n': - new_inl = handle_newline(subj); + if (options & CMARK_OPT_PRESERVE_WHITESPACE) { + advance(subj); + new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); + } else { + new_inl = handle_newline(subj); + } break; case '`': new_inl = handle_backticks(subj, options); @@ -1490,7 +1495,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, subj->pos = endpos; // if we're at a newline, strip trailing spaces. - if (S_is_line_end_char(peek_char(subj))) { + if ((options & CMARK_OPT_PRESERVE_WHITESPACE) == 0 && S_is_line_end_char(peek_char(subj))) { cmark_chunk_rtrim(&contents); } @@ -1511,7 +1516,8 @@ void cmark_parse_inlines(cmark_parser *parser, subject subj; cmark_chunk content = {parent->content.ptr, parent->content.size, 0}; subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap); - cmark_chunk_rtrim(&subj.input); + if ((options & CMARK_OPT_PRESERVE_WHITESPACE) == 0) + cmark_chunk_rtrim(&subj.input); while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options)) ; From 471d20c2f4ae5172c6819e198cb09f41b8f05e16 Mon Sep 17 00:00:00 2001 From: Jeremy Schonfeld <jschonfeld@apple.com> Date: Wed, 5 May 2021 10:06:24 -0700 Subject: [PATCH 200/218] Allow all whitespace when preserving whitespace rdar://77383424 --- api_test/main.c | 10 +++++++++- src/blocks.c | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index 89798c8dc..8a35f6e02 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1179,10 +1179,18 @@ static void check_markdown_plaintext(test_batch_runner *runner, char *markdown) cmark_node *textNode = cmark_node_first_child(pg); INT_EQ(runner, cmark_node_get_type(textNode), CMARK_NODE_TEXT, "markdown '%s' did not produce a text node inside the paragraph node", markdown); const char *text = cmark_node_get_literal(textNode); - STR_EQ(runner, text, markdown, "markdown '%s' resulted in '%s'", markdown, text); + OK(runner, text, "Text literal for '%s' was null", markdown); + if (text) { + STR_EQ(runner, text, markdown, "markdown '%s' resulted in '%s'", markdown, text); + } else { + SKIP(runner, 1); + } + cmark_node_free(doc); } static void preserve_whitespace_opt(test_batch_runner *runner) { + check_markdown_plaintext(runner, " "); + check_markdown_plaintext(runner, " "); check_markdown_plaintext(runner, "hello"); check_markdown_plaintext(runner, "hello "); check_markdown_plaintext(runner, " hello"); diff --git a/src/blocks.c b/src/blocks.c index d10f7d20f..8f5449fde 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -308,7 +308,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { case CMARK_NODE_PARAGRAPH: { has_content = resolve_reference_link_definitions(parser, b); - if (!has_content) { + if (!has_content && (parser->options & CMARK_OPT_PRESERVE_WHITESPACE) == 0) { // remove blank node (former reference def) cmark_node_free(b); } From 6df1faa852e3d89dfc0a11d284893bbcfb64b628 Mon Sep 17 00:00:00 2001 From: Charles Hu <charleshu@apple.com> Date: Mon, 17 May 2021 19:40:30 -0700 Subject: [PATCH 201/218] Don't emit an attribute node if it doesn't have parentheses rdar://77476197 --- api_test/main.c | 32 ++++++++++++++++++++++++++++++++ src/inlines.c | 14 +++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/api_test/main.c b/api_test/main.c index 8a35f6e02..4d747d19e 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1203,6 +1203,37 @@ static void preserve_whitespace_opt(test_batch_runner *runner) { check_markdown_plaintext(runner, " hello \n \n world "); } +static void check_markdown_attributes_node(test_batch_runner *runner, char *markdown, cmark_node_type expectedType, char *expectedAttributes) { + cmark_node *doc = cmark_parse_document(markdown, strlen(markdown), CMARK_OPT_DEFAULT); + cmark_node *pg = cmark_node_first_child(doc); + INT_EQ(runner, cmark_node_get_type(pg), CMARK_NODE_PARAGRAPH, "markdown '%s' did not produce a paragraph node", markdown); + cmark_node *attributeNode = cmark_node_first_child(pg); + cmark_node_type nodeType = cmark_node_get_type(attributeNode); + INT_EQ(runner, nodeType, expectedType, "markdown '%s' did not produce the correct node type: got %d, expecting %d", markdown, nodeType, expectedType); + const char *attributeContent = cmark_node_get_attributes(attributeNode); + if (attributeContent == NULL) { + OK(runner, expectedAttributes == NULL, "markdown '%s' produced an unexpected NULL attribute", markdown); + } else if (expectedAttributes == NULL) { + OK(runner, attributeContent == NULL, "markdown '%s' produced an unexpected NULL attribute", markdown); + } else { + STR_EQ(runner, attributeContent, expectedAttributes, "markdown '%s' did not produce the correct attributes: got %s, expecting: %s", markdown, attributeContent, expectedAttributes); + } + + cmark_node_free(doc); +} + +static void verify_custome_attributes_node(test_batch_runner *runner) { + // Should produce a TEXT node since there's no `()` to signify attributes + check_markdown_attributes_node(runner, "^[]", CMARK_NODE_TEXT, NULL); + check_markdown_attributes_node(runner, "^[](", CMARK_NODE_TEXT, NULL); + check_markdown_attributes_node(runner, "^[])", CMARK_NODE_TEXT, NULL); + check_markdown_attributes_node(runner, "^[])(", CMARK_NODE_TEXT, NULL); + // Should produce an ATTRIBUTE node with no attributes + check_markdown_attributes_node(runner, "^[]()", CMARK_NODE_ATTRIBUTE, ""); + // Should produce an ATTRIBUTE node with attributes + check_markdown_attributes_node(runner, "^[](rainbow: 'extreme')", CMARK_NODE_ATTRIBUTE, "rainbow: 'extreme'"); +} + int main() { int retval; test_batch_runner *runner = test_batch_runner_new(); @@ -1235,6 +1266,7 @@ int main() { ref_source_pos(runner); inline_only_opt(runner); preserve_whitespace_opt(runner); + verify_custome_attributes_node(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; diff --git a/src/inlines.c b/src/inlines.c index a444586a7..ed57058cc 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1094,6 +1094,7 @@ static cmark_node *handle_close_bracket_attribute(cmark_parser *parser, subject cmark_chunk raw_label; int found_label; cmark_node *tmp, *tmpnext; + bool isAttributesNode = false; // ^name[content](attributes) // TODO: support name. we will not even enter this with a name because we fail the match first @@ -1108,9 +1109,20 @@ static cmark_node *handle_close_bracket_attribute(cmark_parser *parser, subject if (peek_at(subj, endattributes) == ')') { subj->pos = endattributes + 1; - attributes = cmark_chunk_dup(&subj->input, startattributes, endattributes - startattributes); + isAttributesNode = true; + if (endattributes - startattributes == 0) { + attributes = cmark_chunk_literal(NULL); + } else { + attributes = cmark_chunk_dup(&subj->input, startattributes, endattributes - startattributes); + } } } + + if (!isAttributesNode) { + // The current node can't be parsed as attribute node, turn it to a TEXT node instead. + pop_bracket(subj); + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); + } inl = make_simple(subj->mem, CMARK_NODE_ATTRIBUTE); inl->as.attribute.attributes = attributes; From 2afe77a010caff85cf2ee0aca5f031aae27c644b Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Wed, 12 May 2021 09:08:25 -0600 Subject: [PATCH 202/218] update use of mutexes --- src/arena.c | 48 ++++++++++++++++++++++++++++++------------------ src/inlines.c | 17 ++++++++++++----- 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/src/arena.c b/src/arena.c index e28e680ea..fa9adbdc0 100644 --- a/src/arena.c +++ b/src/arena.c @@ -36,30 +36,39 @@ void cmark_arena_push(void) { } int cmark_arena_pop(void) { + int ret = 1; + CMARK_INITIALIZE_AND_LOCK(arena); if (!A) - return 0; - while (A && !A->push_point) { - free(A->ptr); - struct arena_chunk *n = A->prev; - free(A); - A = n; + ret = 0; + else { + while (A && !A->push_point) { + free(A->ptr); + struct arena_chunk *n = A->prev; + free(A); + A = n; + } + if (A) + A->push_point = 0; } - if (A) - A->push_point = 0; - return 1; + CMARK_UNLOCK(arena); + return ret; } static void init_arena(void) { + CMARK_INITIALIZE_AND_LOCK(arena); A = alloc_arena_chunk(4 * 1048576, NULL); + CMARK_UNLOCK(arena); } void cmark_arena_reset(void) { + CMARK_INITIALIZE_AND_LOCK(arena); while (A) { free(A->ptr); struct arena_chunk *n = A->prev; free(A); A = n; } + CMARK_UNLOCK(arena); } static void *arena_calloc(size_t nmem, size_t size) { @@ -74,20 +83,23 @@ static void *arena_calloc(size_t nmem, size_t size) { sz = (sz + align) & ~align; CMARK_INITIALIZE_AND_LOCK(arena); - + + void *ptr = NULL; + if (sz > A->sz) { A->prev = alloc_arena_chunk(sz, A->prev); - return (uint8_t *) A->prev->ptr + sizeof(size_t); - } - if (sz > A->sz - A->used) { - A = alloc_arena_chunk(A->sz + A->sz / 2, A); + ptr = (uint8_t *) A->prev->ptr; + } else { + if (sz > A->sz - A->used) { + A = alloc_arena_chunk(A->sz + A->sz / 2, A); + } + ptr = (uint8_t *) A->ptr + A->used; + A->used += sz; + *((size_t *) ptr) = sz - sizeof(size_t); } - void *ptr = (uint8_t *) A->ptr + A->used; - A->used += sz; CMARK_UNLOCK(arena); - - *((size_t *) ptr) = sz - sizeof(size_t); + return (uint8_t *) ptr + sizeof(size_t); } diff --git a/src/inlines.c b/src/inlines.c index ed57058cc..92d9e3dcd 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1376,16 +1376,23 @@ static const char SMART_PUNCT_CHARS[] = { static bufsize_t subject_find_special_char(subject *subj, int options) { bufsize_t n = subj->pos + 1; + bufsize_t ret = subj->input.len; + CMARK_INITIALIZE_AND_LOCK(chars); while (n < subj->input.len) { - if (SPECIAL_CHARS[subj->input.data[n]]) - return n; - if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]]) - return n; + if (SPECIAL_CHARS[subj->input.data[n]]) { + ret = n; + break; + } + if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]]) { + ret = n; + break; + } n++; } + CMARK_UNLOCK(chars); - return subj->input.len; + return ret; } void cmark_inlines_add_special_character(unsigned char c, bool emphasis) { From 061a4d8ef40d70d4c873239ccc12cc1f9b7513a0 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 18 May 2021 16:39:16 -0600 Subject: [PATCH 203/218] move global characters arrays into the parser --- api_test/main.c | 61 ++++++++++++++++++++++++ src/blocks.c | 17 ++++++- src/include/inlines.h | 9 +++- src/include/parser.h | 4 ++ src/inlines.c | 107 +++++++++++++++++++----------------------- 5 files changed, 136 insertions(+), 62 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index 4d747d19e..71cfe6de3 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1234,6 +1234,66 @@ static void verify_custome_attributes_node(test_batch_runner *runner) { check_markdown_attributes_node(runner, "^[](rainbow: 'extreme')", CMARK_NODE_ATTRIBUTE, "rainbow: 'extreme'"); } +typedef void (*reentrant_call_func) (void); + +static cmark_node *reentrant_parse_inline_ext(cmark_syntax_extension *self, cmark_parser *parser, + cmark_node *parent, unsigned char character, + cmark_inline_parser *inline_parser) { + void *priv = cmark_syntax_extension_get_private(self); + if (priv) { + reentrant_call_func func = (reentrant_call_func)priv; + func(); + cmark_syntax_extension_set_private(self, NULL, NULL); + } + + return NULL; +} + +static void run_inner_parser() { + cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); + cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("strikethrough")); + + static const char markdown[] = "this is the ~~outer~~ inner document"; + cmark_parser_feed(parser, markdown, sizeof(markdown) - 1); + + cmark_node *doc = cmark_parser_finish(parser); + cmark_node_free(doc); + cmark_parser_free(parser); +} + +static void parser_interrupt(test_batch_runner *runner) { + cmark_gfm_core_extensions_ensure_registered(); + + cmark_syntax_extension *my_ext = cmark_syntax_extension_new("interrupt"); + cmark_syntax_extension_set_private(my_ext, run_inner_parser, NULL); + cmark_syntax_extension_set_match_inline_func(my_ext, reentrant_parse_inline_ext); + + cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); + cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("strikethrough")); + cmark_parser_attach_syntax_extension(parser, my_ext); + + static const char markdown[] = "this is the ~~inner~~ outer document"; + cmark_parser_feed(parser, markdown, sizeof(markdown) - 1); + + cmark_node *doc = cmark_parser_finish(parser); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT); + STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" + "<document xmlns=\"http://commonmark.org/xml/1.0\">\n" + " <paragraph>\n" + " <text xml:space=\"preserve\">this is the </text>\n" + " <strikethrough>\n" + " <text xml:space=\"preserve\">inner</text>\n" + " </strikethrough>\n" + " <text xml:space=\"preserve\"> outer document</text>\n" + " </paragraph>\n" + "</document>\n", "interrupting the parser should still allow extensions"); + + free(xml); + cmark_node_free(doc); + cmark_parser_free(parser); +} + int main() { int retval; test_batch_runner *runner = test_batch_runner_new(); @@ -1267,6 +1327,7 @@ int main() { inline_only_opt(runner); preserve_whitespace_opt(runner); verify_custome_attributes_node(runner); + parser_interrupt(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; diff --git a/src/blocks.c b/src/blocks.c index 8f5449fde..9d00abb9c 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -94,6 +94,16 @@ int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension) { parser->syntax_extensions = cmark_llist_append(parser->mem, parser->syntax_extensions, extension); if (extension->match_inline || extension->insert_inline_from_delim) { + if (!parser->inline_syntax_extensions) { + // if we're loading an inline extension into this parser for the first time, + // allocate new buffers for the inline parser character arrays + parser->skip_chars = (int8_t *)parser->mem->calloc(sizeof(int8_t), 256); + cmark_set_default_skip_chars(&parser->skip_chars, true); + + parser->special_chars = (int8_t *)parser->mem->calloc(sizeof(int8_t), 256); + cmark_set_default_special_chars(&parser->special_chars, true); + } + parser->inline_syntax_extensions = cmark_llist_append( parser->mem, parser->inline_syntax_extensions, extension); } @@ -132,6 +142,9 @@ static void cmark_parser_reset(cmark_parser *parser) { parser->syntax_extensions = saved_exts; parser->inline_syntax_extensions = saved_inline_exts; parser->options = saved_options; + + cmark_set_default_skip_chars(&parser->skip_chars, false); + cmark_set_default_special_chars(&parser->special_chars, false); } cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { @@ -417,9 +430,9 @@ void cmark_manage_extensions_special_characters(cmark_parser *parser, int add) { for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { unsigned char c = (unsigned char)(size_t)tmp_char->data; if (add) - cmark_inlines_add_special_character(c, ext->emphasis); + cmark_inlines_add_special_character(parser, c, ext->emphasis); else - cmark_inlines_remove_special_character(c, ext->emphasis); + cmark_inlines_remove_special_character(parser, c, ext->emphasis); } } } diff --git a/src/include/inlines.h b/src/include/inlines.h index 7dd91bf52..96f0919ea 100644 --- a/src/include/inlines.h +++ b/src/include/inlines.h @@ -5,6 +5,8 @@ extern "C" { #endif +#include <stdlib.h> +#include "cmark-gfm_config.h" #include "references.h" cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); @@ -19,8 +21,11 @@ void cmark_parse_inlines(cmark_parser *parser, bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, cmark_map *refmap); -void cmark_inlines_add_special_character(unsigned char c, bool emphasis); -void cmark_inlines_remove_special_character(unsigned char c, bool emphasis); +void cmark_inlines_add_special_character(cmark_parser *parser, unsigned char c, bool emphasis); +void cmark_inlines_remove_special_character(cmark_parser *parser, unsigned char c, bool emphasis); + +void cmark_set_default_skip_chars(int8_t **skip_chars, bool use_memcpy); +void cmark_set_default_special_chars(int8_t **special_chars, bool use_memcpy); #ifdef __cplusplus } diff --git a/src/include/parser.h b/src/include/parser.h index 245580b85..ff24157d9 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -2,6 +2,7 @@ #define CMARK_PARSER_H #include <stdio.h> +#include <stdlib.h> #include "references.h" #include "node.h" #include "buffer.h" @@ -49,6 +50,9 @@ struct cmark_parser { cmark_llist *syntax_extensions; cmark_llist *inline_syntax_extensions; cmark_ispunct_func backslash_ispunct; + /* used when parsing inlines, can be populated by extensions if any are loaded */ + int8_t *skip_chars; + int8_t *special_chars; }; #ifdef __cplusplus diff --git a/src/inlines.c b/src/inlines.c index 92d9e3dcd..c558cc5aa 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -13,7 +13,6 @@ #include "scanners.h" #include "inlines.h" #include "syntax_extension.h" -#include "mutex.h" static const char *EMDASH = "\xE2\x80\x94"; static const char *ENDASH = "\xE2\x80\x93"; @@ -64,10 +63,14 @@ typedef struct subject{ bool scanned_for_backticks; } subject; -// Extensions may populate this. -static int8_t SKIP_CHARS[256]; +void cmark_set_default_skip_chars(int8_t **skip_chars, bool use_memcpy) { + static int8_t default_skip_chars[256]; -CMARK_DEFINE_LOCK(chars); + if (use_memcpy) + memcpy(*skip_chars, &default_skip_chars, 256); + else + *skip_chars = default_skip_chars; +} static CMARK_INLINE bool S_is_line_end_char(char c) { return (c == '\n' || c == '\r'); @@ -80,7 +83,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, cmark_chunk *buffer, cmark_map *refmap); -static bufsize_t subject_find_special_char(subject *subj, int options); +static bufsize_t subject_find_special_char(cmark_parser *parser, subject *subj, int options); // Create an inline with a literal string value. static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t, @@ -394,8 +397,8 @@ static cmark_node *handle_backticks(subject *subj, int options) { // Scan ***, **, or * and return number scanned, or 0. // Advances position. -static int scan_delims(subject *subj, unsigned char c, bool *can_open, - bool *can_close) { +static int scan_delims(cmark_parser *parser, subject *subj, unsigned char c, + bool *can_open, bool *can_close) { int numdelims = 0; bufsize_t before_char_pos, after_char_pos; int32_t after_char = 0; @@ -408,19 +411,15 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, } else { before_char_pos = subj->pos - 1; - CMARK_INITIALIZE_AND_LOCK(chars); - // walk back to the beginning of the UTF_8 sequence: - while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) { + while ((peek_at(subj, before_char_pos) >> 6 == 2 || parser->skip_chars[peek_at(subj, before_char_pos)]) && before_char_pos > 0) { before_char_pos -= 1; } len = cmark_utf8proc_iterate(subj->input.data + before_char_pos, subj->pos - before_char_pos, &before_char); - if (len == -1 || (before_char < 256 && SKIP_CHARS[(unsigned char) before_char])) { + if (len == -1 || (before_char < 256 && parser->skip_chars[(unsigned char) before_char])) { before_char = 10; } - - CMARK_UNLOCK(chars); } if (c == '\'' || c == '"') { @@ -438,18 +437,14 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, } else { after_char_pos = subj->pos; - CMARK_INITIALIZE_AND_LOCK(chars); - - while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) { + while (parser->skip_chars[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) { after_char_pos += 1; } len = cmark_utf8proc_iterate(subj->input.data + after_char_pos, subj->input.len - after_char_pos, &after_char); - if (len == -1 || (after_char < 256 && SKIP_CHARS[(unsigned char) after_char])) { + if (len == -1 || (after_char < 256 && parser->skip_chars[(unsigned char) after_char])) { after_char = 10; } - - CMARK_UNLOCK(chars); } left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && @@ -548,13 +543,13 @@ static void push_bracket(subject *subj, bracket_type type, cmark_node *inl_text) } // Assumes the subject has a c at the current position. -static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) { +static cmark_node *handle_delim(cmark_parser *parser, subject *subj, unsigned char c, bool smart) { bufsize_t numdelims; cmark_node *inl_text; bool can_open, can_close; cmark_chunk contents; - numdelims = scan_delims(subj, c, &can_open, &can_close); + numdelims = scan_delims(parser, subj, c, &can_open, &can_close); if (c == '\'' && smart) { contents = cmark_chunk_literal(RIGHTSINGLEQUOTE); @@ -1346,18 +1341,25 @@ static cmark_node *handle_newline(subject *subj) { } // "\r\n\\`&_*[]<!" -static int8_t SPECIAL_CHARS[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +void cmark_set_default_special_chars(int8_t **special_chars, bool use_memcpy) { + static int8_t default_special_chars[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + if (use_memcpy) + memcpy(*special_chars, &default_special_chars, 256); + else + *special_chars = default_special_chars; +} // " ' . - static const char SMART_PUNCT_CHARS[] = { @@ -1374,41 +1376,30 @@ static const char SMART_PUNCT_CHARS[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; -static bufsize_t subject_find_special_char(subject *subj, int options) { +static bufsize_t subject_find_special_char(cmark_parser *parser, subject *subj, int options) { bufsize_t n = subj->pos + 1; - bufsize_t ret = subj->input.len; - CMARK_INITIALIZE_AND_LOCK(chars); while (n < subj->input.len) { - if (SPECIAL_CHARS[subj->input.data[n]]) { - ret = n; - break; - } - if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]]) { - ret = n; - break; - } + if (parser->special_chars[subj->input.data[n]]) + return n; + if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]]) + return n; n++; } - CMARK_UNLOCK(chars); - return ret; + return subj->input.len; } -void cmark_inlines_add_special_character(unsigned char c, bool emphasis) { - CMARK_INITIALIZE_AND_LOCK(chars); - SPECIAL_CHARS[c] = 1; +void cmark_inlines_add_special_character(cmark_parser *parser, unsigned char c, bool emphasis) { + parser->special_chars[c] = 1; if (emphasis) - SKIP_CHARS[c] = 1; - CMARK_UNLOCK(chars); + parser->skip_chars[c] = 1; } -void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) { - CMARK_INITIALIZE_AND_LOCK(chars); - SPECIAL_CHARS[c] = 0; +void cmark_inlines_remove_special_character(cmark_parser *parser, unsigned char c, bool emphasis) { + parser->special_chars[c] = 0; if (emphasis) - SKIP_CHARS[c] = 0; - CMARK_UNLOCK(chars); + parser->skip_chars[c] = 0; } static cmark_node *try_extensions(cmark_parser *parser, @@ -1466,7 +1457,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, case '_': case '\'': case '"': - new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0); + new_inl = handle_delim(parser, subj, c, (options & CMARK_OPT_SMART) != 0); break; case '-': new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0); @@ -1508,7 +1499,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, if (new_inl != NULL) break; - endpos = subject_find_special_char(subj, options); + endpos = subject_find_special_char(parser, subj, options); contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); startpos = subj->pos; subj->pos = endpos; From fe9ba10ef3b008f3bf3b8743a5d9294269deb85c Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 1 Jun 2021 10:57:05 -0600 Subject: [PATCH 204/218] free special char blocks alongside the parser --- src/blocks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/blocks.c b/src/blocks.c index 9d00abb9c..1d76705d8 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -162,6 +162,12 @@ cmark_parser *cmark_parser_new(int options) { void cmark_parser_free(cmark_parser *parser) { cmark_mem *mem = parser->mem; + + if (parser->inline_syntax_extensions) { + mem->free(parser->special_chars); + mem->free(parser->skip_chars); + } + cmark_parser_dispose(parser); cmark_strbuf_free(&parser->curline); cmark_strbuf_free(&parser->linebuf); From 5d991687426fefe9b7e39c0002c81fe35ad50852 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 1 Jun 2021 10:57:56 -0600 Subject: [PATCH 205/218] don't reset the special-char blocks in parser_reset since cmark_parser_reset is called in cmark_parser_finish, this state would be inconsistent if you reused a parser with extensions multiple times. --- src/blocks.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index 1d76705d8..bcceac33d 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -142,9 +142,6 @@ static void cmark_parser_reset(cmark_parser *parser) { parser->syntax_extensions = saved_exts; parser->inline_syntax_extensions = saved_inline_exts; parser->options = saved_options; - - cmark_set_default_skip_chars(&parser->skip_chars, false); - cmark_set_default_special_chars(&parser->special_chars, false); } cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { @@ -152,6 +149,8 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { parser->mem = mem; parser->options = options; cmark_parser_reset(parser); + cmark_set_default_skip_chars(&parser->skip_chars, false); + cmark_set_default_special_chars(&parser->special_chars, false); return parser; } From ea07fb5634334c681fd029ef9c9bebf9b421820a Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 1 Jun 2021 14:42:18 -0600 Subject: [PATCH 206/218] add comment about freeing special-chars memory --- src/blocks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/blocks.c b/src/blocks.c index bcceac33d..eea98694b 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -162,6 +162,7 @@ cmark_parser *cmark_parser_new(int options) { void cmark_parser_free(cmark_parser *parser) { cmark_mem *mem = parser->mem; + // If any inline syntax extensions were added, free the memory allocated for the special-chars arrays if (parser->inline_syntax_extensions) { mem->free(parser->special_chars); mem->free(parser->skip_chars); From 6f75d912aea034a741577a46ae2c034c3d96975a Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Wed, 2 Jun 2021 16:29:17 -0600 Subject: [PATCH 207/218] save special_chars/skip_chars in parser_reset --- src/blocks.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/blocks.c b/src/blocks.c index eea98694b..c331b4ad3 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -124,6 +124,8 @@ static void cmark_parser_reset(cmark_parser *parser) { cmark_llist *saved_inline_exts = parser->inline_syntax_extensions; int saved_options = parser->options; cmark_mem *saved_mem = parser->mem; + int8_t *saved_specials = parser->special_chars; + int8_t *saved_skips = parser->skip_chars; cmark_parser_dispose(parser); @@ -142,15 +144,18 @@ static void cmark_parser_reset(cmark_parser *parser) { parser->syntax_extensions = saved_exts; parser->inline_syntax_extensions = saved_inline_exts; parser->options = saved_options; + + parser->special_chars = saved_specials; + parser->skip_chars = saved_skips; } cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser)); parser->mem = mem; parser->options = options; - cmark_parser_reset(parser); cmark_set_default_skip_chars(&parser->skip_chars, false); cmark_set_default_special_chars(&parser->special_chars, false); + cmark_parser_reset(parser); return parser; } From 7586471ecddfe51efbff4f203e5db7d61a71d9dd Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Wed, 2 Jun 2021 16:41:49 -0600 Subject: [PATCH 208/218] don't leak my_ext in the parser_interrupt test --- api_test/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/api_test/main.c b/api_test/main.c index 71cfe6de3..bd2bc95af 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1292,6 +1292,7 @@ static void parser_interrupt(test_batch_runner *runner) { free(xml); cmark_node_free(doc); cmark_parser_free(parser); + cmark_syntax_extension_free(cmark_get_default_mem_allocator(), my_ext); } int main() { From cea3e642975a9b769a73daa4e999a2786201c313 Mon Sep 17 00:00:00 2001 From: Charles Hu <charleshu@apple.com> Date: Wed, 16 Jun 2021 11:58:07 -0700 Subject: [PATCH 209/218] Add custom attributes using ^[foo][N] syntax rdar://79015293 --- api_test/main.c | 102 ++++++++++++++++++++++++++++++++++++++- src/blocks.c | 7 +-- src/include/inlines.h | 4 ++ src/include/references.h | 4 ++ src/inlines.c | 90 ++++++++++++++++++++++++++++++++-- src/references.c | 27 +++++++++++ 6 files changed, 224 insertions(+), 10 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index bd2bc95af..448a1ef7f 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1222,7 +1222,7 @@ static void check_markdown_attributes_node(test_batch_runner *runner, char *mark cmark_node_free(doc); } -static void verify_custome_attributes_node(test_batch_runner *runner) { +static void verify_custom_attributes_node(test_batch_runner *runner) { // Should produce a TEXT node since there's no `()` to signify attributes check_markdown_attributes_node(runner, "^[]", CMARK_NODE_TEXT, NULL); check_markdown_attributes_node(runner, "^[](", CMARK_NODE_TEXT, NULL); @@ -1234,6 +1234,103 @@ static void verify_custome_attributes_node(test_batch_runner *runner) { check_markdown_attributes_node(runner, "^[](rainbow: 'extreme')", CMARK_NODE_ATTRIBUTE, "rainbow: 'extreme'"); } +static cmark_node* parse_custom_attributues_footnote(test_batch_runner *runner, const char *markdown, cmark_node **retdoc) { + cmark_node *doc = cmark_parse_document(markdown, strlen(markdown), CMARK_OPT_DEFAULT); + cmark_node *pg = cmark_node_first_child(doc); + INT_EQ(runner, cmark_node_get_type(pg), CMARK_NODE_PARAGRAPH, "markdown '%s' did not produce a paragraph node", markdown); + *retdoc = doc; + return cmark_node_first_child(pg); +} + +static void verify_custom_attributes_footnote_basic(test_batch_runner *runner) { + static const char markdown[] = + "^[caffe][1]\n" + "\n" + "^[1]: rainbow: 'extreme', colors: { r: 255, g: 0, b: 0 }, corgicopter: true"; + cmark_node *doc; + cmark_node *attributeNode = parse_custom_attributues_footnote(runner, markdown, &doc); + INT_EQ(runner, cmark_node_get_type(attributeNode), CMARK_NODE_ATTRIBUTE, "markdown '%s' did not produce an attribute node", markdown); + STR_EQ(runner, cmark_node_get_attributes(attributeNode), + "rainbow: 'extreme', colors: { r: 255, g: 0, b: 0 }, corgicopter: true", + "markdown '%s' did not produce the right attribute in footnote", markdown); + + cmark_node_free(doc); +} + +static void verify_custom_attributes_footnote_multiple_footnotes(test_batch_runner *runner) { + static const char markdown[] = + "^[food][1] and ^[drinks][2]\n" + "\n" + "^[1]: rainbow: 'fun'\n" + "^[2]: magic: 42"; + cmark_node *doc; + cmark_node *attributeNode1 = parse_custom_attributues_footnote(runner, markdown, &doc); + INT_EQ(runner, cmark_node_get_type(attributeNode1), CMARK_NODE_ATTRIBUTE, "markdown '%s' did not produce an attribute node", markdown); + STR_EQ(runner, cmark_node_get_attributes(attributeNode1), "rainbow: 'fun'", "markdown '%s' did not produce the right attribute in footnote", markdown); + cmark_node *textNode = cmark_node_next(attributeNode1); // "and" + cmark_node *attributeNode2 = cmark_node_next(textNode); + INT_EQ(runner, cmark_node_get_type(attributeNode2), CMARK_NODE_ATTRIBUTE, "markdown '%s' did not produce an attribute node", markdown); + STR_EQ(runner, cmark_node_get_attributes(attributeNode2), "magic: 42", "markdown '%s' did not produce the right attribute in footnote", markdown); + + cmark_node_free(doc); +} + +static void verify_custom_attributes_footnote_reuse(test_batch_runner *runner) { + static const char markdown[] = + "^[pizza][1], ^[sandwich][2], ^[ice cream][2], and ^[salad][1]\n" + "\n" + "^[1]: has_tomato: true\n" + "^[2]: price: 12"; + cmark_node *doc; + cmark_node *pizzaNode = parse_custom_attributues_footnote(runner, markdown, &doc); + INT_EQ(runner, cmark_node_get_type(pizzaNode), CMARK_NODE_ATTRIBUTE, "markdown '%s' did not produce an attribute node", markdown); + STR_EQ(runner, cmark_node_get_attributes(pizzaNode), "has_tomato: true", "markdown '%s' did not produce the right attribute in footnote", markdown); + cmark_node *sandwichNode = cmark_node_next(cmark_node_next(pizzaNode)); + INT_EQ(runner, cmark_node_get_type(sandwichNode), CMARK_NODE_ATTRIBUTE, "markdown '%s' did not produce an attribute node", markdown); + STR_EQ(runner, cmark_node_get_attributes(sandwichNode), "price: 12", "markdown '%s' did not produce the right attribute in footnote", markdown); + cmark_node *icecreamNode = cmark_node_next(cmark_node_next(sandwichNode)); + INT_EQ(runner, cmark_node_get_type(icecreamNode), CMARK_NODE_ATTRIBUTE, "markdown '%s' did not produce an attribute node", markdown); + STR_EQ(runner, cmark_node_get_attributes(icecreamNode), "price: 12", "markdown '%s' did not produce the right attribute in footnote", markdown); + cmark_node *saladNode = cmark_node_next(cmark_node_next(icecreamNode)); + INT_EQ(runner, cmark_node_get_type(saladNode), CMARK_NODE_ATTRIBUTE, "markdown '%s' did not produce an attribute node", markdown); + STR_EQ(runner, cmark_node_get_attributes(saladNode), "has_tomato: true", "markdown '%s' did not produce the right attribute in footnote", markdown); + + cmark_node_free(doc); +} + +static void verify_custom_attributes_footnote_mixed_content(test_batch_runner *runner) { + static const char markdown[] = + "^[attribute1][1], [a link][2], ^[attribute2][3], ^[attribute3][1]\n" + "\n" + "^[1]: rainbow: 'fun'\n" + "[2]: https://www.example.com\n" + "Lorem ipsum\n" + "\n" + "^[3]: universe: 42\n"; + cmark_node *doc; + cmark_node *attributeNode1 = parse_custom_attributues_footnote(runner, markdown, &doc); + INT_EQ(runner, cmark_node_get_type(attributeNode1), CMARK_NODE_ATTRIBUTE, "markdown '%s' did not produce an attribute node", markdown); + STR_EQ(runner, cmark_node_get_attributes(attributeNode1), "rainbow: 'fun'", "markdown '%s' did not produce the right attribute in footnote", markdown); + cmark_node *linkNode = cmark_node_next(cmark_node_next(attributeNode1)); + INT_EQ(runner, cmark_node_get_type(linkNode), CMARK_NODE_LINK, "markdown '%s' did not produce an link node", markdown); + STR_EQ(runner, cmark_node_get_url(linkNode), "https://www.example.com", "markdown '%s' did not produce the right link in footnote", markdown); + cmark_node *attributeNode2 = cmark_node_next(cmark_node_next(linkNode)); + INT_EQ(runner, cmark_node_get_type(attributeNode2), CMARK_NODE_ATTRIBUTE, "markdown '%s' did not produce an attribute node", markdown); + STR_EQ(runner, cmark_node_get_attributes(attributeNode2), "universe: 42", "markdown '%s' did not produce the right attribute in footnote", markdown); + cmark_node *attributeNode3 = cmark_node_next(cmark_node_next(attributeNode2)); + INT_EQ(runner, cmark_node_get_type(attributeNode3), CMARK_NODE_ATTRIBUTE, "markdown '%s' did not produce an attribute node", markdown); + STR_EQ(runner, cmark_node_get_attributes(attributeNode3), "rainbow: 'fun'", "markdown '%s' did not produce the right attribute in footnote", markdown); + + cmark_node_free(doc); +} + +static void verify_custom_attributes_node_with_footnote(test_batch_runner *runner) { + verify_custom_attributes_footnote_basic(runner); + verify_custom_attributes_footnote_multiple_footnotes(runner); + verify_custom_attributes_footnote_reuse(runner); + verify_custom_attributes_footnote_mixed_content(runner); +} + typedef void (*reentrant_call_func) (void); static cmark_node *reentrant_parse_inline_ext(cmark_syntax_extension *self, cmark_parser *parser, @@ -1327,7 +1424,8 @@ int main() { ref_source_pos(runner); inline_only_opt(runner); preserve_whitespace_opt(runner); - verify_custome_attributes_node(runner); + verify_custom_attributes_node(runner); + verify_custom_attributes_node_with_footnote(runner); parser_interrupt(runner); test_print_summary(runner); diff --git a/src/blocks.c b/src/blocks.c index c331b4ad3..8024f4d24 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -284,9 +284,10 @@ static bool resolve_reference_link_definitions( bufsize_t pos; cmark_strbuf *node_content = &b->content; cmark_chunk chunk = {node_content->ptr, node_content->size, 0}; - while (chunk.len && chunk.data[0] == '[' && - (pos = cmark_parse_reference_inline(parser->mem, &chunk, - parser->refmap))) { + while ((chunk.len && chunk.data[0] == '[' && + (pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) || + (chunk.len && chunk.data[0] == '^' && chunk.data[1] == '[' && + (pos = cmark_parse_reference_attributes_inline(parser->mem, &chunk, parser->refmap)))) { chunk.data += pos; chunk.len -= pos; diff --git a/src/include/inlines.h b/src/include/inlines.h index 96f0919ea..d323af994 100644 --- a/src/include/inlines.h +++ b/src/include/inlines.h @@ -11,6 +11,7 @@ extern "C" { cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); +cmark_chunk cmark_clean_attributes(cmark_mem *mem, cmark_chunk *attributes); CMARK_GFM_EXPORT void cmark_parse_inlines(cmark_parser *parser, @@ -21,6 +22,9 @@ void cmark_parse_inlines(cmark_parser *parser, bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, cmark_map *refmap); +bufsize_t cmark_parse_reference_attributes_inline(cmark_mem *mem, cmark_chunk *input, + cmark_map *refmap); + void cmark_inlines_add_special_character(cmark_parser *parser, unsigned char c, bool emphasis); void cmark_inlines_remove_special_character(cmark_parser *parser, unsigned char c, bool emphasis); diff --git a/src/include/references.h b/src/include/references.h index def944dc7..81330ae7a 100644 --- a/src/include/references.h +++ b/src/include/references.h @@ -9,14 +9,18 @@ extern "C" { struct cmark_reference { cmark_map_entry entry; + bool is_attributes_reference; cmark_chunk url; cmark_chunk title; + cmark_chunk attributes; }; typedef struct cmark_reference cmark_reference; void cmark_reference_create(cmark_map *map, cmark_chunk *label, cmark_chunk *url, cmark_chunk *title); +void cmark_reference_create_attributes(cmark_map *map, cmark_chunk *label, + cmark_chunk *attributes); cmark_map *cmark_reference_map_new(cmark_mem *mem); #ifdef __cplusplus diff --git a/src/inlines.c b/src/inlines.c index c558cc5aa..eafd5d71b 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -878,6 +878,12 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) { return cmark_chunk_buf_detach(&buf); } +// Clean custom attributes. This function uses `cmark_clean_url` internaly +// because the requirements are the same +cmark_chunk cmark_clean_attributes(cmark_mem *mem, cmark_chunk *attributes) { + return cmark_clean_url(mem, attributes); +} + // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. static cmark_node *handle_pointy_brace(subject *subj, int options) { @@ -933,10 +939,19 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) { // Note: unescaped brackets are not allowed in labels. // The label begins with `[` and ends with the first `]` character // encountered. Backticks in labels do not start code spans. -static int link_label(subject *subj, cmark_chunk *raw_label) { +static int link_label(subject *subj, cmark_chunk *raw_label, bool parse_attribute_label) { bufsize_t startpos = subj->pos; int length = 0; unsigned char c; + + // If we are parsing attribute label, advance past ^ + if (parse_attribute_label) { + if (peek_char(subj) == '^') { + advance(subj); + } else { + return 0; + } + } // advance past [ if (peek_char(subj) == '[') { @@ -963,8 +978,9 @@ static int link_label(subject *subj, cmark_chunk *raw_label) { } if (c == ']') { // match found + bufsize_t position = parse_attribute_label ? startpos + 2 : startpos + 1; *raw_label = - cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); + cmark_chunk_dup(&subj->input, position, subj->pos - position); cmark_chunk_trim(raw_label); advance(subj); // advance past ] return 1; @@ -1089,6 +1105,7 @@ static cmark_node *handle_close_bracket_attribute(cmark_parser *parser, subject cmark_chunk raw_label; int found_label; cmark_node *tmp, *tmpnext; + cmark_reference *ref = NULL; bool isAttributesNode = false; // ^name[content](attributes) @@ -1112,6 +1129,19 @@ static cmark_node *handle_close_bracket_attribute(cmark_parser *parser, subject } } } + + // If we can't match direct link, look for [link label] that matches in refmap + raw_label = cmark_chunk_literal(""); + found_label = link_label(subj, &raw_label, false); + if (found_label) { + ref = (cmark_reference *)cmark_map_lookup(subj->refmap, &raw_label); + cmark_chunk_free(subj->mem, &raw_label); + + if (ref && ref->is_attributes_reference) { + isAttributesNode = true; + attributes = chunk_clone(subj->mem, &ref->attributes); + } + } if (!isAttributesNode) { // The current node can't be parsed as attribute node, turn it to a TEXT node instead. @@ -1220,7 +1250,7 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { // Next, look for a following [link label] that matches in refmap. // skip spaces raw_label = cmark_chunk_literal(""); - found_label = link_label(subj, &raw_label); + found_label = link_label(subj, &raw_label, false); if (!found_label) { // If we have a shortcut reference link, back up // to before the spacse we skipped. @@ -1239,7 +1269,7 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { cmark_chunk_free(subj->mem, &raw_label); } - if (ref != NULL) { // found + if (ref != NULL && !ref->is_attributes_reference) { // found url = chunk_clone(subj->mem, &ref->url); title = chunk_clone(subj->mem, &ref->title); goto match; @@ -1568,7 +1598,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, subject_from_buf(mem, -1, 0, &subj, input, NULL); // parse label: - if (!link_label(&subj, &lab) || lab.len == 0) + if (!link_label(&subj, &lab, false) || lab.len == 0) return 0; // colon: @@ -1616,6 +1646,56 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, return subj.pos; } +bufsize_t cmark_parse_reference_attributes_inline(cmark_mem *mem, cmark_chunk *input, + cmark_map *refmap) { + subject subj; + + cmark_chunk lab; + cmark_chunk attributes; + + bufsize_t matchlen = 0; + unsigned char c; + + subject_from_buf(mem, -1, 0, &subj, input, NULL); + + // parse attribute label: + if (!link_label(&subj, &lab, true) || lab.len == 0) { + return 0; + } + + // Colon: + if (peek_char(&subj) == ':') { + advance(&subj); + } else { + return 0; + } + + // parse attributes + spnl(&subj); + // read until next newline + bufsize_t startpos = subj.pos; + while ((c = peek_char(&subj)) && !S_is_line_end_char(c)) { + advance(&subj); + matchlen++; + } + + if (matchlen == 0) { + return 0; + } + + attributes = cmark_chunk_dup(&subj.input, startpos, matchlen); + + // parse final spaces and newline: + skip_spaces(&subj); + if (!skip_line_end(&subj)) { + return 0; + } + + // insert reference into refmap + cmark_reference_create_attributes(refmap, &lab, &attributes); + return subj.pos; +} + unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser) { return peek_char(parser); } diff --git a/src/references.c b/src/references.c index 7e7f34b38..5104c4190 100644 --- a/src/references.c +++ b/src/references.c @@ -11,6 +11,7 @@ static void reference_free(cmark_map *map, cmark_map_entry *_ref) { mem->free(ref->entry.label); cmark_chunk_free(mem, &ref->url); cmark_chunk_free(mem, &ref->title); + cmark_chunk_free(mem, &ref->attributes); mem->free(ref); } } @@ -28,8 +29,10 @@ void cmark_reference_create(cmark_map *map, cmark_chunk *label, ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref)); ref->entry.label = reflabel; + ref->is_attributes_reference = false; ref->url = cmark_clean_url(map->mem, url); ref->title = cmark_clean_title(map->mem, title); + ref->attributes = cmark_chunk_literal(""); ref->entry.age = map->size; ref->entry.next = map->refs; @@ -37,6 +40,30 @@ void cmark_reference_create(cmark_map *map, cmark_chunk *label, map->size++; } +void cmark_reference_create_attributes(cmark_map *map, cmark_chunk *label, + cmark_chunk *attributes) { + cmark_reference *ref; + unsigned char *reflabel = normalize_map_label(map->mem, label); + + /* empty reference name, or composed from only whitespace */ + if (reflabel == NULL) + return; + + assert(map->sorted == NULL); + + ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref)); + ref->entry.label = reflabel; + ref->is_attributes_reference = true; + ref->url = cmark_chunk_literal(""); + ref->title = cmark_chunk_literal(""); + ref->attributes = cmark_clean_attributes(map->mem, attributes); + ref->entry.age = map->size; + ref->entry.next = map->refs; + + map->refs = (cmark_map_entry *)ref; + map->size++; +} + cmark_map *cmark_reference_map_new(cmark_mem *mem) { return cmark_map_new(mem, reference_free); } From 1424dac16be513a4e81024fbcebfc2894e6828c0 Mon Sep 17 00:00:00 2001 From: Jeremy Schonfeld <jschonfeld@apple.com> Date: Mon, 14 Jun 2021 11:29:14 -0700 Subject: [PATCH 210/218] Preserve leading newlines when CMARK_OPT_PRESERVE_WHITESPACE is set rdar://79148254 --- api_test/main.c | 5 +++++ src/blocks.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/api_test/main.c b/api_test/main.c index 448a1ef7f..cb3dd0ffd 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1201,6 +1201,11 @@ static void preserve_whitespace_opt(test_batch_runner *runner) { check_markdown_plaintext(runner, "hel\nworld\nlo"); check_markdown_plaintext(runner, " hel \n world \n lo "); check_markdown_plaintext(runner, " hello \n \n world "); + check_markdown_plaintext(runner, "\n"); + check_markdown_plaintext(runner, "\n\n\n"); + check_markdown_plaintext(runner, "\nHello"); + check_markdown_plaintext(runner, "Hello\n"); + check_markdown_plaintext(runner, "\nHello\n"); } static void check_markdown_attributes_node(test_batch_runner *runner, char *markdown, cmark_node_type expectedType, char *expectedAttributes) { diff --git a/src/blocks.c b/src/blocks.c index 8024f4d24..6e87f19cb 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1416,7 +1416,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, container = finalize(parser, container); assert(parser->current != NULL); } - } else if (parser->blank) { + } else if (parser->blank && (parser->options & CMARK_OPT_PRESERVE_WHITESPACE) == 0) { // ??? do nothing } else if (accepts_lines(S_type(container))) { if (S_type(container) == CMARK_NODE_HEADING && From e9b454e95532af31f02e2467d94b38c9d1eb9f80 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 15 Jun 2021 11:55:28 -0600 Subject: [PATCH 211/218] add cmark-gfm-bin target to Package.swift --- Package.swift | 14 +++++++++++++- {src => bin}/main.c | 0 src/CMakeLists.txt | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) rename {src => bin}/main.c (100%) diff --git a/Package.swift b/Package.swift index 6e9a89cb2..a16c6ea5c 100644 --- a/Package.swift +++ b/Package.swift @@ -13,12 +13,14 @@ let package = Package( .library( name: "cmark-gfm-extensions", targets: ["cmark-gfm-extensions"]), + .executable( + name: "cmark-gfm-bin", + targets: ["cmark-gfm-bin"]), ], targets: [ .target(name: "cmark-gfm", path: "src", exclude: [ - "main.c", "scanners.re", "libcmark-gfm.pc.in", "config.h.in", @@ -38,5 +40,15 @@ let package = Package( "ext_scanners.re", ] ), + .target(name: "cmark-gfm-bin", + dependencies: [ + "cmark-gfm", + "cmark-gfm-extensions", + ], + path: "bin", + sources: [ + "main.c", + ] + ), ] ) diff --git a/src/main.c b/bin/main.c similarity index 100% rename from src/main.c rename to bin/main.c diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 874ad3822..b7018903c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,7 +40,7 @@ set(LIBRARY_SOURCES ) set(PROGRAM "cmark-gfm") -set(PROGRAM_SOURCES main.c) +set(PROGRAM_SOURCES "${PROJECT_SOURCE_DIR}/bin/main.c") include_directories(include ${CMAKE_CURRENT_BINARY_DIR}) include_directories( From 8a631d3e74c81554351f1780fececb93b858c666 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 15 Jun 2021 11:58:55 -0600 Subject: [PATCH 212/218] add .build/.swiftpm to gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index d503137bb..784c04b6a 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,7 @@ alltests.md progit/ bench/benchinput.md test/afl_results/ + +# Build directories for SwiftPM and Xcode +.swiftpm +.build From 4e7da18f62d5c0ddbf1d163686174ca1b8165fa9 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 15 Jun 2021 12:28:33 -0600 Subject: [PATCH 213/218] add api_test to Package.swift --- Package.swift | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Package.swift b/Package.swift index a16c6ea5c..cb0474d09 100644 --- a/Package.swift +++ b/Package.swift @@ -16,6 +16,8 @@ let package = Package( .executable( name: "cmark-gfm-bin", targets: ["cmark-gfm-bin"]), + .executable(name: "api_test", + targets: ["api_test"]) ], targets: [ .target(name: "cmark-gfm", @@ -50,5 +52,15 @@ let package = Package( "main.c", ] ), + .target(name: "api_test", + dependencies: [ + "cmark-gfm", + "cmark-gfm-extensions", + ], + path: "api_test", + exclude: [ + "CMakeLists.txt", + ] + ) ] ) From f0c2fb10a2719536e77554cb44c8979e67f2b266 Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 15 Jun 2021 14:41:33 -0600 Subject: [PATCH 214/218] fix warning in api_test --- api_test/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api_test/main.c b/api_test/main.c index cb3dd0ffd..9897c2d85 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1179,7 +1179,7 @@ static void check_markdown_plaintext(test_batch_runner *runner, char *markdown) cmark_node *textNode = cmark_node_first_child(pg); INT_EQ(runner, cmark_node_get_type(textNode), CMARK_NODE_TEXT, "markdown '%s' did not produce a text node inside the paragraph node", markdown); const char *text = cmark_node_get_literal(textNode); - OK(runner, text, "Text literal for '%s' was null", markdown); + OK(runner, (text != NULL), "Text literal for '%s' was null", markdown); if (text) { STR_EQ(runner, text, markdown, "markdown '%s' resulted in '%s'", markdown, text); } else { From 4c9a330c21c62b6db5ebc9d42b21564bd4c04c2a Mon Sep 17 00:00:00 2001 From: Victoria Mitchell <victoria_m@apple.com> Date: Tue, 15 Jun 2021 15:30:51 -0600 Subject: [PATCH 215/218] add explicit modulemap for cmark-gfm this silences the "header should be renamed to be used as an umbrella header" warning --- src/include/module.modulemap | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/include/module.modulemap diff --git a/src/include/module.modulemap b/src/include/module.modulemap new file mode 100644 index 000000000..1fc033abb --- /dev/null +++ b/src/include/module.modulemap @@ -0,0 +1,25 @@ +module cmark_gfm { + umbrella header "cmark-gfm.h" + header "cmark-gfm_config.h" + header "cmark-gfm-extension_api.h" + header "buffer.h" + header "chunk.h" + header "cmark_ctype.h" + header "footnotes.h" + header "houdini.h" + header "html.h" + header "inlines.h" + header "iterator.h" + header "map.h" + header "mutex.h" + header "node.h" + header "parser.h" + header "plugin.h" + header "references.h" + header "registry.h" + header "render.h" + header "scanners.h" + header "syntax_extension.h" + header "utf8.h" + export * +} From 44090a3606e6d4be67c9a5be4128b2087428597a Mon Sep 17 00:00:00 2001 From: Junior Bontognali <bontojr@apple.com> Date: Fri, 30 Jul 2021 11:07:59 +0200 Subject: [PATCH 216/218] add intention to take upstream changes rdar://81302358 --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index a36d6f9b3..5ab690059 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ cmark-gfm repository adds GitHub Flavored Markdown extensions to [the upstream implementation], as defined in [the spec]. +Our plan is to take the changes into this repository as the upstream project evolve. + The rest of the README is preserved as-is from the upstream source. Note that the library and binaries produced by this fork are suffixed with `-gfm` in order to distinguish them from the upstream. From 2190baadafe6e83e50f8f97c2909eb48fd3eae66 Mon Sep 17 00:00:00 2001 From: Franklin Schrans <fschrans@apple.com> Date: Mon, 2 Aug 2021 12:49:45 +0200 Subject: [PATCH 217/218] Update README.md --- README.md | 12 ++++++++---- src/.DS_Store | Bin 6148 -> 0 bytes 2 files changed, 8 insertions(+), 4 deletions(-) delete mode 100644 src/.DS_Store diff --git a/README.md b/README.md index 5ab690059..374b10586 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,24 @@ cmark-gfm ========= -[![Build Status]](https://travis-ci.org/github/cmark-gfm) -[![Windows Build Status]](https://ci.appveyor.com/project/github/cmark) - `cmark-gfm` is an extended version of the C reference implementation of [CommonMark], a rationalized version of Markdown syntax with a spec. This repository adds GitHub Flavored Markdown extensions to [the upstream implementation], as defined in [the spec]. -Our plan is to take the changes into this repository as the upstream project evolve. +Changes upstream in `cmark` will be pulled into this `cmark-gfm` project repository +as the upstream project evolves. The original `cmark` repository can be found here: +<https://github.com/commonmark/cmark>. The rest of the README is preserved as-is from the upstream source. Note that the library and binaries produced by this fork are suffixed with `-gfm` in order to distinguish them from the upstream. +## License + +The original `cmark` code is released under a BSD2 license. This same license +applies to the Swift code included in this `cmark-gfm` repository. + --- It provides a shared library (`libcmark`) with functions for parsing diff --git a/src/.DS_Store b/src/.DS_Store deleted file mode 100644 index 16fc3eeabd69994654190062bfefa5d6ff475c3d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJ5EC}5S)cbL`st~rLVvZtSC7_E)bCi5>iA8l>Swmi=#37DTp5FLIIkU)?=@C zY<Y^eZvoi)JUjwR0CTz{K75#(@4HXzt|CUH^NceFJiiR@ho{pd`{#gjAMlDd9B};3 zKOKEbCIzH`6p#W^KnmPgfhw@`#f{I^aZ*4E{Cx%d`_Sl)y>Lv7PX~u+0f;k(!#Iy# zg4jGj?1f_@BQ#4YF{xH9h9#ZxR(ZW}OiVhgnh&d+tvVEo+j)MAbXZT+C<Uazr2_Z4 zTzdWA(y#RYmn5yEfE4&w3fOG3+pPJds;!g9d97{qJ>7FY>290{g+r8MVw7Vpyd1A0 cDf61ox!()N#Go@CbfSI+To;)X_-zHg0H8z_bpQYW From e21b3ebcd577a61eab828803fbf426ae4d31b43b Mon Sep 17 00:00:00 2001 From: kyle <kyle201817146@gmail.com> Date: Mon, 8 Nov 2021 00:58:46 +0800 Subject: [PATCH 218/218] [Bugfix] Fix the backticks bug --- src/inlines.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/inlines.c b/src/inlines.c index eafd5d71b..75cd11016 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -373,13 +373,14 @@ static void S_normalize_code(cmark_strbuf *s) { // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. static cmark_node *handle_backticks(subject *subj, int options) { + bufsize_t initPos = subj->pos; cmark_chunk openticks = take_while(subj, isbacktick); bufsize_t startpos = subj->pos; bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); if (endpos == 0) { // not found subj->pos = startpos; // rewind - return make_str(subj, subj->pos, subj->pos, openticks); + return make_str(subj, initPos, initPos + openticks.len - 1, openticks); } else { cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);