ADDED: Add file transclusion

This commit is contained in:
Fletcher T. Penney 2017-02-11 17:53:46 -05:00
parent 9e6b7e0134
commit 384874cd3a
19 changed files with 674 additions and 10 deletions

View File

@ -180,6 +180,7 @@ set(src_files
src/stack.c
src/token.c
src/token_pairs.c
src/transclude.c
src/writer.c
)
@ -197,6 +198,7 @@ set(header_files
src/stack.h
src/token.h
src/token_pairs.h
src/transclude.h
src/uthash.h
src/writer.h
)

View File

@ -1017,7 +1017,7 @@ void mmd_export_token_html(DString * out, const char * source, token * t, size_t
temp_bool = true;
if (t->type == PAIR_BRACKET) {
// This is a locator for subsequent ciation
// This is a locator for subsequent citation
temp_char = text_inside_pair(source, t);
temp_char2 = label_from_string(temp_char);

View File

@ -98,6 +98,14 @@ token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byt
void mmd_engine_parse_string(mmd_engine * e);
/// Does the text have metadata?
bool mmd_has_metadata(mmd_engine * e, size_t * end);
/// Extract desired metadata as string value
char * metavalue_for_key(mmd_engine * e, const char * key);
void mmd_export_token_tree(DString * out, mmd_engine * e, short format);
@ -287,6 +295,7 @@ enum smart_quotes_language {
enum output_format {
FORMAT_MMD,
FORMAT_HTML,
FORMAT_LATEX,
FORMAT_ODF,
@ -314,6 +323,7 @@ enum parser_extensions {
EXT_ESCAPED_LINE_BREAKS = 1 << 17, //!< Escaped line break
EXT_NO_STRONG = 1 << 18, //!< Don't allow nested \<strong\>'s
EXT_NO_EMPH = 1 << 19, //!< Don't allow nested \<emph\>'s
EXT_TRANSCLUDE = 1 << 20, //!< Perform transclusion(s)
EXT_FAKE = 1 << 31, //!< 31 is highest number allowed
};

View File

@ -54,6 +54,7 @@
*/
#include <ctype.h>
#include <libgen.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -66,6 +67,7 @@
#include "html.h"
#include "mmd.h"
#include "token.h"
#include "transclude.h"
#include "version.h"
#define kBUFFERSIZE 4096 // How many bytes to read at a time
@ -97,7 +99,7 @@ DString * stdin_buffer() {
}
DString * scan_file(const char * fname) {
static DString * scan_file(const char * fname) {
/* Read from a file and return a GString *
`buffer` will need to be freed elsewhere */
@ -177,7 +179,7 @@ char * mmd_process(DString * buffer, unsigned long extensions, short format, sho
int main(int argc, char** argv) {
int exitcode = EXIT_SUCCESS;
char * binname = "multimarkdown";
short format = 0;
short format = FORMAT_HTML;
short language = LC_EN;
// Initialize argtable structs
@ -247,7 +249,7 @@ int main(int argc, char** argv) {
// Parse options
unsigned long extensions = EXT_SMART | EXT_NOTES | EXT_CRITIC;
unsigned long extensions = EXT_SMART | EXT_NOTES | EXT_CRITIC | EXT_TRANSCLUDE;
if (a_compatibility->count > 0) {
// Compatibility mode disables certain features
@ -338,6 +340,15 @@ int main(int argc, char** argv) {
break;
}
// Perform transclusion(s)
if (extensions & EXT_TRANSCLUDE) {
char * folder = dirname((char *) a_file->filename[i]);
transclude_source(buffer, folder, format, NULL, NULL);
free(folder);
}
result = mmd_process(buffer, extensions, format, language);
if (!(output_stream = fopen(output_filename, "w"))) {
@ -378,6 +389,15 @@ int main(int argc, char** argv) {
buffer = stdin_buffer();
}
if ((extensions & EXT_TRANSCLUDE) && (a_file->count == 1)) {
// Perform transclusion(s)
char * folder = dirname((char *) a_file->filename[0]);
transclude_source(buffer, folder, format, NULL, NULL);
free(folder);
}
result = mmd_process(buffer, extensions, format, language);
// Where does output go?

View File

@ -729,7 +729,12 @@ void strip_quote_markers_from_block(mmd_engine * e, token * block) {
/// Create a token chain from source string
token * mmd_tokenize_string(mmd_engine * e, const char * str, size_t len) {
/// stop_on_empty_line allows us to stop parsing part of the way through
token * mmd_tokenize_string(mmd_engine * e, const char * str, size_t len, bool stop_on_empty_line) {
// Reset metadata flag
e->allow_meta = (e->extensions & EXT_COMPATIBILITY) ? false : true;
// Create a scanner (for re2c)
Scanner s;
s.start = str;
@ -793,6 +798,11 @@ token * mmd_tokenize_string(mmd_engine * e, const char * str, size_t len) {
mmd_assign_line_type(e, line);
token_append_child(root, line);
if (stop_on_empty_line) {
if (line->type == LINE_EMPTY)
return root;
}
line = token_new(0,s.cur - str,0);
break;
default:
@ -1709,7 +1719,7 @@ token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byt
e->definition_stack->size = 0;
// Tokenize the string
token * doc = mmd_tokenize_string(e, &e->dstr->str[byte_start], byte_len);
token * doc = mmd_tokenize_string(e, &e->dstr->str[byte_start], byte_len, false);
// Parse tokens into blocks
mmd_parse_token_chain(e, doc);
@ -1751,3 +1761,64 @@ void mmd_engine_parse_string(mmd_engine * e) {
e->root = mmd_engine_parse_substring(e, 0, e->dstr->currentStringLength);
}
bool mmd_has_metadata(mmd_engine * e, size_t * end) {
bool result = false;
// Free existing parse tree
if (e->root)
token_tree_free(e->root);
#ifdef kUseObjectPool
// Ensure token pool is available and ready
token_pool_init();
#endif
// Tokenize the string (up until first empty line)
token * doc = mmd_tokenize_string(e, &e->dstr->str[0], e->dstr->currentStringLength, true);
// Parse tokens into blocks
mmd_parse_token_chain(e, doc);
if (doc) {
if (doc->child && doc->child->type == BLOCK_META) {
result = true;
if (end != NULL)
*end = doc->child->len;
}
token_tree_free(doc);
}
return result;
}
/// Grab metadata without processing entire document
/// Returned char * does not need to be freed
char * metavalue_for_key(mmd_engine * e, const char * key) {
if (e->metadata_stack->size == 0) {
// Ensure we have checked for metadata
if (!mmd_has_metadata(e, NULL))
return NULL;
}
char * result = NULL;
char * clean = label_from_string(key);
meta * m;
for (int i = 0; i < e->metadata_stack->size; ++i)
{
m = stack_peek_index(e->metadata_stack, i);
if (strcmp(clean, m->key) == 0) {
// We have a match
return m->value;
}
}
return result;
}

View File

@ -1114,6 +1114,7 @@ static void yy_reduce(
break;
case 2: /* blocks ::= block */
{
engine->root = yymsp[0].minor.yy0;
strip_line_tokens_from_block(engine, yymsp[0].minor.yy0);
#ifndef NDEBUG
fprintf(stderr, "First block %d\n", yymsp[0].minor.yy0->type);

View File

@ -84,6 +84,7 @@ blocks(A) ::= blocks(B) block(C).
}
blocks(A) ::= block(B).
{
engine->root = B; // In case the first block is metadata and we just want to know if it exists
strip_line_tokens_from_block(engine, B);
#ifndef NDEBUG
fprintf(stderr, "First block %d\n", B->type);

View File

@ -76,10 +76,6 @@ void token_pool_init(void) {
if (token_pool == NULL) {
// No pool exists
token_pool = pool_new(sizeof(token));
} else {
// Pool exists, ensure it's drained
// NOTE: This invalidates any tokens currently in use.
token_pool_drain();
}
}

440
src/transclude.c Normal file
View File

@ -0,0 +1,440 @@
/**
MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
@file transclude.c
@brief
@author Fletcher T. Penney
@bug
**/
/*
Copyright © 2016 - 2017 Fletcher T. Penney.
The `MultiMarkdown 6` project is released under the MIT License..
GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
https://github.com/fletcher/MultiMarkdown-4/
MMD 4 is released under both the MIT License and GPL.
CuTest is released under the zlib/libpng license. See CuTest.c for the text
of the license.
## The MIT License ##
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <stdio.h>
#include <string.h>
#include "d_string.h"
#include "libMultiMarkdown.h"
#include "transclude.h"
#if defined(__WIN32)
#include <windows.h>
#endif
#define kBUFFERSIZE 4096 // How many bytes to read at a time
/// Windows can use either `\` or `/` as a separator -- thanks to t-beckmann on github
/// for suggesting a fix for this.
bool is_separator(char c) {
#if defined(__WIN32)
return c == '\\' || c == '/';
#else
return c == '/';
#endif
}
#ifdef TEST
void Test_is_separator(CuTest* tc) {
char * test = "a/\\";
#if defined(__WIN32)
CuAssertIntEquals(tc, false, is_separator(test[0]));
CuAssertIntEquals(tc, true, is_separator(test[1]));
CuAssertIntEquals(tc, true, is_separator(test[2]));
#else
CuAssertIntEquals(tc, false, is_separator(test[0]));
CuAssertIntEquals(tc, true, is_separator(test[1]));
CuAssertIntEquals(tc, false, is_separator(test[2]));
#endif
}
#endif
void add_trailing_sep(DString * path) {
#if defined(__WIN32)
char sep = '\\';
#else
char sep = '/';
#endif
// Ensure that folder ends in separator
if (!is_separator(path->str[path->currentStringLength - 1])) {
d_string_append_c(path, sep);
}
}
/// Combine directory and base filename to create a full path */
char * path_from_dir_base(char * dir, char * base) {
if (!dir && !base)
return NULL;
DString * path = NULL;
char * result = NULL;
if ((base != NULL) && (is_separator(base[0]))) {
// We have an absolute path
path = d_string_new(base);
} else {
// We have a directory and relative path
path = d_string_new(dir);
// Ensure that folder ends in separator
add_trailing_sep(path);
// Append filename (if present)
if (base)
d_string_append(path, base);
}
result = path->str;
d_string_free(path, false);
return result;
}
#ifdef TEST
void Test_path_from_dir_base(CuTest* tc) {
char dir[10] = "/foo";
char base[10] = "bar";
char * path = path_from_dir_base(dir, base);
#if defined(__WIN32)
CuAssertStrEquals(tc, "/foo\\bar", path);
#else
CuAssertStrEquals(tc, "/foo/bar", path);
#endif
free(path);
strcpy(base, "/bar");
path = path_from_dir_base(dir, base);
CuAssertStrEquals(tc, "/bar", path);
free(path);
path = path_from_dir_base(NULL, NULL);
CuAssertStrEquals(tc, NULL, path);
}
#endif
/// Separate filename and directory from a full path
///
/// See http://stackoverflow.com/questions/1575278/function-to-split-a-filepath-into-path-and-file
void split_path_file(char ** dir, char ** file, char * path) {
char * slash = path, * next;
#if defined(__WIN32)
const char sep[] = "\\/"; // Windows allows either variant
#else
const char sep[] = "/";
#endif
while ((next = strpbrk(slash + 1, sep)))
slash = next;
if (path != slash)
slash++;
// *dir = my_strndup(path, slash - path);
*dir = strndup(path, slash - path);
*file = strdup(slash);
}
#ifdef TEST
void Test_split_path_file(CuTest* tc) {
char * dir, * file;
char * path = "/foo/bar.txt";
split_path_file(&dir, &file, path);
CuAssertStrEquals(tc, "/foo/", dir);
CuAssertStrEquals(tc, "bar.txt", file);
path = "\\foo\\bar.txt";
split_path_file(&dir, &file, path);
#if defined(__WIN32)
CuAssertStrEquals(tc, "\\foo\\", dir);
CuAssertStrEquals(tc, "bar.txt", file);
#else
CuAssertStrEquals(tc, "", dir);
CuAssertStrEquals(tc, "\\foo\\bar.txt", file);
#endif
}
#endif
DString * scan_file(const char * fname) {
/* Read from a file and return a DString *
`buffer` will need to be freed elsewhere */
char chunk[kBUFFERSIZE];
size_t bytes;
FILE * file;
#if defined(__WIN32)
int wchars_num = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
wchar_t wstr[wchars_num];
MultiByteToWideChar(CP_UTF8, 0, fname, -1, wstr, wchars_num);
if ((file = _wfopen(wstr, L"r")) == NULL) {
#else
if ((file = fopen(fname, "r")) == NULL ) {
#endif
return NULL;
}
DString * buffer = d_string_new("");
while ((bytes = fread(chunk, 1, kBUFFERSIZE, file)) > 0) {
d_string_append_c_array(buffer, chunk, bytes);
}
fclose(file);
return buffer;
}
/// Recursively transclude source text, given a search directory.
/// Track files to prevent infinite recursive loops
void transclude_source(DString * source, char * dir, short format, stack * parsed, stack * manifest) {
DString * file_path;
DString * buffer;
// Ensure folder is tidied up
char * folder = path_from_dir_base(dir, NULL);
char * start, * stop;
char text[1100];
char * temp;
size_t offset;
// TODO: Does this source have metadata that overrides the search directory?
mmd_engine * e = mmd_engine_create_with_dstring(source, EXT_TRANSCLUDE);
if (mmd_has_metadata(e, &offset)) {
temp = metavalue_for_key(e, "transclude base");
if (temp) {
free(folder);
folder = path_from_dir_base(dir, temp);
}
}
mmd_engine_free(e, false);
if (folder == NULL) {
// We don't have anywhere to search, so nothing to do
goto exit;
}
// Make sure we use a parse tree for children
stack * parse_stack = parsed;
if (parsed == NULL) {
// Create temporary stack
parse_stack = stack_new(0);
}
// Iterate through source text, looking for `{{foo}}`
start = strstr(source->str, "{{");
while (start != NULL) {
stop = strstr(start, "}}");
if (stop == NULL)
break;
// Where will we start next search?
offset = stop + 2 - source->str;
// Ensure we have a reasonable match -- cap at 1000 characters
if (stop - start < 1000) {
// Grab text
strncpy(text, start + 2, stop - start - 2);
text[stop - start - 2] = '\0';
// Is this just {{TOC}}
if (strcmp("TOC",text) == 0) {
start = strstr(stop, "{{");
continue;
}
// Is this an absolute path or relative path?
if (is_separator(text[0])) {
// Absolute path
file_path = d_string_new(text);
} else {
// Relative path
file_path = d_string_new(folder);
// Ensure that folder ends in separator
add_trailing_sep(file_path);
d_string_append(file_path, text);
}
// Adjust file wildcard extension for output format
// e.g. `foo.*`
if (format && strncmp(&text[stop - start - 4], ".*", 2) == 0) {
// Trim '.*'
d_string_erase(file_path, file_path->currentStringLength - 2, 2);
switch (format) {
case FORMAT_HTML:
d_string_append(file_path, ".html");
break;
case FORMAT_LATEX:
d_string_append(file_path, ".tex");
break;
default:
d_string_append(file_path, ".txt");
break;
}
}
// Prevent infinite recursive loops
for (int i = 0; i < parse_stack->size; ++i)
{
temp = stack_peek_index(parse_stack, i);
if (strcmp(file_path->str, temp) == 0) {
// We have parsed this file already, don't recurse infinitely
goto finish_file;
}
}
// Add this file to stack
stack_push(parse_stack, file_path->str);
// Add file to the manifest?
if (manifest) {
bool add = true;
for (int i = 0; i < manifest->size; ++i)
{
temp = stack_peek_index(manifest, i);
if (strcmp(file_path->str, temp) == 0) {
// Already on manifest, don't duplicate
add = false;
}
}
// Add path to manifest
if (add)
stack_push(manifest, strdup(file_path->str));
}
// Read the file
buffer = scan_file(file_path->str);
// Substitue buffer for transclusion token
if (buffer) {
// Erase transclusion token from current source
d_string_erase(source, start - source->str, 2 + stop - start);
// Recursively check this file for transclusions
transclude_source(buffer, folder, format, parse_stack, manifest);
// Strip metadata from buffer now that we have parsed it
e = mmd_engine_create_with_dstring(buffer, EXT_TRANSCLUDE);
if (mmd_has_metadata(e, &offset)) {
d_string_erase(buffer, 0, offset);
} else {
// Do we need to strip BOM?
if (strncmp(buffer->str, "\xef\xbb\xbf",3) == 0)
d_string_erase(buffer, 0, 3);
}
mmd_engine_free(e, false);
// Insert file text
d_string_insert(source, start - source->str, buffer->str);
// Shift search point
offset = start - source->str + buffer->currentStringLength;
d_string_free(buffer, true);
}
// Remove this file from stack
stack_pop(parse_stack);
finish_file:
d_string_free(file_path, true);
} else {
// Match was too long to be reasonable file name
}
start = strstr(source->str + offset, "{{");
}
exit:
if (parsed == NULL) {
// Free temp stack
stack_free(parse_stack);
}
free(folder);
}

74
src/transclude.h Normal file
View File

@ -0,0 +1,74 @@
/**
MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
@file transclude.h
@brief
@author Fletcher T. Penney
@bug
**/
/*
Copyright © 2016 - 2017 Fletcher T. Penney.
The `MultiMarkdown 6` project is released under the MIT License..
GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
https://github.com/fletcher/MultiMarkdown-4/
MMD 4 is released under both the MIT License and GPL.
CuTest is released under the zlib/libpng license. See CuTest.c for the text
of the license.
## The MIT License ##
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef TRANSCLUDE_MULTIMARKDOWN_6_H
#define TRANSCLUDE_MULTIMARKDOWN_6_H
#include "stack.h"
#ifdef TEST
#include "CuTest.h"
#endif
/// Combine directory and base filename to create a full path */
char * path_from_dir_base(char * dir, char * base);
/// Recursively transclude source text, given a search directory.
/// Track files to prevent infinite recursive loops
void transclude_source(DString * source, char * dir, short format, stack * parsed, stack * manifest);
#endif

View File

@ -211,5 +211,7 @@ token * manual_label_from_header(token * h, const char * source);
char * label_from_string(const char * str);
char * clean_string(const char * str, bool lowercase);
#endif

View File

@ -0,0 +1,15 @@
<p>This text is included in <code>foo.txt</code>.</p>
<p>This should not be transcluded to avoid an infinite loop &#8211; {{foo.txt}}</p>
<p>This text is included in <code>bar.txt</code>.</p>
<p>This can be transcluded without causing an infinite loop &#8211; {{foo.txt}}</p>
<pre><code>This is a file with no metadata.
</code></pre>
<pre><code>This is a file with no metadata.
</code></pre>
<p>This is <em>HTML</em>.</p>

View File

@ -0,0 +1,5 @@
<p>{{foo.txt}}</p>
<p>{{bar.txt}}</p>
<p>{{transclusion/bat.*}}</p>

View File

@ -0,0 +1,5 @@
{{foo.txt}}
{{bar.txt}}
{{transclusion/bat.*}}

14
tests/MMD6Tests/bar.txt Normal file
View File

@ -0,0 +1,14 @@
Title: bar
transclude base: transclusion
This text is included in `bar.txt`.
This can be transcluded without causing an infinite loop -- {{foo.txt}}
```
{{nometa.txt}}
```
```
{{nometa-bom8.txt}}
```

5
tests/MMD6Tests/foo.txt Normal file
View File

@ -0,0 +1,5 @@
Title: foo
This text is included in `foo.txt`.
This should not be transcluded to avoid an infinite loop -- {{foo.txt}}

View File

@ -0,0 +1 @@
<p>This is <em>HTML</em>.</p>

View File

@ -0,0 +1 @@
This is a file with no metadata.

View File

@ -0,0 +1 @@
This is a file with no metadata.