MultiMarkdown-6/Sources/libMultiMarkdown/token.c

710 lines
14 KiB
C

/**
Parser-Template -- Boilerplate parser example using re2c lexer and lemon parser.
@file token.c
@brief Structure and functions to manage tokens representing portions of a
text string.
@author Fletcher T. Penney
@bug
**/
/*
Copyright © 2016 - 2017 Fletcher T. Penney.
The `MultiMarkdown 6` project is released under the MIT License..
GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
https://github.com/fletcher/MultiMarkdown-4/
MMD 4 is released under both the MIT License and GPL.
CuTest is released under the zlib/libpng license. See CuTest.c for the text
of the license.
## The MIT License ##
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include "char.h"
#include "token.h"
#ifdef kUseObjectPool
//!< Use an object pool to allocate tokens more efficiently to improve
//!< performance.
#include "object_pool.h"
static pool * token_pool = NULL; //!< Pointer to our object pool
/// Count number of uses of this pool to allow us know
/// when it's safe to drain the pool
static short token_pool_count = 0;
/// Intialize object pool for token allocation
void token_pool_init(void) {
if (token_pool == NULL) {
// No pool exists
token_pool = pool_new(sizeof(token));
}
// Increment counter
token_pool_count++;
}
/// Drain token allocator pool to prepare for another parse
void token_pool_drain(void) {
// Decrement counter
token_pool_count--;
if (token_pool_count == 0)
pool_drain(token_pool);
}
/// Free token allocator pool
void token_pool_free(void) {
if (token_pool_count == 0) {
pool_free(token_pool);
token_pool = NULL;
} else {
fprintf(stderr, "ERROR: Attempted to drain token pool while still in use.\n");
}
}
#endif
/// Get pointer to a new token
token * token_new(unsigned short type, size_t start, size_t len) {
#ifdef kUseObjectPool
token * t = pool_allocate_object(token_pool);
#else
token * t = malloc(sizeof(token));
#endif
if (t) {
t->type = type;
t->start = start;
t->len = len;
t->next = NULL;
t->prev = NULL;
t->child = NULL;
t->tail = t;
t->can_open = true; //!< Default to true -- we assume openers can open and closers can close
t->can_close = true; //!< unless specified otherwise (e.g. for ambidextrous tokens)
t->unmatched = true;
t->mate = NULL;
}
return t;
}
/// Create a parent for a chain of tokens
token * token_new_parent(token * child, unsigned short type) {
if (child == NULL) {
return token_new(type, 0, 0);
}
token * t = token_new(type, child->start, 0);
t->child = child;
child->prev = NULL;
// Ensure that parent length correctly includes children
if (child == NULL) {
t->len = 0;
} else if (child->next == NULL) {
t->len = child->len;
} else {
while (child->next != NULL)
child = child->next;
t->len = child->start + child->len - t->start;
}
return t;
}
/// Add a new token to the end of a token chain. The new token
/// may or may not also be the start of a chain
void token_chain_append(token * chain_start, token * t) {
if ((chain_start == NULL) ||
(t == NULL))
return;
// Append t
chain_start->tail->next = t;
t->prev = chain_start->tail;
// Adjust tail marker
chain_start->tail = t->tail;
}
/// Add a new token to the end of a parent's child
/// token chain. The new token may or may not be
/// the start of a chain.
void token_append_child(token * parent, token * t) {
if ((parent == NULL) || (t == NULL))
return;
if (parent->child == NULL) {
// Parent has no children
parent->child = t;
} else {
// Append to to existing child chain
token_chain_append(parent->child, t);
}
// Set len on parent
parent->len = parent->child->tail->start + parent->child->tail->len - parent->start;
}
/// Remove the first child of a token
void token_remove_first_child(token * parent) {
if ((parent == NULL) || (parent->child == NULL))
return;
token * t = parent->child;
parent->child = t->next;
if (parent->child) {
parent->child->prev = NULL;
parent->child->tail = t->tail;
}
token_free(t);
}
/// Remove the last child of a token
void token_remove_last_child(token * parent) {
if ((parent == NULL) || (parent->child == NULL))
return;
token * t = parent->child->tail;
if (t->prev) {
t->prev->next = NULL;
parent->child->tail = t->prev;
}
token_free(t);
}
/// Remove the last token in a chain
void token_remove_tail(token * head) {
if ((head == NULL) || (head->tail == head))
return;
token * t = head->tail;
if (t->prev) {
t->prev->next = NULL;
head->tail = t->prev;
}
token_free(t);
}
/// Pop token out of it's chain, connecting head and tail of chain back together.
/// Token must be freed if it is no longer needed.
/// \todo: If t is the tail token of a chain, the tail is no longer correct on the start of chain.
void token_pop_link_from_chain(token * t) {
if (t == NULL)
return;
token * prev = t->prev;
token * next = t->next;
t->next = NULL;
t->prev = NULL;
t->tail = t;
if (prev) {
prev->next = next;
}
if (next) {
next->prev = prev;
}
}
/// Remove one or more tokens from chain
void tokens_prune(token * first, token * last) {
if (first == NULL || last == NULL)
return;
token * prev = first->prev;
token * next = last->next;
if (prev != NULL)
prev->next = next;
if (next != NULL)
next->prev = prev;
first->prev = NULL;
last->next = NULL;
token_tree_free(first);
}
/// Given a start/stop point in token chain, create a new container token.
/// Return pointer to new container token.
token * token_prune_graft(token * first, token * last, unsigned short container_type) {
if (first == NULL || last == NULL)
return first;
token * prev = first->prev;
token * next = last->next;
// If we are head of chain, remember tail
token * tail = NULL;
if (prev == NULL)
tail = first->tail;
token * container = token_new(container_type, first->start, last->start + last->len - first->start);
container->child = first;
container->next = next;
container->prev = prev;
container->can_close = 0;
container->can_open = 0;
if (tail)
container->tail = tail;
if (prev)
prev->next = container;
first->prev = NULL;
last->next = NULL;
if (next)
next->prev = container;
return container;
}
/// Free token
void token_free(token * t) {
#ifdef kUseObjectPool
return;
#else
if (t == NULL)
return;
token_tree_free(t->child);
free(t);
#endif
}
/// Free token chain
void token_tree_free(token * t) {
#ifdef kUseObjectPool
return;
#else
token * n;
while (t != NULL) {
n = t->next;
token_free(t);
t = n;
}
#endif
}
/// Forward declaration
void print_token_tree(token * t, unsigned short depth, const char * string);
/// Print contents of the token based on specified string
void print_token(token * t, unsigned short depth, const char * string) {
if (t != NULL) {
for (int i = 0; i < depth; ++i)
{
fprintf(stderr, "\t");
}
if (string == NULL) {
fprintf(stderr, "* (%d) %lu:%lu\n", t->type, t->start, t->len);
} else {
fprintf(stderr, "* (%d) %lu:%lu\t'%.*s'\n", t->type, t->start, t->len, (int)t->len, &string[t->start]);
}
if (t->child != NULL)
print_token_tree(t->child, depth + 1, string);
}
}
/// Print contents of the token tree based on specified string
void print_token_tree(token * t, unsigned short depth, const char * string) {
while (t != NULL) {
print_token(t, depth, string);
t = t->next;
}
}
/// Print a description of the token based on specified string
void token_describe(token * t, const char * string) {
print_token(t, 0, string);
}
/// Print a description of the token tree based on specified string
void token_tree_describe(token * t, const char * string) {
fprintf(stderr, "=====>\n");
while (t != NULL) {
print_token(t, 0, string);
t = t->next;
}
fprintf(stderr, "<=====\n");
}
/// Find the child node of a given parent that contains the specified
/// offset position.
token * token_child_for_offset(
token * parent, //!< Pointer to parent token
size_t offset //!< Search position
) {
if (parent == NULL)
return NULL;
if ((parent->start > offset) ||
(parent->start + parent->len < offset))
return NULL;
token * walker = parent->child;
while (walker != NULL) {
if (walker->start <= offset) {
if (walker->start + walker->len > offset) {
return walker;
}
}
if (walker->start > offset)
return NULL;
walker = walker->next;
}
return NULL;
}
/// Given two character ranges, see if they intersect (touching doesn't count)
static bool ranges_intersect(size_t start1, size_t len1, size_t start2, size_t len2) {
return ((start1 < start2 + len2) && (start2 < start1 + len1)) ? true : false;
}
/// Find first child node of a given parent that intersects the specified
/// offset range.
token * token_first_child_in_range(
token * parent, //!< Pointer to parent token
size_t start, //!< Start search position
size_t len //!< Search length
) {
if (parent == NULL)
return NULL;
if ((parent->start > start + len) ||
(parent->start + parent->len < start))
return NULL;
token * walker = parent->child;
while (walker != NULL) {
if (ranges_intersect(start, len, walker->start, walker->len))
return walker;
if (walker->start > start)
return NULL;
walker = walker->next;
}
return NULL;
}
/// Find last child node of a given parent that intersects the specified
/// offset range.
token * token_last_child_in_range(
token * parent, //!< Pointer to parent token
size_t start, //!< Start search position
size_t len //!< Search length
) {
if (parent == NULL)
return NULL;
if ((parent->start > start + len) ||
(parent->start + parent->len < start))
return NULL;
token * walker = parent->child;
token * last = NULL;
while (walker != NULL) {
if (ranges_intersect(start, len, walker->start, walker->len))
last = walker;
if (walker->start > start + len)
return last;
walker = walker->next;
}
return last;
}
void token_trim_leading_whitespace(token * t, const char * string) {
while (t->len && char_is_whitespace(string[t->start])) {
t->start++;
t->len--;
}
}
void token_trim_trailing_whitespace(token * t, const char * string) {
while (t->len && char_is_whitespace(string[t->start + t->len - 1])) {
t->len--;
}
}
void token_trim_whitespace(token * t, const char * string) {
token_trim_leading_whitespace(t, string);
token_trim_trailing_whitespace(t, string);
}
/// Check whether first token in the chain matches the given type.
/// If so, return and advance the chain.
token * token_chain_accept(token ** t, short type) {
token * result = NULL;
if (t && *t && ((*t)->type == type)) {
result = *t;
*t = (*t)->next;
}
return result;
}
/// Allow checking for multiple token types
token * token_chain_accept_multiple(token ** t, int n, ...) {
token * result = NULL;
va_list valist;
va_start(valist, n);
for (int i = 0; i < n; ++i)
{
result = token_chain_accept(t, va_arg(valist, int));
if (result)
break;
}
va_end(valist);
return result;
}
void token_skip_until_type(token ** t, short type) {
while ((*t) && ((*t)->type != type))
*t = (*t)->next;
}
/// Allow checking for multiple token types
void token_skip_until_type_multiple(token ** t, int n, ...) {
va_list valist;
int type[n];
va_start(valist, n);
// Load target types
for (int i = 0; i < n; ++i)
{
type[i] = va_arg(valist, int);
}
//
while (*t) {
for (int i = 0; i < n; ++i)
{
if ((*t)->type == type[i])
return;
}
*t = (*t)->next;
}
va_end(valist);
}
void token_split_on_char(token * t, const char * source, const char c) {
if (!t)
return;
size_t start = t->start;
size_t pos = 0;
size_t stop = t->len;
token * new = NULL;
while (pos + 1 < stop) {
if (source[start + pos] == c){
new = token_new(t->type, start + pos + 1, stop - (pos + 1));
new->next = t->next;
t->next = new;
t->len = pos;
t = t->next;
}
pos++;
}
}
// Split a token and create new ones as needed
void token_split(token * t, size_t start, size_t len, unsigned short new_type) {
if (!t)
return;
size_t stop = start + len;
if (start < t->start)
return;
if (stop > t->start + t->len)
return;
token * A; // This will be new token
bool inset_start = false;
bool inset_stop = false;
// Will we need a leading token?
if (start > t->start)
inset_start = true;
// Will we need a lagging token?
if (stop < t->start + t->len)
inset_stop = true;
if (inset_start) {
A = token_new(new_type, start, len);
if (inset_stop) {
// We will end up with t->A->T2
// Create T2
token * T2 = token_new(t->type, stop, t->start + t->len - stop);
T2->next = t->next;
if (t->next)
t->next->prev = T2;
A->next = T2;
T2->prev = A;
} else {
// We will end up with T->A
A->next = t->next;
if (t->next)
t->next->prev = A;
}
t->next = A;
A->prev = t;
t->len = start - t->start;
} else {
if (inset_stop) {
// We will end up with A->T
// But we swap the tokens to ensure we don't
// cause difficulty pointing to this chain,
// resulting in T->A, where T is the new type
A = token_new(t->type, stop, t->start + t->len - stop);
A->prev = t;
A->next = t->next;
t->next = A;
if (A->next)
A->next->prev = A;
t->len = stop - t->start;
t->type = new_type;
} else {
// We will end up with A
t->type = new_type;
}
}
}