Merge branch 'test' of github.com:urbit/urbit into test

This commit is contained in:
Galen Wolfe-Pauly 2015-01-27 09:01:07 -08:00
commit 0bc50d1302
134 changed files with 51771 additions and 805 deletions

0
.d/j/g/.gitignore vendored Normal file
View File

View File

@ -75,10 +75,19 @@ CFLAGS= $(COSFLAGS) -O3 -msse3 -ffast-math \
-Ioutside/re2 \
-Ioutside/cre2/src/src \
-Ioutside/ed25519/src \
-Ioutside/commonmark/src \
-Ioutside/commonmark/build/src \
$(DEFINES) \
$(MDEFINES)
CWFLAGS=-Wall
# TODO remove -Wno-*
CWFLAGS=-Wall \
-Wextra \
-Wno-sign-compare \
-Wno-unused-parameter \
-Wno-missing-field-initializers \
-Wno-error=unused-result \
-Werror
ifdef NO_SILENT_RULES
%.o: %.c $(CORE)
@ -248,6 +257,10 @@ J_F_OFILES_UT=\
j/f/ut_tock.o \
j/f/ut_wrap.o
J_G_OFILES=\
j/g/dawn.o \
j/g/sqar.o
J_OFILES=\
$(J_A_OFILES) \
$(J_B_OFILES) \
@ -257,6 +270,7 @@ J_OFILES=\
$(J_E_OFILES_ED) \
$(J_F_OFILES) \
$(J_F_OFILES_UT) \
$(J_G_OFILES) \
j/tree.o
BASE_OFILES=$(N_OFILES) $(J_OFILES)
@ -330,6 +344,8 @@ LIBED25519=outside/ed25519/ed25519.a
LIBANACHRONISM=outside/anachronism/build/libanachronism.a
LIBCOMMONMARK=outside/commonmark/build/src/libcmark.a
all: vere
.MAKEFILE-VERSION: Makefile make.conf
@ -357,25 +373,28 @@ $(LIBED25519):
$(LIBANACHRONISM):
$(MAKE) -C outside/anachronism static
$(LIBCOMMONMARK):
$(MAKE) -C outside/commonmark
$(CRE2_OFILES): outside/cre2/src/src/cre2.cpp outside/cre2/src/src/cre2.h $(LIBRE2)
$(CXX) $(CXXFLAGS) -c $< $(LIBRE2) -o $@
$(V_OFILES): i/v/vere.h
ifdef NO_SILENT_RULES
$(BIN)/vere: $(LIBCRE) $(VERE_OFILES) $(LIBUV) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM)
$(BIN)/vere: $(LIBCRE) $(LIBCOMMONMARK) $(VERE_OFILES) $(LIBUV) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM)
mkdir -p $(BIN)
$(CLD) $(CLDOSFLAGS) -o $(BIN)/vere $(VERE_OFILES) $(LIBUV) $(LIBCRE) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM) $(LIBS)
$(CLD) $(CLDOSFLAGS) -o $(BIN)/vere $(VERE_OFILES) $(LIBUV) $(LIBCRE) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM) $(LIBS) $(LIBCOMMONMARK)
else
$(BIN)/vere: $(LIBCRE) $(VERE_OFILES) $(LIBUV) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM)
$(BIN)/vere: $(LIBCRE) $(LIBCOMMONMARK) $(VERE_OFILES) $(LIBUV) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM)
@echo " CCLD $(BIN)/vere"
@mkdir -p $(BIN)
@$(CLD) $(CLDOSFLAGS) -o $(BIN)/vere $(VERE_OFILES) $(LIBUV) $(LIBCRE) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM) $(LIBS)
@$(CLD) $(CLDOSFLAGS) -o $(BIN)/vere $(VERE_OFILES) $(LIBUV) $(LIBCRE) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM) $(LIBS) $(LIBCOMMONMARK)
endif
$(BIN)/meme: $(LIBCRE) $(MEME_OFILES) $(LIBUV) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM)
$(BIN)/meme: $(LIBCRE) $(LIBCOMMONMARK) $(MEME_OFILES) $(LIBUV) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM)
mkdir -p $(BIN)
$(CLD) $(CLDOSFLAGS) -o $(BIN)/meme $(MEME_OFILES) $(LIBUV) $(LIBCRE) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM) $(LIBS)
$(CLD) $(CLDOSFLAGS) -o $(BIN)/meme $(MEME_OFILES) $(LIBUV) $(LIBCRE) $(LIBRE2) $(LIBED25519) $(LIBANACHRONISM) $(LIBS) $(LIBCOMMONMARK)
tags:
ctags -R -f .tags --exclude=root

View File

@ -12,6 +12,8 @@ Urbit is a new programming and execution environment designed from scratch. Any
resemblance to existing languages or operating systems is coincidental,
cosmetic, or inevitable.
All of the source code is entirely in the public domain.
Install
-------------------
@ -25,21 +27,24 @@ Urbit depends on:
openssl
automake
autoconf
ragel
cmake
re2c
libtool
libssl-dev (Linux only)
ncurses (Linux only)
####Ubuntu or Debian
sudo apt-get install libgmp3-dev libsigsegv-dev openssl libssl-dev libncurses5-dev git make exuberant-ctags automake autoconf libtool g++ ragel
sudo apt-get install libgmp3-dev libsigsegv-dev openssl libssl-dev libncurses5-dev git make exuberant-ctags automake autoconf libtool g++ ragel cmake re2c
####Fedora
sudo yum install gcc gcc-c++ git gmp-devel openssl-devel openssl ncurses-devel libsigsegv-devel ctags automake autoconf libtool
sudo yum install gcc gcc-c++ git gmp-devel openssl-devel openssl ncurses-devel libsigsegv-devel ctags automake autoconf libtool cmake re2c
####AWS
sudo yum --enablerepo epel install gcc git gmp-devel openssl-devel ncurses-devel libsigsegv-devel ctags automake autoconf libtool
sudo yum --enablerepo epel install gcc git gmp-devel openssl-devel ncurses-devel libsigsegv-devel ctags automake autoconf libtool cmake re2c
####OS X
@ -415,7 +420,6 @@ To start chatting, simply type
~waclux-tomwyc/try=> :chat
and type `?` for the list of commands once `:chat` is running.
Most of us are hanging out on `:chat` regularly. We can answer any questions you might have and help you get oriented in this new environment.

View File

@ -61,7 +61,7 @@ types are wordy and annoying. We've replaced them with:
typedef uint8_t c3_t; // boolean
typedef uint8_t c3_o; // loobean
typedef uint8_t c3_g; // 32-bit log - 0-31 bits
typedef uint8_t c3_g; // 5-bit atom for a 32-bit log.
typedef uint32_t c3_l; // little; 31-bit unsigned integer
typedef uint32_t c3_m; // mote; also c3_l; LSB first a-z 4-char string.
@ -93,8 +93,9 @@ modularize the definitions. Keep them alphabetical, though.
### c3: variables and variable naming
The C3 style uses Hoon style TLV variable names, with a quasi
Hungarian syntax. This is weird, but works really well, as
long as what you're doing isn't hideous.
Hungarian syntax. This is weird, but works really well, as long
as what you're doing isn't hideously complicated. (Then it works
badly, but we shouldn't need anything hideous in u3.)
A TLV variable name is a random pronounceable three-letter
string, sometimes with some vague relationship to its meaning,
@ -295,7 +296,7 @@ nouns, which is obviously what most functions do.
In general, though, in most places it's not worth thinking about
what your function does. There is a convention for it, which
depends on where it is, not what it is. Follow the convention.
depends on where it is, not what it does. Follow the convention.
### u3: reference conventions
@ -563,6 +564,27 @@ remote node, render the stacktrace as a consequence of the user's
action - even if its its direct cause was (for instance) a Unix
SIGINT or SIGALRM.
### u3: C structures on the loom
Normally, all data on the loom is nouns. Sometimes we break this
rule just a little, though - eg, in the `u3h` hashtables.
To point to non-noun C structs on the loom, we use a `u3_post`,
which is just a loom word offset. A macro lets us declare this
as if it was a pointer:
typedef c3_w u3_post;
#define u3p(type) u3_post
Some may regard this as clever, others as pointless. Anyway, use
`u3to()` and `u3of()` to convert to and from pointers.
When using C structs on the loom - generally a bad idea - make
sure anything which could be on the surface road is structurally
portable, eg, won't change size when the pointer size changes.
(Note also: we consider little-endian, rightly or wrongly, to
have won the endian wars.)
## u3: API overview by prefix
Let's run through the `u3` modules one by one. All public
@ -623,7 +645,6 @@ For descending into a subroad *without* Nock virtualization,
use `u3m_hate()` and `u3m_love` respectively. Hating enters
a subroad; loving leaves it, copying out a product noun.
Other miscellaneous tools in `u3m`: `u3m_file()` loads a Unix
file as a Nock atom; `u3m_water()` measures the boundaries of
the loom in current use (ie, watermarks); and a variety of
@ -631,6 +652,12 @@ prettyprinting routines, none perfect, are available, mainly for
debugging printfs: `u3m_pretty()`, `u3m_p()`, `u3m_tape()` and
`u3m_wall()`.
It's sometimes nice to run a mark-and-sweep garbage collector,
`u3m_grab()`, which collects the world from a list of roots,
and asserts if it finds any leaks or incorrect refcounts. This
tool is for debugging and long-term maintenance only; refcounts
should never err.
### u3j: jets
The jet system, `u3j`, is what makes `u3` and `nock` in any sense
@ -638,9 +665,9 @@ a useful computing environment. Except perhaps `u3a` (there is
really no such thing as a trivial allocator, though `u3a` is
dumber than most) - `u3j` is the most interesting code in `u3`.
Let's consider the minor miracle of jet binding which lets `u3j`
work - and decrement not be `O(n)` - without violating the
precisely defined semantics of pure Nock, *ever*.
Let's consider the minor miracle of driver-to-battery binding
which lets `u3j` work - and decrement not be `O(n)` - without
violating the precisely defined semantics of pure Nock, *ever*.
It's easy to assume that jets represent an architectural coupling
between Hoon language semantics and Nock interpreter internals.
@ -658,7 +685,29 @@ itself.
Except for the arbitrary decision to make a core `[code data]`,
(or as we sometimes say, `[battery payload]`), instead of `[data
code]`, any high-level language transforming itself to Nock would
use this design. So jets are in fact fully general.
use this design.
So jets are in fact fully general. Broadly speaking, the jet
system works by matching a C *driver* to a battery. When the
battery is invoked with Nock operator `9`, it must be found in
associative memory and linked to its driver. Then we link the
formula axis of the operation (`a` in `[9 a b]`) to a specific
function in the driver.
To validate this jet binding, we need to know two things. One,
we need to know the C function actually is a perfect semantic
match for the Nock formula. This can be developed with driver
test flags, which work, and locked down with a secure formula
hash in the driver, which we haven't bothered with just yet.
(You could also try to develop a formal method for verifying
that C functions and Nock formulas are equivalent, but this is
a research problem for the future.)
Two, we need to validate that the payload is appropriate for the
battery. We should note that jets are a Nock feature and have no
reference to Hoon. A driver which relies on the Hoon type system
to only pair it with valid payloads is a broken driver, and
breaks the Nock compliance of the system as a whole. So don't.
Now, a casual observer might look at `[battery payload]` and
expect the simplest case of it to be `[formula subject]`. That
@ -726,8 +775,7 @@ Cold state is associated with the logical execution history of
the pier. It consists entirely of nouns and ignores restarts.
Warm state contains all dependencies between cold and hot
state. It consists of C structures allocated on the loom (with
`u3_post`, ie, a word pointer relative to the loom).
state. It consists of C structures allocated on the loom.
Warm state is purely a function of cold and hot states, and
we can wipe and regenerate it at any time. On any restart where
@ -741,7 +789,7 @@ numbering itself on process initialization. This structure -
which embeds function pointers to all the jets - is defined
in `j/tree.c`. The data structures:
/* u3j_harm: jet arm.
/* u3j_harm: driver arm.
*/
typedef struct _u3j_harm {
c3_c* fcs_c; // `.axe` or name
@ -751,7 +799,7 @@ in `j/tree.c`. The data structures:
c3_o liv; // live (enabled)
} u3j_harm;
/* u3j_core: driver definition.
/* u3j_core: C core driver.
*/
typedef struct _u3j_core {
c3_c* cos_c; // control string
@ -765,9 +813,9 @@ in `j/tree.c`. The data structures:
*/
typedef struct _u3e_dash {
u3j_core* dev_u; // null-terminated static list
c3_l len_l; // dynamic array length
c3_l all_l; // allocated length
u3j_core* ray_u; // dynamic array by axis
c3_l len_l; // ray_u filled length
c3_l all_l; // ray_u allocated length
u3j_core* ray_u; // dynamic driver array
} u3j_dash;
Warm and cold state is *per road*. In other words, as we nest
@ -781,32 +829,27 @@ roads, we also nest jet state. The jet state in the road is:
In case you understand Hoon, `das` (cold state) is a `++dash`,
and `har_p` (warm state) is a map from battery to `++calx`:
++ bane ,@tas :: battery name
++ bash ,@uvH :: ctx identity hash
++ bosh ,@uvH :: local battery hash
++ batt ,* :: battery
++ calx :: cached by battery
$: jax=,@ud :: jet index
pax=,@ud :: parent axis or 0
hap=(map ,@ud ,@ud) :: axis/jet
huc=(map term nock) :: name/tool
== ::
++ chum $? lef=term :: jet name
[std=term kel=@] :: kelvin version
[ven=term pro=term kel=@] :: vendor and product
[ven=term pro=term ver=@ kel=@] :: all of the above
== ::
++ clue (trel chum nock (list (pair term nock))):: battery definition
++ clog (pair cope (map batt (map term nock))) :: identity record
++ cope (trel bane axis (each bash noun)) :: core pattern
++ dash :: jet system
$: sys=(map batt bash) :: battery/identity
haw=(map bash clog) :: identity/core
== ::
++ bane ,@tas :: battery name
++ bash ,@uvH :: label hash
++ bosh ,@uvH :: local battery hash
++ batt ,* :: battery
++ calf ::
$: jax=,@ud :: hot core index
hap=(map ,@ud ,@ud) :: axis/hot arm index
lab=path :: label as path
jit=* :: arbitrary data
== ::
++ calx (trel calf (pair bash cope) club) :: cached by battery
++ clog (pair cope (map batt club)) :: identity record
++ club (pair corp (map term nock)) :: battery pattern
++ cope (trel bane axis (each bash noun)) :: core pattern
++ core ,* :: core
++ corp (each core batt) :: parent or static
++ dash (map bash clog) :: jet system
The jet index in a `++calx` is an index into `ray_u` in the
dashboard - ie, a pointer into hot state. This is why the
warm state has to be reset when we reload the pier.
The driver index `jax` in a `++calx` is an index into `ray_u` in the
dashboard - ie, a pointer into hot state. This is why the warm
state has to be reset when we reload the pier in a new process.
Why is jet state nested? Nock of course is a functional system,
so as we compute we don't explicitly create state. Jet state is
@ -880,10 +923,23 @@ instance, if the core is a Hoon gate - a function - we will call
### u3j: the cold jet dashboard
For even more fun, the jet tree is not actually a tree of
batteries. Rather, *multiple batteries* may share any node in
the jet tree. For instance, it's normal to have two equivalent
Nock batteries at the same time in one pier: one battery compiled
with debugging hints, one not.
batteries. It's a tree of battery *labels*, where a label is
an [axis term] path from the root of the tree. (At the root,
if the core pattern is always followed properly, is a core whose
payload is an atomic constant, conventionally the Hoon version.)
Under each of these labels, it's normal to have an arbitrary
number of different Nock batteries (not just multiple copies
of the same noun, a situation we *do* strive to avoid). For
instance, one might be compiled with debugging hints, one not.
We might even have changed the semantics of the battery without
changing the label - so long as those semantics don't invalidate
any attached driver.
et tree. For instance, it's normal to have
two equivalent Nock batteries at the same time in one pier: one
battery compiled with debugging hints, one not.
Rather, the jet tree is a semantic hierarchy. The root of the
hierarchy is a constant, by convention the Hoon kernel version
@ -908,41 +964,68 @@ again, it's important to remember that we track jet bindings not
by the core, which may not be static, but by the battery, which
is always static.
(And if you're wondering how we can use a phat noun like a Nock
(And if you're wondering how we can use a deep noun like a Nock
formula or battery as a key in a key-value table, remember
`mug_w`, the lazily computed short hash, in all boxed nouns.)
In any case, `das`, the dashboard, contains `sys`, a map from
battery to battery identity hash (`++bash`), and `haw`, a map
from `bash` to battery record (`++clog`).
In any case, `das`, the dashboard, is a map from `bash` to jet
location record (`++clog`). A `clog` in turn contains two kinds
of information: the `++cope`, or per-location noun; and a map of
batteries to a per-battery `++club`.
A `clog` is a cell whose tail is a hook map, straight from the
user's clue. The head is a `++cope`, which is a triple of
`++bane` (battery name, right now just a `term`); `++axis`,
the axis, within *this* core, of the parent; and `(each bash
noun)`, which is either `[0 bash]` if the parent is another
core, or `[1 noun]`, for the constant noun (like `164`) if
there is no parent core.
The `cope` is a triple of `++bane` (battery name, right now just
a `term`); `++axis`, the axis, within *this* core, of the parent;
and `(each bash noun)`, which is either `[0 bash]` if the parent
is another core, or `[1 noun]`, for the constant noun (like
`164`) if there is no parent core.
A `bash` is just the noun hash (`++sham`) of a `cope`, which
uniquely expresses the battery's self-declared hierarchical
identity without depending on the actual battery code.
uniquely expresses the battery's hierarchical location without
depending on the actual formulas.
The `club` contains a `++corp`, which we use to actually validate
the core. Obviously jet execution has to be perfectly compatible
with Nock. We search on the battery, but getting the battery
right is not enough - a typical battery is dependent on its
context. For example, your jet-propelled library function is
very likely to call `++dec` or other advanced kernel technology.
If you've replaced the kernel in your context with something
else, we need to detect this and not run the jet.
There are two cases for a jet-propelled core - either the entire
core is a static constant, or it isn't. Hence the definition
of `corp`:
++ corp (each core batt) :: parent or static
Ie, a `corp` is `[0 core]` or `[1 batt]`. If it's static -
meaning that the jet only works with one specific core, ie, the
parent axis of each location in the hierarchy is `3` - we can
validate with a single comparison. Otherwise, we have to recurse
upward by checking the parent.
Note that there is at present no way to force a jet to depend on
static *data*.
### u3j: the warm jet dashboard
We don't use the cold state to match jets as we call them; we use
the cold state to register jets as we find them, and also to
We don't use the cold state to match jets as we call them. We
use the cold state to register jets as we find them, and also to
rebuild the warm state after the hot state is reset.
What we actually use at runtime is the warm state, `jed->har_p`,
which is a `u3h` (built-in hashtable), allocated on the loom,
from battery to `++calx`.
A `calx` is a quadruple of `jax`, the jet index, an index into
`ray_u` in `u3j_dash`; `pax`, the parent axis (as in
`cope` above); `hap`, a table from arm axis (ie, the axis of each
formula within the battery) to jet arm index (into `arm_u` in
`u3j_core`); and `huc`, the hook table (as in `clog`).
A `calx` is a triple of a `++calf`, a `[bash cope]` cell, and a
`club`. The latter two are all straight from cold state.
The `calf` contains warm data dependent on hot state. It's a
quadruple: of `jax`, the hot driver index (in `ray_u` in
`u3j_dash`); `hap`, a table from arm axis (ie, the axis of each
formula within the battery) to driver arm index (into `arm_u` in
`u3j_core`); `lab`, the complete label path; and `jit`, any
other dynamic data that may speed up execution.
We construct `hap`, when we create the calx, by iterating through
the arms registered in the `u3j_core`. Note the way a `u3j_harm`
@ -955,6 +1038,10 @@ and it would be sad to have to manage their axes by hand. To use
an `fcs_c` with a named arm, it's sufficient to make sure the
name is bound to a formula `[0 axis]` in the hook table.
`jit`, as its name suggests, is a stub where any sort of
optimization data computed on battery registration might go. To
use it, fill in the `_cj_jit()` function.
### u3j: the hot dashboard
Now it should be easy to see how we actually invoke jets. Every
@ -965,21 +1052,22 @@ will try to execute them.
Because nouns with a reference count of 1 are precious,
`u3j_kick()` has a tricky reference control definition. It
reserves the right to return `u3_none` in the case where there is
no jet, or the jet does not apply for this case; in this case, it
does not consume its argument `cor`. Otherwise, it does.
no driver, or the driver does not apply for this case; in this
case, it retains argument `cor`. If it succeeds, though, it
transfers `cor`.
`u3j_kick()` searches for the battery (always the head of the
core, of course) in the hot dashboard. If the battery is
registered, it searches for the axis in `hap` in the `calx`.
If it exists, the core has a driver and the driver has a jet for
this arm, which we can try to call. If not, we return `u3_none`.
If it exists, the core matches a driver and the driver jets this
arm. If not, we return `u3_none`.
Otherwise, we call `fun_f` in our `u3j_harm`. This obeys the
same protocol as `u3j_kick()`; it can refuse to function by
returning `u3_none`, or consume the noun.
Besides the actual function pointer `fun_f`, we have some flags
in the `u3j_harm` which tell us how to call the jet.
in the `u3j_harm` which tell us how to call the arm function.
If `ice` is yes (`&`, `0`), the jet is known to be perfect and we
can just trust the product of `fun_f`. Otherwise, we need to run
@ -992,13 +1080,13 @@ When auto-testing jets in this way, the principle is that the
test is on the outermost layer of recursion.)
(Note also that anyone who multi-threads this execution
environment has a slight locking problem with these flags if jet
environment has a slight locking problem with these flags if arm
testing is multi-threaded.)
If `tot` is yes, (`&`, `0`), the jet is *total* and has to return
properly (though it can still return *u3_none*). Otherwise, it
is *partial* and can `u3_cm_bail()` out with c3__punt. This
feature has a cost: the jet runs in a subroad.
If `tot` is yes, (`&`, `0`), the arm function is *total* and has
to return properly (though it can still return *u3_none*).
Otherwise, it is *partial* and can `u3_cm_bail()` out with
c3__punt. This feature has a cost: the jet runs in a subroad.
Finally, if `liv` is no (`|`, 1), the jet is off and doesn't run.
@ -1014,7 +1102,7 @@ then appears ready for action.
### u3j: jet functions
At present, all jets are compiled statically into `u3`. This is
At present, all drivers are compiled statically into `u3`. This is
not a long-term permanent solution or anything. However, it will
always be the case with a certain amount of core functionality.
@ -1031,7 +1119,8 @@ shared.
`u3w` interfaces use the same protocol as `fun_f` above: the
caller passes the entire core, which is retained if the function
returns `u3_none`, transferred otherwise.
returns `u3_none`, transferred otherwise. Why? Again, use
counts of 1 are special and precious for performance hackers.
`u3q` interfaces break the core into C arguments, *retain* noun
arguments, and *transfer* noun returns. `u3k` interfaces are the
@ -1055,8 +1144,8 @@ transfer both arguments and results.
### u3a: allocation functions
`u3a` allocates on the current road (u3R). Its internal structures
are entirely uninteresting and typical of a naive allocator.
`u3a` allocates on the current road (u3R). Its internal
structures are uninteresting and typical of a naive allocator.
The two most-used `u3a` functions are `u3a_gain()` to add a
reference count, and `u3a_lose()` to release one (and free the
@ -1067,7 +1156,415 @@ the macros `u3k()` and `u3z()` respectively.
Normally we create nouns through `u3i` functions, and don't call
the `u3a` allocators directly. But if you do:
One, there are *two* sets of allocators: the word-aligned alloca
One, there are *two* sets of allocators: the word-aligned
allocators and the fully-aligned (ie, malloc compatible)
allocators. For instance, on a typical OS X setup, malloc
produces 16-byte aligned results - needed for some SSE
instructions.
These allocators are *not compatible*. For 32-bit alignment
as used in nouns, call
/* u3a_walloc(): allocate storage measured in words.
*/
void*
u3a_walloc(c3_w len_w);
/* u3a_wfree(): free storage.
*/
void
u3a_wfree(void* lag_v);
/* u3a_wealloc(): word realloc.
*/
void*
u3a_wealloc(void* lag_v, c3_w len_w);
For full alignment, call:
/* u3a_malloc(): aligned storage measured in bytes.
*/
void*
u3a_malloc(size_t len_i);
/* u3a_realloc(): aligned realloc in bytes.
*/
void*
u3a_realloc(void* lag_v, size_t len_i);
/* u3a_realloc2(): gmp-shaped realloc.
*/
void*
u3a_realloc2(void* lag_v, size_t old_i, size_t new_i);
/* u3a_free(): free for aligned malloc.
*/
void
u3a_free(void* tox_v);
/* u3a_free2(): gmp-shaped free.
*/
void
u3a_free2(void* tox_v, size_t siz_i);
There are also a set of special-purpose allocators for building
atoms. When building atoms, please remember that it's incorrect
to have a high 0 word - the word length in the atom structure
must be strictly correct.
Of course, we don't always know how large our atom will be.
Therefore, the standard way of building large atoms is to
allocate a block of raw space with `u3a_slab()`, then chop off
the end with `u3a_malt()` (which does the measuring itself)
or `u3a_mint()` in case you've measured it yourself.
Once again, *do not call `malloc()`* (or C++ `new`) within any
code that may be run within a jet. This will cause rare sporadic
corruption when we interrupt execution within a `malloc()`. We'd
just override the symbol, but `libuv` uses `malloc()` across
threads within its own synchronization primitives - for this to
work with `u3a_malloc()`, we'd have to introduce our own locks on
the surface-level road (which might be a viable solution).
### u3n: nock execution
The `u3n` routines execute Nock itself. On the inside, they have
a surprising resemblance to the spec proper (the only interesting
detail is how we handle tail-call elimination) and are, as one
would expect, quite slow. (There is no such thing as a fast tree
interpreter.)
There is only one Nock, but there are lots of ways to call it.
(Remember that all `u3n` functions *transfer* C arguments and
returns.)
The simplest interpreter, `u3n_nock_on(u3_noun bus, u3_noun fol)`
invokes Nock on `bus` (the subject) and `fol` (the formula).
(Why is it`[subject formula]`, not `[formula subject]`? The same
reason `0` is true and `1` is false.)
A close relative is `u3n_slam_on(u3_noun gat, u3_noun sam)`,
which slams a *gate* (`gat`) on a sample (`sam`). (In a normal
programming language which didn't talk funny and was retarded,
`u3n_slam_on()` would call a function on an argument.) We could
write it most simply as:
u3_noun
u3n_slam_on(u3_noun gat, u3_noun sam)
{
u3_noun pro = u3n_nock_on
(u3nc(u3k(u3h(gat)),
u3nc(sam, u3k(u3t(u3t(gat))))),
u3k(u3h(gat)));
u3z(gat);
return pro;
}
Simpler is `u3n_kick_on(u3_noun gat)`, which slams a gate (or,
more generally, a *trap* - because sample structure is not even
needed here) without changing its sample:
u3_noun
u3n_kick_on(u3_noun gat, u3_noun sam)
{
return u3n_nock_on(gat, u3k(u3h(gat)));
}
The `_on` functions in `u3n` are all defined as pure Nock. But
actually, even though we say we don't extend Nock, we do. But we
don't. But we do.
Note that `u3` has a well-developed error handling system -
`u3m_bail()` to throw an exception, `u3m_soft_*` to catch one.
But Nock has no exception model at all. That's okay - all it
means if that if an `_on` function bails, the exception is an
exception in the caller.
However, `u3`'s exception handling happens to match a convenient
virtual super-Nock in `hoon.hoon`, the infamous `++mock`. Of
course, Nock is slow, and `mock` is Nock in Nock, so it is
(logically) super-slow. Then again, so is decrement.
With the power of `u3`, we nest arbitrary layers of `mock`
without any particular performance cost. Moreover, we simply
treat Nock proper as a special case of `mock`. (More precisely,
the internal VM loop is `++mink` and the error compiler is
`++mook`. But we call the whole sandbox system `mock`.)
The nice thing about `mock` functions is that (by executing
within `u3m_soft_run()`, which as you may recall uses a nested
road) they provide both exceptions and the namespace operator -
`.^` in Hoon, which becomes operator `11` in `mock`.
`11` requires a namespace function, or `fly`, which produces a
`++unit` - `~` (`0`) for no binding, or `[0 value]`. The sample
to a `fly` is a `++path`, just a list of text `span`.
`mock` functions produce a `++toon`. Fully elaborated:
++ noun ,* :: any noun
++ path (list ,@ta) :: namespace path
++ span ,@ta :: text-atom (ASCII)
++ toon $% [%0 p=noun] :: success
[%1 p=(list path)] :: blocking paths
[%2 p=(list tank)] :: stack trace
== ::
++ tank :: printable
$% [%leaf p=tape] :: flat text
$: %palm :: backstep list
p=[p=tape q=tape r=tape s=tape] :: mid cap open close
q=(list tank) :: contents
== ::
$: %rose :: straight list
p=[p=tape q=tape r=tape] :: mid open close
q=(list tank) :: contents
== ::
==
(Note that `tank` is overdesigned and due for replacement.)
What does a `toon` mean? Either your computation succeded (`[0
noun]`, or could not finish because it blocked on one or more
global paths (`[1 (list path)]`), or it exited with a stack trace
(`[2 (list tank)]`).
Note that of all the `u3` exceptions, only `%exit` is produced
deterministically by the Nock definition. Therefore, only
`%exit` produces a `2` result. Any other argument to
`u3m_bail()` will unwind the virtualization stack all the way to
the top - or to be more exact, to `u3m_soft_top()`.
In any case, the simplest `mock` functions are `u3n_nock_un()`
and `u3n_slam_un()`. These provide exception control without
any namespace change, as you can see by the code:
/* u3n_nock_un(): produce .*(bus fol), as ++toon.
*/
u3_noun
u3n_nock_un(u3_noun bus, u3_noun fol)
{
u3_noun fly = u3nt(u3nt(11, 0, 6), 0, 0); // |=(a=* .^(a))
return u3n_nock_in(fly, bus, fol);
}
/* u3n_slam_un(): produce (gat sam), as ++toon.
*/
u3_noun
u3n_slam_un(u3_noun gat, u3_noun sam)
{
u3_noun fly = u3nt(u3nt(11, 0, 6), 0, 0); // |=(a=* .^(a))
return u3n_slam_in(fly, gat, sam);
}
The `fly` is added as the first argument to `u3n_nock_in()` and
`u3n_slam_in()`. Of course, logically, `fly` executes in the
caller's exception layer. (Maintaining this illusion is slightly
nontrivial.) Finally, `u3n_nock_an()` is a sandbox with a null
namespace.
### u3e: persistence
The only `u3e` function you should need to call is `u3e_save()`,
which saves the loom. As it can be restored on any platform,
please make sure you don't have any state in the loom that is
bound to your process or architecture - except for exceptions
like the warm jet state, which is actively purged on reboot.
### u3r: reading nouns (weak)
As befits accessors they don't make anything, `u3r` noun reading
functions always retain their arguments and their returns. They
never bail; rather, when they don't work, they return a `u3_weak`
result.
Most of these functions are straightforward and do only what
their comments say. A few are interesting enough to discuss.
`u3r_at()` is the familiar tree fragment function, `/` from the
Nock spec. For taking complex nouns apart, `u3r_mean()` is a
relatively funky way of deconstructing nouns with a varargs list
of `axis`, `u3_noun *`. For cells, triples, etc, decompose with
`u3r_cell()`, `u3r_trel()`, etc. For the tagged equivalents, use
`u3r_pq()` and friends.
`u3r_sing(u3_noun a, u3_noun b)` (true if `a` and `b` are a
*single* noun) are interesting because it uses mugs to help it
out. Clearly, different nouns may have the same mug, but the
same nouns cannot have a different mug. It's important to
understand the performance characteristics of `u3r_sing()`:
the worst possible case is a comparison of duplicate nouns,
which have the same value but were created separately. In this
case, the tree is traversed
`u3r_sung()` is a deeply funky and frightening version of
`u3r_sing()` that unifies pointers to the duplicate nouns it
finds, freeing the second copy. Obviously, do not use
`u3r_sung()` when you have live, but not reference counted, noun
references from C - if they match a noun with a refcount of 1
that gets freed, bad things happen.
It's important to remember that `u3r_mug()`, which produces a
31-bit, nonzero insecure hash, uses the `mug_w` slot in any boxed
noun as a lazy cache. There are a number of variants of
`u3r_mug()` that can get you out of building unneeded nouns.
### u3x: reading nouns (bail)
`u3x` functions are like `u3r` functions, but instead of
returning `u3_none` when (for instance) we try to take the head
of an atom, they bail with `%exit`. In other words, they do what
the same operation would do in Nock.
### u3h: hash tables.
We can of course use the Hoon `map` structure as an associative
array. This is a balanced treap and reasonably fast. However,
it's considerably inferior to a custom structure like an HAMT
(hash array-mapped trie). We use `u3_post` to allocate HAMT
structures on the loom.
(Our HAMT implements the classic Bagwell algorithm which depends
on the `gcc` standard directive `__builtin_popcount()`. On a CPU
which doesn't support popcount or an equivalent instruction, some
other design would probably be preferable.)
There's no particular rocket science in the API. `u3h_new()`
creates a hashtable; `u3h_free()` destroys one; `u3h_put()`
inserts, `u3h_get()` retrieves. You can transform values in a
hashtable with `u3h_walk()`.
The only funky function is `u3h_gut()`, which unifies keys with
`u3r_sung()`. As with all cases of `u3r_sung()`, this must be
used with extreme caution.
### u3z: memoization
Connected to the `~+` rune in Hoon, via the Nock `%memo` hint,
the memoization facility is a general-purpose cache.
(It's also used for partial memoization - a feature that'll
probably be removed, in which conservative worklist algorithms
(which would otherwise be exponential) memoize everything in the
subject *except* the worklist. This is used heavily in the Hoon
compiler jets (j/f/*.c). Unfortunately, it's probably not
possible to make this work perfectly in that it can't be abused
to violate Nock, so we'll probably remove it at a later date,
instead making `++ut` keep its own monadic cache.)
Each `u3z` function comes with a `c3_m` mote which disambiguates
the function mapping key to value. For Nock itself, use 0. For
extra speed, small tuples are split out in C; thus, find with
u3_weak u3z_find(c3_m, u3_noun);
u3_weak u3z_find_2(c3_m, u3_noun, u3_noun);
u3_weak u3z_find_3(c3_m, u3_noun, u3_noun, u3_noun);
u3_weak u3z_find_4(c3_m, u3_noun, u3_noun, u3_noun, u3_noun);
and save with
u3_noun u3z_save(c3_m, u3_noun, u3_noun);
u3_noun u3z_save_2(c3_m, u3_noun, u3_noun, u3_noun);
u3_noun u3z_save_3(c3_m, u3_noun, u3_noun, u3_noun, u3_noun);
u3_noun u3z_save_4(c3_m, u3_noun, u3_noun, u3_noun, u3_noun, u3_noun);
where the value is the last argument. To eliminate duplicate
nouns, there is also
u3_noun
u3z_uniq(u3_noun);
`u3z` functions retain keys and transfer values.
The `u3z` cache, built on `u3h` hashes, is part of the current
road, and goes away when it goes away. (In future, we may wish
to promote keys/values which outlive the road, as we do with jet
state.) There is no cache reclamation at present, so be careful.
### u3t: tracing and profiling.
TBD.
### u3v: the Arvo kernel
An Arvo kernel - or at least, a core that compiles with the Arvo
interface - is part of the global `u3` state. What is an Arvo
core? Slightly pseudocoded:
++ arvo
|%
++ come |= [yen=@ ova=(list ovum) nyf=pone] :: 11
^- [(list ovum) _+>]
!!
++ keep |= [now=@da hap=path] :: 4
^- (unit ,@da)
!!
++ load |= [yen=@ ova=(list ovum) nyf=pane] :: 86
^- [(list ovum) _+>]
!!
++ peek |= [now=@da path] :: 87
^- (unit)
!!
++ poke |= [now=@da ovo=ovum] :: 42
^- [(list ovum) _+>]
!!
++ wish |= txt=@ta :: 20
^- *
!!
--
++ card ,[p=@tas q=*] :: typeless card
++ ovum ,[p=wire q=card] :: Arvo event
++ wire path :: event cause
This is the Arvo ABI in a very real sense. Arvo is a core with
these six arms. To use these arms, we hardcode the axis of the
formula (`11`, `4`, `86`, etc) into the C code that calls Arvo,
because otherwise we'd need type metadata - which we can get, by
calling Arvo.
It's important to understand the Arvo event/action structure, or
`++ovum`. An `ovum` is a `card`, which is any `[term noun]`
cell, and a `++wire`, a `path` which indicates the location of
the event. At the Unix level, the `wire` corresponds to a system
module or context. For input events, this is the module that
caused the event; for output actions, it's the module that
performs the action.
`++poke` sends Arvo an event `ovum`, producing a cell of action
ova and a new Arvo core.
`++peek` dereferences the Arvo namespace. It takes a date and a
key, and produces `~` (`0`) or `[~ value]`.
`++keep` asks Arvo the next time it wants to be woken up, for the
given `wire`. (This input will probably be eliminated in favor
of a single global timer.)
`++wish` compiles a string of Hoon source. While just a
convenience, it's a very convenient convenience.
`++come` and `++load` are used by Arvo to reset itself (more
precisely, to shift the Arvo state from an old kernel to a new
one); there is no need to call them from C.
Now that we understand the Arvo kernel interface, let's look at
the `u3v` API. As usual, all the functions in `u3v` are
commented, but unfortunately it's hard to describe this API as
clean at present. The problem is that `u3v` remains design
coupled to the old `vere` event handling code written for `u2`.
But let's describe the functions you should be calling, assuming
you're not writing the next event system. There are only two.
`u3v_wish(str_c)` wraps the `++wish` functionality in a cache
(which is read-only unless you're on the surface road).
`u3v_do()` uses `wish` to provide a convenient interface for
calling Hoon kernel functions by name. Even more conveniently,
we tend to call `u3v_do()` with these convenient aliases:
#define u3do(txt_c, arg) u3v_do(txt_c, arg)
#define u3dc(txt_c, a, b) u3v_do(txt_c, u3nc(a, b))
#define u3dt(txt_c, a, b, c) u3v_do(txt_c, u3nt(a, b, c))
#define u3dq(txt_c, a, b, c, d) u3v_do(txt_c, u3nt(a, b, c, d))
Most of its functions are

View File

@ -20,6 +20,7 @@
# include "n/j.h" // u3j: jet control
# include "n/m.h" // u3m: master state
# include "n/n.h" // u3n: nock execution
# include "n/o.h" // u3o: config options
# include "n/r.h" // u3r: noun access (error returns)
# include "n/t.h" // u3t: profiling / tracing
# include "n/x.h" // u3x: noun access (error crashes)

View File

@ -79,6 +79,7 @@
# define c3__blin c3_s4('b','l','i','n')
# define c3__blit c3_s4('b','l','i','t')
# define c3__blog c3_s4('b','l','o','g')
# define c3__bloq c3_s4('b','l','o','q')
# define c3__blot c3_s4('b','l','o','t')
# define c3__blue c3_s4('b','l','u','e')
# define c3__blur c3_s4('b','l','u','r')
@ -272,6 +273,7 @@
# define c3__dec c3_s3('d','e','c')
# define c3__deem c3_s4('d','e','e','m')
# define c3__deep c3_s4('d','e','e','p')
# define c3__defn c3_s4('d','e','f','n')
# define c3__del c3_s3('d','e','l')
# define c3__delc c3_s4('d','e','l','c')
# define c3__delt c3_s4('d','e','l','t')
@ -339,6 +341,7 @@
# define c3__edit c3_s4('e','d','i','t')
# define c3__elm c3_s3('e','l','m')
# define c3__else c3_s4('e','l','s','e')
# define c3__emph c3_s4('e','m','p','h')
# define c3__end c3_s3('e','n','d')
# define c3__eq c3_s2('e','q')
# define c3__esh c3_s3('e','s','h')
@ -497,6 +500,7 @@
# define c3__hosc c3_s4('h','o','s','c')
# define c3__hose c3_s4('h','o','s','e')
# define c3__howl c3_s4('h','o','w','l')
# define c3__hrul c3_s4('h','r','u','l')
# define c3__hsbn c3_s4('h','s','b','n')
# define c3__hsbr c3_s4('h','s','b','r')
# define c3__hscn c3_s4('h','s','c','n')
@ -510,6 +514,8 @@
# define c3__hsts c3_s4('h','s','t','s')
# define c3__htcn c3_s4('h','t','c','n')
# define c3__htls c3_s4('h','t','l','s')
# define c3__html c3_s4('h','t','m','l')
# define c3__htmt c3_s4('h','t','m','t')
# define c3__http c3_s4('h','t','t','p')
# define c3__hume c3_s4('h','u','m','e')
# define c3__hunk c3_s4('h','u','n','k')
@ -530,6 +536,7 @@
# define c3__inuk c3_s4('i','n','u','k')
# define c3__iron c3_s4('i','r','o','n')
# define c3__is c3_s2('i','s')
# define c3__item c3_s4('i','t','e','m')
# define c3__ix c3_s2('i','x')
# define c3__jack c3_s4('j','a','c','k')
# define c3__jamx c3_s4('j','a','m','x')
@ -610,6 +617,7 @@
# define c3__lint c3_s4('l','i','n','t')
# define c3__liqd c3_s4('l','i','q','d')
# define c3__lisc c3_s4('l','i','s','c')
# define c3__list c3_s4('l','i','s','t')
# define c3__lite c3_s4('l','i','t','e')
# define c3__live c3_s4('l','i','v','e')
# define c3__load c3_s4('l','o','a','d')
@ -743,6 +751,7 @@
# define c3__pane c3_s4('p','a','n','e')
# define c3__pang c3_s4('p','a','n','g')
# define c3__pank c3_s4('p','a','n','k')
# define c3__para c3_s4('p','a','r','a')
# define c3__parq c3_s4('p','a','r','q')
# define c3__part c3_s4('p','a','r','t')
# define c3__pass c3_s4('p','a','s','s')

View File

@ -17,6 +17,7 @@
*** C file.
**/
# if defined(U3_OS_linux)
# include <inttypes.h>
# include <stdlib.h>
# include <string.h>
# include <stdarg.h>
@ -32,6 +33,7 @@
# include <sys/mman.h>
# elif defined(U3_OS_osx)
# include <inttypes.h>
# include <stdlib.h>
# include <string.h>
# include <stdarg.h>
@ -48,6 +50,7 @@
# include <sys/mman.h>
# elif defined(U3_OS_bsd)
# include <inttypes.h>
# include <stdlib.h>
# include <string.h>
# include <stdarg.h>
@ -70,7 +73,7 @@
/** Address space layout.
**/
# if defined(U3_OS_linux)
# define U3_OS_LoomBase 0x404db000
# define U3_OS_LoomBase 0x36000000
# define U3_OS_LoomBits 29 // ie, 2^29 words == 2GB
# elif defined(U3_OS_osx)
# ifdef __LP64__

View File

@ -195,3 +195,9 @@
u3_noun u3wfu_snub(u3_noun);
u3_noun u3wfu_tock(u3_noun);
u3_noun u3wfu_wrap(u3_noun);
/** Tier 7.
**/
u3_noun u3wg_dawn(u3_noun);
u3_noun u3wg_sqar(u3_noun);

40
i/n/a.h
View File

@ -41,7 +41,7 @@
/* u3a_fbox_no: number of free lists per size.
*/
# define u3a_fbox_no 28
# define u3a_fbox_no 27
/** Structures.
@ -125,6 +125,7 @@
struct { // allocation pools
u3p(u3a_fbox) fre_p[u3a_fbox_no]; // heap by node size log
u3p(u3a_fbox) cel_p; // custom cell allocator
c3_w fre_w; // number of free words
} all;
@ -144,8 +145,8 @@
struct { // profile stack
c3_d nox_d; // nock steps
u3_noun don; // ++path
u3_noun day; // profile data, ++doss
u3_noun don; // (list batt)
u3_noun day; // doss, only in u3H (moveme)
} pro;
struct { // memoization
@ -154,13 +155,10 @@
} u3a_road;
typedef u3a_road u3_road;
/* u3a_flag: flags for how.fag_w.
/* u3a_flag: flags for how.fag_w. All arena related.
*/
enum u3a_flag {
u3a_flag_debug = 0x1, // debug memory
u3a_flag_gc = 0x2, // garbage collect once
u3a_flag_sand = 0x4, // sand mode, bump allocation
u3a_flag_die = 0x8 // process was asked to exit
u3a_flag_sand = 0x1, // bump allocation (XX not impl)
};
@ -207,9 +205,6 @@
# define u3a_into(x) ((void *)(u3_Loom + (x)))
# define u3a_outa(p) (((c3_w*)(void*)(p)) - u3_Loom)
# define u3to(type, x) ((type *) u3a_into(x))
# define u3of(type, x) (u3a_outa((type *)x))
# define u3a_is_north(r) __(r->cap_p > r->hat_p)
# define u3a_is_south(r) !u3a_is_north(r)
@ -282,10 +277,15 @@
void*
u3a_walloc(c3_w len_w);
/* u3a_wdrop(): free storage.
/* u3a_celloc(): allocate a cell. Faster, sometimes.
*/
c3_w*
u3a_celloc(void);
/* u3a_wfree(): free storage.
*/
void
u3a_wdrop(void* lag_v);
u3a_wfree(void* lag_v);
/* u3a_wealloc(): word realloc.
*/
@ -299,6 +299,11 @@
void*
u3a_malloc(size_t len_i);
/* u3a_calloc(): aligned storage measured in bytes.
*/
void*
u3a_calloc(size_t num_i, size_t len_i);
/* u3a_realloc(): aligned realloc in bytes.
*/
void*
@ -351,6 +356,11 @@
c3_w
u3a_use(u3_noun som);
/* u3a_luse(): check refcount sanity.
*/
void
u3a_luse(u3_noun som);
/* u3a_mark_ptr(): mark a pointer for gc. Produce size.
*/
c3_w
@ -369,7 +379,7 @@
/* u3a_sweep(): sweep a fully marked road.
*/
void
u3a_sweep(c3_c* cap_c);
u3a_sweep(void);
/* u3a_sane(): check allocator sanity.
*/
@ -398,7 +408,7 @@
c3_w*
u3a_slab(c3_w len_w);
/* u3a_slaq(): u3a_slaq() with a defined blocksize.
/* u3a_slaq(): u3a_slab() with a defined blocksize.
*/
c3_w*
u3a_slaq(c3_g met_g, c3_w len_w);

17
i/n/e.h
View File

@ -18,7 +18,7 @@
c3_w nor_w; // new page count north
c3_w sou_w; // new page count south
c3_w pgs_w; // number of changed pages
u3e_line mem_u[0]; // per page
u3e_line mem_u[0]; // per page
} u3e_control;
/* u3_cs_patch: memory change, top level.
@ -73,21 +73,6 @@
c3_o
u3e_live(c3_o nuu_o, c3_c* dir_c);
/* u3e_boot(): start the u3 system.
*/
void
u3e_boot(c3_o nuu_o, c3_o bug_o, c3_c* dir_c);
/* u3e_init(): start the environment, with/without checkpointing.
*/
void
u3e_init(c3_o chk_o);
/* u3e_grab(): garbage-collect the world, plus extra roots.
*/
void
u3e_grab(c3_c* cap_c, u3_noun som, ...); // terminate with u3_none
/* u3e_dirty(): count dirty pages.
*/
c3_w

View File

@ -92,11 +92,18 @@
/* u3h_get(): read from hashtable.
**
** `key` is RETAINED.
** `key` is RETAINED; result is PRODUCED.
*/
u3_weak
u3h_get(u3p(u3h_root) har_p, u3_noun key);
/* u3h_git(): read from hashtable, retaining result.
**
** `key` is RETAINED; result is RETAINED.
*/
u3_weak
u3h_git(u3p(u3h_root) har_p, u3_noun key);
/* u3h_gut(): read from hashtable, unifying key nouns.
**
** `key` is RETAINED.

20
i/n/j.h
View File

@ -6,18 +6,20 @@
**/
#if 0
++ bane ,@tas :: battery name
++ bash ,@uvH :: ctx identity hash
++ bosh ,@uvH :: local battery hash
++ bash ,@uvH :: label hash
++ bosh ,@uvH :: battery hash
++ batt ,* :: battery
++ calf ::
$: jax=,@ud :: hot core index
hap=(map ,@ud ,@ud) :: axis/hot arm index
lab=path :: label as path
jit=* :: arbitrary data
== ::
++ calx (trel calf (pair bash cope) club) :: cached by battery
++ clog (pair cope (map batt club)) :: identity record
++ clog (pair cope (map batt club)) :: label record
++ club (pair corp (map term nock)) :: battery pattern
++ cope (trel bane axis (each bash noun)) :: core pattern
++ core ,*
++ corp (each core batt) :: parent or static
++ dash (map bash clog) :: jet system
#endif
@ -42,7 +44,7 @@
/* u3j_core: driver definition.
*/
typedef struct _u3j_core {
c3_c* cos_c; // control string
c3_c* cos_c; // control string
struct _u3j_harm* arm_u; // blank-terminated static list
struct _u3j_core* dev_u; // blank-terminated static list
struct _u3j_core* par_u; // dynamic parent pointer
@ -66,7 +68,6 @@
extern u3j_dash u3j_Dash;
# define u3D u3j_Dash
/** Functions.
**/
/* u3j_boot(): initialize jet system.
@ -95,11 +96,9 @@
u3j_soft(u3_noun cor,
const c3_c* tam_c);
/* u3j_find(): battery to driver number, or 0.
**
** `bat` is RETAINED by the caller.
/* u3j_find(): in warm state, return u3_none or calx. RETAINS.
*/
c3_l
u3_weak
u3j_find(u3_noun bat);
/* u3j_kick(): try to kick by jet. If no kick, produce u3_none.
@ -108,8 +107,7 @@
** is no kick, TRANSFERRED if one.
*/
u3_weak
u3j_kick(u3_noun cor,
u3_noun axe);
u3j_kick(u3_noun cor, u3_noun axe);
/* u3j_kink(): kick either by jet or by nock.
*/

View File

@ -81,6 +81,11 @@
u3_noun
u3m_soft_esc(u3_noun sam);
/* u3m_grab(): garbage-collect the world, plus extra roots.
*/
void
u3m_grab(u3_noun som, ...); // terminate with u3_none
/* u3m_water(): produce high and low watermarks. Asserts u3R == u3H.
*/
void

35
i/n/o.h Normal file
View File

@ -0,0 +1,35 @@
/* i/n/o.h
**
** This file is in the public domain.
*/
/** Data structures.
**/
/* u3o_config: process / system configuration.
*/
typedef struct _u3o_config {
u3_noun who; // single identity
c3_c* dir_c; // execution directory (pier)
c3_w wag_w; // flags (both ways)
} u3o_config;
/* u3o_flag: process/system flags.
**
** _debug flags are set outside u3 and heard inside it.
** _check flags are set inside u3 and heard outside it.
*/
enum u3o_flag { // execution flags
u3o_debug_ram = 0x1, // debug: gc
u3o_debug_cpu = 0x2, // debug: profile
u3o_check_corrupt = 0x4, // check: gc memory
u3o_check_fatal = 0x8, // check: unrecoverable
u3o_verbose = 0x10, // be remarkably wordy
u3o_dryrun = 0x20 // don't touch checkpoint
};
/** Globals.
**/
/* u3_Config / u3C: global memory control.
*/
c3_global u3o_config u3o_Config;
# define u3C u3o_Config

174
i/n/r.h
View File

@ -22,8 +22,7 @@
/* u3r_at(): fragment `a` of `b`, or u3_none.
*/
u3_weak
u3r_at(u3_atom a,
u3_weak b);
u3r_at(u3_atom a, u3_weak b);
/* u3r_mean():
**
@ -31,8 +30,7 @@
** Axes must be sorted in tree order.
*/
c3_o
u3r_mean(u3_noun a,
...);
u3r_mean(u3_noun a, ...);
/* u3r_mug():
**
@ -54,7 +52,7 @@
*/
c3_w
u3r_mug_words(const c3_w *buf_w,
c3_w len_w);
c3_w len_w);
/* u3r_mug_cell():
**
@ -62,7 +60,7 @@
*/
c3_w
u3r_mug_cell(u3_noun a,
u3_noun b);
u3_noun b);
/* u3r_mug_trel():
**
@ -70,8 +68,8 @@
*/
c3_w
u3r_mug_trel(u3_noun a,
u3_noun b,
u3_noun c);
u3_noun b,
u3_noun c);
/* u3r_mug_qual():
**
@ -79,9 +77,9 @@
*/
c3_w
u3r_mug_qual(u3_noun a,
u3_noun b,
u3_noun c,
u3_noun d);
u3_noun b,
u3_noun c,
u3_noun d);
/* u3r_mug_both():
**
@ -98,7 +96,7 @@
*/
c3_o
u3r_fing(u3_noun a,
u3_noun b);
u3_noun b);
/* u3r_fing_cell():
**
@ -106,8 +104,8 @@
*/
c3_o
u3r_fing_cell(u3_noun p,
u3_noun q,
u3_noun b);
u3_noun q,
u3_noun b);
/* u3r_fing_mixt():
**
@ -115,8 +113,8 @@
*/
c3_o
u3r_fing_mixt(const c3_c* p_c,
u3_noun q,
u3_noun b);
u3_noun q,
u3_noun b);
/* u3r_fing_trel():
**
@ -124,9 +122,9 @@
*/
c3_o
u3r_fing_trel(u3_noun p,
u3_noun q,
u3_noun r,
u3_noun b);
u3_noun q,
u3_noun r,
u3_noun b);
/* u3r_fing_qual():
**
@ -134,18 +132,17 @@
*/
c3_o
u3r_fing_qual(u3_noun p,
u3_noun q,
u3_noun r,
u3_noun s,
u3_noun b);
u3_noun q,
u3_noun r,
u3_noun s,
u3_noun b);
/* u3r_sing():
**
** Yes iff (a) and (b) are the same noun.
*/
c3_o
u3r_sing(u3_noun a,
u3_noun b);
u3r_sing(u3_noun a, u3_noun b);
/* u3r_sung(): yes iff (a) and (b) are the same noun, unifying equals.
**
@ -153,8 +150,7 @@
** within (a) or (b)!
*/
c3_o
u3r_sung(u3_noun a,
u3_noun b);
u3r_sung(u3_noun a, u3_noun b);
/* u3r_sing_c):
**
@ -162,7 +158,7 @@
*/
c3_o
u3r_sing_c(const c3_c* a_c,
u3_noun b);
u3_noun b);
/* u3r_sing_cell():
**
@ -170,8 +166,8 @@
*/
c3_o
u3r_sing_cell(u3_noun p,
u3_noun q,
u3_noun b);
u3_noun q,
u3_noun b);
/* u3r_sing_mixt():
**
@ -179,8 +175,8 @@
*/
c3_o
u3r_sing_mixt(const c3_c* p_c,
u3_noun q,
u3_noun b);
u3_noun q,
u3_noun b);
/* u3r_sing_trel():
**
@ -188,9 +184,9 @@
*/
c3_o
u3r_sing_trel(u3_noun p,
u3_noun q,
u3_noun r,
u3_noun b);
u3_noun q,
u3_noun r,
u3_noun b);
/* u3r_sing_qual():
**
@ -198,10 +194,10 @@
*/
c3_o
u3r_sing_qual(u3_noun p,
u3_noun q,
u3_noun r,
u3_noun s,
u3_noun b);
u3_noun q,
u3_noun r,
u3_noun s,
u3_noun b);
/* u3r_nord():
**
@ -209,7 +205,7 @@
*/
u3_atom
u3r_nord(u3_noun a,
u3_noun b);
u3_noun b);
/* u3r_mold():
**
@ -217,8 +213,8 @@
*/
c3_o
u3r_mold(u3_noun a,
u3_noun* b,
u3_noun* c);
u3_noun* b,
u3_noun* c);
/* u3r_cell():
**
@ -226,8 +222,8 @@
*/
c3_o
u3r_cell(u3_noun a,
u3_noun* b,
u3_noun* c);
u3_noun* b,
u3_noun* c);
/* u3r_trel():
**
@ -235,9 +231,9 @@
*/
c3_o
u3r_trel(u3_noun a,
u3_noun* b,
u3_noun* c,
u3_noun* d);
u3_noun* b,
u3_noun* c,
u3_noun* d);
/* u3r_qual():
**
@ -245,10 +241,10 @@
*/
c3_o
u3r_qual(u3_noun a,
u3_noun* b,
u3_noun* c,
u3_noun* d,
u3_noun* e);
u3_noun* b,
u3_noun* c,
u3_noun* d,
u3_noun* e);
/* u3r_quil():
**
@ -256,11 +252,11 @@
*/
c3_o
u3r_quil(u3_noun a,
u3_noun* b,
u3_noun* c,
u3_noun* d,
u3_noun* e,
u3_noun* f);
u3_noun* b,
u3_noun* c,
u3_noun* d,
u3_noun* e,
u3_noun* f);
/* u3r_p():
**
@ -268,8 +264,8 @@
*/
c3_o
u3r_p(u3_noun a,
u3_noun b,
u3_noun* c);
u3_noun b,
u3_noun* c);
/* u3r_bush():
**
@ -277,8 +273,8 @@
*/
c3_o
u3r_bush(u3_noun a,
u3_noun* b,
u3_noun* c);
u3_noun* b,
u3_noun* c);
/* u3r_pq():
**
@ -286,9 +282,9 @@
*/
c3_o
u3r_pq(u3_noun a,
u3_noun b,
u3_noun* c,
u3_noun* d);
u3_noun b,
u3_noun* c,
u3_noun* d);
/* u3r_pqr():
**
@ -296,10 +292,10 @@
*/
c3_o
u3r_pqr(u3_noun a,
u3_noun b,
u3_noun* c,
u3_noun* d,
u3_noun* e);
u3_noun b,
u3_noun* c,
u3_noun* d,
u3_noun* e);
/* u3r_pqrs():
**
@ -307,11 +303,11 @@
*/
c3_o
u3r_pqrs(u3_noun a,
u3_noun b,
u3_noun* c,
u3_noun* d,
u3_noun* e,
u3_noun* f);
u3_noun b,
u3_noun* c,
u3_noun* d,
u3_noun* e,
u3_noun* f);
/* u3r_met():
**
@ -322,7 +318,7 @@
*/
c3_w
u3r_met(c3_y a_y,
u3_atom b);
u3_atom b);
/* u3r_bit():
**
@ -330,7 +326,7 @@
*/
c3_b
u3r_bit(c3_w a_w,
u3_atom b);
u3_atom b);
/* u3r_byte():
**
@ -338,7 +334,7 @@
*/
c3_y
u3r_byte(c3_w a_w,
u3_atom b);
u3_atom b);
/* u3r_bytes():
**
@ -346,9 +342,9 @@
*/
void
u3r_bytes(c3_w a_w,
c3_w b_w,
c3_y* c_y,
u3_atom d);
c3_w b_w,
c3_y* c_y,
u3_atom d);
/* u3r_chop():
**
@ -358,11 +354,11 @@
*/
void
u3r_chop(c3_g met_g,
c3_w fum_w,
c3_w wid_w,
c3_w tou_w,
c3_w* dst_w,
u3_atom src);
c3_w fum_w,
c3_w wid_w,
c3_w tou_w,
c3_w* dst_w,
u3_atom src);
/* u3r_mp():
**
@ -370,7 +366,7 @@
*/
void
u3r_mp(mpz_t a_mp,
u3_atom b);
u3_atom b);
/* u3r_word():
**
@ -378,7 +374,7 @@
*/
c3_w
u3r_word(c3_w a_w,
u3_atom b);
u3_atom b);
/* u3r_chub():
**
@ -386,7 +382,7 @@
*/
c3_d
u3r_chub(c3_w a_w,
u3_atom b);
u3_atom b);
/* u3r_words():
**
@ -394,9 +390,9 @@
*/
void
u3r_words(c3_w a_w,
c3_w b_w,
c3_w* c_w,
u3_atom d);
c3_w b_w,
c3_w* c_w,
u3_atom d);
/* u3r_string(): `a`, a text atom, as malloced C string.
*/

60
i/n/t.h
View File

@ -2,8 +2,53 @@
**
** This file is in the public domain.
*/
/** Options.
**/
/* U3_CPU_DEBUG: activate profiling.
*/
# define U3_CPU_DEBUG
/** Data structures.
**/
/* u3t_trace: fast execution flags.
*/
typedef struct _u3t_trace {
c3_o noc_o; // now executing in nock interpreter
c3_o glu_o; // now executing in jet glue
c3_o mal_o; // now executing in allocator
c3_o far_o; // now executing in fragmentor
c3_o coy_o; // now executing in copy
c3_o euq_o; // now executing in equal
} u3t_trace;
/** Macros.
**/
# ifdef U3_CPU_DEBUG
# define u3t_on(var) \
(u3T.var = (u3C.wag_w & u3o_debug_cpu) \
? (c3n == u3T.var) ? c3y : (abort(), 0) \
: u3T.var)
# else
# define u3t_on(var)
#endif
# ifdef U3_CPU_DEBUG
# define u3t_off(var) \
(u3T.var = (u3C.wag_w & u3o_debug_cpu) \
? (c3y == u3T.var) ? c3n : (abort(), 0) \
: u3T.var)
# else
# define u3t_off(var)
#endif
/** Functions.
**/
/* u3t_init(): initialize tracing layer.
*/
void
u3t_init(void);
/* u3t_push(): push on trace stack.
*/
void
@ -34,10 +79,10 @@
void
u3t_samp(void);
/* u3t_come(): push on profile stack.
/* u3t_come(): push on profile stack; return yes if active push. RETAIN.
*/
void
u3t_come(u3_atom cog);
c3_o
u3t_come(u3_noun bat);
/* u3t_flee(): pop off profile stack.
*/
@ -58,3 +103,12 @@
*/
void
u3t_boot(void);
/** Globals.
**/
/* u3_Trace / u3C: global memory control.
*/
c3_global u3t_trace u3t_Trace;
# define u3T u3t_Trace

View File

@ -100,3 +100,9 @@
# define u3dc(txt_c, a, b) u3v_do(txt_c, u3nc(a, b))
# define u3dt(txt_c, a, b, c) u3v_do(txt_c, u3nt(a, b, c))
# define u3dq(txt_c, a, b, c, d) u3v_do(txt_c, u3nt(a, b, c, d))
/* u3to(), u3of(): offset/pointer conversion.
*/
# define u3to(type, x) ((type *) u3a_into(x))
# define u3of(type, x) (u3a_outa((type *)x))

12
i/n/v.h
View File

@ -9,10 +9,10 @@
struct _u3v_arvo;
typedef struct _u3v_cart {
u3_noun vir; // effects of ovum
c3_o did; // cart considered for commit?
c3_o cit; // cart committed?
c3_d ent_d; // entry in raft queue?
u3_noun vir; // effects of ovum
c3_o did; // cart considered for commit?
c3_o cit; // cart committed?
c3_d ent_d; // event number
u3p(struct _u3v_cart) nex_p;
} u3v_cart;
@ -34,8 +34,8 @@
u3_noun roc; // kernel core
struct { // ova waiting to process
u3p(u3v_cart) egg_p; // exit of ovum queue
u3p(u3v_cart) geg_p; // entry of ovum queue
u3p(u3v_cart) egg_p; // exit of ovum queue
u3p(u3v_cart) geg_p; // entry of ovum queue
} ova;
} u3v_arvo;

View File

@ -522,11 +522,12 @@
c3_o abo; // -a
c3_o bat; // -b, batch create
c3_o gab; // -g
c3_o dem; // -d, dem
c3_o dem; // -d, daemon
c3_o dry; // -D, dry compute
c3_o fog; // -Xwtf, skip last event
c3_o fak; // -F, fake carrier
c3_o loh; // -L, local-only networking
c3_o pro; // , profile
c3_o pro; // -P, profile
c3_o veb; // -v, verbose (inverse of -q)
c3_o nuu; // -c, new pier
c3_o vno; // -V
@ -565,11 +566,6 @@
c3_global c3_c* u3_Local;
c3_global c3_c* u3_System;
c3_global c3_o u3_Flag_Abort;
c3_global c3_o u3_Flag_Garbage;
c3_global c3_o u3_Flag_Profile;
c3_global c3_o u3_Flag_Verbose;
/** Functions.
**/
/* Urbit time: 128 bits, leap-free.

View File

@ -14,7 +14,7 @@
{
#if 0
if ( b == 3 && a == 2684227708 ) {
printf("dword at 0x27ff84ff8 is %llu\r\n", *(c3_d *)0x27ff84ff8);
printf("dword at 0x27ff84ff8 is %" PRIu64 "\r\n", *(c3_d *)0x27ff84ff8);
*(c3_d *)0x27ff84ff8 = 25;
printf("see, we modified it\r\n");
}

View File

@ -16,19 +16,19 @@
return u3m_bail(c3__fail);
}
else {
u3_noun acc;
u3_noun acc = u3_blip;
c3_w i_w = a;
if ( !i_w )
return u3_nul;
return u3_nul;
while ( i_w ) {
if ( c3n == u3du(b) ) {
return u3_nul;
}
acc = u3i_cell( u3h(b), acc );
b = u3t(b);
i_w--;
acc = u3i_cell( u3h(b), acc );
b = u3t(b);
i_w--;
}
return u3kb_flop(acc);

View File

@ -18,6 +18,10 @@
return u3m_bail(c3__exit);
}
else {
return u3n_nock_in(u3k(fly), u3k(bus), u3k(fol));
u3_noun som;
som = u3n_nock_in(u3k(fly), u3k(bus), u3k(fol));
return som;
}
}

View File

@ -556,13 +556,15 @@
u3nc(c3__germ, u3k(p_gen)),
u3k(q_gen));
}
#if 0
_open_do_pq(sgbr) // ~|
{
return u3nt
(c3__sggr,
u3nc(c3__yelp, u3k(p_gen)),
u3nc(c3__mean, u3k(p_gen)),
u3k(q_gen));
}
#endif
_open_do_pq(sggl) // ~>
{
return u3nt
@ -652,15 +654,23 @@
}
_open_do_pqrs(sgwt) // ~?
{
return u3nq(c3__tsgl,
u3k(s_gen),
c3__wtdt,
u3nq(u3k(q_gen),
u3nc(u3_nul, 1),
c3__sgpm,
u3nt(u3k(p_gen),
u3k(r_gen),
u3nc(u3_nul, 1))));
return u3nt
(c3__tsls,
u3nq(c3__wtdt,
u3k(q_gen),
u3nc(c3__bczp, c3__null),
u3nc(u3nc(c3__bczp, c3__null), u3k(r_gen))),
u3nq(c3__wtsg,
u3nc(u3nc(u3_nul, 2),u3_nul),
u3nt(c3__tsgr,
u3nc(u3_nul, 3),
u3k(s_gen)),
u3nq(c3__sgpm,
u3k(p_gen),
u3nc(u3_nul, 5),
u3nt(c3__tsgr,
u3nc(u3_nul, 3),
u3k(s_gen)))));
}
/***
****
@ -713,7 +723,7 @@
u3nt(c3__tsls,
u3nt(c3__ktts, c3__a,
u3nt(c3__tsgr, u3nc(c3__cnzy, c3__v),
u3k(p_gen))),
u3nc(c3__bccm, u3k(p_gen)))),
u3nt(c3__tsls,
u3nt(c3__ktts, c3__b,
u3nt(c3__tsgr,
@ -823,7 +833,7 @@
_open_pq (kthp);
_open_pq (sgts);
_open_pq (sgbr);
// _open_pq (sgbr);
_open_pq (sggl);
_open_pq (sgbc);
_open_pq (sgcb);

View File

@ -229,7 +229,7 @@
u3_noun q_yor = u3t(yor);
u3_noun puq_hax, quq_hax;
u3_noun puq_yor, quq_yor;
u3_noun ret;
u3_noun ret = 0;
if ( u3_nul != q_hax ) {
u3r_mean(q_hax, 6, &puq_hax, 7, &quq_hax, 0);
@ -582,7 +582,7 @@
u3_noun q_yor = u3t(yor);
u3_noun puq_hax, quq_hax;
u3_noun puq_yor, quq_yor;
u3_noun ret;
u3_noun ret = 0;
if ( u3_nul != q_hax ) {
u3r_mean(q_hax, 6, &puq_hax, 7, &quq_hax, 0);

227
j/g/dawn.c Normal file
View File

@ -0,0 +1,227 @@
/* j/g/dawn.c
**
** This file is in the public domain.
*/
#include "all.h"
#include <cmark.h>
#include <node.h>
#include <buffer.h>
u3_noun node_to_noun(cmark_node * nod);
u3_noun list_elems_to_noun(cmark_node * nod)
{
u3_noun elems = u3_nul;
cmark_node * child;
for ( child = nod->last_child; child; child = child->prev ) {
elems = u3nc(node_to_noun(child),elems);
}
return elems;
}
u3_noun document_to_noun(cmark_node * nod)
{
return list_elems_to_noun(nod);
}
u3_noun block_quote_to_noun(cmark_node * nod)
{
return u3nc(u3nc(c3__bloq,u3_nul),list_elems_to_noun(nod));
}
u3_noun list_to_noun(cmark_node * nod)
{
return
u3nc(
u3nt(
c3__list,
__(nod->as.list.tight),
(nod->as.list.list_type == CMARK_BULLET_LIST)
? nod->as.list.bullet_char /* XX convert? */
: u3nc(nod->as.list.start,
(nod->as.list.delimiter == CMARK_PERIOD_DELIM)
? '.'
: ')')),
list_elems_to_noun(nod));
}
u3_noun list_item_to_noun(cmark_node * nod)
{
return u3nc(u3nc(c3__item,u3_nul),list_elems_to_noun(nod));
}
u3_noun code_block_to_noun(cmark_node * nod)
{
u3_atom str = u3i_string((c3_c *) nod->string_content.ptr); /* XX u3i_bytes */
u3_noun res =
u3nt(
c3__code,
nod->as.code.fenced
? u3nq(
u3_nul,
nod->as.code.fence_char,
nod->as.code.fence_length,
u3i_tape((c3_c *) nod->as.code.info.ptr)
)
: u3_nul,
u3qe_lore(str));
u3z(str);
return res;
}
u3_noun html_to_noun(cmark_node * nod)
{
u3_atom str = u3i_string((c3_c *) nod->string_content.ptr); /* XX u3i_bytes */
u3_noun res = u3nc(c3__html, u3qe_lore(str));
u3z(str);
return res;
}
u3_noun paragraph_to_noun(cmark_node * nod)
{
return u3nc(c3__para, list_elems_to_noun(nod));
}
u3_noun header_to_noun(cmark_node * nod)
{
/* see also nod->as.header.setext */
return u3nt(c3__head, nod->as.header.level, list_elems_to_noun(nod));
}
u3_noun hrule_to_noun(cmark_node * nod)
{
return u3nc(c3__hrul, u3_nul);
}
u3_noun reference_def_to_noun(cmark_node * nod)
{
return u3nc(c3__defn, u3_nul);
}
u3_noun text_to_noun(cmark_node * nod)
{
return u3nc(u3_blip, u3i_tape((c3_c *) cmark_chunk_to_cstr(&nod->as.literal)));
}
u3_noun softbreak_to_noun(cmark_node * nod) // XXX
{
return u3nt(0, 10, 0);
}
u3_noun linebreak_to_noun(cmark_node * nod)
{
return u3nc(c3__line, u3_nul);
}
u3_noun inline_code_to_noun(cmark_node * nod)
{
return u3nc(c3__code, u3i_tape((c3_c *) cmark_chunk_to_cstr(&nod->as.literal)));
}
u3_noun inline_html_to_noun(cmark_node * nod) // XXX
{
return u3nc(c3__htmt, u3i_string((c3_c *) cmark_chunk_to_cstr(&nod->as.literal)));
}
u3_noun emph_to_noun(cmark_node * nod)
{
return u3nc(u3nc(c3__emph, c3n), list_elems_to_noun(nod));
}
u3_noun strong_to_noun(cmark_node * nod)
{
return u3nc(u3nc(c3__emph, c3y), list_elems_to_noun(nod));
}
u3_noun link_to_noun(cmark_node * nod)
{
return
u3nc(
u3nt(
c3__link,
nod->as.link.url
? u3i_tape((c3_c *) nod->as.link.url)
: u3_nul,
nod->as.link.title
? u3nc(u3_nul, u3i_tape((c3_c *) nod->as.link.title))
: u3_nul),
list_elems_to_noun(nod));
}
u3_noun image_to_noun(cmark_node * nod)
{
return
u3nc(
u3nt(
c3__blot,
u3i_tape((c3_c *) nod->as.link.url),
nod->as.link.title
? u3nc(u3_nul, u3i_tape((c3_c *) nod->as.link.title))
: u3_nul),
list_elems_to_noun(nod));
}
u3_noun node_to_noun(cmark_node * nod)
{
if (!nod) {
fprintf(stderr, "markdown null node");
return u3m_bail(c3__fail);
}
switch ( nod->type ) {
/* Block */
case CMARK_NODE_DOCUMENT: return document_to_noun(nod);
case CMARK_NODE_BLOCK_QUOTE: return block_quote_to_noun(nod);
case CMARK_NODE_LIST: return list_to_noun(nod);
case CMARK_NODE_LIST_ITEM: return list_item_to_noun(nod);
case CMARK_NODE_CODE_BLOCK: return code_block_to_noun(nod);
case CMARK_NODE_HTML: return html_to_noun(nod);
case CMARK_NODE_PARAGRAPH: return paragraph_to_noun(nod);
case CMARK_NODE_HEADER: return header_to_noun(nod);
case CMARK_NODE_HRULE: return hrule_to_noun(nod);
case CMARK_NODE_REFERENCE_DEF: return reference_def_to_noun(nod);
/* Inline */
case CMARK_NODE_TEXT: return text_to_noun(nod);
case CMARK_NODE_SOFTBREAK: return softbreak_to_noun(nod);
case CMARK_NODE_LINEBREAK: return linebreak_to_noun(nod);
case CMARK_NODE_INLINE_CODE: return inline_code_to_noun(nod);
case CMARK_NODE_INLINE_HTML: return inline_html_to_noun(nod);
case CMARK_NODE_EMPH: return emph_to_noun(nod);
case CMARK_NODE_STRONG: return strong_to_noun(nod);
case CMARK_NODE_LINK: return link_to_noun(nod);
case CMARK_NODE_IMAGE: return image_to_noun(nod);
default: fprintf(stderr, "bad markdown parsing");
return u3m_bail(c3__fail);
}
}
/* functions
*/
u3_noun
u3qg_dawn(u3_atom a)
{
c3_c *tex = u3r_string(a);
/* XX better strlen */
cmark_node * doc = cmark_parse_document(tex, strlen(tex));
u3_noun res = document_to_noun(doc);
cmark_node_free(doc);
// free out, tex?
return res;
}
u3_noun
u3wg_dawn(u3_noun cor)
{
u3_noun a;
if ( (u3_none == (a = u3r_at(u3x_sam, cor))) ||
(c3n == u3ud(a)) )
{
return u3m_bail(c3__exit);
} else {
return u3qg_dawn(a);
}
}

34
j/g/sqar.c Normal file
View File

@ -0,0 +1,34 @@
/* j/1/sqgr.c
**
** This file is in the public domain.
*/
#include "all.h"
/* functions
*/
u3_noun
u3qg_sqar(u3_atom a)
{
mpz_t a_mp;
fprintf(stderr, "C squared %d!\r\n", a);
u3r_mp(a_mp, a);
mpz_mul(a_mp, a_mp, a_mp);
return u3i_mp(a_mp);
}
u3_noun
u3wg_sqar(u3_noun cor)
{
u3_noun a;
if ( (u3_none == (a = u3r_at(u3x_sam, cor))) ||
(c3n == u3ud(a)) )
{
return u3m_bail(c3__exit);
} else {
return u3qg_sqar(a);
}
}

View File

@ -117,7 +117,7 @@ static u3j_harm _mood__hoon_rexp_a[] = {{".2", u3we_rexp}, {}};
static u3j_harm _mood__hoon_trip_a[] = {{".2", u3we_trip}, {}};
static u3j_harm _mood__hoon__aesc_en_a[] = {{".2", u3wea_en}, {}};
static u3j_harm _mood__hoon__aesc_de_a[] = {{".2", u3wea_en}, {}};
static u3j_harm _mood__hoon__aesc_de_a[] = {{".2", u3wea_de}, {}};
static u3j_core _mood__hoon__aesc_d[] =
{ { "en", _mood__hoon__aesc_en_a },
{ "de", _mood__hoon__aesc_de_a },
@ -333,6 +333,32 @@ static u3j_harm _mood__hoon__al_a[] =
};
#endif
static u3j_harm _util_sqar_a[] = {{".2", u3wg_sqar, c3y}, {}};
static u3j_core _util_d[] =
{ { "sqar", _util_sqar_a },
{}
};
static u3j_harm _utyl_dawn_a[] = {{".2", u3wg_dawn, c3y}, {}};
static u3j_core _utyl_d[] =
{
{ "dawn", _utyl_dawn_a },
{}
};
#if 0
static u3j_core _zuse_d[] =
{ { "util", 0, _util_d },
{}
};
#endif
static u3j_core _arvo_d[] =
{ { "util", 0, _util_d },
{ "utyl", 0, _utyl_d },
{}
};
static u3j_core _mood__hoon_d[] =
{ { "add", _mood__hoon_add_a },
{ "dec", _mood__hoon_dec_a },
@ -451,6 +477,7 @@ static u3j_core _mood__hoon_d[] =
{ "ap", _mood__hoon__ap_a },
// { "al", _mood__hoon__al_a },
{ "ut", _mood__hoon__ut_a, _mood__hoon__ut_d },
{ "arvo", 0, _arvo_d },
#endif
{}
};
@ -460,13 +487,13 @@ static u3j_core _mood_d[] =
{}
};
static u3j_core _k164_d[] =
static u3j_core _k163_d[] =
{ { "mood", 0, _mood_d },
{}
};
static u3j_core _d[] = {
{ "k164", 0, _k164_d},
{ "k163", 0, _k163_d},
{}
};

189
n/a.c
View File

@ -4,7 +4,6 @@
*/
#include "all.h"
/* _box_slot(): select the right free list to search for a block.
*/
c3_w
@ -336,8 +335,33 @@ _ca_willoc(c3_w len_w, c3_w ald_w, c3_w alp_w)
static void*
_ca_walloc(c3_w len_w, c3_w ald_w, c3_w alp_w)
{
void* ptr_v = _ca_willoc(len_w, ald_w, alp_w);
void* ptr_v;
u3t_on(mal_o);
ptr_v = _ca_willoc(len_w, ald_w, alp_w);
u3t_off(mal_o);
#if 0
if ( SUB ) {
fprintf(stderr, "sub: at %p; kid %p\r\n",
ptr_v,
u3R->kid_u);
fprintf(stderr, "this: hat %p, cap %p, rut %p, mat %p\r\n",
u3a_into(u3R->hat_p),
u3a_into(u3R->cap_p),
u3a_into(u3R->rut_p),
u3a_into(u3R->mat_p));
if ( u3R->kid_u ) {
fprintf(stderr, "kids: hat %p, cap %p, rut %p, mat %p\r\n\n",
u3a_into(u3R->kid_u->hat_p),
u3a_into(u3R->kid_u->cap_p),
u3a_into(u3R->kid_u->rut_p),
u3a_into(u3R->kid_u->mat_p));
}
}
#endif
#if 0
if ( u3a_botox(ptr_v) == (u3a_box*)(void *)0x27f50a02c ) {
static int xuc_i;
@ -350,14 +374,14 @@ _ca_walloc(c3_w len_w, c3_w ald_w, c3_w alp_w)
return ptr_v;
}
int FOO;
/* u3a_walloc(): allocate storage words on hat.
*/
void*
u3a_walloc(c3_w len_w)
{
void* ptr_v = _ca_walloc(len_w, 1, 0);
void* ptr_v;
ptr_v = _ca_walloc(len_w, 1, 0);
#if 0
if ( (703 == u3_Code) &&
@ -369,9 +393,7 @@ u3a_walloc(c3_w len_w)
u3a_box* box_u = u3a_botox(ptr_v);
box_u->cod_w = 999;
FOO = 1;
}
// if ( 9 == xuc_i ) { FOO = 1; }
xuc_i++;
}
#endif
@ -397,23 +419,28 @@ u3a_wealloc(void* lag_v, c3_w len_w)
for ( i_w = 0; i_w < tiz_w; i_w++ ) {
new_w[i_w] = old_w[i_w];
}
u3a_wdrop(lag_v);
u3a_wfree(lag_v);
return new_w;
}
}
}
/* u3a_wdrop(): free storage.
/* u3a_wfree(): free storage.
*/
void
u3a_wdrop(void* tox_v)
u3a_wfree(void* tox_v)
{
u3a_box* box_u = u3a_botox(tox_v);
c3_w* box_w = (c3_w *)(void *)box_u;
u3t_on(mal_o);
c3_assert(box_u->use_w != 0);
box_u->use_w -= 1;
if ( 0 != box_u->use_w ) return;
if ( 0 != box_u->use_w ) {
u3t_off(mal_o);
return;
}
#if 0
/* Clear the contents of the block, for debugging.
@ -487,30 +514,21 @@ u3a_wdrop(void* tox_v)
_box_attach(box_u);
}
}
u3t_off(mal_o);
}
#if 0
/* u3a_malloc(): allocate storage measured in bytes.
/* u3a_calloc(): allocate and zero-initialize array
*/
void*
u3a_malloc(size_t len_i)
u3a_calloc(size_t num_i, size_t len_i)
{
c3_w len_w = (c3_w)len_i;
size_t byt_i = num_i * len_i;
c3_w* out_w = u3a_malloc(byt_i);
memset(out_w, 0, byt_i);
return u3a_walloc((len_w + 3) >> 2);
return out_w;
}
/* u3a_realloc(): realloc in bytes.
*/
void*
u3a_realloc(void* lag_v, size_t len_i)
{
c3_w len_w = (c3_w)len_i;
return u3a_wealloc(lag_v, (len_w + 3) >> 2);
}
#else
/* u3a_malloc(): aligned storage measured in bytes.
*/
void*
@ -536,6 +554,43 @@ u3a_malloc(size_t len_i)
return out_w;
}
/* u3a_celloc(): allocate a cell.
*/
c3_w*
u3a_celloc(void)
{
u3p(u3a_fbox) cel_p;
if ( (u3R == &(u3H->rod_u)) || !(cel_p = u3R->all.cel_p) ) {
return u3a_walloc(c3_wiseof(u3a_cell));
}
else {
u3a_box* box_u = &(u3to(u3a_fbox, cel_p)->box_u);
box_u->use_w = 1;
u3R->all.cel_p = u3to(u3a_fbox, cel_p)->nex_p;
return u3a_boxto(box_u);
}
}
/* u3a_cfree(): free a cell.
*/
void
u3a_cfree(c3_w* cel_w)
{
if ( u3R == &(u3H->rod_u) ) {
return u3a_wfree(cel_w);
}
else {
u3a_box* box_u = u3a_botox(cel_w);
u3p(u3a_fbox) fre_p = u3of(u3a_fbox, box_u);
u3to(u3a_fbox, fre_p)->nex_p = u3R->all.cel_p;
u3R->all.cel_p = fre_p;
}
}
/* u3a_realloc(): aligned realloc in bytes.
*/
void*
@ -559,7 +614,7 @@ u3a_realloc(void* lag_v, size_t len_i)
for ( i_w = 0; i_w < tiz_w; i_w++ ) {
new_w[i_w] = old_w[i_w];
}
u3a_wdrop(org_w);
u3a_wfree(org_w);
return new_w;
}
}
@ -581,12 +636,15 @@ u3a_realloc2(void* lag_v, size_t old_i, size_t new_i)
void
u3a_free(void* tox_v)
{
if (NULL == tox_v)
return;
c3_w* tox_w = tox_v;
c3_w pad_w = tox_w[-1];
c3_w* org_w = tox_w - (pad_w + 1);
// printf("free %p %p\r\n", org_w, tox_w);
u3a_wdrop(org_w);
u3a_wfree(org_w);
}
/* u3a_free2(): gmp-shaped free.
@ -596,7 +654,6 @@ u3a_free2(void* tox_v, size_t siz_i)
{
return u3a_free(tox_v);
}
#endif
#if 1
/* _me_wash_north(): clean up mug slots after copy.
@ -1002,9 +1059,14 @@ u3a_take(u3_noun som)
return som;
}
else {
return _(u3a_is_north(u3R))
u3t_on(coy_o);
som = _(u3a_is_north(u3R))
? _me_take_north(som)
: _me_take_south(som);
u3t_off(coy_o);
return som;
}
}
@ -1095,14 +1157,14 @@ top:
if ( !_(u3a_is_cat(h_dog)) ) {
_me_lose_north(h_dog);
}
u3a_wdrop(dog_w);
u3a_cfree(dog_w);
if ( !_(u3a_is_cat(t_dog)) ) {
dog = t_dog;
goto top;
}
}
else {
u3a_wdrop(dog_w);
u3a_wfree(dog_w);
}
}
}
@ -1135,14 +1197,14 @@ top:
if ( !_(u3a_is_cat(h_dog)) ) {
_me_lose_south(h_dog);
}
u3a_wdrop(dog_w);
u3a_cfree(dog_w);
if ( !_(u3a_is_cat(t_dog)) ) {
dog = t_dog;
goto top;
}
}
else {
u3a_wdrop(dog_w);
u3a_wfree(dog_w);
}
}
}
@ -1154,16 +1216,17 @@ top:
u3_noun
u3a_gain(u3_noun som)
{
// u3t_on(mal_o);
c3_assert(u3_none != som);
if ( _(u3a_is_cat(som)) ) {
return som;
}
else {
return _(u3a_is_north(u3R))
if ( !_(u3a_is_cat(som)) ) {
som = _(u3a_is_north(u3R))
? _me_gain_north(som)
: _me_gain_south(som);
}
// u3t_off(mal_o);
return som;
}
/* u3a_lose(): lose a reference count.
@ -1171,6 +1234,7 @@ u3a_gain(u3_noun som)
void
u3a_lose(u3_noun som)
{
// u3t_on(mal_o);
if ( !_(u3a_is_cat(som)) ) {
if ( _(u3a_is_north(u3R)) ) {
_me_lose_north(som);
@ -1178,6 +1242,7 @@ u3a_lose(u3_noun som)
_me_lose_south(som);
}
}
// u3t_off(mal_o);
}
/* u3a_use(): reference count.
@ -1196,6 +1261,21 @@ u3a_use(u3_noun som)
}
}
/* u3a_luse(): check refcount sanity.
*/
void
u3a_luse(u3_noun som)
{
if ( 0 == u3a_use(som) ) {
fprintf(stderr, "luse: insane %d 0x%x\r\n", som, som);
abort();
}
if ( _(u3du(som)) ) {
u3a_luse(u3h(som));
u3a_luse(u3t(som));
}
}
/* u3a_mark_ptr(): mark a pointer for gc. Produce size if first mark.
*/
c3_w
@ -1326,9 +1406,12 @@ u3a_print_memory(c3_c* cap_c, c3_w wor_w)
/* u3a_sweep(): sweep a fully marked road.
*/
void
u3a_sweep(c3_c* cap_c)
u3a_sweep(void)
{
c3_w neg_w, pos_w, leq_w, weq_w, tot_w, caf_w;
c3_w neg_w, pos_w, leq_w, weq_w;
#if 0
c3_w tot_w, caf_w;
#endif
/* Measure allocated memory by counting the free list.
*/
@ -1423,6 +1506,7 @@ u3a_sweep(c3_c* cap_c)
}
}
#if 0
tot_w = _(u3a_is_north(u3R))
? u3R->mat_p - u3R->rut_p
: u3R->rut_p - u3R->mat_p;
@ -1430,9 +1514,10 @@ u3a_sweep(c3_c* cap_c)
? u3R->mat_p - u3R->cap_p
: u3R->cap_p - u3R->mat_p;
// u3a_print_memory("available", (tot_w - pos_w));
// u3a_print_memory("allocated", pos_w);
// u3a_print_memory("volatile", caf_w);
u3a_print_memory("available", (tot_w - pos_w));
u3a_print_memory("allocated", pos_w);
u3a_print_memory("volatile", caf_w);
#endif
u3a_print_memory("leaked", leq_w);
u3a_print_memory("weaked", weq_w);
@ -1446,7 +1531,7 @@ u3a_sweep(c3_c* cap_c)
c3_w*
u3a_slab(c3_w len_w)
{
c3_w* nov_w = u3a_walloc(len_w + c3_wiseof(u3a_atom));
c3_w* nov_w = u3a_walloc(len_w + c3_wiseof(u3a_atom));
u3a_atom* pug_u = (void *)nov_w;
pug_u->mug_w = 0;
@ -1494,17 +1579,17 @@ u3a_malt(c3_w* sal_w)
u3_noun
u3a_moot(c3_w* sal_w)
{
c3_w* nov_w = (sal_w - c3_wiseof(u3a_atom));
c3_w* nov_w = (sal_w - c3_wiseof(u3a_atom));
u3a_atom* nov_u = (void*)nov_w;
c3_w len_w = nov_u->len_w;
c3_w las_w = nov_u->buf_w[len_w - 1];
c3_w len_w = nov_u->len_w;
c3_w las_w = nov_u->buf_w[len_w - 1];
c3_assert(0 != len_w);
c3_assert(0 != las_w);
if ( 1 == len_w ) {
if ( _(u3a_is_cat(las_w)) ) {
u3a_wdrop(nov_w);
u3a_wfree(nov_w);
return las_w;
}
@ -1569,7 +1654,7 @@ u3a_mint(c3_w* sal_w, c3_w len_w)
/* See if we can free the slab entirely.
*/
if ( len_w == 0 ) {
u3a_wdrop(nov_w);
u3a_wfree(nov_w);
return 0;
}
@ -1577,7 +1662,7 @@ u3a_mint(c3_w* sal_w, c3_w len_w)
c3_w low_w = nov_u->buf_w[0];
if ( _(u3a_is_cat(low_w)) ) {
u3a_wdrop(nov_w);
u3a_wfree(nov_w);
return low_w;
}

131
n/e.c
View File

@ -201,7 +201,7 @@ _ce_image_open(u3e_image* img_u, c3_o nuu_o)
}
else {
if ( siz_d != (pgs_d << (c3_d)(u3a_page + 2)) ) {
fprintf(stderr, "%s: corrupt size %llx\r\n", ful_c, siz_d);
fprintf(stderr, "%s: corrupt size %" PRIx64 "\r\n", ful_c, siz_d);
return c3n;
}
img_u->pgs_w = (c3_w) pgs_d;
@ -751,6 +751,12 @@ u3e_save(void)
{
u3_ce_patch* pat_u;
// In dry-run mode, we never touch this stuff.
//
if ( u3C.wag_w & u3o_dryrun ) {
return;
}
// Write all dirty pages to disk; clear protection and dirty bits.
//
// This has to block the main thread. All further processing can happen
@ -814,70 +820,81 @@ u3e_live(c3_o nuu_o, c3_c* dir_c)
u3P.nor_u.nam_c = "north";
u3P.sou_u.nam_c = "south";
/* Open and apply any patches.
*/
if ( _(nuu_o) ) {
if ( (c3n == _ce_image_open(&u3P.nor_u, c3y)) ||
(c3n == _ce_image_open(&u3P.sou_u, c3y)) )
{
printf("boot: image failed\r\n");
exit(1);
}
}
else {
u3_ce_patch* pat_u;
/* Open image files.
#if 0
if ( u3C.wag_w & u3o_dryrun ) {
return c3y;
} else
#endif
{
/* Open and apply any patches.
*/
{
if ( (c3n == _ce_image_open(&u3P.nor_u, c3n)) ||
(c3n == _ce_image_open(&u3P.sou_u, c3n)) )
if ( _(nuu_o) ) {
if ( (c3n == _ce_image_open(&u3P.nor_u, c3y)) ||
(c3n == _ce_image_open(&u3P.sou_u, c3y)) )
{
fprintf(stderr, "boot: no image\r\n");
return u3e_live(c3y, dir_c);
printf("boot: image failed\r\n");
exit(1);
}
}
else {
u3_ce_patch* pat_u;
/* Load any patch files; apply them to images.
*/
if ( 0 != (pat_u = _ce_patch_open()) ) {
printf("_ce_patch_apply\r\n");
_ce_patch_apply(pat_u);
printf("_ce_image_sync\r\n");
_ce_image_sync(&u3P.nor_u);
_ce_image_sync(&u3P.sou_u);
printf("_ce_patch_delete\r\n");
_ce_patch_delete();
printf("_ce_patch_free\r\n");
_ce_patch_free(pat_u);
}
/* Write image files to memory; reinstate protection.
*/
{
_ce_image_blit(&u3P.nor_u,
u3_Loom,
(1 << u3a_page));
_ce_image_blit(&u3P.sou_u,
(u3_Loom + (1 << u3a_bits) - (1 << u3a_page)),
-(1 << u3a_page));
if ( 0 != mprotect((void *)u3_Loom, u3a_bytes, PROT_READ) ) {
perror("protect");
c3_assert(0);
/* Open image files.
*/
{
if ( (c3n == _ce_image_open(&u3P.nor_u, c3n)) ||
(c3n == _ce_image_open(&u3P.sou_u, c3n)) )
{
fprintf(stderr, "boot: no image\r\n");
return u3e_live(c3y, dir_c);
}
}
printf("protected loom\r\n");
}
/* Load any patch files; apply them to images.
*/
if ( 0 != (pat_u = _ce_patch_open()) ) {
printf("_ce_patch_apply\r\n");
_ce_patch_apply(pat_u);
/* If the images were empty, we are logically booting.
*/
if ( (0 == u3P.nor_u.pgs_w) && (0 == u3P.sou_u.pgs_w) ) {
printf("logical boot\r\n");
nuu_o = c3y;
printf("_ce_image_sync\r\n");
_ce_image_sync(&u3P.nor_u);
_ce_image_sync(&u3P.sou_u);
printf("_ce_patch_delete\r\n");
_ce_patch_delete();
printf("_ce_patch_free\r\n");
_ce_patch_free(pat_u);
}
/* Write image files to memory; reinstate protection.
*/
{
_ce_image_blit(&u3P.nor_u,
u3_Loom,
(1 << u3a_page));
_ce_image_blit(&u3P.sou_u,
(u3_Loom + (1 << u3a_bits) - (1 << u3a_page)),
-(1 << u3a_page));
if ( 0 != mprotect((void *)u3_Loom, u3a_bytes, PROT_READ) ) {
perror("protect");
c3_assert(0);
}
printf("protected loom\r\n");
}
/* If the images were empty, we are logically booting.
*/
if ( (0 == u3P.nor_u.pgs_w) && (0 == u3P.sou_u.pgs_w) ) {
printf("live: logical boot\r\n");
nuu_o = c3y;
}
else {
u3a_print_memory("live: loaded",
(u3P.nor_u.pgs_w + u3P.sou_u.pgs_w) << u3a_page);
}
}
}
return nuu_o;
}

51
n/h.c
View File

@ -72,7 +72,7 @@ _ch_buck_add(u3h_buck* hab_u, u3_noun kev)
bah_u->kev[i_w + 1] = hab_u->kev[i_w];
}
u3a_wdrop(hab_u);
u3a_wfree(hab_u);
return bah_u;
}
}
@ -150,7 +150,7 @@ _ch_node_add(u3h_node* han_u, c3_w lef_w, c3_w rem_w, u3_noun kev)
for ( i_w = inx_w; i_w < len_w; i_w++ ) {
nah_u->sot_w[i_w + 1] = han_u->sot_w[i_w];
}
u3a_wdrop(han_u);
u3a_wfree(han_u);
return nah_u;
}
}
@ -301,25 +301,25 @@ u3h_hum(u3p(u3h_root) har_p, c3_w mug_w)
}
}
/* _ch_buck_get(): read in bucket.
/* _ch_buck_git(): read in bucket.
*/
static u3_weak
_ch_buck_get(u3h_buck* hab_u, u3_noun key)
_ch_buck_git(u3h_buck* hab_u, u3_noun key)
{
c3_w i_w;
for ( i_w = 0; i_w < hab_u->len_w; i_w++ ) {
if ( _(u3r_sing(key, u3h(hab_u->kev[i_w]))) ) {
return u3a_gain(u3t(hab_u->kev[i_w]));
return u3t(hab_u->kev[i_w]);
}
}
return u3_none;
}
/* _ch_node_get(): read in node.
/* _ch_node_git(): read in node.
*/
static u3_weak
_ch_node_get(u3h_node* han_u, c3_w lef_w, c3_w rem_w, u3_noun key)
_ch_node_git(u3h_node* han_u, c3_w lef_w, c3_w rem_w, u3_noun key)
{
c3_w bit_w, map_w;
@ -339,7 +339,7 @@ _ch_node_get(u3h_node* han_u, c3_w lef_w, c3_w rem_w, u3_noun key)
u3_noun kev = u3h_slot_to_noun(sot_w);
if ( _(u3r_sing(key, u3h(kev))) ) {
return u3a_gain(u3t(kev));
return u3t(kev);
}
else {
return u3_none;
@ -349,19 +349,19 @@ _ch_node_get(u3h_node* han_u, c3_w lef_w, c3_w rem_w, u3_noun key)
void* hav_v = u3h_slot_to_node(sot_w);
if ( 0 == lef_w ) {
return _ch_buck_get(hav_v, key);
return _ch_buck_git(hav_v, key);
}
else return _ch_node_get(hav_v, lef_w, rem_w, key);
else return _ch_node_git(hav_v, lef_w, rem_w, key);
}
}
}
/* u3h_get(): read from hashtable.
/* u3h_git(): read from hashtable.
**
** `key` is RETAINED.
** `key` is RETAINED; result is RETAINED.
*/
u3_weak
u3h_get(u3p(u3h_root) har_p, u3_noun key)
u3h_git(u3p(u3h_root) har_p, u3_noun key)
{
u3h_root* har_u = u3to(u3h_root, har_p);
c3_w mug_w = u3r_mug(key);
@ -377,7 +377,7 @@ u3h_get(u3p(u3h_root) har_p, u3_noun key)
if ( _(u3r_sing(key, u3h(kev))) ) {
har_u->sot_w[inx_w] = u3h_noun_be_warm(sot_w);
return u3a_gain(u3t(kev));
return u3t(kev);
}
else {
return u3_none;
@ -386,10 +386,25 @@ u3h_get(u3p(u3h_root) har_p, u3_noun key)
else {
u3h_node* han_u = u3h_slot_to_node(sot_w);
return _ch_node_get(han_u, 25, rem_w, key);
return _ch_node_git(han_u, 25, rem_w, key);
}
}
/* u3h_get(): read from hashtable.
**
** `key` is RETAINED; result is PRODUCED.
*/
u3_weak
u3h_get(u3p(u3h_root) har_p, u3_noun key)
{
u3_noun pro = u3h_git(har_p, key);
if ( u3_none != pro ) {
u3a_gain(pro);
}
return pro;
}
/* _ch_buck_gut(): read in bucket, unifying key nouns.
*/
static u3_weak
@ -489,7 +504,7 @@ _ch_free_buck(u3h_buck* hab_u)
for ( i_w = 0; i_w < hab_u->len_w; i_w++ ) {
u3a_lose(hab_u->kev[i_w]);
}
u3a_wdrop(hab_u);
u3a_wfree(hab_u);
}
/* _ch_free_node(): free node.
@ -520,7 +535,7 @@ _ch_free_node(u3h_node* han_u, c3_w lef_w)
}
}
}
u3a_wdrop(han_u);
u3a_wfree(han_u);
}
/* u3h_free(): free hashtable.
@ -545,7 +560,7 @@ u3h_free(u3p(u3h_root) har_p)
_ch_free_node(han_u, 25);
}
}
u3a_wdrop(har_u);
u3a_wfree(har_u);
}
/* _ch_walk_buck(): walk bucket for gc.

3
n/i.c
View File

@ -208,7 +208,8 @@ u3i_cell(u3_noun a, u3_noun b)
c3_assert(!_(u3a_is_junior(u3R, b)));
{
c3_w* nov_w = u3a_walloc(c3_wiseof(u3a_cell));
// c3_w* nov_w = u3a_walloc(c3_wiseof(u3a_cell));
c3_w* nov_w = u3a_celloc();
u3a_cell* nov_u = (void *)nov_w;
u3_noun pro;

537
n/j.c
View File

@ -196,10 +196,10 @@ _cj_sham(u3_noun som) // XX wrong, does not match ++sham
return haf;
}
/* _cj_warm_fend(): in warm state, return u3_none or calx. RETAINS.
/* u3j_find(): in warm state, return u3_none or calx. RETAINS.
*/
u3_weak
_cj_warm_fend(u3_noun bat)
u3j_find(u3_noun bat)
{
u3a_road* rod_u = u3R;
@ -237,7 +237,7 @@ _cj_warm_hump(c3_l jax_l, u3_noun huc)
if ( '.' == *(jet_u->fcs_c) ) {
c3_d axe_d = 0;
if ( (1 != sscanf(jet_u->fcs_c+1, "%llu", &axe_d)) ||
if ( (1 != sscanf(jet_u->fcs_c+1, "%" SCNu64, &axe_d)) ||
axe_d >> 32ULL ||
((1 << 31) & (axe_l = (c3_w)axe_d)) ||
(axe_l < 2) )
@ -265,12 +265,12 @@ _cj_warm_hump(c3_l jax_l, u3_noun huc)
return hap;
}
/* _cj_boil_mean(): in parent, declare a core. RETAINS.
/* _cj_hot_mean(): in parent, declare a core. RETAINS.
**
** XX bat is used only for printing, remove.
*/
static c3_l
_cj_boil_mean(c3_l par_l, u3_noun mop, u3_noun bat)
_cj_hot_mean(c3_l par_l, u3_noun mop, u3_noun bat)
{
u3j_core* par_u;
u3j_core* dev_u;
@ -291,7 +291,7 @@ _cj_boil_mean(c3_l par_l, u3_noun mop, u3_noun bat)
while ( (cop_u = &dev_u[i_l])->cos_c ) {
if ( _(u3r_sing_c(cop_u->cos_c, u3h(mop))) ) {
#if 0
fprintf(stderr, "boil: bound jet %d/%s/%s/%x\r\n",
fprintf(stderr, "hot: bound jet %d/%s/%s/%x\r\n",
cop_u->jax_l,
cop_u->cos_c,
par_u ? par_u->cos_c : "~",
@ -305,10 +305,10 @@ _cj_boil_mean(c3_l par_l, u3_noun mop, u3_noun bat)
return 0;
}
/* _cj_boil_mine(): in boiling state, declare a core. RETAINS.
/* _cj_hot_mine(): in hot state, declare a core. RETAINS.
*/
static c3_l
_cj_boil_mine(u3_noun mop, u3_noun cor)
_cj_hot_mine(u3_noun mop, u3_noun cor)
{
u3_noun p_mop, q_mop, r_mop, hr_mop, tr_mop;
@ -320,141 +320,17 @@ _cj_boil_mine(u3_noun mop, u3_noun cor)
// Calculate parent axis.
//
if ( c3y == hr_mop ) {
u3_noun cax = _cj_warm_fend(u3h(u3r_at(q_mop, cor)));
u3_noun cax = u3j_find(u3h(u3r_at(q_mop, cor)));
par_l = u3h(u3h(cax));
u3z(cax);
}
else par_l = 0;
return _cj_boil_mean(par_l, mop, u3h(cor));
return _cj_hot_mean(par_l, mop, u3h(cor));
}
}
static c3_l _cj_warm_ream_at(u3_noun soh, u3_noun cag);
/* _cj_warm_ream_be(): install battery; RETAINS.
*/
static void
_cj_warm_ream_be(c3_l jax_l,
u3_noun soh,
u3_noun mop,
u3_noun bat,
u3_noun cuz)
{
u3h_put(u3R->jed.har_p,
bat,
u3nt(u3nt(jax_l, _cj_warm_hump(jax_l, u3t(cuz)), u3_nul),
u3nc(u3k(soh), u3k(mop)),
u3k(cuz)));
}
/* _cj_warm_ream_is(): reream battery; RETAINS.
*/
static void
_cj_warm_ream_is(c3_l jax_l,
u3_noun soh,
u3_noun mop,
u3_noun sab)
{
if ( u3_nul != sab ) {
u3_noun n_sab, l_sab, r_sab, pn_sab, qn_sab;
u3x_trel(sab, &n_sab, &l_sab, &r_sab);
u3x_cell(n_sab, &pn_sab, &qn_sab);
_cj_warm_ream_be(jax_l, soh, mop, pn_sab, qn_sab);
_cj_warm_ream_is(jax_l, soh, mop, l_sab);
_cj_warm_ream_is(jax_l, soh, mop, r_sab);
}
}
/* _cj_warm_ream_un(): reream under `soh`; RETAINS.
*/
static c3_l
_cj_warm_ream_un(u3_noun soh)
{
u3_noun cag = u3kdb_got(u3k(u3R->jed.das), u3k(soh));
u3_noun sab = u3t(cag);
u3_noun cax;
c3_l jax_l;
if ( u3_none != (cax = u3h_get(u3R->jed.har_p, u3h(u3h(sab)))) ) {
jax_l = u3h(u3h(cax));
u3z(cax);
}
else {
jax_l = _cj_warm_ream_at(soh, cag);
}
u3z(cag);
return jax_l;
}
/* _cj_warm_ream_at(): reream at `soh` and `cag`; RETAINS.
*/
static c3_l
_cj_warm_ream_at(u3_noun soh, u3_noun cag)
{
u3_noun mop = u3h(cag);
u3_noun sab = u3t(cag);
u3_noun p_mop, q_mop, r_mop, hr_mop, tr_mop;
u3x_trel(mop, &p_mop, &q_mop, &r_mop);
u3x_cell(r_mop, &hr_mop, &tr_mop);
{
c3_l par_l, jax_l;
if ( c3y == hr_mop ) {
par_l = _cj_warm_ream_un(tr_mop);
}
else par_l = 0;
jax_l = _cj_boil_mean(par_l, mop, 0);
_cj_warm_ream_is(jax_l, soh, mop, sab);
return jax_l;
}
}
/* _cj_warm_ream_in(): reream in `taw`; RETAINS.
*/
static void
_cj_warm_ream_in(u3_noun taw)
{
if ( u3_nul != taw ) {
u3_noun n_taw, l_taw, r_taw, pn_taw, qn_taw;
u3x_trel(taw, &n_taw, &l_taw, &r_taw);
u3x_cell(n_taw, &pn_taw, &qn_taw);
_cj_warm_ream_at(pn_taw, qn_taw);
_cj_warm_ream_in(l_taw);
_cj_warm_ream_in(r_taw);
}
}
/* _cj_warm_ream(): reream warm from cold state.
*/
static void
_cj_warm_ream(void)
{
c3_assert(u3R == &(u3H->rod_u));
{
_cj_warm_ream_in(u3R->jed.das);
}
}
/* u3j_ream(): reream after restoring from checkpoint.
*/
void
u3j_ream(void)
{
u3h_free(u3R->jed.har_p);
u3R->jed.har_p = u3h_new();
_cj_warm_ream();
}
/* u3j_boot(): initialize jet system.
*/
void
@ -472,42 +348,6 @@ u3j_boot(void)
fprintf(stderr, "boot: installed %d jets\n", jax_l);
}
/* _cj_find(): search for jet, old school. `bat` is RETAINED.
*/
c3_l
_cj_find(u3_noun bat)
{
u3a_road* rod_u = u3R;
while ( 1 ) {
u3_weak jaw = u3h_gut(rod_u->jed.har_p, bat);
if ( u3_none != jaw ) {
u3_assure(u3a_is_cat(u3h(jaw)));
#if 0
if ( rod_u != u3R ) {
fprintf(stderr, "got: %x in %p/%p, %d\r\n",
bat, rod_u, rod_u->jed.har_p, jax);
}
#endif
return (c3_l)u3h(jaw);
}
if ( rod_u->par_u ) {
rod_u = rod_u->par_u;
}
else return 0;
}
}
/* u3j_find(): search for jet. `bat` is RETAINED.
*/
c3_l
u3j_find(u3_noun bat)
{
return _cj_find(bat);
}
/* _cj_soft(): kick softly by arm axis.
*/
static u3_noun
@ -590,7 +430,7 @@ _cj_hook_in(u3_noun cor,
if ( !_(u3du(cor)) ) { return u3m_bail(c3__fail); }
{
u3_weak cax = _cj_warm_fend(bat);
u3_weak cax = u3j_find(bat);
if ( u3_none == cax ) { return u3m_bail(c3__fail); }
{
@ -634,7 +474,7 @@ _cj_hook_in(u3_noun cor,
axe_l))) )
{
if ( 0 == axe_l ) {
u3z(cax);
u3z(cax);
return u3n_nock_on(cor, fol);
} else {
// Tricky: the above case would work here too, but would
@ -678,6 +518,56 @@ u3j_hook(u3_noun cor,
return pro;
}
/* _cj_fine(): validate core. RETAIN.
*/
static c3_o
_cj_fine(u3_noun cup, u3_noun mop, u3_noun cor)
{
if ( c3y == u3h(cup) ) {
return u3r_sing(cor, u3t(cup));
}
else {
u3_noun par = u3t(u3t(mop));
u3_noun pax = u3h(u3t(mop));
if ( c3n == u3h(par) ) {
c3_assert(3 == pax);
if ( c3n == u3r_sing(u3t(par), u3t(cor)) ) {
fprintf(stderr, "fine: core mismatch: %x, %x\r\n",
u3t(par), u3t(cor));
}
return u3r_sing(u3t(par), u3t(cor));
}
else {
u3_weak pac = u3r_at(pax, cor);
if ( u3_none == pac || !_(u3du(pac)) ) {
fprintf(stderr, "fail 1\r\n");
return c3n;
}
else {
u3_weak cax = u3j_find(u3h(pac));
if ( u3_none == cax ) {
fprintf(stderr, "fine: parent not found (%x)\r\n", u3r_mug(u3h(pac)));
return c3n;
}
else {
c3_o pro_o;
cup = u3h(u3t(u3t(cax)));
mop = u3t(u3h(u3t(cax)));
cor = pac;
pro_o = _cj_fine(cup, mop, cor);
u3z(cax);
return pro_o;
}
}
}
}
}
/* u3j_kick(): new kick.
**
** `axe` is RETAINED by the caller; `cor` is RETAINED iff there
@ -687,29 +577,79 @@ u3_weak
u3j_kick(u3_noun cor, u3_noun axe)
{
if ( !_(u3du(cor)) ) { return u3_none; }
u3t_on(glu_o);
{
u3_noun bat = u3h(cor);
u3_weak cax = _cj_warm_fend(bat);
u3_weak cax = u3j_find(bat);
if ( u3_none == cax ) { return u3_none; }
if ( u3_none == cax ) { u3t_off(glu_o); return u3_none; }
{
u3_noun mop = u3t(u3h(u3t(cax)));
u3_noun hap = u3h(u3t(u3h(cax)));
u3_noun cup = u3h(u3t(u3t(cax)));
u3_noun inx = u3kdb_get(u3k(hap), u3k(axe));
if ( u3_none == inx ) {
u3z(cax); return u3_none;
u3z(cax);
u3t_off(glu_o);
{
c3_o pof_o = __(u3C.wag_w & u3o_debug_cpu);
if ( _(pof_o) ) {
pof_o = u3t_come(bat);
}
if ( _(pof_o) ) {
u3_noun pro = u3n_nock_on(cor, u3nq(9, u3k(axe), 0, 1));
u3t_flee();
return pro;
}
else {
return u3_none;
}
}
}
#if 1
else if ( !_(_cj_fine(cup, mop, cor)) ) {
fprintf(stderr, "improper core %x\r\n", u3r_mug(cor));
u3m_p("improper label", u3h(u3t(u3t(u3h(cax)))));
u3m_bail(c3__exit);
u3t_off(glu_o);
return u3_none;
}
#endif
else {
c3_l jax_l = u3h(u3h(cax));
c3_l jax_l = u3h(u3h(cax));
u3j_core* cop_u = &u3D.ray_u[jax_l];
c3_l inx_l = inx;
c3_l inx_l = inx;
u3j_harm* ham_u = &cop_u->arm_u[inx_l];
u3_noun pro;
c3_o pof_o = __(u3C.wag_w & u3o_debug_cpu);
u3_noun pro;
u3z(cax);
if ( _(pof_o) ) {
pof_o = u3t_come(bat);
}
u3t_off(glu_o);
pro = _cj_kick_z(cor, cop_u, ham_u, axe);
if ( u3_none == pro ) {
if ( _(pof_o) ) {
pro = u3n_nock_on(cor, u3nq(9, u3k(axe), 0, 1));
return pro;
u3t_flee();
return pro;
}
else return u3_none;
}
else {
if ( _(pof_o) ) {
u3t_flee();
}
return pro;
}
}
}
}
@ -733,7 +673,7 @@ u3j_kink(u3_noun cor,
/* _cj_jit(): generate arbitrary warm jet-associated data. RETAIN.
*/
static u3_noun
_cj_jit(c3_l jax_l, u3_noun cor)
_cj_jit(c3_l jax_l, u3_noun bat)
{
return u3_nul;
}
@ -754,10 +694,12 @@ _cj_mine(u3_noun cey, u3_noun cor)
u3_noun cup; // ++corp
u3_noun soh; // ++bash
u3_noun cuz; // ++club
u3_noun lab; // ++path
if ( 0 == q_cey ) {
mop = u3nq(u3k(p_cey), 3, c3n, u3k(u3t(cor)));
cup = u3nc(c3n, u3k(cor));
lab = u3_nul;
}
else {
u3_weak rah = u3r_at(q_cey, cor);
@ -768,7 +710,7 @@ _cj_mine(u3_noun cey, u3_noun cor)
}
else {
u3_noun tab = u3h(rah);
u3_weak cax = _cj_warm_fend(tab);
u3_weak cax = u3j_find(tab);
if ( u3_none == cax ) {
fprintf(stderr, "fund: in %s, parent %x not found at %d\r\n",
@ -788,6 +730,7 @@ _cj_mine(u3_noun cey, u3_noun cor)
else {
cup = u3nc(c3n, u3k(tab));
}
lab = u3k(u3h(u3t(u3t(u3h(cax)))));
u3z(cax);
}
}
@ -815,13 +758,20 @@ _cj_mine(u3_noun cey, u3_noun cor)
// Save warm state.
//
{
c3_l jax_l = _cj_boil_mine(mop, cor);
c3_l jax_l = _cj_hot_mine(mop, cor);
u3_noun bal = u3nc(u3k(p_cey), lab);
#if 0
u3m_p("new jet", bal);
fprintf(stderr, " bat %x, jax %d\r\n", u3r_mug(bat), jax_l);
#endif
u3h_put(u3R->jed.har_p,
bat,
u3nt(u3nt(jax_l,
u3nt(u3nq(jax_l,
_cj_warm_hump(jax_l, r_cey),
_cj_jit(jax_l, cor)),
bal,
_cj_jit(jax_l, bat)),
u3nc(soh, mop),
cuz));
}
@ -836,10 +786,11 @@ u3j_mine(u3_noun clu, u3_noun cor)
u3_noun bat = u3h(cor);
u3_noun cax;
u3t_on(glu_o);
if ( !_(u3du(cor)) ) {
u3z(clu);
}
else if ( u3_none != (cax = _cj_warm_fend(bat)) ) {
else if ( u3_none != (cax = u3j_find(bat)) ) {
u3z(cax); u3z(clu);
}
else {
@ -851,15 +802,16 @@ u3j_mine(u3_noun clu, u3_noun cor)
}
}
u3z(cor);
u3t_off(glu_o);
}
/* _cj_cold_reap_to: reap clog map. RETAINS `sab`, TRANSFERS `bas`.
/* _cj_cold_reap_to: reap clog list. RETAINS `sab`, TRANSFERS `bam`.
*/
static u3_noun
_cj_cold_reap_to(u3_noun sab, u3_noun bas)
_cj_cold_reap_to(u3_noun sab, u3_noun bam)
{
if ( u3_nul == sab ) {
return bas;
return bam;
}
else {
u3_noun n_sab, l_sab, r_sab, pn_sab, qn_sab;
@ -867,8 +819,8 @@ _cj_cold_reap_to(u3_noun sab, u3_noun bas)
u3x_trel(sab, &n_sab, &l_sab, &r_sab);
u3x_cell(n_sab, &pn_sab, &qn_sab);
{
bas = _cj_cold_reap_to(l_sab, bas);
bas = _cj_cold_reap_to(r_sab, bas);
bam = _cj_cold_reap_to(l_sab, bam);
bam = _cj_cold_reap_to(r_sab, bam);
// If the battery is not junior, or if it has been
// already collected for the product, promote it.
@ -876,23 +828,52 @@ _cj_cold_reap_to(u3_noun sab, u3_noun bas)
if ( _(u3a_left(pn_sab)) ) {
u3_noun bat = u3a_take(pn_sab);
bas = u3kdb_put(bas, bat, u3a_take(qn_sab));
bam = u3nc(u3nc(bat, u3a_take(qn_sab)), bam);
}
return bas;
return bam;
}
}
}
/* _cj_cold_reap_with(): unify old and new battery maps. TRANSFERS.
*/
static u3_noun
_cj_cold_reap_with(u3_noun sab, u3_noun bam)
{
u3_noun mab = bam;
while ( u3_nul != mab ) {
u3_noun i_mab = u3h(mab);
sab = u3kdb_put(sab, u3k(u3h(i_mab)), u3k(u3t(i_mab)));
mab = u3t(mab);
}
u3z(bam);
return sab;
}
/* _cj_cold_reap_at(): reap haw node. RETAINS.
*/
static void
_cj_cold_reap_at(u3_noun soh, u3_noun cag)
{
u3_noun sab = _cj_cold_reap_to(u3t(cag), u3_nul);
u3_noun bam = _cj_cold_reap_to(u3t(cag), u3_nul);
if ( u3_nul != bam ) {
u3_noun hoe, sab;
if ( u3_nul != sab ) {
soh = u3a_take(soh);
cag = u3nc(u3a_take(u3h(cag)), sab);
hoe = u3kdb_get(u3k(u3R->jed.das), u3k(soh));
if ( u3_none == hoe ) {
sab = _cj_cold_reap_with(u3_nul, bam);
cag = u3nc(u3a_take(u3h(cag)), sab);
}
else {
sab = _cj_cold_reap_with(u3k(u3t(hoe)), bam);
cag = u3nc(u3k(u3h(hoe)), sab);
}
u3z(hoe);
u3R->jed.das = u3kdb_put(u3R->jed.das, soh, cag);
}
@ -924,17 +905,22 @@ _cj_warm_reap(u3_noun kev)
u3_noun cax = u3t(kev);
if ( _(u3a_left(bat)) ) {
u3_noun tab = u3a_take(bat);
u3_noun xac = u3a_take(cax);
if ( !_(u3a_is_junior(u3R, bat)) &&
(u3_none != u3h_git(u3R->jed.har_p, bat)) ) {
fprintf(stderr, "reap: promote collision (bat %x)\r\n", u3r_mug(bat));
u3m_p("collision", u3h(u3t(u3t(u3h(cax)))));
}
else {
u3_noun tab = u3a_take(bat);
u3_noun xac = u3a_take(cax);
#if 0
fprintf(stderr, "reap: bat %x (%d, %d), cax %x\r\n",
u3r_mug(tab),
u3a_is_junior(u3R, bat),
u3a_use(tab),
u3r_mug(xac));
u3m_p("hot jet", u3h(u3t(u3t(u3h(cax)))));
fprintf(stderr, " bat %x\r\n", u3r_mug(tab));
#endif
u3h_put(u3R->jed.har_p, tab, xac);
u3z(tab);
u3h_put(u3R->jed.har_p, tab, xac);
u3z(tab);
}
}
}
@ -946,3 +932,146 @@ u3j_reap(u3_noun das, u3p(u3h_root) har_p)
_cj_cold_reap_in(das);
u3h_walk(har_p, _cj_warm_reap);
}
static c3_l _cj_warm_ream_at(u3_noun soh, u3_noun* lab, u3_noun cag);
/* _cj_warm_ream_be(): install battery; RETAINS.
*/
static void
_cj_warm_ream_be(c3_l jax_l,
u3_noun soh,
u3_noun lab,
u3_noun mop,
u3_noun bat,
u3_noun cuz)
{
#if 0
u3m_p("old jet", lab);
fprintf(stderr, " bat %x, soh %x, jax %d\r\n",
u3r_mug(bat), u3r_mug(soh), jax_l);
#endif
u3h_put(u3R->jed.har_p,
bat,
u3nt(u3nq(jax_l,
_cj_warm_hump(jax_l, u3t(cuz)),
u3k(lab),
_cj_jit(jax_l, bat)),
u3nc(u3k(soh), u3k(mop)),
u3k(cuz)));
}
/* _cj_warm_ream_is(): reream battery; RETAINS.
*/
static void
_cj_warm_ream_is(c3_l jax_l,
u3_noun soh,
u3_noun lab,
u3_noun mop,
u3_noun sab)
{
if ( u3_nul != sab ) {
u3_noun n_sab, l_sab, r_sab, pn_sab, qn_sab;
u3x_trel(sab, &n_sab, &l_sab, &r_sab);
u3x_cell(n_sab, &pn_sab, &qn_sab);
_cj_warm_ream_be(jax_l, soh, lab, mop, pn_sab, qn_sab);
_cj_warm_ream_is(jax_l, soh, lab, mop, l_sab);
_cj_warm_ream_is(jax_l, soh, lab, mop, r_sab);
}
}
/* _cj_warm_ream_un(): reream under `soh`; RETAINS, transfers `*lab`.
*/
static c3_l
_cj_warm_ream_un(u3_noun soh, u3_noun* lab)
{
u3_noun cag = u3kdb_got(u3k(u3R->jed.das), u3k(soh));
u3_noun sab = u3t(cag);
u3_noun cax;
c3_l jax_l;
if ( u3_none != (cax = u3h_get(u3R->jed.har_p, u3h(u3h(sab)))) ) {
jax_l = u3h(u3h(cax));
*lab = u3k(u3h(u3t(u3t(u3h(cax)))));
u3z(cax);
}
else {
jax_l = _cj_warm_ream_at(soh, lab, cag);
}
u3z(cag);
return jax_l;
}
/* _cj_warm_ream_at(): reream at `soh` and `cag`; RETAINS, transfers `*lab`.
*/
static c3_l
_cj_warm_ream_at(u3_noun soh, u3_noun* lab, u3_noun cag)
{
u3_noun mop = u3h(cag);
u3_noun sab = u3t(cag);
u3_noun p_mop, q_mop, r_mop, hr_mop, tr_mop;
u3x_trel(mop, &p_mop, &q_mop, &r_mop);
u3x_cell(r_mop, &hr_mop, &tr_mop);
{
c3_l par_l, jax_l;
u3_noun pal = u3_nul;
if ( c3y == hr_mop ) {
par_l = _cj_warm_ream_un(tr_mop, &pal);
}
else {
par_l = 0;
pal = u3_nul;
}
*lab = u3nc(u3k(p_mop), pal);
jax_l = _cj_hot_mean(par_l, mop, 0);
_cj_warm_ream_is(jax_l, soh, *lab, mop, sab);
return jax_l;
}
}
/* _cj_warm_ream_in(): reream in `taw`; RETAINS.
*/
static void
_cj_warm_ream_in(u3_noun taw)
{
if ( u3_nul != taw ) {
u3_noun n_taw, l_taw, r_taw, pn_taw, qn_taw;
u3_noun lab;
u3x_trel(taw, &n_taw, &l_taw, &r_taw);
u3x_cell(n_taw, &pn_taw, &qn_taw);
_cj_warm_ream_at(pn_taw, &lab, qn_taw);
u3z(lab);
_cj_warm_ream_in(l_taw);
_cj_warm_ream_in(r_taw);
}
}
/* _cj_warm_ream(): reream warm from cold state.
*/
static void
_cj_warm_ream(void)
{
c3_assert(u3R == &(u3H->rod_u));
{
_cj_warm_ream_in(u3R->jed.das);
}
}
/* u3j_ream(): reream after restoring from checkpoint.
*/
void
u3j_ream(void)
{
u3h_free(u3R->jed.har_p);
u3R->jed.har_p = u3h_new();
_cj_warm_ream();
}

172
n/m.c
View File

@ -18,7 +18,7 @@
c3_o
u3m_trap(void);
#else
# define u3m_trap() (u3_noun)(setjmp(u3R->esc.buf))
# define u3m_trap() (u3_noun)(_setjmp(u3R->esc.buf))
#endif
/* u3m_signal(): treat a nock-level exception as a signal interrupt.
@ -204,7 +204,7 @@ _cm_signal_recover(c3_l sig_l, u3_noun arg)
// A top-level crash - rather odd. We should GC.
//
_cm_emergency("recover: top", sig_l);
u3H->rod_u.how.fag_w |= u3a_flag_gc;
u3C.wag_w |= u3o_check_corrupt;
// Reset the top road - the problem could be a fat cap.
//
@ -278,6 +278,8 @@ _cm_signal_deep(c3_w sec_w)
setitimer(ITIMER_VIRTUAL, &itm_u, 0);
signal(SIGVTALRM, _cm_signal_handle_alrm);
}
u3t_boot();
}
/* _cm_signal_done():
@ -299,6 +301,7 @@ _cm_signal_done()
setitimer(ITIMER_VIRTUAL, &itm_u, 0);
}
u3_unix_ef_move();
u3t_boff();
}
/* u3m_signal(): treat a nock-level exception as a signal interrupt.
@ -447,10 +450,6 @@ _cm_pave(c3_o nuu_o, c3_o bug_o)
u3a_words - 1);
u3R = &u3H->rod_u;
}
if ( _(bug_o) ) {
u3R->how.fag_w |= u3a_flag_debug;
}
}
#if 0
@ -535,15 +534,10 @@ u3m_bail(u3_noun how)
if ( (c3__exit == how) && (u3R == &u3H->rod_u) ) {
abort();
}
if ( c3__fail == how ) {
abort();
}
if ( c3__foul == how ) {
abort();
}
/* Printf some metadata.
*/
if ( c3__exit != how ) {
if ( c3__exit != how && (_(u3ud(how)) || 1 != u3h(how)) ) {
if ( _(u3ud(how)) ) {
c3_c str_c[5];
@ -562,8 +556,12 @@ u3m_bail(u3_noun how)
}
}
if ( c3__oops == how ) {
abort();
switch ( how ) {
case c3__fail:
case c3__foul:
case c3__meme:
case c3__oops:
abort();
}
if ( &(u3H->rod_u) == u3R ) {
@ -623,12 +621,14 @@ u3m_leap(c3_w pad_w)
/* Measure the pad - we'll need it.
*/
{
#if 0
if ( pad_w < u3R->all.fre_w ) {
pad_w = 0;
}
else {
pad_w -= u3R->all.fre_w;
}
#endif
if ( (pad_w + c3_wiseof(u3a_road)) >= u3a_open(u3R) ) {
u3m_bail(c3__meme);
}
@ -677,9 +677,6 @@ u3m_leap(c3_w pad_w)
/* Set up the new road.
*/
{
if ( u3R->how.fag_w & u3a_flag_debug ) {
rod_u->how.fag_w |= u3a_flag_debug;
}
u3R = rod_u;
_pave_parts();
}
@ -728,18 +725,19 @@ u3m_hate(c3_w pad_w)
u3_noun
u3m_love(u3_noun pro)
{
u3_noun das = u3R->jed.das;
u3p(u3h_root) har_p = u3R->jed.har_p;
{
u3_noun das = u3R->jed.das;
u3p(u3h_root) har_p = u3R->jed.har_p;
u3m_fall();
u3m_fall();
pro = u3a_take(pro);
pro = u3a_take(pro);
u3j_reap(das, har_p);
u3R->cap_p = u3R->ear_p;
u3R->ear_p = 0;
u3j_reap(das, har_p);
u3R->cap_p = u3R->ear_p;
u3R->ear_p = 0;
}
return pro;
}
@ -794,9 +792,9 @@ u3m_water(c3_w* low_w, c3_w* hig_w)
*/
u3_noun
u3m_soft_top(c3_w sec_w, // timer seconds
c3_w pad_w, // base memory pad
u3_funk fun_f,
u3_noun arg)
c3_w pad_w, // base memory pad
u3_funk fun_f,
u3_noun arg)
{
u3_noun why, pro;
c3_l sig_l;
@ -806,6 +804,10 @@ u3m_soft_top(c3_w sec_w, // timer seconds
_cm_signal_deep(0);
if ( 0 != (sig_l = sigsetjmp(u3_Signal, 1)) ) {
// reinitialize trace state
//
u3t_init();
// return to blank state
//
_cm_signal_done();
@ -821,13 +823,13 @@ u3m_soft_top(c3_w sec_w, // timer seconds
/* Trap for ordinary nock exceptions.
*/
if ( 0 == (why = (u3_noun)setjmp(u3R->esc.buf)) ) {
if ( 0 == (why = (u3_noun)_setjmp(u3R->esc.buf)) ) {
pro = fun_f(arg);
/* Make sure the inner routine did not create garbage.
*/
if ( u3R->how.fag_w & u3a_flag_debug ) {
u3e_grab("top", pro, u3_none);
if ( u3C.wag_w & u3o_debug_ram ) {
u3m_grab(pro, u3_none);
}
/* Revert to external signal regime.
@ -862,7 +864,7 @@ u3m_soft_top(c3_w sec_w, // timer seconds
u3_noun
u3m_soft_sure(u3_funk fun_f, u3_noun arg)
{
u3_noun pro, pru = u3m_soft_top(0, 32768, fun_f, arg);
u3_noun pro, pru = u3m_soft_top(0, (1 << 18), fun_f, arg);
c3_assert(_(u3du(pru)));
pro = u3k(u3t(pru));
@ -893,15 +895,15 @@ u3m_soft_nock(u3_noun bus, u3_noun fol)
*/
u3_noun
u3m_soft_run(u3_noun fly,
u3_funq fun_f,
u3_noun aga,
u3_noun agb)
u3_funq fun_f,
u3_noun aga,
u3_noun agb)
{
u3_noun why, pro;
u3_noun why = 0, pro;
/* Record the cap, and leap.
*/
u3m_hate(32768);
u3m_hate(1 << 18);
/* Configure the new road.
*/
@ -910,21 +912,21 @@ u3m_soft_run(u3_noun fly,
u3R->pro.don = u3R->par_u->pro.don;
u3R->bug.tax = 0;
}
u3t_on(coy_o);
/* Trap for exceptions.
*/
if ( 0 == (why = (u3_noun)setjmp(u3R->esc.buf)) ) {
if ( 0 == (why = (u3_noun)_setjmp(u3R->esc.buf)) ) {
u3t_off(coy_o);
pro = fun_f(aga, agb);
if ( u3R->how.fag_w & u3a_flag_debug ) {
u3e_grab("top", pro, u3_none);
}
/* Produce success, on the old road.
*/
pro = u3nc(0, u3m_love(pro));
}
else {
u3t_init();
/* Produce - or fall again.
*/
{
@ -989,7 +991,7 @@ u3m_soft_esc(u3_noun sam)
/* Record the cap, and leap.
*/
u3m_hate(32768);
u3m_hate(1 << 18);
/* Configure the new road.
*/
@ -1001,7 +1003,7 @@ u3m_soft_esc(u3_noun sam)
/* Trap for exceptions.
*/
if ( 0 == (why = (u3_noun)setjmp(u3R->esc.buf)) ) {
if ( 0 == (why = (u3_noun)_setjmp(u3R->esc.buf)) ) {
pro = u3n_slam_on(fly, sam);
/* Fall back to the old road, leaving temporary memory intact.
@ -1009,6 +1011,8 @@ u3m_soft_esc(u3_noun sam)
pro = u3m_love(pro);
}
else {
u3t_init();
/* Push the error back up to the calling context - not the run we
** are in, but the caller of the run, matching pure nock semantics.
*/
@ -1024,6 +1028,34 @@ u3m_soft_esc(u3_noun sam)
return pro;
}
/* u3m_grab(): garbage-collect the world, plus extra roots.
*/
void
u3m_grab(u3_noun som, ...) // terminate with u3_none
{
// u3h_free(u3R->cax.har_p);
// u3R->cax.har_p = u3h_new();
u3v_mark();
u3m_mark();
{
va_list vap;
u3_noun tur;
va_start(vap, som);
if ( som != u3_none ) {
u3a_mark_noun(som);
while ( u3_none != (tur = va_arg(vap, u3_noun)) ) {
u3a_mark_noun(tur);
}
}
va_end(vap);
}
u3a_sweep();
}
/* u3m_soft(): top-level wrapper.
**
** Produces [0 product] or [%error (list tank)], top last.
@ -1035,7 +1067,7 @@ u3m_soft(c3_w sec_w,
{
u3_noun why;
why = u3m_soft_top(sec_w, (1 << 17), fun_f, arg); // 512K pad
why = u3m_soft_top(sec_w, (1 << 18), fun_f, arg); // 512K pad
if ( 0 == u3h(why) ) {
return why;
@ -1328,6 +1360,22 @@ _cm_signals(void)
exit(1);
}
// signal(SIGINT, _loom_stop);
// Block SIGPROF, so that if/when we reactivate it on the
// main thread for profiling, we won't get hits in parallel
// on other threads.
{
sigset_t set;
sigemptyset(&set);
sigaddset(&set, SIGPROF);
if ( 0 != pthread_sigmask(SIG_BLOCK, &set, NULL) ) {
perror("pthread_sigmask");
exit(1);
}
}
}
/* _cm_init(): start the environment, with/without checkpointing.
@ -1372,34 +1420,6 @@ _cm_init(c3_o chk_o)
}
}
/* u3e_grab(): garbage-collect the world, plus extra roots, then
*/
void
u3e_grab(c3_c* cap_c, u3_noun som, ...) // terminate with u3_none
{
// u3h_free(u3R->cax.har_p);
// u3R->cax.har_p = u3h_new();
u3v_mark();
u3m_mark();
{
va_list vap;
u3_noun tur;
va_start(vap, som);
if ( som != u3_none ) {
u3a_mark_noun(som);
while ( u3_none != (tur = va_arg(vap, u3_noun)) ) {
u3a_mark_noun(tur);
}
}
va_end(vap);
}
u3a_sweep(cap_c);
}
/* u3m_boot(): start the u3 system.
*/
void
@ -1413,6 +1433,10 @@ u3m_boot(c3_o nuu_o, c3_o bug_o, c3_c* dir_c)
*/
nuu_o = u3e_live(nuu_o, dir_c);
/* Activate tracing.
*/
u3t_init();
/* Construct or activate the allocator.
*/
_cm_pave(nuu_o, bug_o);

94
n/n.c
View File

@ -4,15 +4,15 @@
*/
#include "all.h"
extern int FOO;
static u3_noun _n_nock_on(u3_noun bus, u3_noun fol);
/* _cn_hint(): process hint.
/* _n_hint(): process hint.
*/
static u3_noun
_cn_hint(u3_noun zep,
u3_noun hod,
u3_noun bus,
u3_noun nex)
_n_hint(u3_noun zep,
u3_noun hod,
u3_noun bus,
u3_noun nex)
{
switch ( zep ) {
default: {
@ -20,7 +20,7 @@ _cn_hint(u3_noun zep,
u3a_lose(zep);
u3a_lose(hod);
return u3n_nock_on(bus, nex);
return _n_nock_on(bus, nex);
}
case c3__hunk:
@ -40,19 +40,21 @@ _cn_hint(u3_noun zep,
u3t(u3t(u3t(hod))));
}
#endif
pro = u3n_nock_on(bus, nex);
pro = _n_nock_on(bus, nex);
u3t_drop();
return pro;
}
case c3__slog: {
u3t_off(noc_o);
u3t_slog(hod);
return u3n_nock_on(bus, nex);
u3t_on(noc_o);
return _n_nock_on(bus, nex);
}
case c3__germ: {
u3_noun pro = u3n_nock_on(bus, nex);
u3_noun pro = _n_nock_on(bus, nex);
if ( c3y == u3r_sing(pro, hod) ) {
u3z(pro); return hod;
@ -62,16 +64,19 @@ _cn_hint(u3_noun zep,
}
case c3__fast: {
u3_noun pro = u3n_nock_on(bus, nex);
u3_noun pro = _n_nock_on(bus, nex);
u3t_off(noc_o);
u3j_mine(hod, u3k(pro));
u3t_on(noc_o);
return pro;
}
case c3__memo: {
u3z(hod);
#if 0
return u3n_nock_on(bus, nex);
return _n_nock_on(bus, nex);
#else
{
u3_noun pro = u3z_find_2(c3__nock, bus, nex);
@ -80,7 +85,7 @@ _cn_hint(u3_noun zep,
u3z(bus); u3z(nex);
return pro;
}
pro = u3n_nock_on(u3k(bus), u3k(nex));
pro = _n_nock_on(u3k(bus), u3k(nex));
u3z_save_2(c3__nock, bus, nex, pro);
u3z(bus); u3z(nex);
@ -93,7 +98,7 @@ _cn_hint(u3_noun zep,
case c3__sole: {
u3z(hod);
{
u3_noun pro = u3n_nock_on(bus, nex);
u3_noun pro = _n_nock_on(bus, nex);
// return u3z_uniq(pro);
return pro;
@ -102,12 +107,10 @@ _cn_hint(u3_noun zep,
}
}
extern u3_noun BAD;
/* u3n_nock_on(): produce .*(bus fol). Do not virtualize.
/* _n_nock_on(): produce .*(bus fol). Do not virtualize.
*/
u3_noun
u3n_nock_on(u3_noun bus, u3_noun fol)
_n_nock_on(u3_noun bus, u3_noun fol)
{
u3_noun hib, gal;
@ -120,8 +123,8 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
if ( c3y == u3r_du(hib) ) {
u3_noun poz, riv;
poz = u3n_nock_on(u3k(bus), u3k(hib));
riv = u3n_nock_on(bus, u3k(gal));
poz = _n_nock_on(u3k(bus), u3k(hib));
riv = _n_nock_on(bus, u3k(gal));
u3a_lose(fol);
return u3i_cell(poz, riv);
@ -151,8 +154,8 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
c3_assert(!"not reached");
case 2: {
u3_noun nex = u3n_nock_on(u3k(bus), u3k(u3t(gal)));
u3_noun seb = u3n_nock_on(bus, u3k(u3h(gal)));
u3_noun nex = _n_nock_on(u3k(bus), u3k(u3t(gal)));
u3_noun seb = _n_nock_on(bus, u3k(u3h(gal)));
u3a_lose(fol);
bus = seb;
@ -164,7 +167,7 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
case 3: {
u3_noun gof, pro;
gof = u3n_nock_on(bus, u3k(gal));
gof = _n_nock_on(bus, u3k(gal));
pro = u3r_du(gof);
u3a_lose(gof); u3a_lose(fol);
@ -175,7 +178,7 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
case 4: {
u3_noun gof, pro;
gof = u3n_nock_on(bus, u3k(gal));
gof = _n_nock_on(bus, u3k(gal));
pro = u3i_vint(gof);
u3a_lose(fol);
@ -184,7 +187,7 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
c3_assert(!"not reached");
case 5: {
u3_noun wim = u3n_nock_on(bus, u3k(gal));
u3_noun wim = _n_nock_on(bus, u3k(gal));
u3_noun pro = u3r_sing(u3h(wim), u3t(wim));
u3a_lose(wim); u3a_lose(fol);
@ -197,7 +200,7 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
u3x_trel(gal, &b_gal, &c_gal, &d_gal);
{
u3_noun tys = u3n_nock_on(u3k(bus), u3k(b_gal));
u3_noun tys = _n_nock_on(u3k(bus), u3k(b_gal));
u3_noun nex;
if ( 0 == tys ) {
@ -218,7 +221,7 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
u3x_cell(gal, &b_gal, &c_gal);
{
u3_noun bod = u3n_nock_on(bus, u3k(b_gal));
u3_noun bod = _n_nock_on(bus, u3k(b_gal));
u3_noun nex = u3k(c_gal);
u3a_lose(fol);
@ -234,7 +237,7 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
u3x_cell(gal, &b_gal, &c_gal);
{
u3_noun heb = u3n_nock_on(u3k(bus), u3k(b_gal));
u3_noun heb = _n_nock_on(u3k(bus), u3k(b_gal));
u3_noun bod = u3nc(heb, bus);
u3_noun nex = u3k(c_gal);
@ -251,8 +254,12 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
u3x_cell(gal, &b_gal, &c_gal);
{
u3_noun seb = u3n_nock_on(bus, u3k(c_gal));
u3_noun pro = u3j_kick(seb, b_gal);
u3_noun seb = _n_nock_on(bus, u3k(c_gal));
u3_noun pro;
u3t_off(noc_o);
pro = u3j_kick(seb, b_gal);
u3t_on(noc_o);
if ( u3_none != pro ) {
u3a_lose(fol);
@ -288,7 +295,7 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
u3_noun d_gal = q_gal;
zep = u3k(b_gal);
hod = u3n_nock_on(u3k(bus), u3k(c_gal));
hod = _n_nock_on(u3k(bus), u3k(c_gal));
nex = u3k(d_gal);
}
else {
@ -301,13 +308,17 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
}
u3a_lose(fol);
return _cn_hint(zep, hod, bus, nex);
return _n_hint(zep, hod, bus, nex);
}
}
case 11: {
u3_noun gof = u3n_nock_on(bus, u3k(gal));
u3_noun val = u3m_soft_esc(gof);
u3_noun gof = _n_nock_on(bus, u3k(gal));
u3_noun val;
u3t_off(noc_o);
val = u3m_soft_esc(u3k(gof));
u3t_on(noc_o);
if ( !_(u3du(val)) ) {
u3m_bail(u3nt(1, gof, 0));
@ -315,6 +326,7 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
else {
u3_noun pro;
u3z(gof);
u3z(fol);
pro = u3k(u3t(val));
u3z(val);
@ -327,6 +339,20 @@ u3n_nock_on(u3_noun bus, u3_noun fol)
}
}
/* u3n_nock_on(): produce .*(bus fol). Do not virtualize.
*/
u3_noun
u3n_nock_on(u3_noun bus, u3_noun fol)
{
u3_noun pro;
u3t_on(noc_o);
pro = _n_nock_on(bus, fol);
u3t_off(noc_o);
return pro;
}
/* u3n_kick_on(): fire `gat` without changing the sample.
*/
u3_noun

28
n/r.c
View File

@ -55,21 +55,25 @@ _frag_deep(c3_w a_w, u3_noun b)
** Return fragment (a) of (b), or u3_none if not applicable.
*/
u3_weak
u3r_at(u3_atom a,
u3_noun b)
u3r_at(u3_atom a, u3_noun b)
{
c3_assert(u3_none != a);
c3_assert(u3_none != b);
u3t_on(far_o);
if ( 0 == a ) {
u3t_off(far_o);
return u3_none;
}
if ( _(u3a_is_cat(a)) ) {
u3t_off(far_o);
return _frag_word(a, b);
}
else {
if ( !_(u3a_is_pug(a)) ) {
u3t_off(far_o);
return u3_none;
}
else {
@ -83,11 +87,15 @@ u3r_at(u3_atom a,
b = _frag_deep(a_u->buf_w[len_w - 1], b);
if ( u3_none == b ) {
u3t_off(far_o);
return b;
} else {
len_w--;
}
}
u3t_off(far_o);
return b;
}
}
@ -387,7 +395,7 @@ u3r_mug(u3_noun veb)
if ( _(u3a_is_cat(veb)) ) {
c3_w x_w = veb;
return _mug_words(2166136261, (veb ? 1 : 0), &x_w);
return _mug_words(2166136261U, (veb ? 1 : 0), &x_w);
} else {
u3a_noun* veb_u = u3a_to_ptr(veb);
@ -407,7 +415,7 @@ u3r_mug(u3_noun veb)
u3a_atom* veb_u = u3a_to_ptr(veb);
c3_w len_w = veb_u->len_w;
veb_u->mug_w = _mug_words_buf(2166136261, len_w, veb);
veb_u->mug_w = _mug_words_buf(2166136261U, len_w, veb);
return veb_u->mug_w;
}
}
@ -422,7 +430,7 @@ c3_w
u3r_mug_words(const c3_w *buf_w,
c3_w len_w)
{
return _mug_words(2166136261, len_w, buf_w);
return _mug_words(2166136261U, len_w, buf_w);
}
/* u3r_mug_string():
@ -432,7 +440,7 @@ u3r_mug_words(const c3_w *buf_w,
c3_w
u3r_mug_string(const c3_c *a_c)
{
return _mug_bytes(2166136261, strlen(a_c), (c3_y *)a_c);
return _mug_bytes(2166136261U, strlen(a_c), (c3_y *)a_c);
}
/* u3r_mug_cell():
@ -703,7 +711,13 @@ c3_o
u3r_sing(u3_noun a,
u3_noun b)
{
return _sing_x(a, b);
c3_o ret_o;
u3t_on(euq_o);
ret_o = _sing_x(a, b);
u3t_off(euq_o);
return ret_o;
}
/* u3r_sung(): yes iff (a) and (b) are the same noun, unifying equals.

281
n/t.c
View File

@ -3,6 +3,7 @@
** This file is in the public domain.
*/
#include "all.h"
#include <pthread.h>
/* u3t_push(): push on trace stack.
*/
@ -60,31 +61,187 @@ u3t_slog(u3_noun hod)
void
u3t_heck(u3_atom cog)
{
printf("ct: heck %s\r\n", u3r_string(cog));
// Profile sampling, because it allocates on the home road,
// only works on when we're not at home.
//
if ( &(u3H->rod_u) != u3R ) {
u3a_road* rod_u;
rod_u = u3R;
u3R = &(u3H->rod_u);
{
if ( 0 == u3R->pro.day ) { u3R->pro.day = u3v_do("doss", 0); }
u3R->pro.day = u3dc("pi-heck", cog, u3R->pro.day);
if ( 0 == u3R->pro.day ) {
u3R->pro.day = u3v_do("doss", 0);
}
u3R->pro.day = u3dc("pi-heck", cog, u3R->pro.day);
}
u3R = rod_u;
}
}
/* _t_jet_label():
*/
u3_weak
_t_jet_label(u3a_road* rod_u, u3_noun bat)
{
while ( 1 ) {
u3_weak cax = u3h_git(rod_u->jed.har_p, bat);
if ( u3_none != cax ) {
return u3h(u3t(u3t(u3h(cax))));
}
if ( rod_u->par_u ) {
rod_u = rod_u->par_u;
}
else return u3_none;
}
}
#if 1
/* _t_samp_process(): process raw sample data from live road.
*/
static u3_noun
_t_samp_process(u3_road* rod_u)
{
u3_noun pef = u3_nul; // (list (pair path (map path ,@ud)))
u3_noun muf = u3_nul; // (map path ,@ud)
c3_w len_w = 0;
// Accumulate a label/map stack which collapses recursive segments.
//
while ( rod_u ) {
u3_noun don = rod_u->pro.don;
while ( u3_nul != don ) {
u3_noun bat = u3h(don);
u3_noun lab;
// Find the label from this battery, surface allocated.
//
{
u3_noun laj = _t_jet_label(rod_u, bat);
if ( u3_none == laj ) { abort(); }
// lab = u3nc(u3i_string("foobar"), u3_nul);
lab = u3a_take(laj); u3a_wash(laj);
}
// Add the label to the traced label stack, trimming recursion.
//
{
u3_noun old;
if ( u3_none == (old = u3kdb_get(u3k(muf), u3k(lab))) ) {
muf = u3kdb_put(muf, u3k(lab), len_w);
pef = u3nc(u3nc(lab, u3k(muf)), pef);
len_w += 1;
}
else {
u3_assure(u3a_is_cat(old));
u3z(muf);
while ( len_w > (old + 1) ) {
u3_noun t_pef = u3k(u3t(pef));
len_w -= 1;
u3z(pef);
pef = t_pef;
}
muf = u3k(u3t(u3h(pef)));
u3z(lab);
}
}
don = u3t(don);
}
rod_u = rod_u->par_u;
}
u3z(muf);
// Lose the maps and save a pure label stack in original order.
//
{
u3_noun pal = u3_nul;
while ( u3_nul != pef ) {
u3_noun h_pef = u3h(pef);
u3_noun t_pef = u3k(u3t(pef));
pal = u3nc(u3k(u3h(h_pef)), pal);
u3z(pef);
pef = t_pef;
}
// fprintf(stderr, "sample: stack length %d\r\n", u3kb_lent(u3k(pal)));
return pal;
}
}
#endif
/* u3t_samp(): sample.
*/
void
u3t_samp(void)
{
if ( 0 == u3R->pro.day ) { u3R->pro.day = u3v_do("doss", 0); }
u3C.wag_w &= ~u3o_debug_cpu;
u3R->pro.day = u3dc("pi-noon", u3k(u3R->pro.don), u3R->pro.day);
// Profile sampling, because it allocates on the home road,
// only works on when we're not at home.
//
if ( &(u3H->rod_u) != u3R ) {
c3_l mot_l;
u3a_road* rod_u;
if ( _(u3T.mal_o) ) {
mot_l = c3_s3('m','a','l');
}
else if ( _(u3T.coy_o) ) {
mot_l = c3_s3('c','o','y');
}
else if ( _(u3T.euq_o) ) {
mot_l = c3_s3('e','u','q');
}
else if ( _(u3T.far_o) ) {
mot_l = c3_s3('f','a','r');
}
else if ( _(u3T.noc_o) ) {
c3_assert(!_(u3T.glu_o));
mot_l = c3_s3('n','o','c');
}
else if ( _(u3T.glu_o) ) {
mot_l = c3_s3('g','l','u');
}
else {
mot_l = c3_s3('f','u','n');
}
rod_u = u3R;
u3R = &(u3H->rod_u);
{
u3_noun lab = _t_samp_process(rod_u);
c3_assert(u3R == &u3H->rod_u);
if ( 0 == u3R->pro.day ) {
u3R->pro.day = u3v_do("doss", 0);
}
u3R->pro.day = u3dt("pi-noon", mot_l, lab, u3R->pro.day);
}
u3R = rod_u;
}
u3C.wag_w |= u3o_debug_cpu;
}
/* u3t_come(): push on profile stack.
/* u3t_come(): push on profile stack; return yes if active push. RETAIN.
*/
void
u3t_come(u3_atom cog)
c3_o
u3t_come(u3_noun bat)
{
printf("ct: come %s\r\n", u3r_string(cog));
u3R->pro.don = u3nc(cog, u3R->pro.don);
if ( (u3_nul == u3R->pro.don) || !_(u3r_sing(bat, u3h(u3R->pro.don))) ) {
u3R->pro.don = u3nc(u3k(bat), u3R->pro.don);
return c3y;
}
else return c3n;
}
/* u3t_flee(): pop off profile stack.
@ -92,13 +249,10 @@ u3t_come(u3_atom cog)
void
u3t_flee(void)
{
c3_assert(_(u3du(u3R->pro.don)));
{
u3_noun tax = u3R->bug.tax;
u3_noun t_don = u3k(u3t(u3R->pro.don));
u3R->bug.tax = u3k(u3t(tax));
u3z(tax);
}
u3R->pro.don = t_don;
u3z(u3R->pro.don);
}
/* u3t_damp(): print and clear profile data.
@ -112,52 +266,75 @@ u3t_damp(void)
u3R->pro.day = u3v_do("doss", 0);
}
#if 0
if ( 0 != u3R->pro.nox_d ) {
printf("knox: %llu\r\n", (u3R->pro.nox_d / 1000ULL));
u3R->pro.nox_d = 0;
}
#endif
}
/* _ct_sigaction(): profile sigaction callback.
*/
void _ct_sigaction(c3_i x_i) { u3t_samp(); }
void _ct_sigaction(c3_i x_i)
{
// fprintf(stderr, "itimer!\r\n"); abort();
u3t_samp();
}
/* u3t_init(): initialize tracing layer.
*/
void
u3t_init(void)
{
u3T.noc_o = c3n;
u3T.glu_o = c3n;
u3T.mal_o = c3n;
u3T.far_o = c3n;
u3T.coy_o = c3n;
u3T.euq_o = c3n;
}
/* u3t_boot(): turn sampling on.
*/
void
u3t_boot(void)
{
printf("ct: now profiling.\r\n");
printf("knox: %llu\r\n", (u3R->pro.nox_d / 1000ULL));
u3R->pro.nox_d = 0;
if ( u3C.wag_w & u3o_debug_cpu ) {
#if defined(U3_OS_osx)
#if 0
{
struct itimerval itm_v;
struct sigaction sig_s;
#if 1
{
struct itimerval itm_v;
struct sigaction sig_s;
sigset_t set;
sig_s.__sigaction_u.__sa_handler = _ct_sigaction;
sig_s.sa_mask = 0;
sig_s.sa_flags = 0;
sigaction(SIGPROF, &sig_s, 0);
sig_s.__sigaction_u.__sa_handler = _ct_sigaction;
sig_s.sa_mask = 0;
sig_s.sa_flags = 0;
sigaction(SIGPROF, &sig_s, 0);
itm_v.it_interval.tv_sec = 0;
itm_v.it_interval.tv_usec = 10000;
itm_v.it_value = itm_v.it_interval;
sigemptyset(&set);
sigaddset(&set, SIGPROF);
if ( 0 != pthread_sigmask(SIG_UNBLOCK, &set, NULL) ) {
perror("pthread_sigmask");
}
setitimer(ITIMER_PROF, &itm_v, 0);
}
itm_v.it_interval.tv_sec = 0;
itm_v.it_interval.tv_usec = 10000;
// itm_v.it_interval.tv_usec = 100000;
itm_v.it_value = itm_v.it_interval;
setitimer(ITIMER_PROF, &itm_v, 0);
}
#endif
#elif defined(U3_OS_linux)
// TODO: support profiling on linux
#elif defined(U3_OS_bsd)
// TODO: support profiling on bsd
#else
#error "port: profiling"
# error "port: profiling"
#endif
}
}
/* u3t_boff(): turn profile sampling off.
@ -165,25 +342,31 @@ u3t_boot(void)
void
u3t_boff(void)
{
if ( u3C.wag_w & u3o_debug_cpu ) {
#if defined(U3_OS_osx)
struct sigaction sig_s;
struct itimerval itm_v;
struct sigaction sig_s;
struct itimerval itm_v;
sigset_t set;
printf("ct: end profiling.\r\n");
sigemptyset(&set);
sigaddset(&set, SIGPROF);
if ( 0 != pthread_sigmask(SIG_BLOCK, &set, NULL) ) {
perror("pthread_sigmask");
}
itm_v.it_interval.tv_sec = 0;
itm_v.it_interval.tv_usec = 0;
itm_v.it_value = itm_v.it_interval;
itm_v.it_interval.tv_sec = 0;
itm_v.it_interval.tv_usec = 0;
itm_v.it_value = itm_v.it_interval;
setitimer(ITIMER_PROF, &itm_v, 0);
sigaction(SIGPROF, &sig_s, 0);
setitimer(ITIMER_PROF, &itm_v, 0);
sigaction(SIGPROF, &sig_s, 0);
u3t_damp();
#elif defined(U3_OS_linux)
// TODO: support profiling on linux
#elif defined(U3_OS_bsd)
// TODO: support profiling on bsd
#else
#error "port: profiling"
# error "port: profiling"
#endif
}
}

View File

@ -0,0 +1,23 @@
# editorconfig.org
root = true
[*]
end_of_line = lf
charset = utf-8
insert_final_newline = true
[*.js]
trim_trailing_whitespace = true
indent_style = space
indent_size = 2
[*.{c,h}]
trim_trailing_whitespace = true
indent_style = tab
indent_size = 8
[Makefile]
trim_trailing_whitespace = true
indent_style = tab
indent_size = 8

33
outside/commonmark/.gitignore vendored Normal file
View File

@ -0,0 +1,33 @@
# Object files
*.o
*.ko
*.obj
*.elf
# Libraries
*.lib
*.a
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
*~
*.bak
*.diff
*#
*.zip
bstrlib.txt
build
cmark.dSYM/*
cmark

View File

@ -0,0 +1,10 @@
language: c
compiler:
- clang
- gcc
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq pandoc re2c valgrind
script:
- make testtarball
- PROG=`ls cmark-*.*/build/src/cmark` make leakcheck

View File

@ -0,0 +1,24 @@
project(cmark)
cmake_minimum_required(VERSION 2.8)
if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make")
endif()
set(PROJECT_NAME "cmark")
set(PROJECT_VERSION_MAJOR 0)
set(PROJECT_VERSION_MINOR 0)
set(PROJECT_VERSION_PATCH 1)
set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} )
add_subdirectory(src)
#add_subdirectory(api_test)
#add_subdirectory(man)
#enable_testing()
#add_subdirectory(test)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
"Choose the type of build, options are: Debug Release." FORCE)
endif(NOT CMAKE_BUILD_TYPE)

View File

@ -0,0 +1,70 @@
Copyright (c) 2014, John MacFarlane
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of John MacFarlane nor the names of other
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-----
The polyfill for String.fromCodePoint included in commonmark.js is
Copyright Mathias Bynens <http://mathiasbynens.be/>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-----
The normalization code in runtests.py was derived from the
markdowntest project, Copyright 2013 Karl Dubost:
The MIT License (MIT)
Copyright (c) 2013 Karl Dubost
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

194
outside/commonmark/Makefile Normal file
View File

@ -0,0 +1,194 @@
SRCDIR?=src
DATADIR?=data
BUILDDIR?=build
GENERATOR?=Unix Makefiles
MINGW_BUILDDIR?=build-mingw
MINGW_INSTALLDIR?=windows
SPEC=spec.txt
SITE=_site
SPECVERSION=$(shell perl -ne 'print $$1 if /^version: *([0-9.]+)/' $(SPEC))
PKGDIR?=cmark-$(SPECVERSION)
TARBALL?=cmark-$(SPECVERSION).tar.gz
ZIPARCHIVE?=cmark-$(SPECVERSION).zip
FUZZCHARS?=2000000 # for fuzztest
BENCHDIR=bench
BENCHFILE=$(BENCHDIR)/benchinput.md
ALLTESTS=alltests.md
NUMRUNS?=10
PROG?=$(BUILDDIR)/src/cmark
BENCHINP?=README.md
JSMODULES=$(wildcard js/lib/*.js)
.PHONY: all spec leakcheck clean fuzztest dingus upload jshint test testjs benchjs update-site upload-site check npm debug mingw archive tarball ziparchive testarchive testtarball testziparchive testlib bench apidoc
all: $(BUILDDIR)
@make -C $(BUILDDIR)
@echo "Binaries can be found in $(BUILDDIR)/src"
check:
@cmake --version > /dev/null || (echo "You need cmake to build this program: http://www.cmake.org/download/" && exit 1)
$(BUILDDIR): check $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc man/man1/cmark.1 man/man3/cmark.3
mkdir -p $(BUILDDIR); \
cd $(BUILDDIR); \
cmake .. -G "$(GENERATOR)" -DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
install: $(BUILDDIR)
make -C $(BUILDDIR) install
debug:
mkdir -p $(BUILDDIR); \
cd $(BUILDDIR); \
cmake .. -DCMAKE_BUILD_TYPE=Debug; \
make
mingw:
mkdir -p $(MINGW_BUILDDIR); \
cd $(MINGW_BUILDDIR); \
cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchain-mingw32.cmake -DCMAKE_INSTALL_PREFIX=$(MINGW_INSTALLDIR) ;\
make && make install
archive: spec.html $(BUILDDIR)
@rm -rf $(PKGDIR); \
mkdir -p $(PKGDIR)/$(SRCDIR)/html; \
mkdir -p $(PKGDIR)/api_test; \
srcfiles=`git ls-tree --full-tree -r HEAD --name-only $(SRCDIR) api_test`; \
for f in $$srcfiles; do cp -a $$f $(PKGDIR)/$$f; done; \
cp -a $(SRCDIR)/scanners.c $(PKGDIR)/$(SRCDIR)/; \
cp -a spec.html $(PKGDIR); \
cp CMakeLists.txt $(PKGDIR); \
perl -ne '$$p++ if /^### JavaScript/; print if (!$$p)' Makefile > $(PKGDIR)/Makefile; \
cp -a Makefile.nmake nmake.bat $(PKGDIR); \
cp -r man $(PKGDIR)/; \
cp -r test $(PKGDIR)/; \
cp -a README.md LICENSE spec.txt $(PKGDIR)/; \
tar czf $(TARBALL) $(PKGDIR); \
zip -q -r $(ZIPARCHIVE) $(PKGDIR); \
rm -rf $(PKGDIR) ; \
echo "Created $(TARBALL) and $(ZIPARCHIVE)."
clean:
rm -rf $(BUILDDIR) $(MINGW_BUILDDIR) $(MINGW_INSTALLDIR) $(TARBALL) $(ZIPARCHIVE) $(PKGDIR)
$(PROG): all
man/man3/cmark.3: src/cmark.h
python man/make_man_page.py $< > $@
# We include html_unescape.h in the repository, so this shouldn't
# normally need to be generated.
$(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf
gperf -L ANSI-C -I -t -N find_entity -H hash_entity -K entity -C -l \
--null-strings -m5 $< > $@
# We include case_fold_switch.inc in the repository, so this shouldn't
# normally need to be generated.
$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt
perl mkcasefold.pl < $< > $@
test: $(SPEC) $(BUILDDIR)
make -C $(BUILDDIR) test ARGS="-V"
$(TARBALL): archive
$(ZIPARCHIVE): archive
testarchive: testtarball testziparchive
rm -rf $(PKGDIR)
testtarball: $(TARBALL)
rm -rf $(PKGDIR); \
tar xvzf $(TARBALL); \
cd $(PKGDIR); \
mkdir build && cd build && cmake .. && make && ctest -V
testziparchive: $(ZIPARCHIVE)
rm -rf $(PKGDIR); \
unzip $(ZIPARCHIVE); \
cd $(PKGDIR); \
mkdir build && cd build && cmake .. && make && ctest -V
$(ALLTESTS): spec.txt
python test/spec_tests.py --spec $< --dump-tests | python -c 'import json; import sys; tests = json.loads(sys.stdin.read()); print "\n".join([test["markdown"] for test in tests]).encode("utf-8")' > $@
leakcheck: $(ALLTESTS) $(PROG)
cat $< | valgrind --leak-check=full --dsymutil=yes --error-exitcode=1 $(PROG) >/dev/null
fuzztest:
{ for i in `seq 1 10`; do \
cat /dev/urandom | head -c $(FUZZCHARS) | iconv -f latin1 -t utf-8 | tee fuzz-$$i.txt | \
/usr/bin/env time -p $(PROG) >/dev/null && rm fuzz-$$i.txt ; \
done } 2>&1 | grep 'user\|abnormally'
# for benchmarking
$(BENCHFILE): progit/progit.md
-rm $@; for x in `seq 1 20` ; do cat $< >> $@; done
progit:
git clone https://github.com/progit/progit.git
progit/progit.md: progit
cat progit/en/*/*.markdown > $@
bench: $(BENCHFILE)
{ sudo renice 99 $$$$; \
for x in `seq 1 $(NUMRUNS)` ; do \
/usr/bin/env time -p $(PROG) </dev/null >/dev/null ; \
/usr/bin/env time -p $(PROG) <$< >/dev/null ; \
done \
} 2>&1 | grep 'real' | awk '{print $$2}' | python3 'bench/stats.py'
operf: $(PROG)
operf $(PROG) <$(BENCHINP) >/dev/null
distclean: clean
-rm -f js/commonmark.js
-rm -rf *.dSYM
-rm -f README.html
-rm -f spec.md fuzz.txt spec.html
-rm -rf $(BENCHFILE) $(ALLTESTS) progit
### JavaScript ###
js/commonmark.js: js/lib/index.js ${JSMODULES}
browserify --standalone commonmark $< -o $@
testjs: $(SPEC)
node js/test.js
jshint:
jshint ${JSMODULES}
benchjs:
node js/bench.js ${BENCHINP}
npm:
cd js; npm publish
dingus: js/commonmark.js
echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000
### Spec ###
spec.md: $(SPEC)
perl spec2md.pl < $< > $@
spec.html: spec.md template.html
pandoc --no-highlight --number-sections --template template.html -s --toc -S $< | \
perl -pe 's/a href="@([^"]*)"/a id="\1" href="#\1" class="definition"/g' | \
perl -pe 's/␣/<span class="space"> <\/span>/g' \
> $@
spec.pdf: spec.md template.tex specfilter.hs
pandoc -s $< --template template.tex \
--filter ./specfilter.hs -o $@ --latex-engine=xelatex --toc \
--number-sections -V documentclass=report -V tocdepth=2 \
-V classoption=twosides
### Website ###
update-site: spec.html js/commonmark.js
make -C $(SITE) update
upload-site: spec.html
make -C $(SITE) upload

View File

@ -0,0 +1,57 @@
SRCDIR=src
DATADIR=data
BUILDDIR=build
INSTALLDIR=windows
SPEC=spec.txt
PROG=$(BUILDDIR)\src\cmark.exe
GENERATOR=NMake Makefiles
all: $(BUILDDIR)
@pushd $(BUILDDIR) && $(MAKE) /nologo && popd
$(BUILDDIR):
@cmake --version > nul || (echo "You need cmake to build this program: http://www.cmake.org/download/" && exit 1)
-mkdir $(BUILDDIR) 2> nul
pushd $(BUILDDIR) && \
cmake \
-G "$(GENERATOR)" \
-D CMAKE_BUILD_TYPE=$(BUILD_TYPE) \
-D CMAKE_INSTALL_PREFIX=$(INSTALLDIR) \
.. && \
popd
install: all
@pushd $(BUILDDIR) && $(MAKE) /nologo install && popd
clean:
-rmdir /s /q $(BUILDDIR) $(MINGW_INSTALLDIR) 2> nul
$(SRCDIR)\case_fold_switch.inc: $(DATADIR)\CaseFolding-3.2.0.txt
perl mkcasefold.pl < $? > $@
man\man1\cmark.1: man\cmark.1.md
pandoc $? -o $@ -s -t man
test: $(SPEC) all
@pushd $(BUILDDIR) && $(MAKE) /nologo test ARGS="-V" && popd
distclean: clean
del /q src\scanners.c 2> nul
del /q spec.md spec.html 2> nul
### Spec ###
spec.md: $(SPEC)
perl spec2md.pl < $? > $@
spec.html: spec.md template.html
pandoc --no-highlight --number-sections --template template.html -s --toc -S $? | \
perl -pe "s/a href=\"@([^"]*)\"/a id=\"\\1\" href=\"#\\1\" class=\"definition\"/g" | \
perl -pe "s/\\x{2423}/<span class=\"space\"> <\\/span>/g" \
> $@
spec.pdf: spec.md template.tex specfilter.hs
pandoc -s $? --template template.tex \
--filter specfilter.hs -o $@ --latex-engine=xelatex --toc \
--number-sections -V documentclass=report -V tocdepth=2 \
-V classoption=twosides

View File

@ -0,0 +1,47 @@
PREFIX?=/usr/local
SRCDIR?=src
CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc -Wno-missing-field-initializers -fPIC $(OPTCFLAGS)
LDFLAGS?=-g -O3 -Wall -Werror -fPIC $(OPTLDFLAGS)
HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
CMARK_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.o
CMARK_HDR = $(SRCDIR)/cmark.h $(SRCDIR)/buffer.h $(SRCDIR)/references.h \
$(SRCDIR)/chunk.h $(SRCDIR)/debug.h $(SRCDIR)/utf8.h \
$(SRCDIR)/scanners.h $(SRCDIR)/inlines.h
HTML_HDR = $(SRCDIR)/html/html_unescape.h $(SRCDIR)/html/houdini.h
$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(CMARK_OBJ) $(SRCDIR)/main.c
$(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(CMARK_OBJ) $(SRCDIR)/main.c
$(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
re2c --case-insensitive -bis $< > $@ || (rm $@ && false)
$(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf
gperf -I -t -N find_entity -H hash_entity -K entity -C -l
--null-strings -m5 $< > $@
libcommonmark.so: $(HTML_OBJ) $(CMARK_OBJ)
$(CC) $(LDFLAGS) -shared -o $@ $^
install: libcommonmark.so $(cmark_HDR) $(HTML_HDR)
install -d $(PREFIX)/lib $(PREFIX)/include/cmark/html
install libcommonmark.so $(PREFIX)/lib/
install $(cmark_HDR) $(PREFIX)/include/cmark/
install $(HTML_HDR) $(PREFIX)/include/cmark/html/
CMARK_HDR = $(SRCDIR)/cmark.h $(SRCDIR)/buffer.h $(SRCDIR)/references.h \
$(SRCDIR)/chunk.h $(SRCDIR)/debug.h $(SRCDIR)/utf8.h \
$(SRCDIR)/scanners.h $(SRCDIR)/inlines.h
HTML_HDR = $(SRCDIR)/html/html_unescape.h $(SRCDIR)/html/houdini.h
clean:
-rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o libcommonmark.so
-rm -f js/commonmark.js
-rm -rf *.dSYM
-rm -f README.html
-rm -f spec.md fuzz.txt spec.html

View File

@ -0,0 +1,298 @@
CommonMark
==========
CommonMark is a rationalized version of Markdown syntax,
with a [spec][the spec] and BSD3-licensed reference
implementations in C and JavaScript.
[Try it now!](http://spec.commonmark.org/dingus.html)
The implementations
-------------------
The C implementation provides both a shared library (`libcmark`) and a
standalone program `cmark` that converts CommonMark to HTML. It is
written in standard C99 and has no library dependencies. The parser is
very fast (see [benchmarks](benchmarks.md)).
It is easy to use `libcmark` in python, lua, ruby, and other dynamic
languages: see `wrapper.py`, `wrapper.lua`, and `wrapper.rb` in the
repository for simple examples.
The JavaScript implementation is a single JavaScript file, with
no dependencies, that can be linked to in an HTML page. Here
is a simple usage example:
``` javascript
var reader = new commonmark.DocParser();
var writer = new commonmark.HtmlRenderer();
var parsed = reader.parse("Hello *world*");
var result = writer.render(parsed);
```
A node package is also available; it includes a command-line tool called
`commonmark`.
**A note on security:**
Neither implementation attempts to sanitize link attributes or
raw HTML. If you use these libraries in applications that accept
untrusted user input, you must run the output through an HTML
sanitizer to protect against
[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
Installing (C)
--------------
Building the C program (`cmark`) and shared library (`libcmark`)
requires [cmake]. If you modify `scanners.re`, then you will also
need [re2c], which is used to generate `scanners.c` from
`scanners.re`. We have included a pre-generated `scanners.c` in
the repository to reduce build dependencies.
If you have GNU make, you can simply `make`, `make test`, and `make
install`. This calls [cmake] to create a `Makefile` in the `build`
directory, then uses that `Makefile` to create the executable and
library. The binaries can be found in `build/src`.
For a more portable method, you can use [cmake] manually. [cmake] knows
how to create build environments for many build systems. For example,
on FreeBSD:
mkdir build
cd build
cmake .. # optionally: -DCMAKE_INSTALL_PREFIX=path
make # executable will be create as build/src/cmake
make test
make install
Or, to create Xcode project files on OSX:
mkdir build
cd build
cmake -G Xcode ..
make
make test
make install
The GNU Makefile also provides a few other targets for developers.
To run a "fuzz test" against ten long randomly generated inputs:
make fuzztest
To run a test for memory leaks using valgrind:
make leakcheck
To make a release tarball and zip archive:
make archive
To test the archives:
make testarchive
Compiling for Windows
---------------------
To compile with MSVC and NMAKE:
nmake
You can cross-compile a Windows binary and dll on linux if you have the
`mingw32` compiler:
make mingw
The binaries will be in `build-mingw/windows/bin`.
Installing (JavaScript)
-----------------------
The JavaScript library can be installed through `npm`:
npm install commonmark
To build the JavaScript library as a single standalone file:
browserify --standalone commonmark js/lib/index.js -o js/commonmark.js
Or fetch a pre-built copy from
<http://spec.commonmark.org/js/commonmark.js>`.
To run tests for the JavaScript library:
make testjs
or
node js/test.js
The spec
--------
[The spec] contains over 500 embedded examples which serve as conformance
tests. To run the tests using an executable `$PROG`:
python test/spec_tests.py --program $PROG
If you want to extract the raw test data from the spec without
actually running the tests, you can do:
python test/spec_tests.py --dump-tests
and you'll get all the tests in JSON format.
[The spec]: http://jgm.github.io/CommonMark/spec.html
The source of [the spec] is `spec.txt`. This is basically a Markdown
file, with code examples written in a shorthand form:
.
Markdown source
.
expected HTML output
.
To build an HTML version of the spec, do `make spec.html`. To build a
PDF version, do `make spec.pdf`. Both these commands require that
[pandoc] is installed, and creating a PDF requires a latex installation.
The spec is written from the point of view of the human writer, not
the computer reader. It is not an algorithm---an English translation of
a computer program---but a declarative description of what counts as a block
quote, a code block, and each of the other structural elements that can
make up a Markdown document.
Because John Gruber's [canonical syntax
description](http://daringfireball.net/projects/markdown/syntax) leaves
many aspects of the syntax undetermined, writing a precise spec requires
making a large number of decisions, many of them somewhat arbitrary.
In making them, we have appealed to existing conventions and
considerations of simplicity, readability, expressive power, and
consistency. We have tried to ensure that "normal" documents in the many
incompatible existing implementations of Markdown will render, as far as
possible, as their authors intended. And we have tried to make the rules
for different elements work together harmoniously. In places where
different decisions could have been made (for example, the rules
governing list indentation), we have explained the rationale for
my choices. In a few cases, we have departed slightly from the canonical
syntax description, in ways that we think further the goals of Markdown
as stated in that description.
For the most part, we have limited ourselves to the basic elements
described in Gruber's canonical syntax description, eschewing extensions
like footnotes and definition lists. It is important to get the core
right before considering such things. However, we have included a visible
syntax for line breaks and fenced code blocks.
Differences from original Markdown
----------------------------------
There are only a few places where this spec says things that contradict
the canonical syntax description:
- It [allows all punctuation symbols to be
backslash-escaped](http://jgm.github.io/CommonMark/spec.html#backslash-escapes),
not just the symbols with special meanings in Markdown. We found
that it was just too hard to remember which symbols could be
escaped.
- It introduces an [alternative syntax for hard line
breaks](http://jgm.github.io/CommonMark/spec.html#hard-line-breaks), a
backslash at the end of the line, supplementing the
two-spaces-at-the-end-of-line rule. This is motivated by persistent
complaints about the “invisible” nature of the two-space rule.
- Link syntax has been made a bit more predictable (in a
backwards-compatible way). For example, `Markdown.pl` allows single
quotes around a title in inline links, but not in reference links.
This kind of difference is really hard for users to remember, so the
spec [allows single quotes in both
contexts](http://jgm.github.io/CommonMark/spec.html#links).
- The rule for HTML blocks differs, though in most real cases it
shouldn't make a difference. (See
[here](http://jgm.github.io/CommonMark/spec.html#html-blocks) for
details.) The spec's proposal makes it easy to include Markdown
inside HTML block-level tags, if you want to, but also allows you to
exclude this. It is also makes parsing much easier, avoiding
expensive backtracking.
- It does not collapse adjacent bird-track blocks into a single
blockquote:
> this is two
> blockquotes
> this is a single
>
> blockquote with two paragraphs
- Rules for content in lists differ in a few respects, though (as with
HTML blocks), most lists in existing documents should render as
intended. There is some discussion of the choice points and
differences [here](http://jgm.github.io/CommonMark/spec.html#motivation).
We think that the spec's proposal does better than any existing
implementation in rendering lists the way a human writer or reader
would intuitively understand them. (We could give numerous examples
of perfectly natural looking lists that nearly every existing
implementation flubs up.)
- The spec stipulates that two blank lines break out of all list
contexts. This is an attempt to deal with issues that often come up
when someone wants to have two adjacent lists, or a list followed by
an indented code block.
- Changing bullet characters, or changing from bullets to numbers or
vice versa, starts a new list. We think that is almost always going
to be the writer's intent.
- The number that begins an ordered list item may be followed by
either `.` or `)`. Changing the delimiter style starts a new
list.
- The start number of an ordered list is significant.
- [Fenced code blocks](http://jgm.github.io/CommonMark/spec.html#fenced-code-blocks) are supported, delimited by either
backticks (```` ``` ```` or tildes (` ~~~ `).
Contributing
------------
There is a [forum for discussing
CommonMark](http://talk.commonmark.org); you should use it instead of
github issues for questions and possibly open-ended discussions.
Use the [github issue tracker](http://github.com/jgm/CommonMark/issues)
only for simple, clear, actionable issues.
Authors
-------
The spec was written by John MacFarlane, drawing on
- his experience writing and maintaining Markdown implementations in several
languages, including the first Markdown parser not based on regular
expression substitutions ([pandoc](http://github.com/jgm/pandoc)) and
the first markdown parsers based on PEG grammars
([peg-markdown](http://github.com/jgm/peg-markdown),
[lunamark](http://github.com/jgm/lunamark))
- a detailed examination of the differences between existing Markdown
implementations using [BabelMark 2](http://johnmacfarlane.net/babelmark2/),
and
- extensive discussions with David Greenspan, Jeff Atwood, Vicent
Marti, Neil Williams, and Benjamin Dumke-von der Ehe.
John MacFarlane was also responsible for the original versions of the
C and JavaScript implementations. The block parsing algorithm was
worked out together with David Greenspan. Vicent Marti
optimized the C implementation for performance, increasing its speed
tenfold. Kārlis Gaņģis helped work out a better parsing algorithm
for links and emphasis, eliminating several worst-case performance
issues. Nick Wellnhofer contributed many improvements, including
most of the C library's API and its test harness.
[cmake]: http://www.cmake.org/download/
[pandoc]: http://johnmacfarlane.net/pandoc/
[re2c]: http://re2c.org

View File

@ -0,0 +1,247 @@
# Appendix B: An alternate spec for HTML blocks {-}
(The following spec departs less from original markdown than the
one described above, but is also less flexible.)
An [HTML block](#html-block) <a id="html-block-tag"/> begins
with an [open tag](#open-tag), [HTML comment](#html-comment),
[processing instruction](#processing-instruction),
[declaration](#declaration), or [CDATA section](#cdata-section).
This opening element may optionally be preceded by 1-3 spaces,
and must not be followed on a line by anything other than white space.
If the opening tag is self-closing, or if it is an [HTML
comment](#html-comment), [processing
instruction](#processing-instruction), [declaration](#declaration), or
[CDATA section](#cdata-section), then the [HTML block](#html-block)
contains just that tag.
If it is an [open tag](#open-tag), then the [HTML block](#html-block)
continues until a matching closing tag is found, or until the end
of the document. Note that the matching closing tag is not necessarily
the first closing tag of the same type that is encountered, since
that tag may close a later open tag of the same type. Open and closing
tags must be balanced.
The contents of the HTML block are interpreted as raw HTML, and will not
be escaped in HTML output.
Some simple examples:
.
<table>
<tr>
<td>
hi
</td>
</tr>
</table>
okay.
.
<table>
<tr>
<td>
hi
</td>
</tr>
</table>
<p>okay.</p>
.
.
<div class="outer">
<div class="inner">
<p>foo&ouml;</p>
</div>
</div>
.
<div class="outer">
<div class="inner">
<p>foo&ouml;</p>
</div>
</div>
.
A self-closing tag:
.
<div />
.
<div />
.
Here we have an unclosed tag, and the block continues to the end of
the document:
.
<div>
<div>
foo
</div>
*bar*
.
<div>
<div>
foo
</div>
*bar*
.
A comment:
.
<!-- Foo
bar
baz -->
.
<!-- Foo
bar
baz -->
.
A processing instruction:
.
<?php
echo 'foo'
?>
.
<?php
echo 'foo'
?>
.
CDATA:
.
<![CDATA[
function matchwo(a,b)
{
if (a < b && a < 0) then
{
return 1;
}
else
{
return 0;
}
}
]]>
.
<![CDATA[
function matchwo(a,b)
{
if (a < b && a < 0) then
{
return 1;
}
else
{
return 0;
}
}
]]>
.
The opening tag can be indented 1-3 spaces, but not 4:
.
<!-- foo -->
<!-- foo -->
.
<!-- foo -->
<pre><code>&lt;!-- foo --&gt;
</code></pre>
.
The opening tag must be on a line (or lines) by itself:
.
<table><tr><td>
foo
</td></tr></table>
.
<p><table><tr<td> foo </td></tr></table></p>
.
.
<!-- foo -->bar
.
<p><!-- foo -->bar</p>
.
The opening tag need not be an HTML block tag or even an HTML tag:
.
<a>
foo
</a>
.
<a>
foo
</a>
.
.
<foo>
bar
</foo>
.
<foo>
bar
</foo>
.
So, note the difference:
.
<del>
bar
</del>
<del>bar</del>
.
<del>
bar
</del>
<p><del>bar</del></p>
.
This rule differs from John Gruber's original markdown syntax
specification, which says:
> The only restrictions are that block-level HTML elements —
> e.g. `<div>`, `<table>`, `<pre>`, `<p>`, etc. — must be separated from
> surrounding content by blank lines, and the start and end tags of the
> block should not be indented with tabs or spaces.
In some ways Gruber's rule is more restrictive than the one given
here:
- It requires that an HTML block be preceded and followed by a blank line.
- It does not allow the start tag to be indented.
- It does not allow the end tag to be indented.
- It does not require that the open tag be an HTML block-level tag.
Indeed, most markdown implementations, including some of Gruber's
own perl implementations, do not impose these restrictions.
However, unlike Gruber's rule, this one requires that the open
tag be on a line by itself. It also differs from most markdown
implementations in how it handles the case where there is no matching
closing tag (a case not mentioned in Gruber's rule). In such a case,
the rule stated above includes the whole rest of the document in the
HTML block.

View File

@ -0,0 +1,25 @@
add_executable(api_test
cplusplus.cpp
harness.c
harness.h
main.c
)
include_directories(
${PROJECT_SOURCE_DIR}/src
${PROJECT_BINARY_DIR}/src
)
target_link_libraries(api_test libcmark)
# Compiler flags
if(MSVC)
# Force to always compile with W4
if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4127 /wd4244 /wd4267 /wd4706 /wd4800 /D_CRT_SECURE_NO_WARNINGS")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP")
elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -std=c99 -pedantic")
endif()

View File

@ -0,0 +1,15 @@
#include <cstdlib>
#include "cmark.h"
#include "harness.h"
extern "C" void
test_cplusplus(test_batch_runner *runner)
{
static const char md[] = "paragraph\n";
char *html = cmark_markdown_to_html(md, sizeof(md) - 1);
STR_EQ(runner, html, "<p>paragraph</p>\n", "libcmark works with C++");
free(html);
}

View File

@ -0,0 +1,102 @@
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "harness.h"
test_batch_runner*
test_batch_runner_new()
{
return (test_batch_runner *)calloc(1, sizeof(test_batch_runner));
}
static void
test_result(test_batch_runner *runner, int cond, const char *msg, va_list ap)
{
++runner->test_num;
if (cond) {
++runner->num_passed;
}
else {
fprintf(stderr, "FAILED test %d: ", runner->test_num);
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
++runner->num_failed;
}
}
void
SKIP(test_batch_runner *runner, int num_tests)
{
runner->test_num += num_tests;
runner->num_skipped += num_tests;
}
void
OK(test_batch_runner *runner, int cond, const char *msg, ...)
{
va_list ap;
va_start(ap, msg);
test_result(runner, cond, msg, ap);
va_end(ap);
}
void
INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg, ...)
{
int cond = got == expected;
va_list ap;
va_start(ap, msg);
test_result(runner, cond, msg, ap);
va_end(ap);
if (!cond) {
fprintf(stderr, " Got: %d\n", got);
fprintf(stderr, " Expected: %d\n", expected);
}
}
void
STR_EQ(test_batch_runner *runner, const char *got, const char *expected,
const char *msg, ...)
{
int cond = strcmp(got, expected) == 0;
va_list ap;
va_start(ap, msg);
test_result(runner, cond, msg, ap);
va_end(ap);
if (!cond) {
fprintf(stderr, " Got: \"%s\"\n", got);
fprintf(stderr, " Expected: \"%s\"\n", expected);
}
}
int
test_ok(test_batch_runner *runner)
{
return runner->num_failed == 0;
}
void
test_print_summary(test_batch_runner *runner)
{
int num_passed = runner->num_passed;
int num_skipped = runner->num_skipped;
int num_failed = runner->num_failed;
fprintf(stderr, "%d tests passed, %d failed, %d skipped\n",
num_passed, num_failed, num_skipped);
if (test_ok(runner)) {
fprintf(stderr, "PASS\n");
}
else {
fprintf(stderr, "FAIL\n");
}
}

View File

@ -0,0 +1,42 @@
#ifndef CMARK_API_TEST_HARNESS_H
#define CMARK_API_TEST_HARNESS_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
int test_num;
int num_passed;
int num_failed;
int num_skipped;
} test_batch_runner;
test_batch_runner*
test_batch_runner_new();
void
SKIP(test_batch_runner *runner, int num_tests);
void
OK(test_batch_runner *runner, int cond, const char *msg, ...);
void
INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg, ...);
void
STR_EQ(test_batch_runner *runner, const char *got, const char *expected,
const char *msg, ...);
int
test_ok(test_batch_runner *runner);
void
test_print_summary(test_batch_runner *runner);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,622 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define CMARK_NO_SHORT_NAMES
#include "cmark.h"
#include "node.h"
#include "harness.h"
#define UTF8_REPL "\xEF\xBF\xBD"
void
test_cplusplus(test_batch_runner *runner);
static const cmark_node_type node_types[] = {
CMARK_NODE_DOCUMENT,
CMARK_NODE_BLOCK_QUOTE,
CMARK_NODE_LIST,
CMARK_NODE_LIST_ITEM,
CMARK_NODE_CODE_BLOCK,
CMARK_NODE_HTML,
CMARK_NODE_PARAGRAPH,
CMARK_NODE_HEADER,
CMARK_NODE_HRULE,
CMARK_NODE_REFERENCE_DEF,
CMARK_NODE_TEXT,
CMARK_NODE_SOFTBREAK,
CMARK_NODE_LINEBREAK,
CMARK_NODE_INLINE_CODE,
CMARK_NODE_INLINE_HTML,
CMARK_NODE_EMPH,
CMARK_NODE_STRONG,
CMARK_NODE_LINK,
CMARK_NODE_IMAGE
};
static const int num_node_types = sizeof(node_types) / sizeof(*node_types);
static void
test_md_to_html(test_batch_runner *runner, const char *markdown,
const char *expected_html, const char *msg);
static void
test_content(test_batch_runner *runner, cmark_node_type type,
int allowed_content);
static void
test_char(test_batch_runner *runner, int valid, const char *utf8,
const char *msg);
static void
test_incomplete_char(test_batch_runner *runner, const char *utf8,
const char *msg);
static void
test_continuation_byte(test_batch_runner *runner, const char *utf8);
static void
constructor(test_batch_runner *runner)
{
for (int i = 0; i < num_node_types; ++i) {
cmark_node_type type = node_types[i];
cmark_node *node = cmark_node_new(type);
OK(runner, node != NULL, "new type %d", type);
INT_EQ(runner, cmark_node_get_type(node), type,
"get_type %d", type);
switch (node->type) {
case CMARK_NODE_HEADER:
INT_EQ(runner, cmark_node_get_header_level(node), 1,
"default header level is 1");
node->as.header.level = 1;
break;
case CMARK_NODE_LIST:
INT_EQ(runner, cmark_node_get_list_type(node),
CMARK_BULLET_LIST,
"default is list type is bullet");
INT_EQ(runner, cmark_node_get_list_start(node), 1,
"default is list start is 1");
INT_EQ(runner, cmark_node_get_list_tight(node), 0,
"default is list is loose");
break;
default:
break;
}
cmark_node_free(node);
}
}
static void
accessors(test_batch_runner *runner)
{
static const char markdown[] =
"## Header\n"
"\n"
"* Item 1\n"
"* Item 2\n"
"\n"
"2. Item 1\n"
"\n"
"3. Item 2\n"
"\n"
"\n"
" code\n"
"\n"
"``` lang\n"
"fenced\n"
"```\n"
"\n"
"<div>html</div>\n"
"\n"
"[link](url 'title')\n";
cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1);
// Getters
cmark_node *header = cmark_node_first_child(doc);
INT_EQ(runner, cmark_node_get_header_level(header), 2,
"get_header_level");
cmark_node *bullet_list = cmark_node_next(header);
INT_EQ(runner, cmark_node_get_list_type(bullet_list),
CMARK_BULLET_LIST, "get_list_type bullet");
INT_EQ(runner, cmark_node_get_list_tight(bullet_list), 1,
"get_list_tight tight");
cmark_node *ordered_list = cmark_node_next(bullet_list);
INT_EQ(runner, cmark_node_get_list_type(ordered_list),
CMARK_ORDERED_LIST, "get_list_type ordered");
INT_EQ(runner, cmark_node_get_list_start(ordered_list), 2,
"get_list_start");
INT_EQ(runner, cmark_node_get_list_tight(ordered_list), 0,
"get_list_tight loose");
cmark_node *code = cmark_node_next(ordered_list);
STR_EQ(runner, cmark_node_get_string_content(code), "code\n",
"get_string_content indented code");
cmark_node *fenced = cmark_node_next(code);
STR_EQ(runner, cmark_node_get_string_content(fenced), "fenced\n",
"get_string_content fenced code");
STR_EQ(runner, cmark_node_get_fence_info(fenced), "lang",
"get_fence_info");
cmark_node *html = cmark_node_next(fenced);
STR_EQ(runner, cmark_node_get_string_content(html),
"<div>html</div>\n", "get_string_content html");
cmark_node *paragraph = cmark_node_next(html);
INT_EQ(runner, cmark_node_get_start_line(paragraph), 19,
"get_start_line");
INT_EQ(runner, cmark_node_get_start_column(paragraph), 1,
"get_start_column");
INT_EQ(runner, cmark_node_get_end_line(paragraph), 19,
"get_end_line");
cmark_node *link = cmark_node_first_child(paragraph);
STR_EQ(runner, cmark_node_get_url(link), "url",
"get_url");
STR_EQ(runner, cmark_node_get_title(link), "title",
"get_title");
cmark_node *string = cmark_node_first_child(link);
STR_EQ(runner, cmark_node_get_string_content(string), "link",
"get_string_content string");
// Setters
OK(runner, cmark_node_set_header_level(header, 3),
"set_header_level");
OK(runner, cmark_node_set_list_type(bullet_list, CMARK_ORDERED_LIST),
"set_list_type ordered");
OK(runner, cmark_node_set_list_start(bullet_list, 3),
"set_list_start");
OK(runner, cmark_node_set_list_tight(bullet_list, 0),
"set_list_tight loose");
OK(runner, cmark_node_set_list_type(ordered_list, CMARK_BULLET_LIST),
"set_list_type bullet");
OK(runner, cmark_node_set_list_tight(ordered_list, 1),
"set_list_tight tight");
OK(runner, cmark_node_set_string_content(code, "CODE\n"),
"set_string_content indented code");
OK(runner, cmark_node_set_string_content(fenced, "FENCED\n"),
"set_string_content fenced code");
OK(runner, cmark_node_set_fence_info(fenced, "LANG"),
"set_fence_info");
OK(runner, cmark_node_set_string_content(html, "<div>HTML</div>\n"),
"set_string_content html");
OK(runner, cmark_node_set_url(link, "URL"),
"set_url");
OK(runner, cmark_node_set_title(link, "TITLE"),
"set_title");
OK(runner, cmark_node_set_string_content(string, "LINK"),
"set_string_content string");
char *rendered_html = cmark_render_html(doc);
static const char expected_html[] =
"<h3>Header</h3>\n"
"<ol start=\"3\">\n"
"<li>\n"
"<p>Item 1</p>\n"
"</li>\n"
"<li>\n"
"<p>Item 2</p>\n"
"</li>\n"
"</ol>\n"
"<ul>\n"
"<li>Item 1</li>\n"
"<li>Item 2</li>\n"
"</ul>\n"
"<pre><code>CODE\n"
"</code></pre>\n"
"<pre><code class=\"language-LANG\">FENCED\n"
"</code></pre>\n"
"<div>HTML</div>\n"
"<p><a href=\"URL\" title=\"TITLE\">LINK</a></p>\n";
STR_EQ(runner, rendered_html, expected_html, "setters work");
free(rendered_html);
// Getter errors
INT_EQ(runner, cmark_node_get_header_level(bullet_list), 0,
"get_header_level error");
INT_EQ(runner, cmark_node_get_list_type(header), CMARK_NO_LIST,
"get_list_type error");
INT_EQ(runner, cmark_node_get_list_start(code), 0,
"get_list_start error");
INT_EQ(runner, cmark_node_get_list_tight(fenced), 0,
"get_list_tight error");
OK(runner, cmark_node_get_string_content(ordered_list) == NULL,
"get_string_content error");
OK(runner, cmark_node_get_fence_info(paragraph) == NULL,
"get_fence_info error");
OK(runner, cmark_node_get_url(html) == NULL,
"get_url error");
OK(runner, cmark_node_get_title(header) == NULL,
"get_title error");
// Setter errors
OK(runner, !cmark_node_set_header_level(bullet_list, 3),
"set_header_level error");
OK(runner, !cmark_node_set_list_type(header, CMARK_ORDERED_LIST),
"set_list_type error");
OK(runner, !cmark_node_set_list_start(code, 3),
"set_list_start error");
OK(runner, !cmark_node_set_list_tight(fenced, 0),
"set_list_tight error");
OK(runner, !cmark_node_set_string_content(ordered_list, "content\n"),
"set_string_content error");
OK(runner, !cmark_node_set_fence_info(paragraph, "lang"),
"set_fence_info error");
OK(runner, !cmark_node_set_url(html, "url"),
"set_url error");
OK(runner, !cmark_node_set_title(header, "title"),
"set_title error");
OK(runner, !cmark_node_set_header_level(header, 0),
"set_header_level too small");
OK(runner, !cmark_node_set_header_level(header, 7),
"set_header_level too large");
OK(runner, !cmark_node_set_list_type(bullet_list, CMARK_NO_LIST),
"set_list_type invalid");
OK(runner, !cmark_node_set_list_start(bullet_list, -1),
"set_list_start negative");
cmark_node_free(doc);
}
static void
node_check(test_batch_runner *runner) {
// Construct an incomplete tree.
cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT);
cmark_node *p1 = cmark_node_new(CMARK_NODE_PARAGRAPH);
cmark_node *p2 = cmark_node_new(CMARK_NODE_PARAGRAPH);
doc->first_child = p1;
p1->next = p2;
INT_EQ(runner, cmark_node_check(doc, NULL), 4, "node_check works");
INT_EQ(runner, cmark_node_check(doc, NULL), 0,
"node_check fixes tree");
cmark_node_free(doc);
}
static void
create_tree(test_batch_runner *runner)
{
char *html;
cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT);
cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH);
OK(runner, !cmark_node_insert_before(doc, p),
"insert before root fails");
OK(runner, !cmark_node_insert_after(doc, p),
"insert after root fails");
OK(runner, cmark_node_append_child(doc, p), "append1");
INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append1 consistent");
OK(runner, cmark_node_parent(p) == doc, "node_parent");
cmark_node *emph = cmark_node_new(CMARK_NODE_EMPH);
OK(runner, cmark_node_prepend_child(p, emph), "prepend1");
INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend1 consistent");
cmark_node *str1 = cmark_node_new(CMARK_NODE_TEXT);
cmark_node_set_string_content(str1, "Hello, ");
OK(runner, cmark_node_prepend_child(p, str1), "prepend2");
INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend2 consistent");
cmark_node *str3 = cmark_node_new(CMARK_NODE_TEXT);
cmark_node_set_string_content(str3, "!");
OK(runner, cmark_node_append_child(p, str3), "append2");
INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append2 consistent");
cmark_node *str2 = cmark_node_new(CMARK_NODE_TEXT);
cmark_node_set_string_content(str2, "world");
OK(runner, cmark_node_append_child(emph, str2), "append3");
INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append3 consistent");
html = cmark_render_html(doc);
STR_EQ(runner, html, "<p>Hello, <em>world</em>!</p>\n",
"render_html");
free(html);
OK(runner, cmark_node_insert_before(str1, str3), "ins before1");
INT_EQ(runner, cmark_node_check(doc, NULL), 0,
"ins before1 consistent");
// 31e
OK(runner, cmark_node_first_child(p) == str3, "ins before1 works");
OK(runner, cmark_node_insert_before(str1, emph), "ins before2");
INT_EQ(runner, cmark_node_check(doc, NULL), 0,
"ins before2 consistent");
// 3e1
OK(runner, cmark_node_last_child(p) == str1, "ins before2 works");
OK(runner, cmark_node_insert_after(str1, str3), "ins after1");
INT_EQ(runner, cmark_node_check(doc, NULL), 0,
"ins after1 consistent");
// e13
OK(runner, cmark_node_next(str1) == str3, "ins after1 works");
OK(runner, cmark_node_insert_after(str1, emph), "ins after2");
INT_EQ(runner, cmark_node_check(doc, NULL), 0,
"ins after2 consistent");
// 1e3
OK(runner, cmark_node_previous(emph) == str1, "ins after2 works");
cmark_node_unlink(emph);
html = cmark_render_html(doc);
STR_EQ(runner, html, "<p>Hello, !</p>\n",
"render_html after shuffling");
free(html);
cmark_node_free(doc);
// TODO: Test that the contents of an unlinked inline are valid
// after the parent block was destroyed. This doesn't work so far.
cmark_node_free(emph);
}
void
hierarchy(test_batch_runner *runner)
{
cmark_node *bquote1 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
cmark_node *bquote2 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
cmark_node *bquote3 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
OK(runner, cmark_node_append_child(bquote1, bquote2),
"append bquote2");
OK(runner, cmark_node_append_child(bquote2, bquote3),
"append bquote3");
OK(runner, !cmark_node_append_child(bquote3, bquote3),
"adding a node as child of itself fails");
OK(runner, !cmark_node_append_child(bquote3, bquote1),
"adding a parent as child fails");
cmark_node_free(bquote1);
int max_node_type = CMARK_NODE_LAST_BLOCK > CMARK_NODE_LAST_INLINE
? CMARK_NODE_LAST_BLOCK : CMARK_NODE_LAST_INLINE;
OK(runner, max_node_type < 32, "all node types < 32");
int list_item_flag = 1 << CMARK_NODE_LIST_ITEM;
int top_level_blocks =
(1 << CMARK_NODE_BLOCK_QUOTE) |
(1 << CMARK_NODE_LIST) |
(1 << CMARK_NODE_CODE_BLOCK) |
(1 << CMARK_NODE_HTML) |
(1 << CMARK_NODE_PARAGRAPH) |
(1 << CMARK_NODE_HEADER) |
(1 << CMARK_NODE_HRULE) |
(1 << CMARK_NODE_REFERENCE_DEF);
int all_inlines =
(1 << CMARK_NODE_TEXT) |
(1 << CMARK_NODE_SOFTBREAK) |
(1 << CMARK_NODE_LINEBREAK) |
(1 << CMARK_NODE_INLINE_CODE) |
(1 << CMARK_NODE_INLINE_HTML) |
(1 << CMARK_NODE_EMPH) |
(1 << CMARK_NODE_STRONG) |
(1 << CMARK_NODE_LINK) |
(1 << CMARK_NODE_IMAGE);
test_content(runner, CMARK_NODE_DOCUMENT, top_level_blocks);
test_content(runner, CMARK_NODE_BLOCK_QUOTE, top_level_blocks);
test_content(runner, CMARK_NODE_LIST, list_item_flag);
test_content(runner, CMARK_NODE_LIST_ITEM, top_level_blocks);
test_content(runner, CMARK_NODE_CODE_BLOCK , 0);
test_content(runner, CMARK_NODE_HTML, 0);
test_content(runner, CMARK_NODE_PARAGRAPH, all_inlines);
test_content(runner, CMARK_NODE_HEADER, all_inlines);
test_content(runner, CMARK_NODE_HRULE, 0);
test_content(runner, CMARK_NODE_REFERENCE_DEF, 0);
test_content(runner, CMARK_NODE_TEXT, 0);
test_content(runner, CMARK_NODE_SOFTBREAK, 0);
test_content(runner, CMARK_NODE_LINEBREAK, 0);
test_content(runner, CMARK_NODE_INLINE_CODE, 0);
test_content(runner, CMARK_NODE_INLINE_HTML, 0);
test_content(runner, CMARK_NODE_EMPH, all_inlines);
test_content(runner, CMARK_NODE_STRONG, all_inlines);
test_content(runner, CMARK_NODE_LINK, all_inlines);
test_content(runner, CMARK_NODE_IMAGE, all_inlines);
}
static void
test_content(test_batch_runner *runner, cmark_node_type type,
int allowed_content)
{
cmark_node *node = cmark_node_new(type);
for (int i = 0; i < num_node_types; ++i) {
cmark_node_type child_type = node_types[i];
cmark_node *child = cmark_node_new(child_type);
int got = cmark_node_append_child(node, child);
int expected = (allowed_content >> child_type) & 1;
INT_EQ(runner, got, expected,
"add %d as child of %d", child_type, type);
cmark_node_free(child);
}
cmark_node_free(node);
}
static void
parser(test_batch_runner *runner)
{
test_md_to_html(runner, "No newline", "<p>No newline</p>\n",
"document without trailing newline");
}
static void
render_html(test_batch_runner *runner)
{
char *html;
static const char markdown[] =
"foo *bar*\n"
"\n"
"paragraph 2\n";
cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1);
cmark_node *paragraph = cmark_node_first_child(doc);
html = cmark_render_html(paragraph);
STR_EQ(runner, html, "<p>foo <em>bar</em></p>\n",
"render single paragraph");
free(html);
cmark_node *string = cmark_node_first_child(paragraph);
html = cmark_render_html(string);
STR_EQ(runner, html, "foo ", "render single inline");
free(html);
cmark_node *emph = cmark_node_next(string);
html = cmark_render_html(emph);
STR_EQ(runner, html, "<em>bar</em>", "render inline with children");
free(html);
cmark_node_free(doc);
}
static void
utf8(test_batch_runner *runner)
{
// Ranges
test_char(runner, 1, "\x01", "valid utf8 01");
test_char(runner, 1, "\x7F", "valid utf8 7F");
test_char(runner, 0, "\x80", "invalid utf8 80");
test_char(runner, 0, "\xBF", "invalid utf8 BF");
test_char(runner, 0, "\xC0\x80", "invalid utf8 C080");
test_char(runner, 0, "\xC1\xBF", "invalid utf8 C1BF");
test_char(runner, 1, "\xC2\x80", "valid utf8 C280");
test_char(runner, 1, "\xDF\xBF", "valid utf8 DFBF");
test_char(runner, 0, "\xE0\x80\x80", "invalid utf8 E08080");
test_char(runner, 0, "\xE0\x9F\xBF", "invalid utf8 E09FBF");
test_char(runner, 1, "\xE0\xA0\x80", "valid utf8 E0A080");
test_char(runner, 1, "\xED\x9F\xBF", "valid utf8 ED9FBF");
test_char(runner, 0, "\xED\xA0\x80", "invalid utf8 EDA080");
test_char(runner, 0, "\xED\xBF\xBF", "invalid utf8 EDBFBF");
test_char(runner, 0, "\xF0\x80\x80\x80", "invalid utf8 F0808080");
test_char(runner, 0, "\xF0\x8F\xBF\xBF", "invalid utf8 F08FBFBF");
test_char(runner, 1, "\xF0\x90\x80\x80", "valid utf8 F0908080");
test_char(runner, 1, "\xF4\x8F\xBF\xBF", "valid utf8 F48FBFBF");
test_char(runner, 0, "\xF4\x90\x80\x80", "invalid utf8 F4908080");
test_char(runner, 0, "\xF7\xBF\xBF\xBF", "invalid utf8 F7BFBFBF");
test_char(runner, 0, "\xF8", "invalid utf8 F8");
test_char(runner, 0, "\xFF", "invalid utf8 FF");
// Incomplete byte sequences at end of input
test_incomplete_char(runner, "\xE0\xA0", "invalid utf8 E0A0");
test_incomplete_char(runner, "\xF0\x90\x80", "invalid utf8 F09080");
// Invalid continuation bytes
test_continuation_byte(runner, "\xC2\x80");
test_continuation_byte(runner, "\xE0\xA0\x80");
test_continuation_byte(runner, "\xF0\x90\x80\x80");
// Test string containing null character
static const char string_with_null[] = "((((\0))))";
char *html = cmark_markdown_to_html(string_with_null,
sizeof(string_with_null) - 1);
STR_EQ(runner, html, "<p>((((" UTF8_REPL "))))</p>\n",
"utf8 with U+0000");
free(html);
}
static void
test_char(test_batch_runner *runner, int valid, const char *utf8,
const char *msg)
{
char buf[20];
sprintf(buf, "((((%s))))", utf8);
if (valid) {
char expected[30];
sprintf(expected, "<p>((((%s))))</p>\n", utf8);
test_md_to_html(runner, buf, expected, msg);
}
else {
test_md_to_html(runner, buf, "<p>((((" UTF8_REPL "))))</p>\n",
msg);
}
}
static void
test_incomplete_char(test_batch_runner *runner, const char *utf8,
const char *msg)
{
char buf[20];
sprintf(buf, "----%s", utf8);
test_md_to_html(runner, buf, "<p>----" UTF8_REPL "</p>\n", msg);
}
static void
test_continuation_byte(test_batch_runner *runner, const char *utf8)
{
int len = strlen(utf8);
for (int pos = 1; pos < len; ++pos) {
char buf[20];
sprintf(buf, "((((%s))))", utf8);
buf[4+pos] = '\x20';
char expected[50];
strcpy(expected, "<p>((((" UTF8_REPL "\x20");
for (int i = pos + 1; i < len; ++i) {
strcat(expected, UTF8_REPL);
}
strcat(expected, "))))</p>\n");
char *html = cmark_markdown_to_html(buf, strlen(buf));
STR_EQ(runner, html, expected,
"invalid utf8 continuation byte %d/%d", pos, len);
free(html);
}
}
static void
test_md_to_html(test_batch_runner *runner, const char *markdown,
const char *expected_html, const char *msg)
{
char *html = cmark_markdown_to_html(markdown, strlen(markdown));
STR_EQ(runner, html, expected_html, msg);
free(html);
}
int main() {
int retval;
test_batch_runner *runner = test_batch_runner_new();
constructor(runner);
accessors(runner);
node_check(runner);
create_tree(runner);
hierarchy(runner);
parser(runner);
render_html(runner);
utf8(runner);
test_cplusplus(runner);
test_print_summary(runner);
retval = test_ok(runner) ? 0 : 1;
free(runner);
return retval;
}

View File

@ -0,0 +1,595 @@
## Module statistics.py
##
## Copyright (c) 2013 Steven D'Aprano <steve+python@pearwood.info>.
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
"""
Basic statistics module.
This module provides functions for calculating statistics of data, including
averages, variance, and standard deviation.
Calculating averages
--------------------
================== =============================================
Function Description
================== =============================================
mean Arithmetic mean (average) of data.
median Median (middle value) of data.
median_low Low median of data.
median_high High median of data.
median_grouped Median, or 50th percentile, of grouped data.
mode Mode (most common value) of data.
================== =============================================
Calculate the arithmetic mean ("the average") of data:
>>> mean([-1.0, 2.5, 3.25, 5.75])
2.625
Calculate the standard median of discrete data:
>>> median([2, 3, 4, 5])
3.5
Calculate the median, or 50th percentile, of data grouped into class intervals
centred on the data values provided. E.g. if your data points are rounded to
the nearest whole number:
>>> median_grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS
2.8333333333...
This should be interpreted in this way: you have two data points in the class
interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in
the class interval 3.5-4.5. The median of these data points is 2.8333...
Calculating variability or spread
---------------------------------
================== =============================================
Function Description
================== =============================================
pvariance Population variance of data.
variance Sample variance of data.
pstdev Population standard deviation of data.
stdev Sample standard deviation of data.
================== =============================================
Calculate the standard deviation of sample data:
>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS
4.38961843444...
If you have previously calculated the mean, you can pass it as the optional
second argument to the four "spread" functions to avoid recalculating it:
>>> data = [1, 2, 2, 4, 4, 4, 5, 6]
>>> mu = mean(data)
>>> pvariance(data, mu)
2.5
Exceptions
----------
A single exception is defined: StatisticsError is a subclass of ValueError.
"""
__all__ = [ 'StatisticsError',
'pstdev', 'pvariance', 'stdev', 'variance',
'median', 'median_low', 'median_high', 'median_grouped',
'mean', 'mode',
]
import collections
import math
from fractions import Fraction
from decimal import Decimal
# === Exceptions ===
class StatisticsError(ValueError):
pass
# === Private utilities ===
def _sum(data, start=0):
"""_sum(data [, start]) -> value
Return a high-precision sum of the given numeric data. If optional
argument ``start`` is given, it is added to the total. If ``data`` is
empty, ``start`` (defaulting to 0) is returned.
Examples
--------
>>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
11.0
Some sources of round-off error will be avoided:
>>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero.
1000.0
Fractions and Decimals are also supported:
>>> from fractions import Fraction as F
>>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
Fraction(63, 20)
>>> from decimal import Decimal as D
>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
>>> _sum(data)
Decimal('0.6963')
Mixed types are currently treated as an error, except that int is
allowed.
"""
# We fail as soon as we reach a value that is not an int or the type of
# the first value which is not an int. E.g. _sum([int, int, float, int])
# is okay, but sum([int, int, float, Fraction]) is not.
allowed_types = set([int, type(start)])
n, d = _exact_ratio(start)
partials = {d: n} # map {denominator: sum of numerators}
# Micro-optimizations.
exact_ratio = _exact_ratio
partials_get = partials.get
# Add numerators for each denominator.
for x in data:
_check_type(type(x), allowed_types)
n, d = exact_ratio(x)
partials[d] = partials_get(d, 0) + n
# Find the expected result type. If allowed_types has only one item, it
# will be int; if it has two, use the one which isn't int.
assert len(allowed_types) in (1, 2)
if len(allowed_types) == 1:
assert allowed_types.pop() is int
T = int
else:
T = (allowed_types - set([int])).pop()
if None in partials:
assert issubclass(T, (float, Decimal))
assert not math.isfinite(partials[None])
return T(partials[None])
total = Fraction()
for d, n in sorted(partials.items()):
total += Fraction(n, d)
if issubclass(T, int):
assert total.denominator == 1
return T(total.numerator)
if issubclass(T, Decimal):
return T(total.numerator)/total.denominator
return T(total)
def _check_type(T, allowed):
if T not in allowed:
if len(allowed) == 1:
allowed.add(T)
else:
types = ', '.join([t.__name__ for t in allowed] + [T.__name__])
raise TypeError("unsupported mixed types: %s" % types)
def _exact_ratio(x):
"""Convert Real number x exactly to (numerator, denominator) pair.
>>> _exact_ratio(0.25)
(1, 4)
x is expected to be an int, Fraction, Decimal or float.
"""
try:
try:
# int, Fraction
return (x.numerator, x.denominator)
except AttributeError:
# float
try:
return x.as_integer_ratio()
except AttributeError:
# Decimal
try:
return _decimal_to_ratio(x)
except AttributeError:
msg = "can't convert type '{}' to numerator/denominator"
raise TypeError(msg.format(type(x).__name__)) from None
except (OverflowError, ValueError):
# INF or NAN
if __debug__:
# Decimal signalling NANs cannot be converted to float :-(
if isinstance(x, Decimal):
assert not x.is_finite()
else:
assert not math.isfinite(x)
return (x, None)
# FIXME This is faster than Fraction.from_decimal, but still too slow.
def _decimal_to_ratio(d):
"""Convert Decimal d to exact integer ratio (numerator, denominator).
>>> from decimal import Decimal
>>> _decimal_to_ratio(Decimal("2.6"))
(26, 10)
"""
sign, digits, exp = d.as_tuple()
if exp in ('F', 'n', 'N'): # INF, NAN, sNAN
assert not d.is_finite()
raise ValueError
num = 0
for digit in digits:
num = num*10 + digit
if exp < 0:
den = 10**-exp
else:
num *= 10**exp
den = 1
if sign:
num = -num
return (num, den)
def _counts(data):
# Generate a table of sorted (value, frequency) pairs.
table = collections.Counter(iter(data)).most_common()
if not table:
return table
# Extract the values with the highest frequency.
maxfreq = table[0][1]
for i in range(1, len(table)):
if table[i][1] != maxfreq:
table = table[:i]
break
return table
# === Measures of central tendency (averages) ===
def mean(data):
"""Return the sample arithmetic mean of data.
>>> mean([1, 2, 3, 4, 4])
2.8
>>> from fractions import Fraction as F
>>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)])
Fraction(13, 21)
>>> from decimal import Decimal as D
>>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")])
Decimal('0.5625')
If ``data`` is empty, StatisticsError will be raised.
"""
if iter(data) is data:
data = list(data)
n = len(data)
if n < 1:
raise StatisticsError('mean requires at least one data point')
return _sum(data)/n
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
def median(data):
"""Return the median (middle value) of numeric data.
When the number of data points is odd, return the middle data point.
When the number of data points is even, the median is interpolated by
taking the average of the two middle values:
>>> median([1, 3, 5])
3
>>> median([1, 3, 5, 7])
4.0
"""
data = sorted(data)
n = len(data)
if n == 0:
raise StatisticsError("no median for empty data")
if n%2 == 1:
return data[n//2]
else:
i = n//2
return (data[i - 1] + data[i])/2
def median_low(data):
"""Return the low median of numeric data.
When the number of data points is odd, the middle value is returned.
When it is even, the smaller of the two middle values is returned.
>>> median_low([1, 3, 5])
3
>>> median_low([1, 3, 5, 7])
3
"""
data = sorted(data)
n = len(data)
if n == 0:
raise StatisticsError("no median for empty data")
if n%2 == 1:
return data[n//2]
else:
return data[n//2 - 1]
def median_high(data):
"""Return the high median of data.
When the number of data points is odd, the middle value is returned.
When it is even, the larger of the two middle values is returned.
>>> median_high([1, 3, 5])
3
>>> median_high([1, 3, 5, 7])
5
"""
data = sorted(data)
n = len(data)
if n == 0:
raise StatisticsError("no median for empty data")
return data[n//2]
def median_grouped(data, interval=1):
""""Return the 50th percentile (median) of grouped continuous data.
>>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5])
3.7
>>> median_grouped([52, 52, 53, 54])
52.5
This calculates the median as the 50th percentile, and should be
used when your data is continuous and grouped. In the above example,
the values 1, 2, 3, etc. actually represent the midpoint of classes
0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in
class 3.5-4.5, and interpolation is used to estimate it.
Optional argument ``interval`` represents the class interval, and
defaults to 1. Changing the class interval naturally will change the
interpolated 50th percentile value:
>>> median_grouped([1, 3, 3, 5, 7], interval=1)
3.25
>>> median_grouped([1, 3, 3, 5, 7], interval=2)
3.5
This function does not check whether the data points are at least
``interval`` apart.
"""
data = sorted(data)
n = len(data)
if n == 0:
raise StatisticsError("no median for empty data")
elif n == 1:
return data[0]
# Find the value at the midpoint. Remember this corresponds to the
# centre of the class interval.
x = data[n//2]
for obj in (x, interval):
if isinstance(obj, (str, bytes)):
raise TypeError('expected number but got %r' % obj)
try:
L = x - interval/2 # The lower limit of the median interval.
except TypeError:
# Mixed type. For now we just coerce to float.
L = float(x) - float(interval)/2
cf = data.index(x) # Number of values below the median interval.
# FIXME The following line could be more efficient for big lists.
f = data.count(x) # Number of data points in the median interval.
return L + interval*(n/2 - cf)/f
def mode(data):
"""Return the most common data point from discrete or nominal data.
``mode`` assumes discrete data, and returns a single value. This is the
standard treatment of the mode as commonly taught in schools:
>>> mode([1, 1, 2, 3, 3, 3, 3, 4])
3
This also works with nominal (non-numeric) data:
>>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
'red'
If there is not exactly one most common value, ``mode`` will raise
StatisticsError.
"""
# Generate a table of sorted (value, frequency) pairs.
table = _counts(data)
if len(table) == 1:
return table[0][0]
elif table:
raise StatisticsError(
'no unique mode; found %d equally common values' % len(table)
)
else:
raise StatisticsError('no mode for empty data')
# === Measures of spread ===
# See http://mathworld.wolfram.com/Variance.html
# http://mathworld.wolfram.com/SampleVariance.html
# http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
#
# Under no circumstances use the so-called "computational formula for
# variance", as that is only suitable for hand calculations with a small
# amount of low-precision data. It has terrible numeric properties.
#
# See a comparison of three computational methods here:
# http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/
def _ss(data, c=None):
"""Return sum of square deviations of sequence data.
If ``c`` is None, the mean is calculated in one pass, and the deviations
from the mean are calculated in a second pass. Otherwise, deviations are
calculated from ``c`` as given. Use the second case with care, as it can
lead to garbage results.
"""
if c is None:
c = mean(data)
ss = _sum((x-c)**2 for x in data)
# The following sum should mathematically equal zero, but due to rounding
# error may not.
ss -= _sum((x-c) for x in data)**2/len(data)
assert not ss < 0, 'negative sum of square deviations: %f' % ss
return ss
def variance(data, xbar=None):
"""Return the sample variance of data.
data should be an iterable of Real-valued numbers, with at least two
values. The optional argument xbar, if given, should be the mean of
the data. If it is missing or None, the mean is automatically calculated.
Use this function when your data is a sample from a population. To
calculate the variance from the entire population, see ``pvariance``.
Examples:
>>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
>>> variance(data)
1.3720238095238095
If you have already calculated the mean of your data, you can pass it as
the optional second argument ``xbar`` to avoid recalculating it:
>>> m = mean(data)
>>> variance(data, m)
1.3720238095238095
This function does not check that ``xbar`` is actually the mean of
``data``. Giving arbitrary values for ``xbar`` may lead to invalid or
impossible results.
Decimals and Fractions are supported:
>>> from decimal import Decimal as D
>>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
Decimal('31.01875')
>>> from fractions import Fraction as F
>>> variance([F(1, 6), F(1, 2), F(5, 3)])
Fraction(67, 108)
"""
if iter(data) is data:
data = list(data)
n = len(data)
if n < 2:
raise StatisticsError('variance requires at least two data points')
ss = _ss(data, xbar)
return ss/(n-1)
def pvariance(data, mu=None):
"""Return the population variance of ``data``.
data should be an iterable of Real-valued numbers, with at least one
value. The optional argument mu, if given, should be the mean of
the data. If it is missing or None, the mean is automatically calculated.
Use this function to calculate the variance from the entire population.
To estimate the variance from a sample, the ``variance`` function is
usually a better choice.
Examples:
>>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]
>>> pvariance(data)
1.25
If you have already calculated the mean of the data, you can pass it as
the optional second argument to avoid recalculating it:
>>> mu = mean(data)
>>> pvariance(data, mu)
1.25
This function does not check that ``mu`` is actually the mean of ``data``.
Giving arbitrary values for ``mu`` may lead to invalid or impossible
results.
Decimals and Fractions are supported:
>>> from decimal import Decimal as D
>>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
Decimal('24.815')
>>> from fractions import Fraction as F
>>> pvariance([F(1, 4), F(5, 4), F(1, 2)])
Fraction(13, 72)
"""
if iter(data) is data:
data = list(data)
n = len(data)
if n < 1:
raise StatisticsError('pvariance requires at least one data point')
ss = _ss(data, mu)
return ss/n
def stdev(data, xbar=None):
"""Return the square root of the sample variance.
See ``variance`` for arguments and other details.
>>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
1.0810874155219827
"""
var = variance(data, xbar)
try:
return var.sqrt()
except AttributeError:
return math.sqrt(var)
def pstdev(data, mu=None):
"""Return the square root of the population variance.
See ``pvariance`` for arguments and other details.
>>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
0.986893273527251
"""
var = pvariance(data, mu)
try:
return var.sqrt()
except AttributeError:
return math.sqrt(var)

View File

@ -0,0 +1,19 @@
#!/usr/bin/env python3
import sys
import statistics
def pairs(l, n):
return zip(*[l[i::n] for i in range(n)])
# data comes in pairs:
# n - time for running the program with no input
# m - time for running it with the benchmark input
# we measure (m - n)
values = [ float(y) - float(x) for (x,y) in pairs(sys.stdin.readlines(),2)]
print("mean = %.4f, median = %.4f, stdev = %.4f" %
(statistics.mean(values), statistics.median(values),
statistics.stdev(values)))

View File

@ -0,0 +1,33 @@
# Benchmarks
Some benchmarks, run on an ancient Thinkpad running Intel Core 2 Duo at 2GHz.
|Implementation | Time (sec)| Factor |
|-------------------|-----------:|--------:|
| Markdown.pl | 2921.24 | 14606.2 |
| PHP markdown | 20.85 | 104.3 |
| kramdown | 20.83 | 104.1 |
| lunamark | 6.295 | 31.5 |
| cheapskate | 5.760 | 28.8 |
| peg-markdown | 5.450 | 27.3 |
| **commonmark.js** | 2.675 | 13.4 |
| marked | 1.855 | 9.3 |
| discount | 1.705 | 8.5 |
| **cmark** | 0.295 | 1.5 |
| sundown | 0.200 | 1.0 |
To run these benchmarks, use `make bench PROG=/path/to/program`.
The input text is a 10MB Markdown file built by concatenating 20 copies
of the Markdown source of the first edition of [*Pro
Git*](https://github.com/progit/progit/tree/master/en) by Scott Chacon.
`time` is used to measure execution speed. The reported
time is the *difference* between the time to run the program
with the benchmark input and the time to run it with no input.
(This procedure ensures that implementations in dynamic languages are
not prenalized by startup time.) A median of ten runs is taken. The
process is reniced to a high priority so that the system doesn't
interrupt runs.

View File

@ -0,0 +1,10 @@
[since 0.12]
* Updated path of test program.
* Use terminology "plain textual content" instead of "string."
* Added condition that conforming parsers strip or replace NULL characters.
* Changed Example 196 to reflect the spec's rules. It should not be a loose
list as it has no blank lines.
* Adjusted semantically insignificant formatting of HTML output.
* Added example to spec of shortcut link with following space (#214).

View File

@ -0,0 +1,912 @@
# CaseFolding-3.2.0.txt
# Date: 2002-03-22,20:54:33 GMT [MD]
#
# Case Folding Properties
#
# This file is a supplement to the UnicodeData file.
# It provides a case folding mapping generated from the Unicode Character Database.
# If all characters are mapped according to the full mapping below, then
# case differences (according to UnicodeData.txt and SpecialCasing.txt)
# are eliminated.
#
# The data supports both implementations that require simple case foldings
# (where string lengths don't change), and implementations that allow full case folding
# (where string lengths may grow). Note that where they can be supported, the
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
#
# NOTE: case folding does not preserve normalization formats!
#
# For information on case folding, see
# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
#
# ================================================================================
# Format
# ================================================================================
# The entries in this file are in the following machine-readable format:
#
# <code>; <status>; <mapping>; # <name>
#
# The status field is:
# C: common case folding, common mappings shared by both simple and full mappings.
# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
# S: simple case folding, mappings to single characters where different from F.
# T: special case for uppercase I and dotted uppercase I
# - For non-Turkic languages, this mapping is normally not used.
# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
#
# Usage:
# A. To do a simple case folding, use the mappings with status C + S.
# B. To do a full case folding, use the mappings with status C + F.
#
# The mappings with status T can be used or omitted depending on the desired case-folding
# behavior. (The default option is to exclude them.)
#
# =================================================================
0041; C; 0061; # LATIN CAPITAL LETTER A
0042; C; 0062; # LATIN CAPITAL LETTER B
0043; C; 0063; # LATIN CAPITAL LETTER C
0044; C; 0064; # LATIN CAPITAL LETTER D
0045; C; 0065; # LATIN CAPITAL LETTER E
0046; C; 0066; # LATIN CAPITAL LETTER F
0047; C; 0067; # LATIN CAPITAL LETTER G
0048; C; 0068; # LATIN CAPITAL LETTER H
0049; C; 0069; # LATIN CAPITAL LETTER I
0049; T; 0131; # LATIN CAPITAL LETTER I
004A; C; 006A; # LATIN CAPITAL LETTER J
004B; C; 006B; # LATIN CAPITAL LETTER K
004C; C; 006C; # LATIN CAPITAL LETTER L
004D; C; 006D; # LATIN CAPITAL LETTER M
004E; C; 006E; # LATIN CAPITAL LETTER N
004F; C; 006F; # LATIN CAPITAL LETTER O
0050; C; 0070; # LATIN CAPITAL LETTER P
0051; C; 0071; # LATIN CAPITAL LETTER Q
0052; C; 0072; # LATIN CAPITAL LETTER R
0053; C; 0073; # LATIN CAPITAL LETTER S
0054; C; 0074; # LATIN CAPITAL LETTER T
0055; C; 0075; # LATIN CAPITAL LETTER U
0056; C; 0076; # LATIN CAPITAL LETTER V
0057; C; 0077; # LATIN CAPITAL LETTER W
0058; C; 0078; # LATIN CAPITAL LETTER X
0059; C; 0079; # LATIN CAPITAL LETTER Y
005A; C; 007A; # LATIN CAPITAL LETTER Z
00B5; C; 03BC; # MICRO SIGN
00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
00C6; C; 00E6; # LATIN CAPITAL LETTER AE
00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
014A; C; 014B; # LATIN CAPITAL LETTER ENG
014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0152; C; 0153; # LATIN CAPITAL LIGATURE OE
0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
017F; C; 0073; # LATIN SMALL LETTER LONG S
0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
0196; C; 0269; # LATIN CAPITAL LETTER IOTA
0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
01A2; C; 01A3; # LATIN CAPITAL LETTER OI
01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
01A6; C; 0280; # LATIN LETTER YR
01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
01A9; C; 0283; # LATIN CAPITAL LETTER ESH
01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
01B7; C; 0292; # LATIN CAPITAL LETTER EZH
01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
021C; C; 021D; # LATIN CAPITAL LETTER YOGH
021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
0222; C; 0223; # LATIN CAPITAL LETTER OU
0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
0392; C; 03B2; # GREEK CAPITAL LETTER BETA
0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
0397; C; 03B7; # GREEK CAPITAL LETTER ETA
0398; C; 03B8; # GREEK CAPITAL LETTER THETA
0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
039C; C; 03BC; # GREEK CAPITAL LETTER MU
039D; C; 03BD; # GREEK CAPITAL LETTER NU
039E; C; 03BE; # GREEK CAPITAL LETTER XI
039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
03A0; C; 03C0; # GREEK CAPITAL LETTER PI
03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
03D0; C; 03B2; # GREEK BETA SYMBOL
03D1; C; 03B8; # GREEK THETA SYMBOL
03D5; C; 03C6; # GREEK PHI SYMBOL
03D6; C; 03C0; # GREEK PI SYMBOL
03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
03DA; C; 03DB; # GREEK LETTER STIGMA
03DC; C; 03DD; # GREEK LETTER DIGAMMA
03DE; C; 03DF; # GREEK LETTER KOPPA
03E0; C; 03E1; # GREEK LETTER SAMPI
03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
03F0; C; 03BA; # GREEK KAPPA SYMBOL
03F1; C; 03C1; # GREEK RHO SYMBOL
03F2; C; 03C3; # GREEK LUNATE SIGMA SYMBOL
03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
0410; C; 0430; # CYRILLIC CAPITAL LETTER A
0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
0418; C; 0438; # CYRILLIC CAPITAL LETTER I
0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
041E; C; 043E; # CYRILLIC CAPITAL LETTER O
041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
0423; C; 0443; # CYRILLIC CAPITAL LETTER U
0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
042D; C; 044D; # CYRILLIC CAPITAL LETTER E
042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
2126; C; 03C9; # OHM SIGN
212A; C; 006B; # KELVIN SIGN
212B; C; 00E5; # ANGSTROM SIGN
2160; C; 2170; # ROMAN NUMERAL ONE
2161; C; 2171; # ROMAN NUMERAL TWO
2162; C; 2172; # ROMAN NUMERAL THREE
2163; C; 2173; # ROMAN NUMERAL FOUR
2164; C; 2174; # ROMAN NUMERAL FIVE
2165; C; 2175; # ROMAN NUMERAL SIX
2166; C; 2176; # ROMAN NUMERAL SEVEN
2167; C; 2177; # ROMAN NUMERAL EIGHT
2168; C; 2178; # ROMAN NUMERAL NINE
2169; C; 2179; # ROMAN NUMERAL TEN
216A; C; 217A; # ROMAN NUMERAL ELEVEN
216B; C; 217B; # ROMAN NUMERAL TWELVE
216C; C; 217C; # ROMAN NUMERAL FIFTY
216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
10400; C; 10428; # DESERET CAPITAL LETTER LONG I
10401; C; 10429; # DESERET CAPITAL LETTER LONG E
10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
1040C; C; 10434; # DESERET CAPITAL LETTER AY
1040D; C; 10435; # DESERET CAPITAL LETTER OW
1040E; C; 10436; # DESERET CAPITAL LETTER WU
1040F; C; 10437; # DESERET CAPITAL LETTER YEE
10410; C; 10438; # DESERET CAPITAL LETTER H
10411; C; 10439; # DESERET CAPITAL LETTER PEE
10412; C; 1043A; # DESERET CAPITAL LETTER BEE
10413; C; 1043B; # DESERET CAPITAL LETTER TEE
10414; C; 1043C; # DESERET CAPITAL LETTER DEE
10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
10416; C; 1043E; # DESERET CAPITAL LETTER JEE
10417; C; 1043F; # DESERET CAPITAL LETTER KAY
10418; C; 10440; # DESERET CAPITAL LETTER GAY
10419; C; 10441; # DESERET CAPITAL LETTER EF
1041A; C; 10442; # DESERET CAPITAL LETTER VEE
1041B; C; 10443; # DESERET CAPITAL LETTER ETH
1041C; C; 10444; # DESERET CAPITAL LETTER THEE
1041D; C; 10445; # DESERET CAPITAL LETTER ES
1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
1041F; C; 10447; # DESERET CAPITAL LETTER ESH
10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
10421; C; 10449; # DESERET CAPITAL LETTER ER
10422; C; 1044A; # DESERET CAPITAL LETTER EL
10423; C; 1044B; # DESERET CAPITAL LETTER EM
10424; C; 1044C; # DESERET CAPITAL LETTER EN
10425; C; 1044D; # DESERET CAPITAL LETTER ENG

View File

@ -0,0 +1,151 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>commonmark.js demo</title>
<script src="//code.jquery.com/jquery-1.11.0.min.js"></script>
<script src="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
<link href="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
<script src="js/commonmark.js"></script>
<script type="text/javascript">
var writer = new commonmark.HtmlRenderer();
var reader = new commonmark.DocParser();
function getQueryVariable(variable)
{
var query = window.location.search.substring(1);
var vars = query.split("&");
for (var i=0;i<vars.length;i++) {
var pair = vars[i].split("=");
if(pair[0] == variable){return decodeURIComponent(pair[1]);}
}
return null;
}
$(document).ready(function() {
var timer;
var x;
var parsed;
var render = function() {
if (parsed === undefined) {
return;
}
var startTime = new Date().getTime();
var result = writer.renderBlock(parsed);
var endTime = new Date().getTime();
var renderTime = endTime - startTime;
// $("#html").text(result);
$("#preview").html(result);
$("#html").text(result);
$("#ast").text(commonmark.ASTRenderer(parsed));
$("#rendertime").text(renderTime);
};
var parseAndRender = function () {
if (x) { x.abort() } // If there is an existing XHR, abort it.
clearTimeout(timer); // Clear the timer so we don't end up with dupes.
timer = setTimeout(function() { // assign timer a new timeout
var startTime = new Date().getTime();
parsed = reader.parse($("#text").val());
var endTime = new Date().getTime();
var parseTime = endTime - startTime;
$("#parsetime").text(parseTime);
$(".timing").css('visibility','visible');
/*
var warnings = parsed.warnings;
$("#warnings").html('');
for (i=0; i < warnings.length; i++) {
var w = warnings[i];
var warning = $("#warnings").append('<li></li>');
$("#warnings li").last().text('Line ' + w.line + ' column ' + w.column + ': ' + w.message);
}
*/
render();
}, 0); // ms delay
};
var initial_text = getQueryVariable("text");
if (initial_text) {
$("#text").val(initial_text);
// show HTML tab if text is from query
$('#result-tabs a[href="#result"]').tab('show');
}
// make tab insert a tab in the text box:
$("#text").keydown(function (e) {
if (e.which == 9) {
e.preventDefault();
this.value += "\t";
}
});
parseAndRender();
$("#clear-text-box").click(function(e) {
$("#text").val('');
window.location.search = "";
parseAndRender();
});
$("#permalink").click(function(e) {
window.location.pathname = "/index.html";
window.location.search = "text=" + encodeURIComponent($("#text").val());
});
$("#text").bind('keyup paste cut mouseup', parseAndRender);
$(".option").change(render);
});
</script>
<style type="text/css">
h1.title { font-family: monospace; font-size: 120%; font-weight: bold;
margin-top: 0.5em; margin-bottom: 0; }
textarea#text { height: 400px; width: 95%; font-family: monospace; font-size: 92%; }
pre code#html { font-size: 92%; font-family: monospace; }
pre#htmlpre { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
div#astpre { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
div#preview { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
div.row { margin-top: 1em; }
blockquote { font-size: 100%; }
footer { color: #555; text-align: center; margin: 1em; }
pre { display: block; padding: 0.5em; color: #333; background: #f8f8ff }
#warnings li { color: red; font-weight: bold; }
label { padding-left: 1em; padding-top: 0; padding-bottom: 0; }
div.timing { color: gray; visibility: hidden; height: 2em; }
p#text-controls { height: 1em; margin-top: 1em; }
a#permalink { margin-left: 1em; }
span.timing { font-weight: bold; }
span.timing { font-weight: bold; }
</style>
</head>
<body>
<div class="container">
<div class="row">
<div class="col-md-6">
<h1 class="title">commonmark.js dingus</h1>
</div>
</div>
<div class="row">
<div class="col-md-6">
<p id="text-controls"><a id="clear-text-box">clear</a>&nbsp;<a
id="permalink">permalink</a></p>
<textarea id="text"></textarea>
<ul id="warnings"></ul>
<div class="timing">Parsed in <span class="timing" id="parsetime"></span>
ms. Rendered in <span class="timing" id="rendertime"></span> ms.</div>
</div>
<div class="col-md-6">
<ul id="result-tabs" class="nav nav-tabs" role="tablist">
<li class="active"><a href="#preview" role="tab" data-toggle="tab">Preview</a></li>
<li><a href="#result" role="tab" data-toggle="tab">HTML</a></li>
<li><a href="#result-ast" role="tab" data-toggle="tab">AST</a></li>
</ul>
<div class="tab-content">
<div id="preview" class="tab-pane active">
</div>
<div id="result" class="tab-pane">
<pre id="htmlpre"><code id="html"></code></pre>
</div>
<div id="result-ast" class="tab-pane">
<pre id="astpre"><code id="ast"></code></pre>
</div>
</div>
</div>
</div>
</div>
</body>
</html>

View File

@ -0,0 +1,3 @@
commonmark.js
*.tgz
index.html

View File

@ -0,0 +1,30 @@
Copyright (c) 2014, John MacFarlane
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of John MacFarlane nor the names of other
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,24 @@
CommonMark
==========
CommonMark is a rationalized version of Markdown syntax,
with a [spec][the spec] and BSD3-licensed reference
implementations in C and JavaScript.
For more information, see <http://commonmark.org>.
To play with this library without installing it, see
the live dingus at <http://spec.commonmark.org/dingus.html>.
This package includes the commonmark library and a
command-line executable, `commonmark`.
Basic usage example:
var reader = new commonmark.DocParser();
var writer = new commonmark.HtmlRenderer();
var parsed = reader.parse("Hello *world*");
var result = writer.render(parsed);
[the spec]: http://spec.commonmark.org

View File

@ -0,0 +1,405 @@
/**
* References:
*
* - http://en.wikipedia.org/wiki/ANSI_escape_code
* - http://www.termsys.demon.co.uk/vtansi.htm
*
*/
/**
* Module dependencies.
*/
var emitNewlineEvents = require('./newlines')
, prefix = '\x1b[' // For all escape codes
, suffix = 'm' // Only for color codes
/**
* The ANSI escape sequences.
*/
var codes = {
up: 'A'
, down: 'B'
, forward: 'C'
, back: 'D'
, nextLine: 'E'
, previousLine: 'F'
, horizontalAbsolute: 'G'
, eraseData: 'J'
, eraseLine: 'K'
, scrollUp: 'S'
, scrollDown: 'T'
, savePosition: 's'
, restorePosition: 'u'
, queryPosition: '6n'
, hide: '?25l'
, show: '?25h'
}
/**
* Rendering ANSI codes.
*/
var styles = {
bold: 1
, italic: 3
, underline: 4
, inverse: 7
}
/**
* The negating ANSI code for the rendering modes.
*/
var reset = {
bold: 22
, italic: 23
, underline: 24
, inverse: 27
}
/**
* The standard, styleable ANSI colors.
*/
var colors = {
white: 37
, black: 30
, blue: 34
, cyan: 36
, green: 32
, magenta: 35
, red: 31
, yellow: 33
, grey: 90
, brightBlack: 90
, brightRed: 91
, brightGreen: 92
, brightYellow: 93
, brightBlue: 94
, brightMagenta: 95
, brightCyan: 96
, brightWhite: 97
}
/**
* Creates a Cursor instance based off the given `writable stream` instance.
*/
function ansi (stream, options) {
if (stream._ansicursor) {
return stream._ansicursor
} else {
return stream._ansicursor = new Cursor(stream, options)
}
}
module.exports = exports = ansi
/**
* The `Cursor` class.
*/
function Cursor (stream, options) {
if (!(this instanceof Cursor)) {
return new Cursor(stream, options)
}
if (typeof stream != 'object' || typeof stream.write != 'function') {
throw new Error('a valid Stream instance must be passed in')
}
// the stream to use
this.stream = stream
// when 'enabled' is false then all the functions are no-ops except for write()
this.enabled = options && options.enabled
if (typeof this.enabled === 'undefined') {
this.enabled = stream.isTTY
}
this.enabled = !!this.enabled
// then `buffering` is true, then `write()` calls are buffered in
// memory until `flush()` is invoked
this.buffering = !!(options && options.buffering)
this._buffer = []
// controls the foreground and background colors
this.fg = this.foreground = new Colorer(this, 0)
this.bg = this.background = new Colorer(this, 10)
// defaults
this.Bold = false
this.Italic = false
this.Underline = false
this.Inverse = false
// keep track of the number of "newlines" that get encountered
this.newlines = 0
emitNewlineEvents(stream)
stream.on('newline', function () {
this.newlines++
}.bind(this))
}
exports.Cursor = Cursor
/**
* Helper function that calls `write()` on the underlying Stream.
* Returns `this` instead of the write() return value to keep
* the chaining going.
*/
Cursor.prototype.write = function (data) {
if (this.buffering) {
this._buffer.push(arguments)
} else {
this.stream.write.apply(this.stream, arguments)
}
return this
}
/**
* Buffer `write()` calls into memory.
*
* @api public
*/
Cursor.prototype.buffer = function () {
this.buffering = true
return this
}
/**
* Write out the in-memory buffer.
*
* @api public
*/
Cursor.prototype.flush = function () {
this.buffering = false
var str = this._buffer.map(function (args) {
if (args.length != 1) throw new Error('unexpected args length! ' + args.length);
return args[0];
}).join('');
this._buffer.splice(0); // empty
this.write(str);
return this
}
/**
* The `Colorer` class manages both the background and foreground colors.
*/
function Colorer (cursor, base) {
this.current = null
this.cursor = cursor
this.base = base
}
exports.Colorer = Colorer
/**
* Write an ANSI color code, ensuring that the same code doesn't get rewritten.
*/
Colorer.prototype._setColorCode = function setColorCode (code) {
var c = String(code)
if (this.current === c) return
this.cursor.enabled && this.cursor.write(prefix + c + suffix)
this.current = c
return this
}
/**
* Set up the positional ANSI codes.
*/
Object.keys(codes).forEach(function (name) {
var code = String(codes[name])
Cursor.prototype[name] = function () {
var c = code
if (arguments.length > 0) {
c = toArray(arguments).map(Math.round).join(';') + code
}
this.enabled && this.write(prefix + c)
return this
}
})
/**
* Set up the functions for the rendering ANSI codes.
*/
Object.keys(styles).forEach(function (style) {
var name = style[0].toUpperCase() + style.substring(1)
, c = styles[style]
, r = reset[style]
Cursor.prototype[style] = function () {
if (this[name]) return
this.enabled && this.write(prefix + c + suffix)
this[name] = true
return this
}
Cursor.prototype['reset' + name] = function () {
if (!this[name]) return
this.enabled && this.write(prefix + r + suffix)
this[name] = false
return this
}
})
/**
* Setup the functions for the standard colors.
*/
Object.keys(colors).forEach(function (color) {
var code = colors[color]
Colorer.prototype[color] = function () {
this._setColorCode(this.base + code)
return this.cursor
}
Cursor.prototype[color] = function () {
return this.foreground[color]()
}
})
/**
* Makes a beep sound!
*/
Cursor.prototype.beep = function () {
this.enabled && this.write('\x07')
return this
}
/**
* Moves cursor to specific position
*/
Cursor.prototype.goto = function (x, y) {
x = x | 0
y = y | 0
this.enabled && this.write(prefix + y + ';' + x + 'H')
return this
}
/**
* Resets the color.
*/
Colorer.prototype.reset = function () {
this._setColorCode(this.base + 39)
return this.cursor
}
/**
* Resets all ANSI formatting on the stream.
*/
Cursor.prototype.reset = function () {
this.enabled && this.write(prefix + '0' + suffix)
this.Bold = false
this.Italic = false
this.Underline = false
this.Inverse = false
this.foreground.current = null
this.background.current = null
return this
}
/**
* Sets the foreground color with the given RGB values.
* The closest match out of the 216 colors is picked.
*/
Colorer.prototype.rgb = function (r, g, b) {
var base = this.base + 38
, code = rgb(r, g, b)
this._setColorCode(base + ';5;' + code)
return this.cursor
}
/**
* Same as `cursor.fg.rgb(r, g, b)`.
*/
Cursor.prototype.rgb = function (r, g, b) {
return this.foreground.rgb(r, g, b)
}
/**
* Accepts CSS color codes for use with ANSI escape codes.
* For example: `#FF000` would be bright red.
*/
Colorer.prototype.hex = function (color) {
return this.rgb.apply(this, hex(color))
}
/**
* Same as `cursor.fg.hex(color)`.
*/
Cursor.prototype.hex = function (color) {
return this.foreground.hex(color)
}
// UTIL FUNCTIONS //
/**
* Translates a 255 RGB value to a 0-5 ANSI RGV value,
* then returns the single ANSI color code to use.
*/
function rgb (r, g, b) {
var red = r / 255 * 5
, green = g / 255 * 5
, blue = b / 255 * 5
return rgb5(red, green, blue)
}
/**
* Turns rgb 0-5 values into a single ANSI color code to use.
*/
function rgb5 (r, g, b) {
var red = Math.round(r)
, green = Math.round(g)
, blue = Math.round(b)
return 16 + (red*36) + (green*6) + blue
}
/**
* Accepts a hex CSS color code string (# is optional) and
* translates it into an Array of 3 RGB 0-255 values, which
* can then be used with rgb().
*/
function hex (color) {
var c = color[0] === '#' ? color.substring(1) : color
, r = c.substring(0, 2)
, g = c.substring(2, 4)
, b = c.substring(4, 6)
return [parseInt(r, 16), parseInt(g, 16), parseInt(b, 16)]
}
/**
* Turns an array-like object into a real array.
*/
function toArray (a) {
var i = 0
, l = a.length
, rtn = []
for (; i<l; i++) {
rtn.push(a[i])
}
return rtn
}

View File

@ -0,0 +1,71 @@
/**
* Accepts any node Stream instance and hijacks its "write()" function,
* so that it can count any newlines that get written to the output.
*
* When a '\n' byte is encountered, then a "newline" event will be emitted
* on the stream, with no arguments. It is up to the listeners to determine
* any necessary deltas required for their use-case.
*
* Ex:
*
* var cursor = ansi(process.stdout)
* , ln = 0
* process.stdout.on('newline', function () {
* ln++
* })
*/
/**
* Module dependencies.
*/
var assert = require('assert')
var NEWLINE = '\n'.charCodeAt(0)
function emitNewlineEvents (stream) {
if (stream._emittingNewlines) {
// already emitting newline events
return
}
var write = stream.write
stream.write = function (data) {
// first write the data
var rtn = write.apply(stream, arguments)
if (stream.listeners('newline').length > 0) {
var len = data.length
, i = 0
// now try to calculate any deltas
if (typeof data == 'string') {
for (; i<len; i++) {
processByte(stream, data.charCodeAt(i))
}
} else {
// buffer
for (; i<len; i++) {
processByte(stream, data[i])
}
}
}
return rtn
}
stream._emittingNewlines = true
}
module.exports = emitNewlineEvents
/**
* Processes an individual byte being written to a stream
*/
function processByte (stream, b) {
assert.equal(typeof b, 'number')
if (b === NEWLINE) {
stream.emit('newline')
}
}

View File

@ -0,0 +1,35 @@
var Benchmark = require('benchmark').Benchmark;
var suite = new Benchmark.Suite;
var fs = require('fs');
var sm = require('./lib/index.js');
// https://github.com/coreyti/showdown
var showdown = require('../../showdown/src/showdown');
// https://github.com/chjj/marked
var marked = require('../../marked/marked.min.js');
var benchfile = process.argv[2];
var contents = fs.readFileSync(benchfile, 'utf8');
// var converter = new showdown.converter();
suite.add('commonmark.js markdown->html', function() {
var doc = new sm.DocParser().parse(contents);
var renderer = new sm.HtmlRenderer();
renderer.renderBlock(doc);
})
.add('showdown.js markdown->html', function() {
var converter = new showdown.converter();
converter.makeHtml(contents);
})
.add('marked.js markdown->html', function() {
marked(contents);
})
.on('cycle', function(event) {
console.log(String(event.target));
})
.run();

View File

@ -0,0 +1,33 @@
#!/usr/bin/env node
var fs = require('fs');
var util = require('util');
var commonmark = require('../lib/index.js');
var parser = new commonmark.DocParser();
var renderer;
var inps = [];
var output_ast = false;
var files = [];
if (process.argv[2] === '--ast') {
output_ast = true;
files = process.argv.slice(3);
renderer = { render: function(x) {
return util.inspect(x, null, Infinity) + '\n';
} };
} else {
files = process.argv.slice(2);
renderer = new commonmark.HtmlRenderer();
}
if (files.length === 0) {
files = ['/dev/stdin'];
}
for (var i = 0; i < files.length; i++) {
file = files[i];
inps.push(fs.readFileSync(file, 'utf8'));
}
process.stdout.write(renderer.render(parser.parse(inps.join('\n'))));

View File

@ -0,0 +1,12 @@
<!DOCTYPE html>
<html>
<head>
<title>CommonMark dingus</title>
<meta http-equiv="refresh" content="0;URL='/dingus.html" >
</head>
<body>
<p>The most recent version of the CommonMark dingus can be found
at <a
href="http://try.commonmark.org/dingus.html/">/dingus.html/</a>.</p>
</body>
</html>

View File

@ -0,0 +1,698 @@
var C_GREATERTHAN = 62;
var C_SPACE = 32;
var C_OPEN_BRACKET = 91;
var InlineParser = require('./inlines');
var unescapeString = new InlineParser().unescapeString;
// Returns true if string contains only space characters.
var isBlank = function(s) {
return /^\s*$/.test(s);
};
// Convert tabs to spaces on each line using a 4-space tab stop.
var detabLine = function(text) {
if (text.indexOf('\t') == -1) {
return text;
} else {
var lastStop = 0;
return text.replace(/\t/g, function(match, offset) {
var result = ' '.slice((offset - lastStop) % 4);
lastStop = offset + 1;
return result;
});
}
};
// Attempt to match a regex in string s at offset offset.
// Return index of match or -1.
var matchAt = function(re, s, offset) {
var res = s.slice(offset).match(re);
if (res) {
return offset + res.index;
} else {
return -1;
}
};
var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
"/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i');
var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
// DOC PARSER
// These are methods of a DocParser object, defined below.
var makeBlock = function(tag, start_line, start_column) {
return { t: tag,
open: true,
last_line_blank: false,
start_line: start_line,
start_column: start_column,
end_line: start_line,
children: [],
parent: null,
// string_content is formed by concatenating strings, in finalize:
string_content: "",
strings: [],
inline_content: []
};
};
// Returns true if parent block can contain child block.
var canContain = function(parent_type, child_type) {
return ( parent_type == 'Document' ||
parent_type == 'BlockQuote' ||
parent_type == 'ListItem' ||
(parent_type == 'List' && child_type == 'ListItem') );
};
// Returns true if block type can accept lines of text.
var acceptsLines = function(block_type) {
return ( block_type == 'Paragraph' ||
block_type == 'IndentedCode' ||
block_type == 'FencedCode' );
};
// Returns true if block ends with a blank line, descending if needed
// into lists and sublists.
var endsWithBlankLine = function(block) {
if (block.last_line_blank) {
return true;
}
if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) {
return endsWithBlankLine(block.children[block.children.length - 1]);
} else {
return false;
}
};
// Break out of all containing lists, resetting the tip of the
// document to the parent of the highest list, and finalizing
// all the lists. (This is used to implement the "two blank lines
// break of of all lists" feature.)
var breakOutOfLists = function(block, line_number) {
var b = block;
var last_list = null;
do {
if (b.t === 'List') {
last_list = b;
}
b = b.parent;
} while (b);
if (last_list) {
while (block != last_list) {
this.finalize(block, line_number);
block = block.parent;
}
this.finalize(last_list, line_number);
this.tip = last_list.parent;
}
};
// Add a line to the block at the tip. We assume the tip
// can accept lines -- that check should be done before calling this.
var addLine = function(ln, offset) {
var s = ln.slice(offset);
if (!(this.tip.open)) {
throw({ msg: "Attempted to add line (" + ln + ") to closed container." });
}
this.tip.strings.push(s);
};
// Add block of type tag as a child of the tip. If the tip can't
// accept children, close and finalize it and try its parent,
// and so on til we find a block that can accept children.
var addChild = function(tag, line_number, offset) {
while (!canContain(this.tip.t, tag)) {
this.finalize(this.tip, line_number);
}
var column_number = offset + 1; // offset 0 = column 1
var newBlock = makeBlock(tag, line_number, column_number);
this.tip.children.push(newBlock);
newBlock.parent = this.tip;
this.tip = newBlock;
return newBlock;
};
// Parse a list marker and return data on the marker (type,
// start, delimiter, bullet character, padding) or null.
var parseListMarker = function(ln, offset) {
var rest = ln.slice(offset);
var match;
var spaces_after_marker;
var data = {};
if (rest.match(reHrule)) {
return null;
}
if ((match = rest.match(/^[*+-]( +|$)/))) {
spaces_after_marker = match[1].length;
data.type = 'Bullet';
data.bullet_char = match[0][0];
} else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) {
spaces_after_marker = match[3].length;
data.type = 'Ordered';
data.start = parseInt(match[1]);
data.delimiter = match[2];
} else {
return null;
}
var blank_item = match[0].length === rest.length;
if (spaces_after_marker >= 5 ||
spaces_after_marker < 1 ||
blank_item) {
data.padding = match[0].length - spaces_after_marker + 1;
} else {
data.padding = match[0].length;
}
return data;
};
// Returns true if the two list items are of the same type,
// with the same delimiter and bullet character. This is used
// in agglomerating list items into lists.
var listsMatch = function(list_data, item_data) {
return (list_data.type === item_data.type &&
list_data.delimiter === item_data.delimiter &&
list_data.bullet_char === item_data.bullet_char);
};
// Analyze a line of text and update the document appropriately.
// We parse markdown text by calling this on each line of input,
// then finalizing the document.
var incorporateLine = function(ln, line_number) {
var all_matched = true;
var last_child;
var first_nonspace;
var offset = 0;
var match;
var data;
var blank;
var indent;
var last_matched_container;
var i;
var CODE_INDENT = 4;
var container = this.doc;
var oldtip = this.tip;
// Convert tabs to spaces:
ln = detabLine(ln);
// For each containing block, try to parse the associated line start.
// Bail out on failure: container will point to the last matching block.
// Set all_matched to false if not all containers match.
while (container.children.length > 0) {
last_child = container.children[container.children.length - 1];
if (!last_child.open) {
break;
}
container = last_child;
match = matchAt(/[^ ]/, ln, offset);
if (match === -1) {
first_nonspace = ln.length;
blank = true;
} else {
first_nonspace = match;
blank = false;
}
indent = first_nonspace - offset;
switch (container.t) {
case 'BlockQuote':
if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
offset = first_nonspace + 1;
if (ln.charCodeAt(offset) === C_SPACE) {
offset++;
}
} else {
all_matched = false;
}
break;
case 'ListItem':
if (indent >= container.list_data.marker_offset +
container.list_data.padding) {
offset += container.list_data.marker_offset +
container.list_data.padding;
} else if (blank) {
offset = first_nonspace;
} else {
all_matched = false;
}
break;
case 'IndentedCode':
if (indent >= CODE_INDENT) {
offset += CODE_INDENT;
} else if (blank) {
offset = first_nonspace;
} else {
all_matched = false;
}
break;
case 'Header':
case 'HorizontalRule':
// a header can never container > 1 line, so fail to match:
all_matched = false;
if (blank) {
container.last_line_blank = true;
}
break;
case 'FencedCode':
// skip optional spaces of fence offset
i = container.fence_offset;
while (i > 0 && ln.charCodeAt(offset) === C_SPACE) {
offset++;
i--;
}
break;
case 'HtmlBlock':
if (blank) {
container.last_line_blank = true;
all_matched = false;
}
break;
case 'Paragraph':
if (blank) {
container.last_line_blank = true;
all_matched = false;
}
break;
default:
}
if (!all_matched) {
container = container.parent; // back up to last matching block
break;
}
}
last_matched_container = container;
// This function is used to finalize and close any unmatched
// blocks. We aren't ready to do this now, because we might
// have a lazy paragraph continuation, in which case we don't
// want to close unmatched blocks. So we store this closure for
// use later, when we have more information.
var closeUnmatchedBlocks = function(mythis) {
// finalize any blocks not matched
while (!already_done && oldtip != last_matched_container) {
mythis.finalize(oldtip, line_number);
oldtip = oldtip.parent;
}
var already_done = true;
};
// Check to see if we've hit 2nd blank line; if so break out of list:
if (blank && container.last_line_blank) {
this.breakOutOfLists(container, line_number);
}
// Unless last matched container is a code block, try new container starts,
// adding children to the last matched container:
while (container.t != 'FencedCode' &&
container.t != 'IndentedCode' &&
container.t != 'HtmlBlock' &&
// this is a little performance optimization:
matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== -1) {
match = matchAt(/[^ ]/, ln, offset);
if (match === -1) {
first_nonspace = ln.length;
blank = true;
} else {
first_nonspace = match;
blank = false;
}
indent = first_nonspace - offset;
if (indent >= CODE_INDENT) {
// indented code
if (this.tip.t != 'Paragraph' && !blank) {
offset += CODE_INDENT;
closeUnmatchedBlocks(this);
container = this.addChild('IndentedCode', line_number, offset);
} else { // indent > 4 in a lazy paragraph continuation
break;
}
} else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
// blockquote
offset = first_nonspace + 1;
// optional following space
if (ln.charCodeAt(offset) === C_SPACE) {
offset++;
}
closeUnmatchedBlocks(this);
container = this.addChild('BlockQuote', line_number, offset);
} else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
// ATX header
offset = first_nonspace + match[0].length;
closeUnmatchedBlocks(this);
container = this.addChild('Header', line_number, first_nonspace);
container.level = match[0].trim().length; // number of #s
// remove trailing ###s:
container.strings =
[ln.slice(offset).replace(/^ *#+ *$/, '').replace(/ +#+ *$/,'')];
break;
} else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) {
// fenced code block
var fence_length = match[0].length;
closeUnmatchedBlocks(this);
container = this.addChild('FencedCode', line_number, first_nonspace);
container.fence_length = fence_length;
container.fence_char = match[0][0];
container.fence_offset = first_nonspace - offset;
offset = first_nonspace + fence_length;
break;
} else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== -1) {
// html block
closeUnmatchedBlocks(this);
container = this.addChild('HtmlBlock', line_number, first_nonspace);
// note, we don't adjust offset because the tag is part of the text
break;
} else if (container.t == 'Paragraph' &&
container.strings.length === 1 &&
((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
// setext header line
closeUnmatchedBlocks(this);
container.t = 'Header'; // convert Paragraph to SetextHeader
container.level = match[0][0] === '=' ? 1 : 2;
offset = ln.length;
} else if (matchAt(reHrule, ln, first_nonspace) !== -1) {
// hrule
closeUnmatchedBlocks(this);
container = this.addChild('HorizontalRule', line_number, first_nonspace);
offset = ln.length - 1;
break;
} else if ((data = parseListMarker(ln, first_nonspace))) {
// list item
closeUnmatchedBlocks(this);
data.marker_offset = indent;
offset = first_nonspace + data.padding;
// add the list if needed
if (container.t !== 'List' ||
!(listsMatch(container.list_data, data))) {
container = this.addChild('List', line_number, first_nonspace);
container.list_data = data;
}
// add the list item
container = this.addChild('ListItem', line_number, first_nonspace);
container.list_data = data;
} else {
break;
}
if (acceptsLines(container.t)) {
// if it's a line container, it can't contain other containers
break;
}
}
// What remains at the offset is a text line. Add the text to the
// appropriate container.
match = matchAt(/[^ ]/, ln, offset);
if (match === -1) {
first_nonspace = ln.length;
blank = true;
} else {
first_nonspace = match;
blank = false;
}
indent = first_nonspace - offset;
// First check for a lazy paragraph continuation:
if (this.tip !== last_matched_container &&
!blank &&
this.tip.t == 'Paragraph' &&
this.tip.strings.length > 0) {
// lazy paragraph continuation
this.last_line_blank = false;
this.addLine(ln, offset);
} else { // not a lazy continuation
// finalize any blocks not matched
closeUnmatchedBlocks(this);
// Block quote lines are never blank as they start with >
// and we don't count blanks in fenced code for purposes of tight/loose
// lists or breaking out of lists. We also don't set last_line_blank
// on an empty list item.
container.last_line_blank = blank &&
!(container.t == 'BlockQuote' ||
container.t == 'Header' ||
container.t == 'FencedCode' ||
(container.t == 'ListItem' &&
container.children.length === 0 &&
container.start_line == line_number));
var cont = container;
while (cont.parent) {
cont.parent.last_line_blank = false;
cont = cont.parent;
}
switch (container.t) {
case 'IndentedCode':
case 'HtmlBlock':
this.addLine(ln, offset);
break;
case 'FencedCode':
// check for closing code fence:
match = (indent <= 3 &&
ln.charAt(first_nonspace) == container.fence_char &&
ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/));
if (match && match[0].length >= container.fence_length) {
// don't add closing fence to container; instead, close it:
this.finalize(container, line_number);
} else {
this.addLine(ln, offset);
}
break;
case 'Header':
case 'HorizontalRule':
// nothing to do; we already added the contents.
break;
default:
if (acceptsLines(container.t)) {
this.addLine(ln, first_nonspace);
} else if (blank) {
// do nothing
} else if (container.t != 'HorizontalRule' &&
container.t != 'Header') {
// create paragraph container for line
container = this.addChild('Paragraph', line_number, first_nonspace);
this.addLine(ln, first_nonspace);
} else {
console.log("Line " + line_number.toString() +
" with container type " + container.t +
" did not match any condition.");
}
}
}
};
// Finalize a block. Close it and do any necessary postprocessing,
// e.g. creating string_content from strings, setting the 'tight'
// or 'loose' status of a list, and parsing the beginnings
// of paragraphs for reference definitions. Reset the tip to the
// parent of the closed block.
var finalize = function(block, line_number) {
var pos;
// don't do anything if the block is already closed
if (!block.open) {
return 0;
}
block.open = false;
if (line_number > block.start_line) {
block.end_line = line_number - 1;
} else {
block.end_line = line_number;
}
switch (block.t) {
case 'Paragraph':
block.string_content = block.strings.join('\n').replace(/^ */m,'');
// delete block.strings;
// try parsing the beginning as link reference definitions:
while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET &&
(pos = this.inlineParser.parseReference(block.string_content,
this.refmap))) {
block.string_content = block.string_content.slice(pos);
if (isBlank(block.string_content)) {
block.t = 'ReferenceDef';
break;
}
}
break;
case 'Header':
case 'HtmlBlock':
block.string_content = block.strings.join('\n');
break;
case 'IndentedCode':
block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n');
block.t = 'CodeBlock';
break;
case 'FencedCode':
// first line becomes info string
block.info = unescapeString(block.strings[0].trim());
if (block.strings.length == 1) {
block.string_content = '';
} else {
block.string_content = block.strings.slice(1).join('\n') + '\n';
}
block.t = 'CodeBlock';
break;
case 'List':
block.tight = true; // tight by default
var numitems = block.children.length;
var i = 0;
while (i < numitems) {
var item = block.children[i];
// check for non-final list item ending with blank line:
var last_item = i == numitems - 1;
if (endsWithBlankLine(item) && !last_item) {
block.tight = false;
break;
}
// recurse into children of list item, to see if there are
// spaces between any of them:
var numsubitems = item.children.length;
var j = 0;
while (j < numsubitems) {
var subitem = item.children[j];
var last_subitem = j == numsubitems - 1;
if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) {
block.tight = false;
break;
}
j++;
}
i++;
}
break;
default:
break;
}
this.tip = block.parent || this.top;
};
// Walk through a block & children recursively, parsing string content
// into inline content where appropriate. Returns new object.
var processInlines = function(block) {
var newblock = {};
newblock.t = block.t;
newblock.start_line = block.start_line;
newblock.start_column = block.start_column;
newblock.end_line = block.end_line;
switch(block.t) {
case 'Paragraph':
newblock.inline_content =
this.inlineParser.parse(block.string_content.trim(), this.refmap);
break;
case 'Header':
newblock.inline_content =
this.inlineParser.parse(block.string_content.trim(), this.refmap);
newblock.level = block.level;
break;
case 'List':
newblock.list_data = block.list_data;
newblock.tight = block.tight;
break;
case 'CodeBlock':
newblock.string_content = block.string_content;
newblock.info = block.info;
break;
case 'HtmlBlock':
newblock.string_content = block.string_content;
break;
default:
break;
}
if (block.children) {
var newchildren = [];
for (var i = 0; i < block.children.length; i++) {
newchildren.push(this.processInlines(block.children[i]));
}
newblock.children = newchildren;
}
return newblock;
};
// The main parsing function. Returns a parsed document AST.
var parse = function(input) {
this.doc = makeBlock('Document', 1, 1);
this.tip = this.doc;
this.refmap = {};
var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/);
var len = lines.length;
for (var i = 0; i < len; i++) {
this.incorporateLine(lines[i], i+1);
}
while (this.tip) {
this.finalize(this.tip, len - 1);
}
return this.processInlines(this.doc);
};
// The DocParser object.
function DocParser(){
return {
doc: makeBlock('Document', 1, 1),
tip: this.doc,
refmap: {},
inlineParser: new InlineParser(),
breakOutOfLists: breakOutOfLists,
addLine: addLine,
addChild: addChild,
incorporateLine: incorporateLine,
finalize: finalize,
processInlines: processInlines,
parse: parse
};
}
module.exports = DocParser;

View File

@ -0,0 +1,58 @@
// derived from https://github.com/mathiasbynens/String.fromCodePoint
/*! http://mths.be/fromcodepoint v0.2.1 by @mathias */
if (String.fromCodePoint) {
module.exports = function (_) {
try {
return String.fromCodePoint(_);
} catch (e) {
if (e instanceof RangeError) {
return String.fromCharCode(0xFFFD);
}
throw e;
}
}
} else {
var stringFromCharCode = String.fromCharCode;
var floor = Math.floor;
var fromCodePoint = function(_) {
var MAX_SIZE = 0x4000;
var codeUnits = [];
var highSurrogate;
var lowSurrogate;
var index = -1;
var length = arguments.length;
if (!length) {
return '';
}
var result = '';
while (++index < length) {
var codePoint = Number(arguments[index]);
if (
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
codePoint < 0 || // not a valid Unicode code point
codePoint > 0x10FFFF || // not a valid Unicode code point
floor(codePoint) != codePoint // not an integer
) {
return String.fromCharCode(0xFFFD);
}
if (codePoint <= 0xFFFF) { // BMP code point
codeUnits.push(codePoint);
} else { // Astral code point; split in surrogate halves
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
codePoint -= 0x10000;
highSurrogate = (codePoint >> 10) + 0xD800;
lowSurrogate = (codePoint % 0x400) + 0xDC00;
codeUnits.push(highSurrogate, lowSurrogate);
}
if (index + 1 == length || codeUnits.length > MAX_SIZE) {
result += stringFromCharCode.apply(null, codeUnits);
codeUnits.length = 0;
}
}
return result;
};
module.exports = fromCodePoint;
}

View File

@ -0,0 +1,168 @@
// Helper function to produce content in a pair of HTML tags.
var inTags = function(tag, attribs, contents, selfclosing) {
var result = '<' + tag;
if (attribs) {
var i = 0;
var attrib;
while ((attrib = attribs[i]) !== undefined) {
result = result.concat(' ', attrib[0], '="', attrib[1], '"');
i++;
}
}
if (contents) {
result = result.concat('>', contents, '</', tag, '>');
} else if (selfclosing) {
result = result + ' />';
} else {
result = result.concat('></', tag, '>');
}
return result;
};
// Render an inline element as HTML.
var renderInline = function(inline) {
var attrs;
switch (inline.t) {
case 'Text':
return this.escape(inline.c);
case 'Softbreak':
return this.softbreak;
case 'Hardbreak':
return inTags('br',[],"",true) + '\n';
case 'Emph':
return inTags('em', [], this.renderInlines(inline.c));
case 'Strong':
return inTags('strong', [], this.renderInlines(inline.c));
case 'Html':
return inline.c;
case 'Link':
attrs = [['href', this.escape(inline.destination, true)]];
if (inline.title) {
attrs.push(['title', this.escape(inline.title, true)]);
}
return inTags('a', attrs, this.renderInlines(inline.label));
case 'Image':
attrs = [['src', this.escape(inline.destination, true)],
['alt', this.renderInlines(inline.label).
replace(/\<[^>]*alt="([^"]*)"[^>]*\>/g, '$1').
replace(/\<[^>]*\>/g,'')]];
if (inline.title) {
attrs.push(['title', this.escape(inline.title, true)]);
}
return inTags('img', attrs, "", true);
case 'Code':
return inTags('code', [], this.escape(inline.c));
default:
console.log("Unknown inline type " + inline.t);
return "";
}
};
// Render a list of inlines.
var renderInlines = function(inlines) {
var result = '';
for (var i=0; i < inlines.length; i++) {
result = result + this.renderInline(inlines[i]);
}
return result;
};
// Render a single block element.
var renderBlock = function(block, in_tight_list) {
var tag;
var attr;
var info_words;
switch (block.t) {
case 'Document':
var whole_doc = this.renderBlocks(block.children);
return (whole_doc === '' ? '' : whole_doc + '\n');
case 'Paragraph':
if (in_tight_list) {
return this.renderInlines(block.inline_content);
} else {
return inTags('p', [], this.renderInlines(block.inline_content));
}
break;
case 'BlockQuote':
var filling = this.renderBlocks(block.children);
return inTags('blockquote', [], filling === '' ? this.innersep :
this.innersep + filling + this.innersep);
case 'ListItem':
var contents = this.renderBlocks(block.children, in_tight_list);
if (/^[<]/.test(contents)) {
contents = '\n' + contents;
}
if (/[>]$/.test(contents)) {
contents = contents + '\n';
}
return inTags('li', [], contents, false).trim();
case 'List':
tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol';
attr = (!block.list_data.start || block.list_data.start == 1) ?
[] : [['start', block.list_data.start.toString()]];
return inTags(tag, attr, this.innersep +
this.renderBlocks(block.children, block.tight) +
this.innersep);
case 'Header':
tag = 'h' + block.level;
return inTags(tag, [], this.renderInlines(block.inline_content));
case 'CodeBlock':
info_words = block.info ? block.info.split(/ +/) : [];
attr = (info_words.length === 0 || info_words[0].length === 0) ?
[] : [['class','language-' + this.escape(info_words[0],true)]];
return inTags('pre', [],
inTags('code', attr, this.escape(block.string_content)));
case 'HtmlBlock':
return block.string_content;
case 'ReferenceDef':
return "";
case 'HorizontalRule':
return inTags('hr',[],"",true);
default:
console.log("Unknown block type " + block.t);
return "";
}
};
// Render a list of block elements, separated by this.blocksep.
var renderBlocks = function(blocks, in_tight_list) {
var result = [];
for (var i=0; i < blocks.length; i++) {
if (blocks[i].t !== 'ReferenceDef') {
result.push(this.renderBlock(blocks[i], in_tight_list));
}
}
return result.join(this.blocksep);
};
// The HtmlRenderer object.
function HtmlRenderer(){
return {
// default options:
blocksep: '\n', // space between blocks
innersep: '\n', // space between block container tag and contents
softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML
// set to "<br />" to make them hard breaks
// set to " " if you want to ignore line wrapping in source
escape: function(s, preserve_entities) {
if (preserve_entities) {
return s.replace(/[&](?![#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)/gi,'&amp;')
.replace(/[<]/g,'&lt;')
.replace(/[>]/g,'&gt;')
.replace(/["]/g,'&quot;');
} else {
return s.replace(/[&]/g,'&amp;')
.replace(/[<]/g,'&lt;')
.replace(/[>]/g,'&gt;')
.replace(/["]/g,'&quot;');
}
},
renderInline: renderInline,
renderInlines: renderInlines,
renderBlock: renderBlock,
renderBlocks: renderBlocks,
render: renderBlock
};
}
module.exports = HtmlRenderer;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
// commonmark.js - CommomMark in JavaScript
// Copyright (C) 2014 John MacFarlane
// License: BSD3.
// Basic usage:
//
// var commonmark = require('commonmark');
// var parser = new commonmark.DocParser();
// var renderer = new commonmark.HtmlRenderer();
// console.log(renderer.render(parser.parse('Hello *world*')));
var util = require('util');
var renderAST = function(tree) {
return util.inspect(tree, {depth: null});
};
module.exports.DocParser = require('./blocks');
module.exports.HtmlRenderer = require('./html-renderer');
module.exports.ASTRenderer = renderAST;

View File

@ -0,0 +1,854 @@
var fromCodePoint = require('./from-code-point.js');
var entityToChar = require('./html5-entities.js').entityToChar;
// Constants for character codes:
var C_NEWLINE = 10;
var C_SPACE = 32;
var C_ASTERISK = 42;
var C_UNDERSCORE = 95;
var C_BACKTICK = 96;
var C_OPEN_BRACKET = 91;
var C_CLOSE_BRACKET = 93;
var C_LESSTHAN = 60;
var C_GREATERTHAN = 62;
var C_BANG = 33;
var C_BACKSLASH = 92;
var C_AMPERSAND = 38;
var C_OPEN_PAREN = 40;
var C_COLON = 58;
// Some regexps used in inline parser:
var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]';
var ESCAPED_CHAR = '\\\\' + ESCAPABLE;
var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"';
var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'';
var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)';
var REG_CHAR = '[^\\\\()\\x00-\\x20]';
var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)';
var TAGNAME = '[A-Za-z][A-Za-z0-9]*';
var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+";
var SINGLEQUOTEDVALUE = "'[^']*'";
var DOUBLEQUOTEDVALUE = '"[^"]*"';
var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")";
var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")";
var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)";
var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>";
var CLOSETAG = "</" + TAGNAME + "\\s*[>]";
var HTMLCOMMENT = "<!--([^-]+|[-][^-]+)*-->";
var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]";
var DECLARATION = "<![A-Z]+" + "\\s+[^>]*>";
var CDATA = "<!\\[CDATA\\[([^\\]]+|\\][^\\]]|\\]\\][^>])*\\]\\]>";
var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" +
PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")";
var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});";
var reHtmlTag = new RegExp('^' + HTMLTAG, 'i');
var reLinkTitle = new RegExp(
'^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' +
'|' +
'\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' +
'|' +
'\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))');
var reLinkDestinationBraces = new RegExp(
'^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])');
var reLinkDestination = new RegExp(
'^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*');
var reEscapable = new RegExp(ESCAPABLE);
var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g');
var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')');
var reEntityHere = new RegExp('^' + ENTITY, 'i');
var reEntity = new RegExp(ENTITY, 'gi');
// Matches a character with a special meaning in markdown,
// or a string of non-special characters. Note: we match
// clumps of _ or * or `, because they need to be handled in groups.
var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m;
// Replace entities and backslash escapes with literal characters.
var unescapeString = function(s) {
return s.replace(reAllEscapedChar, '$1')
.replace(reEntity, entityToChar);
};
// Normalize reference label: collapse internal whitespace
// to single space, remove leading/trailing whitespace, case fold.
var normalizeReference = function(s) {
return s.trim()
.replace(/\s+/,' ')
.toUpperCase();
};
// INLINE PARSER
// These are methods of an InlineParser object, defined below.
// An InlineParser keeps track of a subject (a string to be
// parsed) and a position in that subject.
// If re matches at current position in the subject, advance
// position in subject and return the match; otherwise return null.
var match = function(re) {
var match = re.exec(this.subject.slice(this.pos));
if (match) {
this.pos += match.index + match[0].length;
return match[0];
} else {
return null;
}
};
// Returns the code for the character at the current subject position, or -1
// there are no more characters.
var peek = function() {
if (this.pos < this.subject.length) {
return this.subject.charCodeAt(this.pos);
} else {
return -1;
}
};
// Parse zero or more space characters, including at most one newline
var spnl = function() {
this.match(/^ *(?:\n *)?/);
return 1;
};
// All of the parsers below try to match something at the current position
// in the subject. If they succeed in matching anything, they
// return the inline matched, advancing the subject.
// Attempt to parse backticks, returning either a backtick code span or a
// literal sequence of backticks.
var parseBackticks = function(inlines) {
var startpos = this.pos;
var ticks = this.match(/^`+/);
if (!ticks) {
return 0;
}
var afterOpenTicks = this.pos;
var foundCode = false;
var match;
while (!foundCode && (match = this.match(/`+/m))) {
if (match === ticks) {
inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks,
this.pos - ticks.length)
.replace(/[ \n]+/g,' ')
.trim() });
return true;
}
}
// If we got here, we didn't match a closing backtick sequence.
this.pos = afterOpenTicks;
inlines.push({ t: 'Text', c: ticks });
return true;
};
// Parse a backslash-escaped special character, adding either the escaped
// character, a hard line break (if the backslash is followed by a newline),
// or a literal backslash to the 'inlines' list.
var parseBackslash = function(inlines) {
var subj = this.subject,
pos = this.pos;
if (subj.charCodeAt(pos) === C_BACKSLASH) {
if (subj.charAt(pos + 1) === '\n') {
this.pos = this.pos + 2;
inlines.push({ t: 'Hardbreak' });
} else if (reEscapable.test(subj.charAt(pos + 1))) {
this.pos = this.pos + 2;
inlines.push({ t: 'Text', c: subj.charAt(pos + 1) });
} else {
this.pos++;
inlines.push({t: 'Text', c: '\\'});
}
return true;
} else {
return false;
}
};
// Attempt to parse an autolink (URL or email in pointy brackets).
var parseAutolink = function(inlines) {
var m;
var dest;
if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
dest = m.slice(1,-1);
inlines.push(
{t: 'Link',
label: [{ t: 'Text', c: dest }],
destination: 'mailto:' + encodeURI(unescape(dest)) });
return true;
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
dest = m.slice(1,-1);
inlines.push({
t: 'Link',
label: [{ t: 'Text', c: dest }],
destination: encodeURI(unescape(dest)) });
return true;
} else {
return false;
}
};
// Attempt to parse a raw HTML tag.
var parseHtmlTag = function(inlines) {
var m = this.match(reHtmlTag);
if (m) {
inlines.push({ t: 'Html', c: m });
return true;
} else {
return false;
}
};
// Scan a sequence of characters with code cc, and return information about
// the number of delimiters and whether they are positioned such that
// they can open and/or close emphasis or strong emphasis. A utility
// function for strong/emph parsing.
var scanDelims = function(cc) {
var numdelims = 0;
var first_close_delims = 0;
var char_before, char_after, cc_after;
var startpos = this.pos;
char_before = this.pos === 0 ? '\n' :
this.subject.charAt(this.pos - 1);
while (this.peek() === cc) {
numdelims++;
this.pos++;
}
cc_after = this.peek();
if (cc_after === -1) {
char_after = '\n';
} else {
char_after = fromCodePoint(cc_after);
}
var can_open = numdelims > 0 && !(/\s/.test(char_after));
var can_close = numdelims > 0 && !(/\s/.test(char_before));
if (cc === C_UNDERSCORE) {
can_open = can_open && !((/[a-z0-9]/i).test(char_before));
can_close = can_close && !((/[a-z0-9]/i).test(char_after));
}
this.pos = startpos;
return { numdelims: numdelims,
can_open: can_open,
can_close: can_close };
};
var Emph = function(ils) {
return {t: 'Emph', c: ils};
};
var Strong = function(ils) {
return {t: 'Strong', c: ils};
};
var Str = function(s) {
return {t: 'Text', c: s};
};
// Attempt to parse emphasis or strong emphasis.
var parseEmphasis = function(cc,inlines) {
var res = this.scanDelims(cc);
var numdelims = res.numdelims;
var startpos = this.pos;
if (numdelims === 0) {
return false;
}
this.pos += numdelims;
inlines.push(Str(this.subject.slice(startpos, this.pos)));
// Add entry to stack for this opener
this.delimiters = { cc: cc,
numdelims: numdelims,
pos: inlines.length - 1,
previous: this.delimiters,
next: null,
can_open: res.can_open,
can_close: res.can_close};
if (this.delimiters.previous !== null) {
this.delimiters.previous.next = this.delimiters;
}
return true;
};
var removeDelimiter = function(delim) {
if (delim.previous !== null) {
delim.previous.next = delim.next;
}
if (delim.next === null) {
// top of stack
this.delimiters = delim.previous;
} else {
delim.next.previous = delim.previous;
}
};
var removeGaps = function(inlines) {
// remove gaps from inlines
var i, j;
j = 0;
for (i = 0 ; i < inlines.length; i++) {
if (inlines[i] !== null) {
inlines[j] = inlines[i];
j++;
}
}
inlines.splice(j);
};
var processEmphasis = function(inlines, stack_bottom) {
var opener, closer;
var opener_inl, closer_inl;
var nextstack, tempstack;
var use_delims;
var contents;
var tmp;
var emph;
var i,j;
// find first closer above stack_bottom:
closer = this.delimiters;
while (closer !== null && closer.previous !== stack_bottom) {
closer = closer.previous;
}
// move forward, looking for closers, and handling each
while (closer !== null) {
if (closer.can_close && (closer.cc === C_UNDERSCORE || closer.cc === C_ASTERISK)) {
// found emphasis closer. now look back for first matching opener:
opener = closer.previous;
while (opener !== null && opener !== stack_bottom) {
if (opener.cc === closer.cc && opener.can_open) {
break;
}
opener = opener.previous;
}
if (opener !== null && opener !== stack_bottom) {
// calculate actual number of delimiters used from this closer
if (closer.numdelims < 3 || opener.numdelims < 3) {
use_delims = closer.numdelims <= opener.numdelims ?
closer.numdelims : opener.numdelims;
} else {
use_delims = closer.numdelims % 2 === 0 ? 2 : 1;
}
opener_inl = inlines[opener.pos];
closer_inl = inlines[closer.pos];
// remove used delimiters from stack elts and inlines
opener.numdelims -= use_delims;
closer.numdelims -= use_delims;
opener_inl.c = opener_inl.c.slice(0, opener_inl.c.length - use_delims);
closer_inl.c = closer_inl.c.slice(0, closer_inl.c.length - use_delims);
// build contents for new emph element
contents = inlines.slice(opener.pos + 1, closer.pos);
removeGaps(contents);
emph = use_delims === 1 ? Emph(contents) : Strong(contents);
// insert into list of inlines
inlines[opener.pos + 1] = emph;
for (i = opener.pos + 2; i < closer.pos; i++) {
inlines[i] = null;
}
// remove elts btw opener and closer in delimiters stack
tempstack = closer.previous;
while (tempstack !== null && tempstack !== opener) {
nextstack = tempstack.previous;
this.removeDelimiter(tempstack);
tempstack = nextstack;
}
// if opener has 0 delims, remove it and the inline
if (opener.numdelims === 0) {
inlines[opener.pos] = null;
this.removeDelimiter(opener);
}
if (closer.numdelims === 0) {
inlines[closer.pos] = null;
tempstack = closer.next;
this.removeDelimiter(closer);
closer = tempstack;
}
} else {
closer = closer.next;
}
} else {
closer = closer.next;
}
}
removeGaps(inlines);
// remove all delimiters
while (this.delimiters != stack_bottom) {
this.removeDelimiter(this.delimiters);
}
};
// Attempt to parse link title (sans quotes), returning the string
// or null if no match.
var parseLinkTitle = function() {
var title = this.match(reLinkTitle);
if (title) {
// chop off quotes from title and unescape:
return unescapeString(title.substr(1, title.length - 2));
} else {
return null;
}
};
// Attempt to parse link destination, returning the string or
// null if no match.
var parseLinkDestination = function() {
var res = this.match(reLinkDestinationBraces);
if (res) { // chop off surrounding <..>:
return encodeURI(unescape(unescapeString(res.substr(1, res.length - 2))));
} else {
res = this.match(reLinkDestination);
if (res !== null) {
return encodeURI(unescape(unescapeString(res)));
} else {
return null;
}
}
};
// Attempt to parse a link label, returning number of characters parsed.
var parseLinkLabel = function() {
var match = this.match(/^\[(?:[^\\\[\]]|\\[\[\]]){0,1000}\]/);
return match === null ? 0 : match.length;
};
// Parse raw link label, including surrounding [], and return
// inline contents. (Note: this is not a method of InlineParser.)
var parseRawLabel = function(s) {
// note: parse without a refmap; we don't want links to resolve
// in nested brackets!
return new InlineParser().parse(s.substr(1, s.length - 2), {});
};
// Add open bracket to delimiter stack and add a Str to inlines.
var parseOpenBracket = function(inlines) {
var startpos = this.pos;
this.pos += 1;
inlines.push(Str("["));
// Add entry to stack for this opener
this.delimiters = { cc: C_OPEN_BRACKET,
numdelims: 1,
pos: inlines.length - 1,
previous: this.delimiters,
next: null,
can_open: true,
can_close: false,
index: startpos };
if (this.delimiters.previous !== null) {
this.delimiters.previous.next = this.delimiters;
}
return true;
};
// IF next character is [, and ! delimiter to delimiter stack and
// add a Str to inlines. Otherwise just add a Str.
var parseBang = function(inlines) {
var startpos = this.pos;
this.pos += 1;
if (this.peek() === C_OPEN_BRACKET) {
this.pos += 1;
inlines.push(Str("!["));
// Add entry to stack for this opener
this.delimiters = { cc: C_BANG,
numdelims: 1,
pos: inlines.length - 1,
previous: this.delimiters,
next: null,
can_open: true,
can_close: false,
index: startpos + 1 };
if (this.delimiters.previous !== null) {
this.delimiters.previous.next = this.delimiters;
}
} else {
inlines.push(Str("!"));
}
return true;
};
// Try to match close bracket against an opening in the delimiter
// stack. Add either a link or image, or a plain [ character,
// to the inlines stack. If there is a matching delimiter,
// remove it from the delimiter stack.
var parseCloseBracket = function(inlines) {
var startpos;
var is_image;
var dest;
var title;
var matched = false;
var link_text;
var i;
var opener, closer_above, tempstack;
this.pos += 1;
startpos = this.pos;
// look through stack of delimiters for a [ or !
opener = this.delimiters;
while (opener !== null) {
if (opener.cc === C_OPEN_BRACKET || opener.cc === C_BANG) {
break;
}
opener = opener.previous;
}
if (opener === null) {
// no matched opener, just return a literal
inlines.push(Str("]"));
return true;
}
// If we got here, open is a potential opener
is_image = opener.cc === C_BANG;
// instead of copying a slice, we null out the
// parts of inlines that don't correspond to link_text;
// later, we'll collapse them. This is awkward, and could
// be simplified if we made inlines a linked list rather than
// an array:
link_text = inlines.slice(0);
for (i = 0; i < opener.pos + 1; i++) {
link_text[i] = null;
}
// Check to see if we have a link/image
// Inline link?
if (this.peek() === C_OPEN_PAREN) {
this.pos++;
if (this.spnl() &&
((dest = this.parseLinkDestination()) !== null) &&
this.spnl() &&
// make sure there's a space before the title:
(/^\s/.test(this.subject.charAt(this.pos - 1)) &&
(title = this.parseLinkTitle() || '') || true) &&
this.spnl() &&
this.match(/^\)/)) {
matched = true;
}
} else {
// Next, see if there's a link label
var savepos = this.pos;
this.spnl();
var beforelabel = this.pos;
n = this.parseLinkLabel();
if (n === 0 || n === 2) {
// empty or missing second label
reflabel = this.subject.slice(opener.index, startpos);
} else {
reflabel = this.subject.slice(beforelabel, beforelabel + n);
}
if (n === 0) {
// If shortcut reference link, rewind before spaces we skipped.
this.pos = savepos;
}
// lookup rawlabel in refmap
var link = this.refmap[normalizeReference(reflabel)];
if (link) {
dest = link.destination;
title = link.title;
matched = true;
}
}
if (matched) {
this.processEmphasis(link_text, opener.previous);
// remove the part of inlines that became link_text.
// see note above on why we need to do this instead of splice:
for (i = opener.pos; i < inlines.length; i++) {
inlines[i] = null;
}
// processEmphasis will remove this and later delimiters.
// Now, for a link, we also remove earlier link openers.
// (no links in links)
if (!is_image) {
opener = this.delimiters;
closer_above = null;
while (opener !== null) {
if (opener.cc === C_OPEN_BRACKET) {
if (closer_above) {
closer_above.previous = opener.previous;
} else {
this.delimiters = opener.previous;
}
} else {
closer_above = opener;
}
opener = opener.previous;
}
}
inlines.push({t: is_image ? 'Image' : 'Link',
destination: dest,
title: title,
label: link_text});
return true;
} else { // no match
this.removeDelimiter(opener); // remove this opener from stack
this.pos = startpos;
inlines.push(Str("]"));
return true;
}
};
// Attempt to parse an entity, return Entity object if successful.
var parseEntity = function(inlines) {
var m;
if ((m = this.match(reEntityHere))) {
inlines.push({ t: 'Text', c: entityToChar(m) });
return true;
} else {
return false;
}
};
// Parse a run of ordinary characters, or a single character with
// a special meaning in markdown, as a plain string, adding to inlines.
var parseString = function(inlines) {
var m;
if ((m = this.match(reMain))) {
inlines.push({ t: 'Text', c: m });
return true;
} else {
return false;
}
};
// Parse a newline. If it was preceded by two spaces, return a hard
// line break; otherwise a soft line break.
var parseNewline = function(inlines) {
var m = this.match(/^ *\n/);
if (m) {
if (m.length > 2) {
inlines.push({ t: 'Hardbreak' });
} else if (m.length > 0) {
inlines.push({ t: 'Softbreak' });
}
return true;
}
return false;
};
// Attempt to parse an image. If the opening '!' is not followed
// by a link, return a literal '!'.
var parseImage = function(inlines) {
if (this.match(/^!/)) {
var link = this.parseLink(inlines);
if (link) {
inlines[inlines.length - 1].t = 'Image';
return true;
} else {
inlines.push({ t: 'Text', c: '!' });
return true;
}
} else {
return false;
}
};
// Attempt to parse a link reference, modifying refmap.
var parseReference = function(s, refmap) {
this.subject = s;
this.pos = 0;
this.label_nest_level = 0;
var rawlabel;
var dest;
var title;
var matchChars;
var startpos = this.pos;
var match;
// label:
matchChars = this.parseLinkLabel();
if (matchChars === 0) {
return 0;
} else {
rawlabel = this.subject.substr(0, matchChars);
}
// colon:
if (this.peek() === C_COLON) {
this.pos++;
} else {
this.pos = startpos;
return 0;
}
// link url
this.spnl();
dest = this.parseLinkDestination();
if (dest === null || dest.length === 0) {
this.pos = startpos;
return 0;
}
var beforetitle = this.pos;
this.spnl();
title = this.parseLinkTitle();
if (title === null) {
title = '';
// rewind before spaces
this.pos = beforetitle;
}
// make sure we're at line end:
if (this.match(/^ *(?:\n|$)/) === null) {
this.pos = startpos;
return 0;
}
var normlabel = normalizeReference(rawlabel);
if (!refmap[normlabel]) {
refmap[normlabel] = { destination: dest, title: title };
}
return this.pos - startpos;
};
// Parse the next inline element in subject, advancing subject position.
// On success, add the result to the inlines list, and return true.
// On failure, return false.
var parseInline = function(inlines) {
var startpos = this.pos;
var origlen = inlines.length;
var c = this.peek();
if (c === -1) {
return false;
}
var res;
switch(c) {
case C_NEWLINE:
case C_SPACE:
res = this.parseNewline(inlines);
break;
case C_BACKSLASH:
res = this.parseBackslash(inlines);
break;
case C_BACKTICK:
res = this.parseBackticks(inlines);
break;
case C_ASTERISK:
case C_UNDERSCORE:
res = this.parseEmphasis(c, inlines);
break;
case C_OPEN_BRACKET:
res = this.parseOpenBracket(inlines);
break;
case C_BANG:
res = this.parseBang(inlines);
break;
case C_CLOSE_BRACKET:
res = this.parseCloseBracket(inlines);
break;
case C_LESSTHAN:
res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines);
break;
case C_AMPERSAND:
res = this.parseEntity(inlines);
break;
default:
res = this.parseString(inlines);
break;
}
if (!res) {
this.pos += 1;
inlines.push({t: 'Text', c: fromCodePoint(c)});
}
return true;
};
// Parse s as a list of inlines, using refmap to resolve references.
var parseInlines = function(s, refmap) {
this.subject = s;
this.pos = 0;
this.refmap = refmap || {};
this.delimiters = null;
var inlines = [];
while (this.parseInline(inlines)) {
}
this.processEmphasis(inlines, null);
return inlines;
};
// The InlineParser object.
function InlineParser(){
return {
subject: '',
label_nest_level: 0, // used by parseLinkLabel method
delimiters: null, // used by parseEmphasis method
pos: 0,
refmap: {},
match: match,
peek: peek,
spnl: spnl,
unescapeString: unescapeString,
parseBackticks: parseBackticks,
parseBackslash: parseBackslash,
parseAutolink: parseAutolink,
parseHtmlTag: parseHtmlTag,
scanDelims: scanDelims,
parseEmphasis: parseEmphasis,
parseLinkTitle: parseLinkTitle,
parseLinkDestination: parseLinkDestination,
parseLinkLabel: parseLinkLabel,
parseOpenBracket: parseOpenBracket,
parseCloseBracket: parseCloseBracket,
parseBang: parseBang,
parseEntity: parseEntity,
parseString: parseString,
parseNewline: parseNewline,
parseReference: parseReference,
parseInline: parseInline,
processEmphasis: processEmphasis,
removeDelimiter: removeDelimiter,
parse: parseInlines
};
}
module.exports = InlineParser;

View File

@ -0,0 +1,25 @@
{ "name": "commonmark",
"description": "a strongly specified, highly compatible variant of Markdown",
"version": "0.12.0",
"homepage": "http://commonmark.org",
"keywords":
[ "markdown",
"commonmark",
"md",
"stmd" ],
"repository":
{ "type": "git",
"url": "https://github.com/jgm/CommonMark.git" },
"author": "John MacFarlane",
"bugs": { "url": "https://github.com/jgm/CommonMark/issues" },
"license": "BSD-3-Clause",
"main": "./lib/index.js",
"bin": { "commonmark": "./bin/commonmark" },
"scripts": { "test": "node ./test.js" },
"directories": {
"lib": "./lib"
},
"engines": {
"node": "*"
}
}

82
outside/commonmark/js/test.js Executable file
View File

@ -0,0 +1,82 @@
#!/usr/bin/env node
var fs = require('fs');
var commonmark = require('./lib/index.js');
var ansi = require('./ansi/ansi');
var cursor = ansi(process.stdout);
var writer = new commonmark.HtmlRenderer();
var reader = new commonmark.DocParser();
var passed = 0;
var failed = 0;
var showSpaces = function(s) {
var t = s;
return t.replace(/\t/g,'→')
.replace(/ /g,'␣');
};
fs.readFile('spec.txt', 'utf8', function(err, data) {
if (err) {
return console.log(err);
}
var i;
var examples = [];
var current_section = "";
var example_number = 0;
var tests = data
.replace(/\r\n?/g, "\n") // Normalize newlines for platform independence
.replace(/^<!-- END TESTS -->(.|[\n])*/m, '');
tests.replace(/^\.\n([\s\S]*?)^\.\n([\s\S]*?)^\.$|^#{1,6} *(.*)$/gm,
function(_,markdownSubmatch,htmlSubmatch,sectionSubmatch){
if (sectionSubmatch) {
current_section = sectionSubmatch;
} else {
example_number++;
examples.push({markdown: markdownSubmatch,
html: htmlSubmatch,
section: current_section,
number: example_number});
}
});
current_section = "";
console.time("Elapsed time");
for (i = 0; i < examples.length; i++) {
var example = examples[i];
if (example.section !== current_section) {
if (current_section !== '') {
cursor.write('\n');
}
current_section = example.section;
cursor.reset().write(current_section).reset().write(' ');
}
var actual = writer.renderBlock(reader.parse(example.markdown.replace(/→/g, '\t')));
if (actual === example.html) {
passed++;
cursor.green().write('✓').reset();
} else {
failed++;
cursor.write('\n');
cursor.red().write('✘ Example ' + example.number + '\n');
cursor.cyan();
cursor.write('=== markdown ===============\n');
cursor.write(showSpaces(example.markdown));
cursor.write('=== expected ===============\n');
cursor.write(showSpaces(example.html));
cursor.write('=== got ====================\n');
cursor.write(showSpaces(actual));
cursor.reset();
}
}
cursor.write('\n' + passed.toString() + ' tests passed, ' +
failed.toString() + ' failed.\n');
console.timeEnd("Elapsed time");
});

View File

@ -0,0 +1,5 @@
INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man1/cmark.1
DESTINATION share/man/man1)
INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man3/cmark.3
DESTINATION share/man/man3)

View File

@ -0,0 +1,102 @@
#!/usr/bin/env python
# Creates a man page from a C file.
# Comments beginning with `/**` are treated as Groff man, except that
# 'this' is converted to \fIthis\fR, and ''this'' to \fBthis\fR.
# Non-blank lines immediately following a man page comment are treated
# as function signatures or examples and parsed into .Ft, .Fo, .Fa, .Fc. The
# immediately preceding man documentation chunk is printed after the example
# as a comment on it.
# That's about it!
import sys, re, os
from datetime import date
comment_start_re = re.compile('^\/\*\* ?')
comment_delim_re = re.compile('^[/ ]\** ?')
comment_end_re = re.compile('^ \**\/')
function_re = re.compile('^ *(?:CMARK_EXPORT\s+)?(?P<type>(?:const\s+)?\w+(?:\s*[*])?)\s*(?P<name>\w+)\s*\((?P<args>[^)]*)\)')
blank_re = re.compile('^\s*$')
macro_re = re.compile('CMARK_EXPORT *')
typedef_start_re = re.compile('typedef.*{$')
typedef_end_re = re.compile('}')
single_quote_re = re.compile("(?<!\w)'([^']+)'(?!\w)")
double_quote_re = re.compile("(?<!\w)''([^']+)''(?!\w)")
def handle_quotes(s):
return re.sub(double_quote_re, '\\\\fB\g<1>\\\\fR', re.sub(single_quote_re, '\\\\fI\g<1>\\\\fR', s))
typedef = False
mdlines = []
chunk = []
sig = []
if len(sys.argv) > 1:
sourcefile = sys.argv[1]
else:
print("Usage: make_man_page.py sourcefile")
exit(1)
with open(sourcefile, 'r') as cmarkh:
state = 'default'
for line in cmarkh:
# state transition
oldstate = state
if comment_start_re.match(line):
state = 'man'
elif comment_end_re.match(line) and state == 'man':
continue
elif comment_delim_re.match(line) and state == 'man':
state = 'man'
elif not typedef and blank_re.match(line):
state = 'default'
elif typedef and typedef_end_re.match(line):
typedef = False
elif state == 'man':
state = 'signature'
typedef = typedef_start_re.match(line)
# handle line
if state == 'man':
chunk.append(handle_quotes(re.sub(comment_delim_re, '', line)))
elif state == 'signature':
ln = re.sub(macro_re, '', line)
if typedef or not re.match(blank_re, ln):
sig.append(ln)
elif oldstate == 'signature' and state != 'signature':
if len(mdlines) > 0 and mdlines[-1] != '\n':
mdlines.append('\n')
rawsig = ''.join(sig)
m = function_re.match(rawsig)
if m:
mdlines.append('\\fI' + m.group('type') + '\\fR' + ' ')
mdlines.append('\\fB' + m.group('name') + '\\fR' + '(')
first = True
for argument in re.split(',', m.group('args')):
if not first:
mdlines.append(', ')
first = False
mdlines.append('\\fI' + argument.strip() + '\\fR')
mdlines.append(')\n')
else:
mdlines.append('.nf\n\\f[C]\n.RS 0n\n')
mdlines += sig
mdlines.append('.RE\n\\f[]\n.fi\n')
if len(mdlines) > 0 and mdlines[-1] != '\n':
mdlines.append('\n')
mdlines.append('.PP\n')
mdlines += chunk
chunk = []
sig = []
elif oldstate == 'man' and state != 'signature':
if len(mdlines) > 0 and mdlines[-1] != '\n':
mdlines.append('\n')
mdlines += chunk # add man chunk
chunk = []
mdlines.append('\n')
sys.stdout.write('.TH ' + os.path.basename(sourcefile).replace('.h','') + ' 3 "' + date.today().strftime('%B %d, %Y') + '" "LOCAL" "Library Functions Manual"\n')
sys.stdout.write(''.join(mdlines))

View File

@ -0,0 +1,31 @@
.TH "cmark" "1" "November 30, 2014" "LOCAL" "General Commands Manual"
.SH "NAME"
\fBcmark\fR
\- convert CommonMark formatted text to HTML
.SH "SYNOPSIS"
.HP 6n
\fBcmark\fR
[\fB\-\-ast\fR]
file*
.SH "DESCRIPTION"
\fBcmark\fR
acts as a pipe, reading from
\fRstdin\fR
or from the specified files and writing to
\fRstdout\fR.
It converts Markdown formatted plain text to HTML, using the conventions
described in the CommonMark spec.
If multiple files are specified, the contents of the files are simply
concatenated before parsing.
.SH "OPTIONS"
.TP 12n
\-\--ast
Print an abstract syntax tree instead of HTML.
.TP 12n
\-\-help
Print usage information.
.TP 12n
\-\-version
Print version.
.SH "AUTHORS"
John MacFarlane

View File

@ -0,0 +1,275 @@
.TH cmark 3 "December 05, 2014" "LOCAL" "Library Functions Manual"
.SH NAME
.B cmark
\- CommonMark parsing, manipulating, and rendering
.SH SIMPLE INTERFACE
.nf
\f[C]
.RS 0n
#define CMARK_VERSION "0.1"
.RE
\f[]
.fi
.PP
Current version of library.
\fIchar *\fR \fBcmark_markdown_to_html\fR(\fIconst char *text\fR, \fIint len\fR)
.PP
Convert \fItext\fR (assumed to be a UTF-8 encoded string with length
\fIlen\fR from CommonMark Markdown to HTML, returning a null-terminated,
UTF-8-encoded string.
.SH NODE STRUCTURE
.nf
\f[C]
.RS 0n
typedef enum {
/* Block */
CMARK_NODE_DOCUMENT,
CMARK_NODE_BLOCK_QUOTE,
CMARK_NODE_LIST,
CMARK_NODE_LIST_ITEM,
CMARK_NODE_CODE_BLOCK,
CMARK_NODE_HTML,
CMARK_NODE_PARAGRAPH,
CMARK_NODE_HEADER,
CMARK_NODE_HRULE,
CMARK_NODE_REFERENCE_DEF,
CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
CMARK_NODE_LAST_BLOCK = CMARK_NODE_REFERENCE_DEF,
/* Inline */
CMARK_NODE_TEXT,
CMARK_NODE_SOFTBREAK,
CMARK_NODE_LINEBREAK,
CMARK_NODE_INLINE_CODE,
CMARK_NODE_INLINE_HTML,
CMARK_NODE_EMPH,
CMARK_NODE_STRONG,
CMARK_NODE_LINK,
CMARK_NODE_IMAGE,
CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
} cmark_node_type;
.RE
\f[]
.fi
.PP
.nf
\f[C]
.RS 0n
typedef enum {
CMARK_NO_LIST,
CMARK_BULLET_LIST,
CMARK_ORDERED_LIST
} cmark_list_type;
.RE
\f[]
.fi
.PP
.nf
\f[C]
.RS 0n
typedef enum {
CMARK_PERIOD_DELIM,
CMARK_PAREN_DELIM
} cmark_delim_type;
.RE
\f[]
.fi
.PP
.SH CREATING AND DESTROYING NODES
\fIcmark_node*\fR \fBcmark_node_new\fR(\fIcmark_node_type type\fR)
.PP
\fIvoid\fR \fBcmark_node_free\fR(\fIcmark_node *node\fR)
.PP
\fIcmark_node*\fR \fBcmark_node_next\fR(\fIcmark_node *node\fR)
.PP
.SH TREE TRAVERSAL
\fIcmark_node*\fR \fBcmark_node_previous\fR(\fIcmark_node *node\fR)
.PP
\fIcmark_node*\fR \fBcmark_node_parent\fR(\fIcmark_node *node\fR)
.PP
\fIcmark_node*\fR \fBcmark_node_first_child\fR(\fIcmark_node *node\fR)
.PP
\fIcmark_node*\fR \fBcmark_node_last_child\fR(\fIcmark_node *node\fR)
.PP
.SH ACCESSORS
\fIcmark_node_type\fR \fBcmark_node_get_type\fR(\fIcmark_node *node\fR)
.PP
\fIconst char*\fR \fBcmark_node_get_string_content\fR(\fIcmark_node *node\fR)
.PP
\fIint\fR \fBcmark_node_set_string_content\fR(\fIcmark_node *node\fR, \fIconst char *content\fR)
.PP
\fIint\fR \fBcmark_node_get_header_level\fR(\fIcmark_node *node\fR)
.PP
\fIint\fR \fBcmark_node_set_header_level\fR(\fIcmark_node *node\fR, \fIint level\fR)
.PP
\fIcmark_list_type\fR \fBcmark_node_get_list_type\fR(\fIcmark_node *node\fR)
.PP
\fIint\fR \fBcmark_node_set_list_type\fR(\fIcmark_node *node\fR, \fIcmark_list_type type\fR)
.PP
\fIint\fR \fBcmark_node_get_list_start\fR(\fIcmark_node *node\fR)
.PP
\fIint\fR \fBcmark_node_set_list_start\fR(\fIcmark_node *node\fR, \fIint start\fR)
.PP
\fIint\fR \fBcmark_node_get_list_tight\fR(\fIcmark_node *node\fR)
.PP
\fIint\fR \fBcmark_node_set_list_tight\fR(\fIcmark_node *node\fR, \fIint tight\fR)
.PP
\fIconst char*\fR \fBcmark_node_get_fence_info\fR(\fIcmark_node *node\fR)
.PP
\fIint\fR \fBcmark_node_set_fence_info\fR(\fIcmark_node *node\fR, \fIconst char *info\fR)
.PP
\fIconst char*\fR \fBcmark_node_get_url\fR(\fIcmark_node *node\fR)
.PP
\fIint\fR \fBcmark_node_set_url\fR(\fIcmark_node *node\fR, \fIconst char *url\fR)
.PP
\fIconst char*\fR \fBcmark_node_get_title\fR(\fIcmark_node *node\fR)
.PP
\fIint\fR \fBcmark_node_set_title\fR(\fIcmark_node *node\fR, \fIconst char *title\fR)
.PP
\fIint\fR \fBcmark_node_get_start_line\fR(\fIcmark_node *node\fR)
.PP
\fIint\fR \fBcmark_node_get_start_column\fR(\fIcmark_node *node\fR)
.PP
\fIint\fR \fBcmark_node_get_end_line\fR(\fIcmark_node *node\fR)
.PP
.SH TREE MANIPULATION
\fIvoid\fR \fBcmark_node_unlink\fR(\fIcmark_node *node\fR)
.PP
\fIint\fR \fBcmark_node_insert_before\fR(\fIcmark_node *node\fR, \fIcmark_node *sibling\fR)
.PP
\fIint\fR \fBcmark_node_insert_after\fR(\fIcmark_node *node\fR, \fIcmark_node *sibling\fR)
.PP
\fIint\fR \fBcmark_node_prepend_child\fR(\fIcmark_node *node\fR, \fIcmark_node *child\fR)
.PP
\fIint\fR \fBcmark_node_append_child\fR(\fIcmark_node *node\fR, \fIcmark_node *child\fR)
.PP
.SH PARSING
\fIcmark_parser *\fR \fBcmark_parser_new\fR(\fI\fR)
.PP
\fIvoid\fR \fBcmark_parser_free\fR(\fIcmark_parser *parser\fR)
.PP
\fIcmark_node *\fR \fBcmark_parser_finish\fR(\fIcmark_parser *parser\fR)
.PP
\fIvoid\fR \fBcmark_parser_feed\fR(\fIcmark_parser *parser\fR, \fIconst char *buffer\fR, \fIsize_t len\fR)
.PP
\fIcmark_node *\fR \fBcmark_parse_document\fR(\fIconst char *buffer\fR, \fIsize_t len\fR)
.PP
\fIcmark_node *\fR \fBcmark_parse_file\fR(\fIFILE *f\fR)
.PP
.SH RENDERING
\fIchar *\fR \fBcmark_render_ast\fR(\fIcmark_node *root\fR)
.PP
\fIchar *\fR \fBcmark_render_html\fR(\fIcmark_node *root\fR)
.PP
.SH AUTHORS
John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.

View File

@ -0,0 +1,22 @@
binmode STDOUT;
print(" switch (c) {\n");
my $lastchar = "";
while (<STDIN>) {
if (/^[A-F0-9]/ and / [CF]; /) {
my ($char, $type, $subst) = m/([A-F0-9]+); ([CF]); ([^;]+)/;
if ($char eq $lastchar) {
break;
}
my @subst = $subst =~ m/(\w+)/g;
printf(" case 0x%s:\n", $char);
foreach (@subst) {
printf(" bufpush(0x%s);\n", $_);
}
printf(" break;\n");
$lastchar = $char;
}
}
printf(" default:\n");
printf(" bufpush(c);\n");
print(" }\n");

View File

@ -0,0 +1 @@
@nmake.exe /nologo /f Makefile.nmake %*

6971
outside/commonmark/spec.txt Normal file

File diff suppressed because it is too large Load Diff

17
outside/commonmark/spec2js.js Executable file
View File

@ -0,0 +1,17 @@
#!/usr/bin/env node
var fs = require('fs');
var util = require('util');
fs.readFile('spec.txt', 'utf8', function(err, data) {
if (err) {
return console.log(err);
}
var examples = [];
data.replace(/^\.\n([\s\S]*?)^\.\n([\s\S]*?)^\.$/gm,
function(_,x,y){
examples.push({markdown: x, html: y});
});
console.log(util.inspect(examples, { depth: null }));
console.warn(examples.length + ' examples');
});

View File

@ -0,0 +1,36 @@
#!/usr/bin/env perl
use strict;
use warnings;
my $stage = 0;
my $example = 0;
my @match;
my $section = "";
while (<STDIN>) {
if (/^\.$/) {
if ($stage == 0) {
$example++;
print "\n<div class=\"example\" id=\"example-$example\" data-section=\"$section\">\n";
print "<div class=\"examplenum\"><a href=\"#example-$example\">Example $example</a>&nbsp;&nbsp;<a class=\"dingus\" title=\"open in interactive dingus\">(interact)</a></div>\n\n";
print "````````````````````````````````````````````````````````` markdown\n";
} elsif ($stage == 1) {
print "`````````````````````````````````````````````````````````\n\n";
print "````````````````````````````````````````````````````````` html\n";
} elsif ($stage == 2) {
print "`````````````````````````````````````````````````````````\n\n";
print "</div>\n\n";
} else {
die "Encountered unknown stage $stage";
}
$stage = ($stage + 1) % 3;
} else {
if ($stage == 0 && (@match = ($_ =~ /^#{1,6} *(.*)/))) {
$section = $match[0];
}
if ($stage != 0) {
$_ =~ s/ /␣/g;
}
print $_;
}
}

View File

@ -0,0 +1,37 @@
#!/usr/bin/env runhaskell
import Text.Pandoc.JSON
import Text.Pandoc.Walk
main = toJSONFilter go
where go :: Pandoc -> Pandoc
go = walk exampleDivs . walk anchors
exampleDivs :: Block -> Block
exampleDivs (Div (ident, ["example"], kvs)
[ d@(Div (_,["examplenum"],_) _),
c1@(CodeBlock (_,["markdown"],_) _),
c2@(CodeBlock (_,["html"],_) _)
]) = Div (ident, ["example"], kvs)
[ rawtex "\\begin{minipage}[t]{\\textwidth}\n{\\scriptsize "
, d
, rawtex "\\vspace{-1em}}"
, rawtex "\\begin{minipage}[t]{0.49\\textwidth}\n\\definecolor{shadecolor}{gray}{0.85}\n"
, addBreaks c1
, rawtex "\\end{minipage}\n\\hfill\n\\begin{minipage}[t]{0.49\\textwidth}\n\\definecolor{shadecolor}{gray}{0.95}\n"
, addBreaks c2
, rawtex "\\end{minipage}\n\\end{minipage}"
]
where rawtex = RawBlock (Format "latex")
addBreaks (CodeBlock attrs code) = CodeBlock attrs $ addBreaks' code
addBreaks' code =
if length code > 49
then take 49 code ++ ('\n':addBreaks' (drop 49 code))
else code
exampleDivs x = x
anchors :: Inline -> Inline
anchors (RawInline (Format "html") ('<':'a':' ':'i':'d':'=':'"':xs)) =
RawInline (Format "latex") ("\\hyperdef{}{" ++ lab ++ "}{\\label{" ++ lab ++ "}}")
where lab = takeWhile (/='"') xs
anchors x = x

View File

@ -0,0 +1,144 @@
cmake_minimum_required(VERSION 2.8)
set(LIBRARY "libcmark")
set(HEADERS
cmark.h
parser.h
buffer.h
node.h
chunk.h
references.h
debug.h
bench.h
utf8.h
scanners.h
inlines.h
html/html_unescape.h
html/houdini.h
)
set(LIBRARY_SOURCES
cmark.c
node.c
blocks.c
inlines.c
print.c
scanners.c
scanners.re
utf8.c
buffer.c
references.c
html/html.c
html/html_unescape.gperf
html/houdini_href_e.c
html/houdini_html_e.c
html/houdini_html_u.c
${HEADERS}
)
#set(PROGRAM "cmark")
#set(PROGRAM_SOURCES
# ${LIBRARY_SOURCES}
# main.c
# )
include_directories(. html ${CMAKE_CURRENT_BINARY_DIR})
include_directories(../../../i)
set(RE2C re2c)
if (MSVC)
file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR} DOS_CURRENT_SOURCE_DIR)
add_custom_command( OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/scanners.c
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/scanners.re
COMMAND ${RE2C} --case-insensitive -b -i
--no-generation-date
-o ${DOS_CURRENT_SOURCE_DIR}\\scanners.c
${DOS_CURRENT_SOURCE_DIR}\\scanners.re )
else(MSVC)
add_custom_command( OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/scanners.c
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/scanners.re
COMMAND ${RE2C} --case-insensitive -b -i
--no-generation-date
-o ${CMAKE_CURRENT_SOURCE_DIR}/scanners.c
${CMAKE_CURRENT_SOURCE_DIR}/scanners.re )
endif(MSVC)
include (GenerateExportHeader)
#add_executable(${PROGRAM} ${PROGRAM_SOURCES})
#add_compiler_export_flags()
#
## Disable the PUBLIC declarations when compiling the executable:
#set_target_properties(${PROGRAM} PROPERTIES
# COMPILE_FLAGS -DCMARK_STATIC_DEFINE)
# Check integrity of node structure when compiled as debug:
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES")
if (${CMAKE_MAJOR_VERSION} GREATER 1 AND ${CMAKE_MINOR_VERSION} GREATER 8)
set(CMAKE_C_VISIBILITY_PRESET hidden)
set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
endif ()
# SHARED add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES})
add_library(${LIBRARY} STATIC ${LIBRARY_SOURCES})
set_target_properties(${LIBRARY}
PROPERTIES OUTPUT_NAME "cmark")
set_property(TARGET ${LIBRARY}
APPEND PROPERTY MACOSX_RPATH true)
generate_export_header(${LIBRARY}
BASE_NAME ${PROJECT_NAME})
#if (MSVC)
# set_property(TARGET ${PROGRAM}
# APPEND PROPERTY LINK_FLAGS /INCREMENTAL:NO)
#endif(MSVC)
#install(TARGETS ${PROGRAM} # SHARED ${LIBRARY}
# RUNTIME DESTINATION bin
# LIBRARY DESTINATION lib
# )
install(FILES cmark.h ${CMAKE_CURRENT_BINARY_DIR}/cmark_export.h
DESTINATION include
)
# Feature tests
include(CheckIncludeFile)
include(CheckCSourceCompiles)
CHECK_INCLUDE_FILE(stdbool.h HAVE_STDBOOL_H)
CHECK_C_SOURCE_COMPILES(
"int main() { __builtin_expect(0,0); return 0; }"
HAVE___BUILTIN_EXPECT)
CHECK_C_SOURCE_COMPILES("
int f(void) __attribute__ (());
int main() { return 0; }
" HAVE___ATTRIBUTE__)
CONFIGURE_FILE(
${CMAKE_CURRENT_SOURCE_DIR}/config.h.in
${CMAKE_CURRENT_BINARY_DIR}/config.h)
# Always compile with warnings
if(MSVC)
# Force to always compile with W4
if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4127 /wd4244 /wd4267 /wd4706 /wd4800 /D_CRT_SECURE_NO_WARNINGS")
elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -std=c99 -pedantic")
endif()
# Compile as C++ under MSVC
if(MSVC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP")
endif()
if($ENV{TIMER})
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DTIMER=1")
endif($ENV{TIMER})

View File

@ -0,0 +1,27 @@
#ifndef CMARK_BENCH_H
#define CMARK_BENCH_H
#include <stdio.h>
#include <time.h>
#ifdef TIMER
float _cmark_start_time;
float _cmark_end_time;
float _cmark_save_time;
#define start_timer() \
_cmark_save_time = _cmark_start_time; \
_cmark_start_time = (float)clock()/CLOCKS_PER_SEC
#define end_timer(M) \
_cmark_end_time = (float)clock()/CLOCKS_PER_SEC; \
fprintf(stderr, "[TIME] (%s:%d) %4.f ns " M "\n", __FILE__, \
__LINE__, (_cmark_end_time - _cmark_start_time) * 1000000); \
_cmark_start_time = _cmark_save_time;
#else
#define start_timer()
#define end_timer(M)
#endif
#endif

View File

@ -0,0 +1,897 @@
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <ctype.h>
#include "config.h"
#include "parser.h"
#include "cmark.h"
#include "node.h"
#include "references.h"
#include "utf8.h"
#include "scanners.h"
#include "inlines.h"
#include "html/houdini.h"
#include "buffer.h"
#include "debug.h"
#define CODE_INDENT 4
#define peek_at(i, n) (i)->data[n]
static void
S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
bool eof);
static void
S_process_line(cmark_parser *parser, const unsigned char *buffer,
size_t bytes);
static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column)
{
cmark_node* e;
e = (cmark_node *)calloc(1, sizeof(*e));
if(e != NULL) {
e->type = tag;
e->open = true;
e->start_line = start_line;
e->start_column = start_column;
e->end_line = start_line;
strbuf_init(&e->string_content, 32);
}
return e;
}
// Create a root document cmark_node.
static cmark_node* make_document()
{
cmark_node *e = make_block(NODE_DOCUMENT, 1, 1);
return e;
}
cmark_parser *cmark_parser_new()
{
cmark_parser *parser = (cmark_parser*)malloc(sizeof(cmark_parser));
cmark_node *document = make_document();
strbuf *line = (strbuf*)malloc(sizeof(strbuf));
strbuf *buf = (strbuf*)malloc(sizeof(strbuf));
cmark_strbuf_init(line, 256);
cmark_strbuf_init(buf, 0);
parser->refmap = cmark_reference_map_new();
parser->root = document;
parser->current = document;
parser->line_number = 0;
parser->curline = line;
parser->linebuf = buf;
return parser;
}
void cmark_parser_free(cmark_parser *parser)
{
cmark_strbuf_free(parser->curline);
free(parser->curline);
cmark_strbuf_free(parser->linebuf);
free(parser->linebuf);
cmark_reference_map_free(parser->refmap);
free(parser);
}
static void finalize(cmark_parser *parser, cmark_node* b, int line_number);
// Returns true if line has only space characters, else false.
static bool is_blank(strbuf *s, int offset)
{
while (offset < s->size) {
switch (s->ptr[offset]) {
case '\n':
return true;
case ' ':
offset++;
break;
default:
return false;
}
}
return true;
}
static inline bool can_contain(cmark_node_type parent_type, cmark_node_type child_type)
{
return ( parent_type == NODE_DOCUMENT ||
parent_type == NODE_BLOCK_QUOTE ||
parent_type == NODE_LIST_ITEM ||
(parent_type == NODE_LIST && child_type == NODE_LIST_ITEM) );
}
static inline bool accepts_lines(cmark_node_type block_type)
{
return (block_type == NODE_PARAGRAPH ||
block_type == NODE_HEADER ||
block_type == NODE_CODE_BLOCK);
}
static void add_line(cmark_node* cmark_node, chunk *ch, int offset)
{
assert(cmark_node->open);
strbuf_put(&cmark_node->string_content, ch->data + offset, ch->len - offset);
}
static void remove_trailing_blank_lines(strbuf *ln)
{
int i;
for (i = ln->size - 1; i >= 0; --i) {
unsigned char c = ln->ptr[i];
if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
break;
}
if (i < 0) {
strbuf_clear(ln);
return;
}
i = strbuf_strchr(ln, '\n', i);
if (i >= 0)
strbuf_truncate(ln, i);
}
// Check to see if a cmark_node ends with a blank line, descending
// if needed into lists and sublists.
static bool ends_with_blank_line(cmark_node* cmark_node)
{
if (cmark_node->last_line_blank) {
return true;
}
if ((cmark_node->type == NODE_LIST || cmark_node->type == NODE_LIST_ITEM) && cmark_node->last_child) {
return ends_with_blank_line(cmark_node->last_child);
} else {
return false;
}
}
// Break out of all containing lists
static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr, int line_number)
{
cmark_node *container = *bptr;
cmark_node *b = parser->root;
// find first containing NODE_LIST:
while (b && b->type != NODE_LIST) {
b = b->last_child;
}
if (b) {
while (container && container != b) {
finalize(parser, container, line_number);
container = container->parent;
}
finalize(parser, b, line_number);
*bptr = b->parent;
}
return 0;
}
static void finalize(cmark_parser *parser, cmark_node* b, int line_number)
{
int firstlinelen;
int pos;
cmark_node* item;
cmark_node* subitem;
if (!b->open)
return; // don't do anything if the cmark_node is already closed
b->open = false;
if (line_number > b->start_line) {
b->end_line = line_number - 1;
} else {
b->end_line = line_number;
}
switch (b->type) {
case NODE_PARAGRAPH:
while (strbuf_at(&b->string_content, 0) == '[' &&
(pos = cmark_parse_reference_inline(&b->string_content, parser->refmap))) {
strbuf_drop(&b->string_content, pos);
}
if (is_blank(&b->string_content, 0)) {
b->type = NODE_REFERENCE_DEF;
}
break;
case NODE_CODE_BLOCK:
if (!b->as.code.fenced) { // indented code
remove_trailing_blank_lines(&b->string_content);
strbuf_putc(&b->string_content, '\n');
break;
} else {
// first line of contents becomes info
firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
houdini_unescape_html_f(
&b->as.code.info,
b->string_content.ptr,
firstlinelen
);
strbuf_drop(&b->string_content, firstlinelen + 1);
strbuf_trim(&b->as.code.info);
strbuf_unescape(&b->as.code.info);
break;
}
case NODE_LIST: // determine tight/loose status
b->as.list.tight = true; // tight by default
item = b->first_child;
while (item) {
// check for non-final non-empty list item ending with blank line:
if (item->last_line_blank && item->next) {
b->as.list.tight = false;
break;
}
// recurse into children of list item, to see if there are
// spaces between them:
subitem = item->first_child;
while (subitem) {
if (ends_with_blank_line(subitem) &&
(item->next || subitem->next)) {
b->as.list.tight = false;
break;
}
subitem = subitem->next;
}
if (!(b->as.list.tight)) {
break;
}
item = item->next;
}
break;
default:
break;
}
}
// Add a cmark_node as child of another. Return pointer to child.
static cmark_node* add_child(cmark_parser *parser, cmark_node* parent,
cmark_node_type block_type, int start_line, int start_column)
{
assert(parent);
// if 'parent' isn't the kind of cmark_node that can accept this child,
// then back up til we hit a cmark_node that can.
while (!can_contain(parent->type, block_type)) {
finalize(parser, parent, start_line);
parent = parent->parent;
}
cmark_node* child = make_block(block_type, start_line, start_column);
child->parent = parent;
if (parent->last_child) {
parent->last_child->next = child;
child->prev = parent->last_child;
} else {
parent->first_child = child;
child->prev = NULL;
}
parent->last_child = child;
return child;
}
typedef struct BlockStack {
struct BlockStack *previous;
cmark_node *next_sibling;
} block_stack;
// Walk through cmark_node and all children, recursively, parsing
// string content into inline content where appropriate.
static void process_inlines(cmark_node* cur, cmark_reference_map *refmap)
{
block_stack* stack = NULL;
block_stack* newstack = NULL;
while (cur != NULL) {
switch (cur->type) {
case NODE_PARAGRAPH:
case NODE_HEADER:
cmark_parse_inlines(cur, refmap);
break;
default:
break;
}
if (cur->first_child) {
newstack = (block_stack*)malloc(sizeof(block_stack));
if (newstack == NULL) break;
newstack->previous = stack;
stack = newstack;
stack->next_sibling = cur->next;
cur = cur->first_child;
} else {
cur = cur->next;
}
while (cur == NULL && stack != NULL) {
cur = stack->next_sibling;
newstack = stack->previous;
free(stack);
stack = newstack;
}
}
while (stack != NULL) {
newstack = stack->previous;
free(stack);
stack = newstack;
}
}
// Attempts to parse a list item marker (bullet or enumerated).
// On success, returns length of the marker, and populates
// data with the details. On failure, returns 0.
static int parse_list_marker(chunk *input, int pos, cmark_list **dataptr)
{
unsigned char c;
int startpos;
cmark_list *data;
startpos = pos;
c = peek_at(input, pos);
if ((c == '*' || c == '-' || c == '+') && !scan_hrule(input, pos)) {
pos++;
if (!isspace(peek_at(input, pos))) {
return 0;
}
data = (cmark_list *)calloc(1, sizeof(*data));
if(data == NULL) {
return 0;
} else {
data->marker_offset = 0; // will be adjusted later
data->list_type = CMARK_BULLET_LIST;
data->bullet_char = c;
data->start = 1;
data->delimiter = CMARK_PERIOD_DELIM;
data->tight = false;
}
} else if (isdigit(c)) {
int start = 0;
do {
start = (10 * start) + (peek_at(input, pos) - '0');
pos++;
} while (isdigit(peek_at(input, pos)));
c = peek_at(input, pos);
if (c == '.' || c == ')') {
pos++;
if (!isspace(peek_at(input, pos))) {
return 0;
}
data = (cmark_list *)calloc(1, sizeof(*data));
if(data == NULL) {
return 0;
} else {
data->marker_offset = 0; // will be adjusted later
data->list_type = CMARK_ORDERED_LIST;
data->bullet_char = 0;
data->start = start;
data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM);
data->tight = false;
}
} else {
return 0;
}
} else {
return 0;
}
*dataptr = data;
return (pos - startpos);
}
// Return 1 if list item belongs in list, else 0.
static int lists_match(cmark_list *list_data, cmark_list *item_data)
{
return (list_data->list_type == item_data->list_type &&
list_data->delimiter == item_data->delimiter &&
// list_data->marker_offset == item_data.marker_offset &&
list_data->bullet_char == item_data->bullet_char);
}
static cmark_node *finalize_document(cmark_parser *parser)
{
while (parser->current != parser->root) {
finalize(parser, parser->current, parser->line_number);
parser->current = parser->current->parent;
}
finalize(parser, parser->root, parser->line_number);
process_inlines(parser->root, parser->refmap);
return parser->root;
}
cmark_node *cmark_parse_file(FILE *f)
{
unsigned char buffer[4096];
cmark_parser *parser = cmark_parser_new();
size_t bytes;
cmark_node *document;
while ((bytes = fread(buffer, 1, sizeof(buffer), f)) > 0) {
bool eof = bytes < sizeof(buffer);
S_parser_feed(parser, buffer, bytes, eof);
if (eof) {
break;
}
}
document = cmark_parser_finish(parser);
cmark_parser_free(parser);
return document;
}
cmark_node *cmark_parse_document(const char *buffer, size_t len)
{
cmark_parser *parser = cmark_parser_new();
cmark_node *document;
S_parser_feed(parser, (const unsigned char *)buffer, len, true);
document = cmark_parser_finish(parser);
cmark_parser_free(parser);
return document;
}
void
cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len)
{
S_parser_feed(parser, (const unsigned char *)buffer, len, false);
}
static void
S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
bool eof)
{
const unsigned char *end = buffer + len;
while (buffer < end) {
const unsigned char *eol
= (const unsigned char *)memchr(buffer, '\n',
end - buffer);
size_t line_len;
if (eol) {
line_len = eol + 1 - buffer;
}
else if (eof) {
line_len = end - buffer;
}
else {
strbuf_put(parser->linebuf, buffer, end - buffer);
break;
}
if (parser->linebuf->size > 0) {
strbuf_put(parser->linebuf, buffer, line_len);
S_process_line(parser, parser->linebuf->ptr,
parser->linebuf->size);
strbuf_clear(parser->linebuf);
}
else {
S_process_line(parser, buffer, line_len);
}
buffer += line_len;
}
}
static void chop_trailing_hashtags(chunk *ch)
{
int n, orig_n;
chunk_rtrim(ch);
orig_n = n = ch->len - 1;
// if string ends in space followed by #s, remove these:
while (n >= 0 && peek_at(ch, n) == '#')
n--;
// Check for a be a space before the final #s:
if (n != orig_n && n >= 0 && peek_at(ch, n) == ' ') {
ch->len = n;
chunk_rtrim(ch);
}
}
static void
S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
{
cmark_node* last_matched_container;
int offset = 0;
int matched = 0;
int lev = 0;
int i;
cmark_list *data = NULL;
bool all_matched = true;
cmark_node* container;
cmark_node* cur = parser->current;
bool blank = false;
int first_nonspace;
int indent;
chunk input;
utf8proc_detab(parser->curline, buffer, bytes);
// Add a newline to the end if not present:
// TODO this breaks abstraction:
if (parser->curline->ptr[parser->curline->size - 1] != '\n') {
strbuf_putc(parser->curline, '\n');
}
input.data = parser->curline->ptr;
input.len = parser->curline->size;
// container starts at the document root.
container = parser->root;
parser->line_number++;
// for each containing cmark_node, try to parse the associated line start.
// bail out on failure: container will point to the last matching cmark_node.
while (container->last_child && container->last_child->open) {
container = container->last_child;
first_nonspace = offset;
while (peek_at(&input, first_nonspace) == ' ') {
first_nonspace++;
}
indent = first_nonspace - offset;
blank = peek_at(&input, first_nonspace) == '\n';
if (container->type == NODE_BLOCK_QUOTE) {
matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
if (matched) {
offset = first_nonspace + 1;
if (peek_at(&input, offset) == ' ')
offset++;
} else {
all_matched = false;
}
} else if (container->type == NODE_LIST_ITEM) {
if (indent >= container->as.list.marker_offset +
container->as.list.padding) {
offset += container->as.list.marker_offset +
container->as.list.padding;
} else if (blank) {
offset = first_nonspace;
} else {
all_matched = false;
}
} else if (container->type == NODE_CODE_BLOCK) {
if (!container->as.code.fenced) { // indented
if (indent >= CODE_INDENT) {
offset += CODE_INDENT;
} else if (blank) {
offset = first_nonspace;
} else {
all_matched = false;
}
} else {
// skip optional spaces of fence offset
i = container->as.code.fence_offset;
while (i > 0 && peek_at(&input, offset) == ' ') {
offset++;
i--;
}
}
} else if (container->type == NODE_HEADER) {
// a header can never contain more than one line
all_matched = false;
if (blank) {
container->last_line_blank = true;
}
} else if (container->type == NODE_HTML) {
if (blank) {
container->last_line_blank = true;
all_matched = false;
}
} else if (container->type == NODE_PARAGRAPH) {
if (blank) {
container->last_line_blank = true;
all_matched = false;
}
}
if (!all_matched) {
container = container->parent; // back up to last matching cmark_node
break;
}
}
last_matched_container = container;
// check to see if we've hit 2nd blank line, break out of list:
if (blank && container->last_line_blank) {
break_out_of_lists(parser, &container, parser->line_number);
}
// unless last matched container is code cmark_node, try new container starts:
while (container->type != NODE_CODE_BLOCK &&
container->type != NODE_HTML) {
first_nonspace = offset;
while (peek_at(&input, first_nonspace) == ' ')
first_nonspace++;
indent = first_nonspace - offset;
blank = peek_at(&input, first_nonspace) == '\n';
if (indent >= CODE_INDENT) {
if (cur->type != NODE_PARAGRAPH && !blank) {
offset += CODE_INDENT;
container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, offset + 1);
container->as.code.fenced = false;
container->as.code.fence_char = 0;
container->as.code.fence_length = 0;
container->as.code.fence_offset = 0;
strbuf_init(&container->as.code.info, 0);
} else { // indent > 4 in lazy line
break;
}
} else if (peek_at(&input, first_nonspace) == '>') {
offset = first_nonspace + 1;
// optional following character
if (peek_at(&input, offset) == ' ')
offset++;
container = add_child(parser, container, NODE_BLOCK_QUOTE, parser->line_number, offset + 1);
} else if ((matched = scan_atx_header_start(&input, first_nonspace))) {
offset = first_nonspace + matched;
container = add_child(parser, container, NODE_HEADER, parser->line_number, offset + 1);
int hashpos = chunk_strchr(&input, '#', first_nonspace);
int level = 0;
while (peek_at(&input, hashpos) == '#') {
level++;
hashpos++;
}
container->as.header.level = level;
container->as.header.setext = false;
} else if ((matched = scan_open_code_fence(&input, first_nonspace))) {
container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, first_nonspace + 1);
container->as.code.fenced = true;
container->as.code.fence_char = peek_at(&input, first_nonspace);
container->as.code.fence_length = matched;
container->as.code.fence_offset = first_nonspace - offset;
strbuf_init(&container->as.code.info, 0);
offset = first_nonspace + matched;
} else if ((matched = scan_html_block_tag(&input, first_nonspace))) {
container = add_child(parser, container, NODE_HTML, parser->line_number, first_nonspace + 1);
// note, we don't adjust offset because the tag is part of the text
} else if (container->type == NODE_PARAGRAPH &&
(lev = scan_setext_header_line(&input, first_nonspace)) &&
// check that there is only one line in the paragraph:
strbuf_strrchr(&container->string_content, '\n',
strbuf_len(&container->string_content) - 2) < 0) {
container->type = NODE_HEADER;
container->as.header.level = lev;
container->as.header.setext = true;
offset = input.len - 1;
} else if (!(container->type == NODE_PARAGRAPH && !all_matched) &&
(matched = scan_hrule(&input, first_nonspace))) {
// it's only now that we know the line is not part of a setext header:
container = add_child(parser, container, NODE_HRULE, parser->line_number, first_nonspace + 1);
finalize(parser, container, parser->line_number);
container = container->parent;
offset = input.len - 1;
} else if ((matched = parse_list_marker(&input, first_nonspace, &data))) {
// compute padding:
offset = first_nonspace + matched;
i = 0;
while (i <= 5 && peek_at(&input, offset + i) == ' ') {
i++;
}
// i = number of spaces after marker, up to 5
if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
data->padding = matched + 1;
if (i > 0) {
offset += 1;
}
} else {
data->padding = matched + i;
offset += i;
}
// check container; if it's a list, see if this list item
// can continue the list; otherwise, create a list container.
data->marker_offset = indent;
if (container->type != NODE_LIST ||
!lists_match(&container->as.list, data)) {
container = add_child(parser, container, NODE_LIST, parser->line_number,
first_nonspace + 1);
memcpy(&container->as.list, data, sizeof(*data));
}
// add the list item
container = add_child(parser, container, NODE_LIST_ITEM, parser->line_number,
first_nonspace + 1);
/* TODO: static */
memcpy(&container->as.list, data, sizeof(*data));
free(data);
} else {
break;
}
if (accepts_lines(container->type)) {
// if it's a line container, it can't contain other containers
break;
}
}
// what remains at offset is a text line. add the text to the
// appropriate container.
first_nonspace = offset;
while (peek_at(&input, first_nonspace) == ' ')
first_nonspace++;
indent = first_nonspace - offset;
blank = peek_at(&input, first_nonspace) == '\n';
// cmark_node quote lines are never blank as they start with >
// and we don't count blanks in fenced code for purposes of tight/loose
// lists or breaking out of lists. we also don't set last_line_blank
// on an empty list item.
container->last_line_blank = (blank &&
container->type != NODE_BLOCK_QUOTE &&
container->type != NODE_HEADER &&
(container->type != NODE_CODE_BLOCK &&
container->as.code.fenced) &&
!(container->type == NODE_LIST_ITEM &&
container->first_child == NULL &&
container->start_line == parser->line_number));
cmark_node *cont = container;
while (cont->parent) {
cont->parent->last_line_blank = false;
cont = cont->parent;
}
if (cur != last_matched_container &&
container == last_matched_container &&
!blank &&
cur->type == NODE_PARAGRAPH &&
strbuf_len(&cur->string_content) > 0) {
add_line(cur, &input, offset);
} else { // not a lazy continuation
// finalize any blocks that were not matched and set cur to container:
while (cur != last_matched_container) {
finalize(parser, cur, parser->line_number);
cur = cur->parent;
assert(cur != NULL);
}
if (container->type == NODE_CODE_BLOCK &&
!container->as.code.fenced) {
add_line(container, &input, offset);
} else if (container->type == NODE_CODE_BLOCK &&
container->as.code.fenced) {
matched = 0;
if (indent <= 3 &&
peek_at(&input, first_nonspace) == container->as.code.fence_char) {
int fence_len = scan_close_code_fence(&input, first_nonspace);
if (fence_len > container->as.code.fence_length)
matched = 1;
}
if (matched) {
// if closing fence, don't add line to container; instead, close it:
finalize(parser, container, parser->line_number);
container = container->parent; // back up to parent
} else {
add_line(container, &input, offset);
}
} else if (container->type == NODE_HTML) {
add_line(container, &input, offset);
} else if (blank) {
// ??? do nothing
} else if (container->type == NODE_HEADER) {
chop_trailing_hashtags(&input);
add_line(container, &input, first_nonspace);
finalize(parser, container, parser->line_number);
container = container->parent;
} else if (accepts_lines(container->type)) {
add_line(container, &input, first_nonspace);
} else if (container->type != NODE_HRULE &&
container->type != NODE_HEADER) {
// create paragraph container for line
container = add_child(parser, container, NODE_PARAGRAPH, parser->line_number, first_nonspace + 1);
add_line(container, &input, first_nonspace);
} else {
assert(false);
}
parser->current = container;
}
strbuf_clear(parser->curline);
}
cmark_node *cmark_parser_finish(cmark_parser *parser)
{
if (parser->linebuf->size) {
S_process_line(parser, parser->linebuf->ptr,
parser->linebuf->size);
strbuf_clear(parser->linebuf);
}
finalize_document(parser);
strbuf_free(parser->curline);
#if CMARK_DEBUG_NODES
if (cmark_node_check(parser->root, stderr)) {
abort();
}
#endif
return parser->root;
}

View File

@ -0,0 +1,375 @@
#include <stdarg.h>
#include <ctype.h>
#include <string.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "buffer.h"
/* Used as default value for strbuf->ptr so that people can always
* assume ptr is non-NULL and zero terminated even for new strbufs.
*/
unsigned char cmark_strbuf__initbuf[1];
unsigned char cmark_strbuf__oom[1];
#define ENSURE_SIZE(b, d) \
if ((d) > buf->asize && strbuf_grow(b, (d)) < 0) \
return -1;
#ifndef MIN
#define MIN(x,y) ((x<y) ? x : y)
#endif
void cmark_strbuf_init(strbuf *buf, int initial_size)
{
buf->asize = 0;
buf->size = 0;
buf->ptr = cmark_strbuf__initbuf;
if (initial_size)
cmark_strbuf_grow(buf, initial_size);
}
int cmark_strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom)
{
unsigned char *new_ptr;
int new_size;
if (buf->ptr == cmark_strbuf__oom)
return -1;
if (target_size <= buf->asize)
return 0;
if (buf->asize == 0) {
new_size = target_size;
new_ptr = NULL;
} else {
new_size = buf->asize;
new_ptr = buf->ptr;
}
/* grow the buffer size by 1.5, until it's big enough
* to fit our target size */
while (new_size < target_size)
new_size = (new_size << 1) - (new_size >> 1);
/* round allocation up to multiple of 8 */
new_size = (new_size + 7) & ~7;
new_ptr = (unsigned char *)realloc(new_ptr, new_size);
if (!new_ptr) {
if (mark_oom)
buf->ptr = cmark_strbuf__oom;
return -1;
}
buf->asize = new_size;
buf->ptr = new_ptr;
/* truncate the existing buffer size if necessary */
if (buf->size >= buf->asize)
buf->size = buf->asize - 1;
buf->ptr[buf->size] = '\0';
return 0;
}
int cmark_strbuf_grow(cmark_strbuf *buf, int target_size)
{
return cmark_strbuf_try_grow(buf, target_size, true);
}
bool cmark_strbuf_oom(const cmark_strbuf *buf)
{
return (buf->ptr == cmark_strbuf__oom);
}
size_t cmark_strbuf_len(const cmark_strbuf *buf)
{
return buf->size;
}
void cmark_strbuf_free(strbuf *buf)
{
if (!buf) return;
if (buf->ptr != cmark_strbuf__initbuf && buf->ptr != cmark_strbuf__oom)
free(buf->ptr);
cmark_strbuf_init(buf, 0);
}
void cmark_strbuf_clear(strbuf *buf)
{
buf->size = 0;
if (buf->asize > 0)
buf->ptr[0] = '\0';
}
int cmark_strbuf_set(strbuf *buf, const unsigned char *data, int len)
{
if (len <= 0 || data == NULL) {
cmark_strbuf_clear(buf);
} else {
if (data != buf->ptr) {
ENSURE_SIZE(buf, len + 1);
memmove(buf->ptr, data, len);
}
buf->size = len;
buf->ptr[buf->size] = '\0';
}
return 0;
}
int cmark_strbuf_sets(strbuf *buf, const char *string)
{
return cmark_strbuf_set(buf,
(const unsigned char *)string,
string ? strlen(string) : 0);
}
int cmark_strbuf_putc(strbuf *buf, int c)
{
ENSURE_SIZE(buf, buf->size + 2);
buf->ptr[buf->size++] = c;
buf->ptr[buf->size] = '\0';
return 0;
}
int cmark_strbuf_put(strbuf *buf, const unsigned char *data, int len)
{
if (len <= 0)
return 0;
ENSURE_SIZE(buf, buf->size + len + 1);
memmove(buf->ptr + buf->size, data, len);
buf->size += len;
buf->ptr[buf->size] = '\0';
return 0;
}
int cmark_strbuf_puts(strbuf *buf, const char *string)
{
return cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string));
}
int cmark_strbuf_vprintf(strbuf *buf, const char *format, va_list ap)
{
const int expected_size = buf->size + (strlen(format) * 2);
int len;
ENSURE_SIZE(buf, expected_size);
while (1) {
len = vsnprintf(
(char *)buf->ptr + buf->size,
buf->asize - buf->size,
format, ap
);
if (len < 0) {
free(buf->ptr);
buf->ptr = cmark_strbuf__oom;
return -1;
}
if (len + 1 <= buf->asize - buf->size) {
buf->size += len;
break;
}
ENSURE_SIZE(buf, buf->size + len + 1);
}
return 0;
}
int cmark_strbuf_printf(strbuf *buf, const char *format, ...)
{
int r;
va_list ap;
va_start(ap, format);
r = cmark_strbuf_vprintf(buf, format, ap);
va_end(ap);
return r;
}
void cmark_strbuf_copy_cstr(char *data, int datasize, const strbuf *buf)
{
int copylen;
assert(data && datasize && buf);
data[0] = '\0';
if (buf->size == 0 || buf->asize <= 0)
return;
copylen = buf->size;
if (copylen > datasize - 1)
copylen = datasize - 1;
memmove(data, buf->ptr, copylen);
data[copylen] = '\0';
}
void cmark_strbuf_swap(strbuf *buf_a, strbuf *buf_b)
{
strbuf t = *buf_a;
*buf_a = *buf_b;
*buf_b = t;
}
unsigned char *cmark_strbuf_detach(strbuf *buf)
{
unsigned char *data = buf->ptr;
if (buf->asize == 0 || buf->ptr == cmark_strbuf__oom) {
/* return an empty string */
return (unsigned char *)calloc(1, 1);
}
cmark_strbuf_init(buf, 0);
return data;
}
void cmark_strbuf_attach(strbuf *buf, unsigned char *ptr, int asize)
{
cmark_strbuf_free(buf);
if (ptr) {
buf->ptr = ptr;
buf->size = strlen((char *)ptr);
if (asize)
buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
else /* pass 0 to fall back on strlen + 1 */
buf->asize = buf->size + 1;
} else {
cmark_strbuf_grow(buf, asize);
}
}
int cmark_strbuf_cmp(const strbuf *a, const strbuf *b)
{
int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
return (result != 0) ? result :
(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
}
int cmark_strbuf_strchr(const strbuf *buf, int c, int pos)
{
const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos);
if (!p)
return -1;
return (int)(p - (const unsigned char *)buf->ptr);
}
int cmark_strbuf_strrchr(const strbuf *buf, int c, int pos)
{
int i;
for (i = pos; i >= 0; i--) {
if (buf->ptr[i] == (unsigned char) c)
return i;
}
return -1;
}
void cmark_strbuf_truncate(strbuf *buf, int len)
{
if (len < buf->size) {
buf->size = len;
buf->ptr[buf->size] = '\0';
}
}
void cmark_strbuf_drop(strbuf *buf, int n)
{
if (n > 0) {
buf->size = buf->size - n;
if (buf->size)
memmove(buf->ptr, buf->ptr + n, buf->size);
buf->ptr[buf->size] = '\0';
}
}
void cmark_strbuf_rtrim(strbuf *buf)
{
if (!buf->size)
return;
while (buf->size > 0) {
if (!isspace(buf->ptr[buf->size - 1]))
break;
buf->size--;
}
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_trim(strbuf *buf)
{
int i = 0;
if (!buf->size)
return;
while (i < buf->size && isspace(buf->ptr[i]))
i++;
cmark_strbuf_drop(buf, i);
cmark_strbuf_rtrim(buf);
}
// Destructively modify string, collapsing consecutive
// space and newline characters into a single space.
void cmark_strbuf_normalize_whitespace(strbuf *s)
{
bool last_char_was_space = false;
int r, w;
for (r = 0, w = 0; r < s->size; ++r) {
switch (s->ptr[r]) {
case ' ':
case '\n':
if (last_char_was_space)
break;
s->ptr[w++] = ' ';
last_char_was_space = true;
break;
default:
s->ptr[w++] = s->ptr[r];
last_char_was_space = false;
}
}
cmark_strbuf_truncate(s, w);
}
// Destructively unescape a string: remove backslashes before punctuation chars.
extern void cmark_strbuf_unescape(strbuf *buf)
{
int r, w;
for (r = 0, w = 0; r < buf->size; ++r) {
if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
continue;
buf->ptr[w++] = buf->ptr[r];
}
cmark_strbuf_truncate(buf, w);
}

View File

@ -0,0 +1,177 @@
#ifndef CMARK_BUFFER_H
#define CMARK_BUFFER_H
#include <stddef.h>
#include <stdarg.h>
#include "config.h"
#include "cmark_export.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
unsigned char *ptr;
int asize, size;
} cmark_strbuf;
CMARK_EXPORT
extern unsigned char cmark_strbuf__initbuf[];
CMARK_EXPORT
extern unsigned char cmark_strbuf__oom[];
#define CMARK_GH_BUF_INIT { cmark_strbuf__initbuf, 0, 0 }
/**
* Initialize a strbuf structure.
*
* For the cases where GH_BUF_INIT cannot be used to do static
* initialization.
*/
CMARK_EXPORT
void cmark_strbuf_init(cmark_strbuf *buf, int initial_size);
/**
* Attempt to grow the buffer to hold at least `target_size` bytes.
*
* If the allocation fails, this will return an error. If mark_oom is true,
* this will mark the buffer as invalid for future operations; if false,
* existing buffer content will be preserved, but calling code must handle
* that buffer was not expanded.
*/
CMARK_EXPORT
int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom);
/**
* Grow the buffer to hold at least `target_size` bytes.
*
* If the allocation fails, this will return an error and the buffer will be
* marked as invalid for future operations, invaliding contents.
*
* @return 0 on success or -1 on failure
*/
CMARK_EXPORT
int cmark_strbuf_grow(cmark_strbuf *buf, int target_size);
CMARK_EXPORT
void cmark_strbuf_free(cmark_strbuf *buf);
CMARK_EXPORT
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
/**
* Test if there have been any reallocation failures with this strbuf.
*
* Any function that writes to a strbuf can fail due to memory allocation
* issues. If one fails, the strbuf will be marked with an OOM error and
* further calls to modify the buffer will fail. Check strbuf_oom() at the
* end of your sequence and it will be true if you ran out of memory at any
* point with that buffer.
*
* @return false if no error, true if allocation error
*/
CMARK_EXPORT
bool cmark_strbuf_oom(const cmark_strbuf *buf);
CMARK_EXPORT
size_t cmark_strbuf_len(const cmark_strbuf *buf);
CMARK_EXPORT
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
CMARK_EXPORT
void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize);
CMARK_EXPORT
unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
CMARK_EXPORT
void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf);
static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf)
{
return (char *)buf->ptr;
}
#define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
/*
* Functions below that return int value error codes will return 0 on
* success or -1 on failure (which generally means an allocation failed).
* Using a strbuf where the allocation has failed with result in -1 from
* all further calls using that buffer. As a result, you can ignore the
* return code of these functions and call them in a series then just call
* strbuf_oom at the end.
*/
CMARK_EXPORT
int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len);
CMARK_EXPORT
int cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
CMARK_EXPORT
int cmark_strbuf_putc(cmark_strbuf *buf, int c);
CMARK_EXPORT
int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len);
CMARK_EXPORT
int cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
CMARK_EXPORT
int cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...)
CMARK_ATTRIBUTE((format (printf, 2, 3)));
CMARK_EXPORT
int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap);
CMARK_EXPORT
void cmark_strbuf_clear(cmark_strbuf *buf);
CMARK_EXPORT
int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos);
CMARK_EXPORT
int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos);
CMARK_EXPORT
void cmark_strbuf_drop(cmark_strbuf *buf, int n);
CMARK_EXPORT
void cmark_strbuf_truncate(cmark_strbuf *buf, int len);
CMARK_EXPORT
void cmark_strbuf_rtrim(cmark_strbuf *buf);
CMARK_EXPORT
void cmark_strbuf_trim(cmark_strbuf *buf);
CMARK_EXPORT
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
CMARK_EXPORT
void cmark_strbuf_unescape(cmark_strbuf *s);
// Convenience macros
#define strbuf cmark_strbuf
#define strbuf__initbuf cmark_strbuf__initbuf
#define strbuf__oom cmark_strbuf__oom
#define GH_BUF_INIT CMARK_GH_BUF_INIT
#define strbuf_init cmark_strbuf_init
#define strbuf_try_grow cmark_strbuf_try_grow
#define strbuf_grow cmark_strbuf_grow
#define strbuf_free cmark_strbuf_free
#define strbuf_swap cmark_strbuf_swap
#define strbuf_oom cmark_strbuf_oom
#define strbuf_len cmark_strbuf_len
#define strbuf_cmp cmark_strbuf_cmp
#define strbuf_attach cmark_strbuf_attach
#define strbuf_detach cmark_strbuf_detach
#define strbuf_copy_cstr cmark_strbuf_copy_cstr
#define strbuf_at cmark_strbuf_at
#define strbuf_set cmark_strbuf_set
#define strbuf_sets cmark_strbuf_sets
#define strbuf_putc cmark_strbuf_putc
#define strbuf_put cmark_strbuf_put
#define strbuf_puts cmark_strbuf_puts
#define strbuf_printf cmark_strbuf_printf
#define strbuf_vprintf cmark_strbuf_vprintf
#define strbuf_clear cmark_strbuf_clear
#define strbuf_strchr cmark_strbuf_strchr
#define strbuf_strrchr cmark_strbuf_strrchr
#define strbuf_drop cmark_strbuf_drop
#define strbuf_truncate cmark_strbuf_truncate
#define strbuf_rtrim cmark_strbuf_rtrim
#define strbuf_trim cmark_strbuf_trim
#define strbuf_normalize_whitespace cmark_strbuf_normalize_whitespace
#define strbuf_unescape cmark_strbuf_unescape
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,122 @@
#ifndef CMARK_CHUNK_H
#define CMARK_CHUNK_H
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <assert.h>
#include "buffer.h"
typedef struct {
unsigned char *data;
int len;
int alloc; // also implies a NULL-terminated string
} cmark_chunk;
static inline void cmark_chunk_free(cmark_chunk *c)
{
if (c->alloc)
free(c->data);
c->data = NULL;
c->alloc = 0;
c->len = 0;
}
static inline void cmark_chunk_ltrim(cmark_chunk *c)
{
assert(!c->alloc);
while (c->len && isspace(c->data[0])) {
c->data++;
c->len--;
}
}
static inline void cmark_chunk_rtrim(cmark_chunk *c)
{
while (c->len > 0) {
if (!isspace(c->data[c->len - 1]))
break;
c->len--;
}
}
static inline void cmark_chunk_trim(cmark_chunk *c)
{
cmark_chunk_ltrim(c);
cmark_chunk_rtrim(c);
}
static inline int cmark_chunk_strchr(cmark_chunk *ch, int c, int offset)
{
const unsigned char *p = (unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
return p ? (int)(p - ch->data) : ch->len;
}
static inline const char *cmark_chunk_to_cstr(cmark_chunk *c)
{
unsigned char *str;
if (c->alloc) {
return (char *)c->data;
}
str = (unsigned char *)malloc(c->len + 1);
if(str != NULL) {
memcpy(str, c->data, c->len);
str[c->len] = 0;
}
c->data = str;
c->alloc = 1;
return (char *)str;
}
static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str)
{
if (c->alloc) {
free(c->data);
}
c->len = strlen(str);
c->data = (unsigned char *)malloc(c->len + 1);
c->alloc = 1;
memcpy(c->data, str, c->len + 1);
}
static inline cmark_chunk cmark_chunk_literal(const char *data)
{
cmark_chunk c = {(unsigned char *)data, data ? strlen(data) : 0, 0};
return c;
}
static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, int pos, int len)
{
cmark_chunk c = {ch->data + pos, len, 0};
return c;
}
static inline cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf)
{
cmark_chunk c;
c.len = buf->size;
c.data = cmark_strbuf_detach(buf);
c.alloc = 1;
return c;
}
// Convenience macros
#define chunk cmark_chunk
#define chunk_free cmark_chunk_free
#define chunk_ltrim cmark_chunk_ltrim
#define chunk_rtrim cmark_chunk_rtrim
#define chunk_trim cmark_chunk_trim
#define chunk_strchr cmark_chunk_strchr
#define chunk_to_cstr cmark_chunk_to_cstr
#define chunk_literal cmark_chunk_literal
#define chunk_dup cmark_chunk_dup
#define chunk_buf_detach cmark_chunk_buf_detach
#endif

View File

@ -0,0 +1,21 @@
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include "node.h"
#include "html/houdini.h"
#include "cmark.h"
#include "buffer.h"
char *cmark_markdown_to_html(const char *text, int len)
{
cmark_node *doc;
char *result;
doc = cmark_parse_document(text, len);
result = cmark_render_html(doc);
cmark_node_free(doc);
return result;
}

View File

@ -0,0 +1,343 @@
#ifndef CMARK_H
#define CMARK_H
#include <stdio.h>
#include "cmark_export.h"
#ifdef __cplusplus
extern "C" {
#endif
/** .SH NAME
*
* .B cmark
* \- CommonMark parsing, manipulating, and rendering
*/
/** .SH SIMPLE INTERFACE
*/
/** Current version of library.
*/
#define CMARK_VERSION "0.1"
/** Convert 'text' (assumed to be a UTF-8 encoded string with length
* 'len' from CommonMark Markdown to HTML, returning a null-terminated,
* UTF-8-encoded string.
*/
CMARK_EXPORT
char *cmark_markdown_to_html(const char *text, int len);
/** .SH NODE STRUCTURE
*/
/**
*/
typedef enum {
/* Block */
CMARK_NODE_DOCUMENT,
CMARK_NODE_BLOCK_QUOTE,
CMARK_NODE_LIST,
CMARK_NODE_LIST_ITEM,
CMARK_NODE_CODE_BLOCK,
CMARK_NODE_HTML,
CMARK_NODE_PARAGRAPH,
CMARK_NODE_HEADER,
CMARK_NODE_HRULE,
CMARK_NODE_REFERENCE_DEF,
CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
CMARK_NODE_LAST_BLOCK = CMARK_NODE_REFERENCE_DEF,
/* Inline */
CMARK_NODE_TEXT,
CMARK_NODE_SOFTBREAK,
CMARK_NODE_LINEBREAK,
CMARK_NODE_INLINE_CODE,
CMARK_NODE_INLINE_HTML,
CMARK_NODE_EMPH,
CMARK_NODE_STRONG,
CMARK_NODE_LINK,
CMARK_NODE_IMAGE,
CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
} cmark_node_type;
/**
*/
typedef enum {
CMARK_NO_LIST,
CMARK_BULLET_LIST,
CMARK_ORDERED_LIST
} cmark_list_type;
/**
*/
typedef enum {
CMARK_PERIOD_DELIM,
CMARK_PAREN_DELIM
} cmark_delim_type;
typedef struct cmark_node cmark_node;
typedef struct cmark_parser cmark_parser;
/**
* .SH CREATING AND DESTROYING NODES
*/
/**
*/
CMARK_EXPORT cmark_node*
cmark_node_new(cmark_node_type type);
/**
*/
CMARK_EXPORT void
cmark_node_free(cmark_node *node);
/**
* .SH TREE TRAVERSAL
*/
CMARK_EXPORT cmark_node*
cmark_node_next(cmark_node *node);
/**
*/
CMARK_EXPORT cmark_node*
cmark_node_previous(cmark_node *node);
/**
*/
CMARK_EXPORT cmark_node*
cmark_node_parent(cmark_node *node);
/**
*/
CMARK_EXPORT cmark_node*
cmark_node_first_child(cmark_node *node);
/**
*/
CMARK_EXPORT cmark_node*
cmark_node_last_child(cmark_node *node);
/**
* .SH ACCESSORS
*/
/**
*/
CMARK_EXPORT cmark_node_type
cmark_node_get_type(cmark_node *node);
/**
*/
CMARK_EXPORT const char*
cmark_node_get_string_content(cmark_node *node);
/**
*/
CMARK_EXPORT int
cmark_node_set_string_content(cmark_node *node, const char *content);
/**
*/
CMARK_EXPORT int
cmark_node_get_header_level(cmark_node *node);
/**
*/
CMARK_EXPORT int
cmark_node_set_header_level(cmark_node *node, int level);
/**
*/
CMARK_EXPORT cmark_list_type
cmark_node_get_list_type(cmark_node *node);
/**
*/
CMARK_EXPORT int
cmark_node_set_list_type(cmark_node *node, cmark_list_type type);
/**
*/
CMARK_EXPORT int
cmark_node_get_list_start(cmark_node *node);
/**
*/
CMARK_EXPORT int
cmark_node_set_list_start(cmark_node *node, int start);
/**
*/
CMARK_EXPORT int
cmark_node_get_list_tight(cmark_node *node);
/**
*/
CMARK_EXPORT int
cmark_node_set_list_tight(cmark_node *node, int tight);
/**
*/
CMARK_EXPORT const char*
cmark_node_get_fence_info(cmark_node *node);
/**
*/
CMARK_EXPORT int
cmark_node_set_fence_info(cmark_node *node, const char *info);
/**
*/
CMARK_EXPORT const char*
cmark_node_get_url(cmark_node *node);
/**
*/
CMARK_EXPORT int
cmark_node_set_url(cmark_node *node, const char *url);
/**
*/
CMARK_EXPORT const char*
cmark_node_get_title(cmark_node *node);
/**
*/
CMARK_EXPORT int
cmark_node_set_title(cmark_node *node, const char *title);
/**
*/
CMARK_EXPORT int
cmark_node_get_start_line(cmark_node *node);
/**
*/
CMARK_EXPORT int
cmark_node_get_start_column(cmark_node *node);
/**
*/
CMARK_EXPORT int
cmark_node_get_end_line(cmark_node *node);
/**
* .SH TREE MANIPULATION
*/
/**
*/
CMARK_EXPORT void
cmark_node_unlink(cmark_node *node);
/**
*/
CMARK_EXPORT int
cmark_node_insert_before(cmark_node *node, cmark_node *sibling);
/**
*/
CMARK_EXPORT int
cmark_node_insert_after(cmark_node *node, cmark_node *sibling);
/**
*/
CMARK_EXPORT int
cmark_node_prepend_child(cmark_node *node, cmark_node *child);
/**
*/
CMARK_EXPORT int
cmark_node_append_child(cmark_node *node, cmark_node *child);
/**
* .SH PARSING
*/
/**
*/
CMARK_EXPORT
cmark_parser *cmark_parser_new();
/**
*/
CMARK_EXPORT
void cmark_parser_free(cmark_parser *parser);
/**
*/
CMARK_EXPORT
cmark_node *cmark_parser_finish(cmark_parser *parser);
/**
*/
CMARK_EXPORT
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
/**
*/
CMARK_EXPORT
cmark_node *cmark_parse_document(const char *buffer, size_t len);
/**
*/
CMARK_EXPORT
cmark_node *cmark_parse_file(FILE *f);
/**
* .SH RENDERING
*/
/**
*/
CMARK_EXPORT
char *cmark_render_ast(cmark_node *root);
/**
*/
CMARK_EXPORT
char *cmark_render_html(cmark_node *root);
/** .SH AUTHORS
*
* John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
*/
#ifndef CMARK_NO_SHORT_NAMES
#define NODE_DOCUMENT CMARK_NODE_DOCUMENT
#define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE
#define NODE_LIST CMARK_NODE_LIST
#define NODE_LIST_ITEM CMARK_NODE_LIST_ITEM
#define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK
#define NODE_HTML CMARK_NODE_HTML
#define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH
#define NODE_HEADER CMARK_NODE_HEADER
#define NODE_HRULE CMARK_NODE_HRULE
#define NODE_REFERENCE_DEF CMARK_NODE_REFERENCE_DEF
#define NODE_TEXT CMARK_NODE_TEXT
#define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK
#define NODE_LINEBREAK CMARK_NODE_LINEBREAK
#define NODE_INLINE_CODE CMARK_NODE_INLINE_CODE
#define NODE_INLINE_HTML CMARK_NODE_INLINE_HTML
#define NODE_EMPH CMARK_NODE_EMPH
#define NODE_STRONG CMARK_NODE_STRONG
#define NODE_LINK CMARK_NODE_LINK
#define NODE_IMAGE CMARK_NODE_IMAGE
#define NODE_LINK_LABEL CMARK_NODE_LINK_LABEL
#define BULLET_LIST CMARK_BULLET_LIST
#define ORDERED_LIST CMARK_ORDERED_LIST
#define PERIOD_DELIM CMARK_PERIOD_DELIM
#define PAREN_DELIM CMARK_PAREN_DELIM
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,49 @@
#cmakedefine HAVE_STDBOOL_H
#ifdef HAVE_STDBOOL_H
#include <stdbool.h>
#elif !defined(__cplusplus)
typedef char bool;
#endif
#cmakedefine HAVE___BUILTIN_EXPECT
#cmakedefine HAVE___ATTRIBUTE__
#ifdef HAVE___ATTRIBUTE__
#define CMARK_ATTRIBUTE(list) __attribute__ (list)
#else
#define CMARK_ATTRIBUTE(list)
#endif
#if 1
#ifndef U3_ALL
#define U3_ALL
#define malloc(a) u3a_malloc(a)
#define calloc(a,b) u3a_calloc(a,b)
#define realloc(a,b) u3a_realloc(a,b)
#define free(a) u3a_free(a)
/* From i/n/a.h
*/
/* u3a_malloc(): aligned storage measured in bytes.
*/
void*
u3a_malloc(size_t len_i);
/* u3a_calloc(): aligned storage measured in bytes.
*/
void*
u3a_calloc(size_t num_i, size_t len_i);
/* u3a_realloc(): aligned realloc in bytes.
*/
void*
u3a_realloc(void* lag_v, size_t len_i);
/* u3a_free(): free for aligned malloc.
*/
void
u3a_free(void* tox_v);
#endif
#endif

View File

@ -0,0 +1,36 @@
#ifndef CMARK_DEBUG_H
#define CMARK_DEBUG_H
#include <stdio.h>
#include <errno.h>
#include <string.h>
#ifdef NDEBUG
#define debug(M, ...)
#else
#define debug(M, ...) \
fprintf(stderr, "DEBUG %s:%d: " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
#endif
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
#define log_err(M, ...) \
fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
clean_errno(), ##__VA_ARGS__)
#define log_warn(M, ...) \
fprintf(stderr, "[WARN] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
clean_errno(), ##__VA_ARGS__)
#define log_info(M, ...) fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, \
__LINE__, ##__VA_ARGS__)
#define check(A, M, ...) \
if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
#define sentinel(M, ...) \
{ log_err(M, ##__VA_ARGS__); errno=0; goto error; }
#define check_debug(A, M, ...) \
if(!(A)) { debug(M, ##__VA_ARGS__); errno=0; goto error; }
#endif

View File

@ -0,0 +1,52 @@
#ifndef CMARK_HOUDINI_H
#define CMARK_HOUDINI_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include "config.h"
#include "buffer.h"
#ifdef HAVE___BUILTIN_EXPECT
# define likely(x) __builtin_expect((x),1)
# define unlikely(x) __builtin_expect((x),0)
#else
# define likely(x) (x)
# define unlikely(x) (x)
#endif
#ifdef HOUDINI_USE_LOCALE
# define _isxdigit(c) isxdigit(c)
# define _isdigit(c) isdigit(c)
#else
/*
* Helper _isdigit methods -- do not trust the current locale
* */
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
#endif
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)
extern size_t houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure);
extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size);
extern void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_unescape_uri(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_unescape_url(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_js(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_unescape_js(strbuf *ob, const uint8_t *src, size_t size);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,107 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "html/houdini.h"
/*
* The following characters will not be escaped:
*
* -_.+!*'(),%#@?=;:/,+&$ alphanum
*
* Note that this character set is the addition of:
*
* - The characters which are safe to be in an URL
* - The characters which are *not* safe to be in
* an URL because they are RESERVED characters.
*
* We asume (lazily) that any RESERVED char that
* appears inside an URL is actually meant to
* have its native function (i.e. as an URL
* component/separator) and hence needs no escaping.
*
* There are two exceptions: the chacters & (amp)
* and ' (single quote) do not appear in the table.
* They are meant to appear in the URL as components,
* yet they require special HTML-entity escaping
* to generate valid HTML markup.
*
* All other characters will be escaped to %XX.
*
*/
static const char HREF_SAFE[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
int
houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size)
{
static const uint8_t hex_chars[] = "0123456789ABCDEF";
size_t i = 0, org;
uint8_t hex_str[3];
hex_str[0] = '%';
while (i < size) {
org = i;
while (i < size && HREF_SAFE[src[i]] != 0)
i++;
if (likely(i > org))
strbuf_put(ob, src + org, i - org);
/* escaping */
if (i >= size)
break;
switch (src[i]) {
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
strbuf_puts(ob, "&amp;");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
strbuf_puts(ob, "&#x27;");
break;
/* the space can be escaped to %20 or a plus
* sign. we're going with the generic escape
* for now. the plus thing is more commonly seen
* when building GET strings */
#if 0
case ' ':
strbuf_putc(ob, '+');
break;
#endif
/* every other character goes with a %XX escaping */
default:
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
hex_str[2] = hex_chars[src[i] & 0xF];
strbuf_put(ob, hex_str, 3);
}
i++;
}
return 1;
}

View File

@ -0,0 +1,81 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "html/houdini.h"
/**
* According to the OWASP rules:
*
* & --> &amp;
* < --> &lt;
* > --> &gt;
* " --> &quot;
* ' --> &#x27; &apos; is not recommended
* / --> &#x2F; forward slash is included as it helps end an HTML entity
*
*/
static const char HTML_ESCAPE_TABLE[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const char *HTML_ESCAPES[] = {
"",
"&quot;",
"&amp;",
"&#39;",
"&#47;",
"&lt;",
"&gt;"
};
int
houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure)
{
size_t i = 0, org, esc = 0;
while (i < size) {
org = i;
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
i++;
if (i > org)
strbuf_put(ob, src + org, i - org);
/* escaping */
if (unlikely(i >= size))
break;
/* The forward slash is only escaped in secure mode */
if ((src[i] == '/' || src[i] == '\'') && !secure) {
strbuf_putc(ob, src[i]);
} else {
strbuf_puts(ob, HTML_ESCAPES[esc]);
}
i++;
}
return 1;
}
int
houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size)
{
return houdini_escape_html0(ob, src, size, 1);
}

View File

@ -0,0 +1,112 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "buffer.h"
#include "houdini.h"
#include "utf8.h"
#include "html_unescape.h"
size_t
houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size)
{
size_t i = 0;
if (size > 3 && src[0] == '#') {
int codepoint = 0;
if (_isdigit(src[1])) {
for (i = 1; i < size && _isdigit(src[i]); ++i) {
int cp = (codepoint * 10) + (src[i] - '0');
if (cp < codepoint)
return 0;
codepoint = cp;
}
}
else if (src[1] == 'x' || src[1] == 'X') {
for (i = 2; i < size && _isxdigit(src[i]); ++i) {
int cp = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
if (cp < codepoint)
return 0;
codepoint = cp;
}
}
if (i < size && src[i] == ';' && codepoint) {
utf8proc_encode_char(codepoint, ob);
return i + 1;
}
}
else {
if (size > MAX_WORD_LENGTH)
size = MAX_WORD_LENGTH;
for (i = MIN_WORD_LENGTH; i < size; ++i) {
if (src[i] == ' ')
break;
if (src[i] == ';') {
const struct html_ent *entity = find_entity((char *)src, i);
if (entity != NULL) {
strbuf_put(ob, entity->utf8, entity->utf8_len);
return i + 1;
}
break;
}
}
}
return 0;
}
int
houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size)
{
size_t i = 0, org, ent;
while (i < size) {
org = i;
while (i < size && src[i] != '&')
i++;
if (likely(i > org)) {
if (unlikely(org == 0)) {
if (i >= size)
return 0;
strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
}
strbuf_put(ob, src + org, i - org);
}
/* escaping */
if (i >= size)
break;
i++;
ent = houdini_unescape_ent(ob, src + i, size - i);
i += ent;
/* not really an entity */
if (ent == 0)
strbuf_putc(ob, '&');
}
return 1;
}
void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size)
{
if (!houdini_unescape_html(ob, src, size))
strbuf_put(ob, src, size);
}

View File

@ -0,0 +1,357 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "config.h"
#include "cmark.h"
#include "node.h"
#include "buffer.h"
#include "html/houdini.h"
// Functions to convert cmark_nodes to HTML strings.
static bool
finish_node(strbuf *html, cmark_node *node, bool tight);
static void escape_html(strbuf *dest, const unsigned char *source, int length)
{
if (length < 0)
length = strlen((char *)source);
houdini_escape_html0(dest, source, (size_t)length, 0);
}
static void escape_href(strbuf *dest, const unsigned char *source, int length)
{
if (length < 0)
length = strlen((char *)source);
houdini_escape_href(dest, source, (size_t)length);
}
static inline void cr(strbuf *html)
{
if (html->size && html->ptr[html->size - 1] != '\n')
strbuf_putc(html, '\n');
}
// Convert the inline children of a node to a plain string.
static void inlines_to_plain_html(strbuf *html, cmark_node* node)
{
cmark_node* cur = node->first_child;
if (cur == NULL) {
return;
}
while (true) {
switch(cur->type) {
case NODE_TEXT:
case NODE_INLINE_CODE:
case NODE_INLINE_HTML:
escape_html(html, cur->as.literal.data, cur->as.literal.len);
break;
case NODE_LINEBREAK:
case NODE_SOFTBREAK:
strbuf_putc(html, ' ');
break;
default:
break;
}
if (cur->first_child) {
cur = cur->first_child;
continue;
}
next_sibling:
if (cur->next) {
cur = cur->next;
continue;
}
cur = cur->parent;
if (cur == node) {
break;
}
goto next_sibling;
}
}
// Convert a cmark_node to HTML.
static void node_to_html(strbuf *html, cmark_node *node)
{
cmark_node *cur;
char start_header[] = "<h0>";
bool tight = false;
bool visit_children;
strbuf *info;
if (node == NULL) {
return;
}
cur = node;
while (true) {
// Only NODE_IMAGE wants to skip its children.
visit_children = true;
switch(cur->type) {
case NODE_DOCUMENT:
break;
case NODE_PARAGRAPH:
if (!tight) {
cr(html);
strbuf_puts(html, "<p>");
}
break;
case NODE_BLOCK_QUOTE:
cr(html);
strbuf_puts(html, "<blockquote>\n");
// BLOCK_QUOTE doesn't use any of the 'as' structs,
// so the 'list' member can be used to store the
// current value of 'tight'.
cur->as.list.tight = tight;
tight = false;
break;
case NODE_LIST_ITEM:
cr(html);
strbuf_puts(html, "<li>");
break;
case NODE_LIST: {
cmark_list *list = &cur->as.list;
bool tmp;
// make sure a list starts at the beginning of the line:
cr(html);
if (list->list_type == CMARK_BULLET_LIST) {
strbuf_puts(html, "<ul>\n");
}
else if (list->start == 1) {
strbuf_puts(html, "<ol>\n");
}
else {
strbuf_printf(html, "<ol start=\"%d\">\n",
list->start);
}
// Store the current value of 'tight' by swapping.
tmp = list->tight;
list->tight = tight;
tight = tmp;
break;
}
case NODE_HEADER:
cr(html);
start_header[2] = '0' + cur->as.header.level;
strbuf_puts(html, start_header);
break;
case NODE_CODE_BLOCK:
info = &cur->as.code.info;
cr(html);
if (&cur->as.code.fence_length == 0
|| strbuf_len(info) == 0) {
strbuf_puts(html, "<pre><code>");
}
else {
int first_tag = strbuf_strchr(info, ' ', 0);
if (first_tag < 0)
first_tag = strbuf_len(info);
strbuf_puts(html,
"<pre><code class=\"language-");
escape_html(html, info->ptr, first_tag);
strbuf_puts(html, "\">");
}
escape_html(html, cur->string_content.ptr, cur->string_content.size);
break;
case NODE_HTML:
cr(html);
strbuf_put(html, cur->string_content.ptr, cur->string_content.size);
break;
case NODE_HRULE:
cr(html);
strbuf_puts(html, "<hr />\n");
break;
case NODE_REFERENCE_DEF:
break;
case NODE_TEXT:
escape_html(html, cur->as.literal.data, cur->as.literal.len);
break;
case NODE_LINEBREAK:
strbuf_puts(html, "<br />\n");
break;
case NODE_SOFTBREAK:
strbuf_putc(html, '\n');
break;
case NODE_INLINE_CODE:
strbuf_puts(html, "<code>");
escape_html(html, cur->as.literal.data, cur->as.literal.len);
break;
case NODE_INLINE_HTML:
strbuf_put(html,
cur->as.literal.data,
cur->as.literal.len);
break;
case NODE_LINK:
strbuf_puts(html, "<a href=\"");
if (cur->as.link.url)
escape_href(html, cur->as.link.url, -1);
if (cur->as.link.title) {
strbuf_puts(html, "\" title=\"");
escape_html(html, cur->as.link.title, -1);
}
strbuf_puts(html, "\">");
break;
case NODE_IMAGE:
strbuf_puts(html, "<img src=\"");
if (cur->as.link.url)
escape_href(html, cur->as.link.url, -1);
strbuf_puts(html, "\" alt=\"");
inlines_to_plain_html(html, cur);
if (cur->as.link.title) {
strbuf_puts(html, "\" title=\"");
escape_html(html, cur->as.link.title, -1);
}
strbuf_puts(html, "\" />");
visit_children = false;
break;
case NODE_STRONG:
strbuf_puts(html, "<strong>");
break;
case NODE_EMPH:
strbuf_puts(html, "<em>");
break;
default:
assert(false);
}
if (visit_children && cur->first_child) {
cur = cur->first_child;
continue;
}
next_sibling:
tight = finish_node(html, cur, tight);
if (cur == node) {
break;
}
if (cur->next) {
cur = cur->next;
continue;
}
cur = cur->parent;
goto next_sibling;
}
}
// Returns the restored value of 'tight'.
static bool
finish_node(strbuf *html, cmark_node *node, bool tight)
{
char end_header[] = "</h0>\n";
switch (node->type) {
case NODE_PARAGRAPH:
if (!tight) {
strbuf_puts(html, "</p>\n");
}
break;
case NODE_BLOCK_QUOTE: {
cmark_list *list = &node->as.list;
strbuf_puts(html, "</blockquote>\n");
// Restore old 'tight' value.
tight = list->tight;
list->tight = false;
break;
}
case NODE_LIST_ITEM:
strbuf_puts(html, "</li>\n");
break;
case NODE_LIST: {
cmark_list *list = &node->as.list;
bool tmp;
strbuf_puts(html,
list->list_type == CMARK_BULLET_LIST ?
"</ul>\n" : "</ol>\n");
// Restore old 'tight' value.
tmp = tight;
tight = list->tight;
list->tight = tmp;
break;
}
case NODE_HEADER:
end_header[3] = '0' + node->as.header.level;
strbuf_puts(html, end_header);
break;
case NODE_CODE_BLOCK:
strbuf_puts(html, "</code></pre>\n");
break;
case NODE_INLINE_CODE:
strbuf_puts(html, "</code>");
break;
case NODE_LINK:
strbuf_puts(html, "</a>");
break;
case NODE_STRONG:
strbuf_puts(html, "</strong>");
break;
case NODE_EMPH:
strbuf_puts(html, "</em>");
break;
default:
break;
}
return tight;
}
char *cmark_render_html(cmark_node *root)
{
char *result;
strbuf html = GH_BUF_INIT;
node_to_html(&html, root);
result = (char *)strbuf_detach(&html);
strbuf_free(&html);
return result;
}

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More