mirror of
https://github.com/ilyakooo0/urbit.git
synced 2024-12-01 20:04:09 +03:00
commit
87e28fb787
1
.gitignore
vendored
1
.gitignore
vendored
@ -30,3 +30,4 @@ node_modules/
|
||||
# ??
|
||||
/inst
|
||||
cscope.*
|
||||
build/
|
||||
|
21
.gitmodules
vendored
Normal file
21
.gitmodules
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
[submodule "subprojects/softfloat3"]
|
||||
path = subprojects/softfloat3
|
||||
url = https://github.com/urbit/berkeley-softfloat-3.git
|
||||
[submodule "subprojects/commonmark-legacy"]
|
||||
path = subprojects/commonmark-legacy
|
||||
url = https://github.com/urbit/commonmark-legacy.git
|
||||
[submodule "subprojects/http-parser-legacy"]
|
||||
path = subprojects/http-parser-legacy
|
||||
url = https://github.com/urbit/http-parser-legacy.git
|
||||
[submodule "subprojects/ed25519"]
|
||||
path = subprojects/ed25519
|
||||
url = https://github.com/urbit/ed25519.git
|
||||
[submodule "subprojects/libscrypt"]
|
||||
path = subprojects/libscrypt
|
||||
url = https://github.com/urbit/libscrypt.git
|
||||
[submodule "subprojects/murmur3"]
|
||||
path = subprojects/murmur3
|
||||
url = https://github.com/urbit/murmur3.git
|
||||
[submodule "subprojects/libuv"]
|
||||
path = subprojects/libuv
|
||||
url = https://github.com/urbit/libuv.git
|
24
.travis.yml
24
.travis.yml
@ -1,30 +1,36 @@
|
||||
language: c
|
||||
script: make && make test # no ./configure
|
||||
script: meson build && cd ./build && ninja
|
||||
|
||||
# Uncomment me if this gets annoying
|
||||
#
|
||||
# notifications:
|
||||
# email: false
|
||||
# notifications:
|
||||
# email: false
|
||||
|
||||
before_install:
|
||||
- wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip
|
||||
- unzip ninja-linux.zip
|
||||
- sudo mv ninja /usr/bin/
|
||||
install:
|
||||
- pip3 install --user -I meson==0.44.1
|
||||
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- python3
|
||||
- python3-pip
|
||||
- libgmp3-dev
|
||||
- libsigsegv-dev
|
||||
- openssl
|
||||
- libssl-dev
|
||||
- libncurses5-dev
|
||||
- make
|
||||
- exuberant-ctags
|
||||
- automake
|
||||
- autoconf
|
||||
- make
|
||||
- libtool
|
||||
- g++
|
||||
- ragel
|
||||
- cmake
|
||||
- re2c
|
||||
- libcurl4-gnutls-dev
|
||||
- python
|
||||
- unzip
|
||||
# before_deploy: "make deb" # TODO
|
||||
deploy:
|
||||
skip_cleanup: true
|
||||
@ -32,7 +38,7 @@ deploy:
|
||||
prerelease: true # turn this off for official releases
|
||||
api_key:
|
||||
secure: V4E7784ECSS3MO6ZIRtang9XwibDyvDYGb0MoSaP2CTlmzIAhdokr4KJFM0qM4KRaaajCdQuqi0lojgOjwdxs7e0GkAwScb33LFxQ7Chj/QkFOY7V1AnSRLR5OsXnazB0nur5aSwvcvnggQ2XW3OeF7zIvGfs9aR97SEz/xCrVE=
|
||||
file: bin/urbit # TODO upload package from before_deploy
|
||||
file: ./build/urbit # TODO upload package from before_deploy
|
||||
on:
|
||||
repo: urbit/urbit
|
||||
tags: true
|
||||
|
552
Makefile
552
Makefile
@ -1,552 +0,0 @@
|
||||
# A simple makefile.
|
||||
#
|
||||
|
||||
default: all
|
||||
-include .make.conf
|
||||
|
||||
CORE=.MAKEFILE-VERSION
|
||||
|
||||
# Pick one of:
|
||||
# linux
|
||||
# osx
|
||||
|
||||
UNAME=$(shell uname)
|
||||
ifeq ($(UNAME),Darwin)
|
||||
OS=osx
|
||||
else ifeq ($(UNAME),Linux)
|
||||
OS=linux
|
||||
else ifeq ($(UNAME),FreeBSD)
|
||||
OS=bsd
|
||||
else ifeq ($(UNAME),OpenBSD)
|
||||
OS=bsd
|
||||
else
|
||||
$(error unknown unix)
|
||||
endif
|
||||
|
||||
# Pick one of:
|
||||
# little
|
||||
# big
|
||||
#
|
||||
ENDIAN=little
|
||||
|
||||
# Binary directory - not in quotes.
|
||||
#
|
||||
BIN=bin
|
||||
|
||||
# Only include/link with this if it exists.
|
||||
# (Mac OS X El Capitan clean install does not have /opt)
|
||||
ifneq (,$(wildcard /opt/local/.))
|
||||
OPTLOCALINC?=/opt/local/include
|
||||
OPTLOCALLIB?=/opt/local/lib
|
||||
endif
|
||||
|
||||
# Only include/link with this if it exists.
|
||||
# (`brew install openssl` on Mac OS X El Capitan puts openssl here)
|
||||
ifneq (,$(wildcard /usr/local/opt/openssl/.))
|
||||
OPENSSLINC?=/usr/local/opt/openssl/include
|
||||
OPENSSLLIB?=/usr/local/opt/openssl/lib
|
||||
endif
|
||||
|
||||
# can't have empty -I or -L options due to whitespace sensitivity
|
||||
ifdef OPTLOCALINC
|
||||
OPTLOCALIFLAGS=-I$(OPTLOCALINC)
|
||||
endif
|
||||
ifdef OPTLOCALLIB
|
||||
OPTLOCALLFLAGS=-L$(OPTLOCALLIB)
|
||||
endif
|
||||
ifdef OPENSSLINC
|
||||
OPENSSLIFLAGS=-I$(OPENSSLINC)
|
||||
endif
|
||||
ifdef OPENSSLLIB
|
||||
OPENSSLLFLAGS=-L$(OPENSSLLIB)
|
||||
endif
|
||||
|
||||
CURLINC=$(shell curl-config --cflags)
|
||||
CURLLIB=$(shell curl-config --libs)
|
||||
|
||||
RM=rm -f
|
||||
CC=cc
|
||||
CXX=c++
|
||||
CXXFLAGS=$(CFLAGS)
|
||||
CLD=c++ $(CFLAGS) -L/usr/local/lib $(OPTLOCALLFLAGS) $(OPENSSLLFLAGS)
|
||||
|
||||
ifeq ($(OS),osx)
|
||||
CLDOSFLAGS=-bind_at_load
|
||||
OSLIBS=-framework CoreServices -framework CoreFoundation
|
||||
endif
|
||||
ifeq ($(OS),linux)
|
||||
OSLIBS=-lpthread -lrt -lcurses
|
||||
DEFINES=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
|
||||
endif
|
||||
ifeq ($(OS),bsd)
|
||||
OSLIBS=-lpthread -lncurses -lkvm
|
||||
endif
|
||||
|
||||
ifeq ($(STATIC),yes)
|
||||
LIBS=-lssl -lcrypto -lncurses /usr/local/lib/libsigsegv.a /usr/local/lib/libgmp.a $(CURLLIB) $(OSLIBS)
|
||||
else
|
||||
LIBS=-lssl -lcrypto -lgmp -lncurses -lsigsegv $(CURLLIB) $(OSLIBS)
|
||||
endif
|
||||
|
||||
INCLUDE=include
|
||||
MDEFINES=-DU3_OS_$(OS) -DU3_OS_ENDIAN_$(ENDIAN)
|
||||
|
||||
DEBUG=no
|
||||
|
||||
ifeq ($(DEBUG),yes)
|
||||
CFLAGS=-g
|
||||
else
|
||||
CFLAGS?=-O3
|
||||
endif
|
||||
|
||||
LIBUV_VER=libuv-v1.7.5
|
||||
|
||||
LIBUV_CONFIGURE_OPTIONS=CC=$(CC)
|
||||
|
||||
# NOTFORCHECKIN - restore -O3
|
||||
# -DGHETTO \
|
||||
# -DHUSH
|
||||
CFLAGS+= $(COSFLAGS) -ffast-math \
|
||||
-funsigned-char \
|
||||
-I/usr/local/include \
|
||||
$(OPTLOCALIFLAGS) \
|
||||
$(OPENSSLIFLAGS) \
|
||||
$(CURLINC) \
|
||||
-I$(INCLUDE) \
|
||||
-Ioutside/$(LIBUV_VER)/include \
|
||||
-Ioutside/anachronism/include \
|
||||
-Ioutside/ed25519/src \
|
||||
-Ioutside/commonmark/src \
|
||||
-Ioutside/commonmark/build/src \
|
||||
-Ioutside/scrypt \
|
||||
-Ioutside/softfloat-3/source/include \
|
||||
-Ioutside/murmur3 \
|
||||
$(DEFINES) \
|
||||
$(MDEFINES)
|
||||
|
||||
# TODO remove -Wno-*
|
||||
CWFLAGS=-Wall \
|
||||
-Wextra \
|
||||
-Wno-sign-compare \
|
||||
-Wno-unused-parameter \
|
||||
-Wno-missing-field-initializers \
|
||||
-Wno-strict-aliasing \
|
||||
-Wno-error
|
||||
ifneq ($(OS),bsd)
|
||||
CWFLAGS+=-Wno-error=unused-result
|
||||
endif
|
||||
|
||||
# glibc 2.24 deprecates readdir_r; iff glibc >=2.24,
|
||||
# don't upgrade 'deprecated declarations' warnings to errors
|
||||
# dependency: `getconf`, which comes w/glibc
|
||||
GLIBC := $(lastword $(shell getconf GNU_LIBC_VERSION 2>/dev/null))
|
||||
# dependency: none, uses make's native functions
|
||||
GLIBC_MAJ := $(word 1, $(subst ., ,$(GLIBC)))
|
||||
GLIBC_MIN := $(word 2, $(subst ., ,$(GLIBC)))
|
||||
# dependency: `expr` shell built-in
|
||||
GLIBC_GE_2_24 := $(shell expr $(GLIBC_MAJ) ">" 2 "|" \
|
||||
$(GLIBC_MAJ) "=" 2 "&" $(GLIBC_MIN) ">=" 24 2>/dev/null)
|
||||
ifeq (1,$(GLIBC_GE_2_24))
|
||||
CWFLAGS+=-Wno-error=deprecated-declarations
|
||||
endif
|
||||
|
||||
ifdef NO_SILENT_RULES
|
||||
%.o: %.c $(CORE)
|
||||
$(CC) -c $(CWFLAGS) $(CFLAGS) -o $@ $<
|
||||
@$(CC) -MM -MP $(CWFLAGS) $(CFLAGS) -MT $@ $< -MF .d/$*.d
|
||||
else
|
||||
%.o: %.c $(CORE)
|
||||
@echo " CC $@"
|
||||
@$(CC) -c $(CWFLAGS) $(CFLAGS) -o $@ $<
|
||||
@$(CC) -MM -MP $(CWFLAGS) $(CFLAGS) -MT $@ $< -MF .d/$*.d
|
||||
endif
|
||||
|
||||
N_OFILES=\
|
||||
noun/allocate.o \
|
||||
noun/events.o \
|
||||
noun/hashtable.o \
|
||||
noun/imprison.o \
|
||||
noun/jets.o \
|
||||
noun/manage.o \
|
||||
noun/nock.o \
|
||||
noun/retrieve.o \
|
||||
noun/trace.o \
|
||||
noun/xtract.o \
|
||||
noun/vortex.o \
|
||||
noun/zave.o
|
||||
|
||||
J_A_OFILES=\
|
||||
jets/a/add.o \
|
||||
jets/a/dec.o \
|
||||
jets/a/div.o \
|
||||
jets/a/gte.o \
|
||||
jets/a/gth.o \
|
||||
jets/a/lte.o \
|
||||
jets/a/lth.o \
|
||||
jets/a/mod.o \
|
||||
jets/a/mul.o \
|
||||
jets/a/sub.o
|
||||
|
||||
J_B_OFILES=\
|
||||
jets/b/bind.o \
|
||||
jets/b/clap.o \
|
||||
jets/b/drop.o \
|
||||
jets/b/flop.o \
|
||||
jets/b/lent.o \
|
||||
jets/b/levy.o \
|
||||
jets/b/lien.o \
|
||||
jets/b/murn.o \
|
||||
jets/b/need.o \
|
||||
jets/b/reap.o \
|
||||
jets/b/reel.o \
|
||||
jets/b/roll.o \
|
||||
jets/b/skid.o \
|
||||
jets/b/skim.o \
|
||||
jets/b/skip.o \
|
||||
jets/b/scag.o \
|
||||
jets/b/slag.o \
|
||||
jets/b/snag.o \
|
||||
jets/b/sort.o \
|
||||
jets/b/turn.o \
|
||||
jets/b/weld.o
|
||||
|
||||
J_C_OFILES=\
|
||||
jets/c/bex.o \
|
||||
jets/c/xeb.o \
|
||||
jets/c/can.o \
|
||||
jets/c/cap.o \
|
||||
jets/c/cat.o \
|
||||
jets/c/con.o \
|
||||
jets/c/cut.o \
|
||||
jets/c/dor.o \
|
||||
jets/c/dvr.o \
|
||||
jets/c/dis.o \
|
||||
jets/c/end.o \
|
||||
jets/c/gor.o \
|
||||
jets/c/hor.o \
|
||||
jets/c/lsh.o \
|
||||
jets/c/mas.o \
|
||||
jets/c/met.o \
|
||||
jets/c/mix.o \
|
||||
jets/c/mug.o \
|
||||
jets/c/muk.o \
|
||||
jets/c/peg.o \
|
||||
jets/c/po.o \
|
||||
jets/c/pow.o \
|
||||
jets/c/rap.o \
|
||||
jets/c/rep.o \
|
||||
jets/c/rip.o \
|
||||
jets/c/rsh.o \
|
||||
jets/c/sqt.o \
|
||||
jets/c/vor.o
|
||||
|
||||
J_D_OFILES=\
|
||||
jets/d/in_has.o \
|
||||
jets/d/in_int.o \
|
||||
jets/d/in_gas.o \
|
||||
jets/d/in_mer.o \
|
||||
jets/d/in_put.o \
|
||||
jets/d/in_tap.o \
|
||||
jets/d/in_uni.o \
|
||||
jets/d/in_wyt.o \
|
||||
jets/d/in_bif.o \
|
||||
jets/d/in_dif.o \
|
||||
jets/d/by_gas.o \
|
||||
jets/d/by_get.o \
|
||||
jets/d/by_has.o \
|
||||
jets/d/by_int.o \
|
||||
jets/d/by_put.o \
|
||||
jets/d/by_uni.o \
|
||||
jets/d/by_bif.o \
|
||||
jets/d/by_dif.o
|
||||
|
||||
J_E_OFILES=\
|
||||
jets/e/aes_ecb.o \
|
||||
jets/e/aes_cbc.o \
|
||||
jets/e/aesc.o \
|
||||
jets/e/cue.o \
|
||||
jets/e/fl.o \
|
||||
jets/e/jam.o \
|
||||
jets/e/mat.o \
|
||||
jets/e/mink.o \
|
||||
jets/e/mule.o \
|
||||
jets/e/parse.o \
|
||||
jets/e/rd.o \
|
||||
jets/e/rq.o \
|
||||
jets/e/rs.o \
|
||||
jets/e/rh.o \
|
||||
jets/e/rub.o \
|
||||
jets/e/scr.o \
|
||||
jets/e/shax.o \
|
||||
jets/e/lore.o \
|
||||
jets/e/loss.o \
|
||||
jets/e/lune.o \
|
||||
jets/e/trip.o
|
||||
|
||||
J_E_OFILES_ED=\
|
||||
jets/e/ed_puck.o \
|
||||
jets/e/ed_sign.o \
|
||||
jets/e/ed_veri.o \
|
||||
jets/e/ed_shar.o
|
||||
|
||||
J_F_OFILES=\
|
||||
jets/f/ap.o \
|
||||
jets/f/cell.o \
|
||||
jets/f/comb.o \
|
||||
jets/f/cons.o \
|
||||
jets/f/core.o \
|
||||
jets/f/face.o \
|
||||
jets/f/fitz.o \
|
||||
jets/f/flan.o \
|
||||
jets/f/flip.o \
|
||||
jets/f/flor.o \
|
||||
jets/f/fork.o \
|
||||
jets/f/help.o \
|
||||
jets/f/hike.o \
|
||||
jets/f/look.o \
|
||||
jets/f/loot.o
|
||||
|
||||
J_F_OFILES_UT=\
|
||||
jets/f/ut.o \
|
||||
jets/f/ut_burn.o \
|
||||
jets/f/ut_buss.o \
|
||||
jets/f/ut_conk.o \
|
||||
jets/f/ut_crop.o \
|
||||
jets/f/ut_find.o \
|
||||
jets/f/ut_fire.o \
|
||||
jets/f/ut_fish.o \
|
||||
jets/f/ut_fuse.o \
|
||||
jets/f/ut_gain.o \
|
||||
jets/f/ut_lose.o \
|
||||
jets/f/ut_mint.o \
|
||||
jets/f/ut_mull.o \
|
||||
jets/f/ut_nest.o \
|
||||
jets/f/ut_peek.o \
|
||||
jets/f/ut_peel.o \
|
||||
jets/f/ut_play.o \
|
||||
jets/f/ut_repo.o \
|
||||
jets/f/ut_rest.o \
|
||||
jets/f/ut_tack.o \
|
||||
jets/f/ut_toss.o \
|
||||
jets/f/ut_wrap.o
|
||||
|
||||
J_G_OFILES=\
|
||||
jets/g/down.o
|
||||
|
||||
J_OFILES=\
|
||||
$(J_A_OFILES) \
|
||||
$(J_B_OFILES) \
|
||||
$(J_C_OFILES) \
|
||||
$(J_D_OFILES) \
|
||||
$(J_E_OFILES) \
|
||||
$(J_E_OFILES_ED) \
|
||||
$(J_F_OFILES) \
|
||||
$(J_F_OFILES_UT) \
|
||||
$(J_G_OFILES) \
|
||||
jets/tree.o
|
||||
|
||||
BASE_OFILES=$(N_OFILES) $(J_OFILES)
|
||||
|
||||
OUT_OFILES=\
|
||||
outside/jhttp/http_parser.o \
|
||||
outside/murmur3/MurmurHash3.o
|
||||
|
||||
V_OFILES=\
|
||||
vere/ames.o \
|
||||
vere/behn.o \
|
||||
vere/cttp.o \
|
||||
vere/http.o \
|
||||
vere/loop.o \
|
||||
vere/raft.o \
|
||||
vere/reck.o \
|
||||
vere/sist.o \
|
||||
vere/term.o \
|
||||
vere/time.o \
|
||||
vere/unix.o \
|
||||
vere/save.o \
|
||||
vere/walk.o
|
||||
|
||||
MAIN_FILE =\
|
||||
vere/main.o
|
||||
|
||||
VERE_OFILES=\
|
||||
$(OUT_OFILES) \
|
||||
$(BASE_OFILES) \
|
||||
$(MAIN_FILE) \
|
||||
$(V_OFILES)
|
||||
|
||||
VERE_DFILES=$(VERE_OFILES:%.o=.d/%.d)
|
||||
|
||||
-include $(VERE_DFILES)
|
||||
|
||||
TEST_HASH_MAIN_FILE =\
|
||||
tests/hashtable_tests.o
|
||||
|
||||
TEST_HASH_OFILES=\
|
||||
$(OUT_OFILES) \
|
||||
$(BASE_OFILES) \
|
||||
$(TEST_HASH_MAIN_FILE) \
|
||||
$(V_OFILES)
|
||||
|
||||
TEST_HASH_DFILES=$(TEST_HASH_OFILES:%.o=.d/%.d)
|
||||
|
||||
-include $(TEST_HASH_DFILES)
|
||||
|
||||
# This is a silly hack necessitated by the fact that libuv uses configure
|
||||
#
|
||||
# * Making 'all' obviously requires outside/libuv,
|
||||
# which requires the libuv Makefile to be created.
|
||||
# * Making distclean on outside/libuv destroys the makefile.
|
||||
# * ...so configuring outside/libuv is parodoxically required
|
||||
# in order to distclean it!
|
||||
# * But what if developer types 'make distclean all' ?
|
||||
# * first target makes libuv Makefile, then destroys it...and
|
||||
# second target knows that it was made.
|
||||
# * ...so second target borks.
|
||||
# * Solution: make libuv not only depend on its own Makefile,
|
||||
# but on a side effect of creating its own makefile.
|
||||
#
|
||||
LIBUV_MAKEFILE=outside/$(LIBUV_VER)/Makefile
|
||||
LIBUV_MAKEFILE2=outside/$(LIBUV_VER)/config.log
|
||||
|
||||
LIBUV=outside/$(LIBUV_VER)/.libs/libuv.a
|
||||
|
||||
LIBED25519=outside/ed25519/ed25519.a
|
||||
|
||||
LIBANACHRONISM=outside/anachronism/build/libanachronism.a
|
||||
|
||||
LIBCOMMONMARK=outside/commonmark/build/src/libcmark.a
|
||||
|
||||
LIBSCRYPT=outside/scrypt/scrypt.a
|
||||
|
||||
LIBSOFTFLOAT=outside/softfloat-3/build/Linux-x86_64-GCC/softfloat.a
|
||||
|
||||
TAGS=\
|
||||
.tags \
|
||||
.etags \
|
||||
GPATH GTAGS GRTAGS \
|
||||
cscope.in.out cscope.po.out cscope.out
|
||||
|
||||
all: urbit
|
||||
|
||||
.MAKEFILE-VERSION: Makefile .make.conf
|
||||
@echo "Makefile update."
|
||||
@touch .MAKEFILE-VERSION
|
||||
|
||||
.make.conf:
|
||||
@echo "# Set custom configuration here, please!" > ".make.conf"
|
||||
|
||||
urbit: $(BIN)/urbit
|
||||
|
||||
$(LIBUV_MAKEFILE) $(LIBUV_MAKEFILE2):
|
||||
cd outside/$(LIBUV_VER) ; sh autogen.sh ; ./configure $(LIBUV_CONFIGURE_OPTIONS)
|
||||
|
||||
# [h]act II: the plot thickens
|
||||
#
|
||||
# * Specifying two targets that each configure libuv works
|
||||
# when the rules are executed sequentially,
|
||||
# * but when attempting a parallel build, it is likely Make
|
||||
# will try to configure libuv simultaneously.
|
||||
# * We can specify a dependency between the two targets so
|
||||
# that execution of their rule(s) is serialized.
|
||||
# * Further, libuv does not seem to be friendly towards
|
||||
# parallel builds either. A true fix is out of scope here
|
||||
# * ...so we must instruct Make to only use one job when it
|
||||
# attempts to build libuv.
|
||||
#
|
||||
$(LIBUV_MAKEFILE2): $(LIBUV_MAKEFILE)
|
||||
|
||||
$(LIBUV): $(LIBUV_MAKEFILE) $(LIBUV_MAKEFILE2)
|
||||
$(MAKE) -C outside/$(LIBUV_VER) all-am -j1
|
||||
|
||||
$(LIBED25519):
|
||||
$(MAKE) -C outside/ed25519
|
||||
|
||||
$(LIBANACHRONISM):
|
||||
$(MAKE) -C outside/anachronism static
|
||||
|
||||
$(LIBCOMMONMARK):
|
||||
$(MAKE) -C outside/commonmark
|
||||
|
||||
$(LIBSCRYPT):
|
||||
$(MAKE) -C outside/scrypt MDEFINES="$(MDEFINES)"
|
||||
|
||||
$(LIBSOFTFLOAT):
|
||||
$(MAKE) -C outside/softfloat-3/build/Linux-x86_64-GCC
|
||||
|
||||
$(V_OFILES): include/vere/vere.h
|
||||
|
||||
ifdef NO_SILENT_RULES
|
||||
$(BIN)/urbit: $(LIBCOMMONMARK) $(VERE_OFILES) $(LIBUV) $(LIBED25519) $(LIBANACHRONISM) $(LIBSCRYPT) $(LIBSOFTFLOAT)
|
||||
mkdir -p $(BIN)
|
||||
$(CLD) $(CLDOSFLAGS) -o $(BIN)/urbit $(VERE_OFILES) $(LIBUV) $(LIBED25519) $(LIBANACHRONISM) $(LIBS) $(LIBCOMMONMARK) $(LIBSCRYPT) $(LIBSOFTFLOAT)
|
||||
else
|
||||
$(BIN)/urbit: $(LIBCOMMONMARK) $(VERE_OFILES) $(LIBUV) $(LIBED25519) $(LIBANACHRONISM) $(LIBSCRYPT) $(LIBSOFTFLOAT)
|
||||
@echo " CCLD $(BIN)/urbit"
|
||||
@mkdir -p $(BIN)
|
||||
@$(CLD) $(CLDOSFLAGS) -o $(BIN)/urbit $(VERE_OFILES) $(LIBUV) $(LIBED25519) $(LIBANACHRONISM) $(LIBS) $(LIBCOMMONMARK) $(LIBSCRYPT) $(LIBSOFTFLOAT)
|
||||
endif
|
||||
|
||||
# This should start a comet or something
|
||||
test:
|
||||
@echo "FIXME no tests defined"
|
||||
|
||||
test_hash: $(BIN)/test_hash
|
||||
|
||||
ifdef NO_SILENT_RULES
|
||||
$(BIN)/test_hash: $(LIBCOMMONMARK) $(TEST_HASH_OFILES) $(LIBUV) $(LIBED25519) $(LIBANACHRONISM) $(LIBSCRYPT) $(LIBSOFTFLOAT)
|
||||
mkdir -p $(BIN)
|
||||
$(CLD) $(CLDOSFLAGS) -o $(BIN)/test_hash $(TEST_HASH_OFILES) $(LIBUV) $(LIBED25519) $(LIBANACHRONISM) $(LIBS) $(LIBCOMMONMARK) $(LIBSCRYPT) $(LIBSOFTFLOAT)
|
||||
else
|
||||
$(BIN)/test_hash: $(LIBCOMMONMARK) $(TEST_HASH_OFILES) $(LIBUV) $(LIBED25519) $(LIBANACHRONISM) $(LIBSCRYPT) $(LIBSOFTFLOAT)
|
||||
@echo "VERE_DFILES=$(VERE_DFILES)"
|
||||
@echo " CCLD $(BIN)/test_hash"
|
||||
@mkdir -p $(BIN)
|
||||
@$(CLD) $(CLDOSFLAGS) -o $(BIN)/test_hash $(TEST_HASH_OFILES) $(LIBUV) $(LIBED25519) $(LIBANACHRONISM) $(LIBS) $(LIBCOMMONMARK) $(LIBSCRYPT) $(LIBSOFTFLOAT)
|
||||
endif
|
||||
|
||||
tags: ctags etags gtags cscope
|
||||
|
||||
ctags:
|
||||
@ctags -R -f .tags --exclude=root || true
|
||||
|
||||
etags:
|
||||
@etags -f .etags $$(find . -name '*.c' -or -name '*.h') || true
|
||||
|
||||
gtags:
|
||||
@gtags || true
|
||||
|
||||
cscope:
|
||||
@cscope -b -q -R || true
|
||||
|
||||
osxpackage:
|
||||
$(RM) -r inst
|
||||
$(MAKE) distclean
|
||||
$(MAKE) $(BIN)/urbit LIB=/usr/local/lib/urb STATIC=yes
|
||||
mkdir -p inst/usr/local/lib/urb inst/usr/local/bin
|
||||
cp $(BIN)/urbit inst/usr/local/bin
|
||||
cp urb/urbit.pill inst/usr/local/lib/urb
|
||||
pkgbuild --root inst --identifier org.urbit.urbit --version 0.2 urbit.pkg
|
||||
|
||||
debbuild:
|
||||
$(MAKE) $(BIN)/urbit LIB=/usr/share/urb
|
||||
|
||||
debinstall:
|
||||
mkdir -p $(DESTDIR)/usr/bin $(DESTDIR)/usr/share/urb
|
||||
install -m755 $(BIN)/urbit $(DESTDIR)/usr/bin
|
||||
cp urb/urbit.pill $(DESTDIR)/usr/share/urb
|
||||
|
||||
clean:
|
||||
$(RM) $(VERE_OFILES) $(BIN)/urbit urbit.pkg $(VERE_DFILES) $(TAGS)
|
||||
$(RM) -r debian/files debian/urbit*
|
||||
|
||||
# 'make distclean all -jn' ∀ n>1 still does not work because it is possible
|
||||
# Make will attempt to build urbit while it is also cleaning urbit..
|
||||
distclean: clean $(LIBUV_MAKEFILE)
|
||||
$(MAKE) -C outside/$(LIBUV_VER) distclean
|
||||
$(MAKE) -C outside/ed25519 clean
|
||||
$(MAKE) -C outside/anachronism clean
|
||||
$(MAKE) -C outside/scrypt clean
|
||||
$(MAKE) -C outside/softfloat-3/build/Linux-x86_64-GCC clean
|
||||
|
||||
.PHONY: clean debbuild debinstalldistclean etags osxpackage tags test
|
50
README.md
50
README.md
@ -16,15 +16,13 @@ If you're doing development on Urbit, keep reading.
|
||||
`vere`, the Urbit virtual machine, depends on the following:
|
||||
|
||||
- C compiler ([gcc](https://gcc.gnu.org) or [clang](http://clang.llvm.org))
|
||||
- [GNU Make](https://www.gnu.org/software/make/)
|
||||
- [Meson](http://mesonbuild.com/)
|
||||
- [GMP](https://gmplib.org)
|
||||
- [CMake](https://cmake.org)
|
||||
- automake, autoconf, and libtool
|
||||
- [OpenSSL](https://www.openssl.org)
|
||||
- [libsigsegv](https://www.gnu.org/software/libsigsegv/)
|
||||
- [libcurl](https://curl.haxx.se/libcurl/)
|
||||
- [libuv](http://libuv.org)
|
||||
- curses implementation (ncurses on Linux distributions, OS curses otherwise)
|
||||
- [Ragel](https://www.colm.net/open-source/ragel/)
|
||||
- [re2c](http://re2c.org)
|
||||
|
||||
Most of these dependencies are unfortunate; we aim to drastically shrink the
|
||||
@ -34,16 +32,44 @@ for future unbundling or removal wherever possible.
|
||||
|
||||
## Building
|
||||
|
||||
Our Makefile should handle the build smoothly on all supported platforms. It's
|
||||
just a simple Makefile, written by hand for GNU Make, and the most complicated
|
||||
parts of its internal machinery have to do with the varied build systems of the
|
||||
bundled libraries.
|
||||
Urbit uses Meson build system.
|
||||
|
||||
Useful targets are the default `all`, `clean`, and `distclean`. The last may not
|
||||
produce an entirely clean distribution directory, and runs a bundled library's
|
||||
configure script; `git clean` may be a better option.
|
||||
Some libraries which are not found in major distributions:
|
||||
- ed25519
|
||||
- http-parser legacy version 0.1.0
|
||||
- murmur3
|
||||
- softfloat3
|
||||
- urbit-scrypt
|
||||
- commonmark legacy version 0.12.0
|
||||
|
||||
The `vere` binary is produced in `bin/urbit`.
|
||||
are included as git submodules. To build urbit from source, perform the following steps:
|
||||
|
||||
## Configuration & compilation
|
||||
(For instructions for legacy meson, also see below)
|
||||
|
||||
1. Install all required dependencies.
|
||||
2. Run `./scripts/bootstrap`
|
||||
3. Run `./scripts/build`
|
||||
4. The executable should appear in `./build` directory.
|
||||
|
||||
### Using meson & ninja
|
||||
To configure project, enter the build directory and enter
|
||||
`meson configure`. Without any arguments this command will display available
|
||||
options. For example, to compile debug build of urbit, use
|
||||
`meson configure -Ddebug=true`.
|
||||
To set the prefix for installation use
|
||||
`meson configure -Dprefix=/usr`, and so on.
|
||||
|
||||
## Configuration & compilation for legacy meson
|
||||
|
||||
The syntax for legacy meson (Version `0.29`) is a bit different.
|
||||
1. Manually create `build` directory and invoke meson as `meson . ./build`
|
||||
2. If you want to set options, this is done in one step.
|
||||
Use `meson -D [options] . ./build` to prepare customized build.
|
||||
|
||||
Once the project is configured, use `ninja` to build it.
|
||||
To install it into the default prefix, use `ninja install`.
|
||||
If you want to specify custom `DESTDIR`, use `DESTDIR=... ninja install`.
|
||||
|
||||
## Building the Debian Package
|
||||
|
||||
|
2
debian/control
vendored
2
debian/control
vendored
@ -2,7 +2,7 @@ Source: urbit
|
||||
Section: net
|
||||
Priority: extra
|
||||
Maintainer: Ted Blackman <ted@tlon.io>
|
||||
Build-Depends: debhelper (>= 9), libgmp3-dev, libsigsegv-dev, openssl, libssl-dev, automake, autoconf, libtool, g++, ragel, cmake, re2c, libcurl4-gnutls-dev
|
||||
Build-Depends: debhelper (>= 9), libgmp3-dev, libsigsegv-dev, openssl, libssl-dev,libtool, meson, re2c, libcurl4-gnutls-dev
|
||||
Standards-Version: 3.9.5
|
||||
Homepage: http://urbit.org
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
**
|
||||
** This file is in the public domain.
|
||||
*/
|
||||
# include "version.h"
|
||||
# include "config.h"
|
||||
/** c3: C environment.
|
||||
**/
|
||||
# include "c/portable.h" // C and OS portability
|
||||
@ -12,7 +12,7 @@
|
||||
|
||||
/** u3: noun environment.
|
||||
**/
|
||||
# include "noun/aliases.h" // general u3
|
||||
# include "noun/aliases.h" // general u3
|
||||
|
||||
# include "noun/allocate.h" // u3a: allocation
|
||||
# include "noun/events.h" // u3e: persistence
|
||||
@ -24,7 +24,7 @@
|
||||
# include "noun/options.h" // u3o: config options
|
||||
# include "noun/retrieve.h" // u3r: noun access (error returns)
|
||||
# include "noun/trace.h" // u3t: profiling / tracing
|
||||
# include "noun/xtract.h" // u3x: noun access (error crashes)
|
||||
# include "noun/xtract.h" // u3x: noun access (error crashes)
|
||||
# include "noun/vortex.h" // u3v: arvo kernel
|
||||
# include "noun/zave.h" // u3z: memoization
|
||||
|
||||
@ -52,4 +52,3 @@
|
||||
*/
|
||||
# define uH u3_term_io_hija()
|
||||
# define uL(x) u3_term_io_loja(x)
|
||||
|
||||
|
@ -4,10 +4,15 @@
|
||||
*/
|
||||
/** Must be compiled on gcc with C99 support.
|
||||
**/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
# ifndef __GNUC__
|
||||
# error "port me"
|
||||
# endif
|
||||
# ifndef _GNU_SOURCE
|
||||
# define _GNU_SOURCE
|
||||
# endif
|
||||
|
||||
|
||||
/** System include files.
|
||||
|
13
include/config.h.in
Normal file
13
include/config.h.in
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
|
||||
#mesondefine URBIT_VERSION
|
||||
|
||||
#mesondefine U3_OS_linux
|
||||
#mesondefine U3_OS_bsd
|
||||
#mesondefine U3_OS_osx
|
||||
|
||||
#mesondefine U3_OS_ENDIAN_little
|
||||
#mesondefine U3_OS_ENDIAN_big
|
||||
|
||||
#endif /*CONFIG_H*/
|
@ -1 +0,0 @@
|
||||
#define URBIT_VERSION "0.5.1"
|
@ -2,7 +2,7 @@
|
||||
**
|
||||
*/
|
||||
#include "all.h"
|
||||
#include <MurmurHash3.h>
|
||||
#include <murmur3.h>
|
||||
|
||||
/* functions
|
||||
*/
|
||||
|
@ -2,7 +2,7 @@
|
||||
**
|
||||
*/
|
||||
#include "all.h"
|
||||
#include "softfloat.h"
|
||||
#include <softfloat.h>
|
||||
|
||||
#define DOUBNAN 0x7ff8000000000000
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
**
|
||||
*/
|
||||
#include "all.h"
|
||||
#include "softfloat.h"
|
||||
#include <softfloat.h>
|
||||
|
||||
#define HALFNAN 0x7e00
|
||||
|
||||
@ -151,8 +151,8 @@
|
||||
/* div
|
||||
*/
|
||||
u3_noun
|
||||
u3qes_div(u3_atom a,
|
||||
u3_atom b,
|
||||
u3qes_div(u3_atom a,
|
||||
u3_atom b,
|
||||
u3_atom r)
|
||||
{
|
||||
union half c, d, e;
|
||||
@ -183,7 +183,7 @@
|
||||
/* sqt
|
||||
*/
|
||||
u3_noun
|
||||
u3qes_sqt(u3_atom a,
|
||||
u3qes_sqt(u3_atom a,
|
||||
u3_atom r)
|
||||
{
|
||||
union half c, d;
|
||||
|
@ -2,7 +2,7 @@
|
||||
**
|
||||
*/
|
||||
#include "all.h"
|
||||
#include "softfloat.h"
|
||||
#include <softfloat.h>
|
||||
|
||||
#define QUADNAN 0x7fff800000000000
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
**
|
||||
*/
|
||||
#include "all.h"
|
||||
#include "softfloat.h"
|
||||
#include <softfloat.h>
|
||||
|
||||
#define SINGNAN 0x7fc00000
|
||||
|
||||
@ -151,8 +151,8 @@
|
||||
/* div
|
||||
*/
|
||||
u3_noun
|
||||
u3qet_div(u3_atom a,
|
||||
u3_atom b,
|
||||
u3qet_div(u3_atom a,
|
||||
u3_atom b,
|
||||
u3_atom r)
|
||||
{
|
||||
union sing c, d, e;
|
||||
@ -183,7 +183,7 @@
|
||||
/* sqt
|
||||
*/
|
||||
u3_noun
|
||||
u3qet_sqt(u3_atom a,
|
||||
u3qet_sqt(u3_atom a,
|
||||
u3_atom r)
|
||||
{
|
||||
union sing c, d;
|
||||
|
123
jets/e/scr.c
123
jets/e/scr.c
@ -5,7 +5,9 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <errno.h>
|
||||
#include <crypto_scrypt.h>
|
||||
|
||||
#include <libscrypt.h>
|
||||
#include <sha256.h>
|
||||
|
||||
static int _crypto_scrypt(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
uint64_t, uint32_t, uint32_t, uint8_t *, size_t);
|
||||
@ -17,8 +19,8 @@ static int _crypto_scrypt(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
u3qes_hsl(u3_atom p, u3_atom pl,
|
||||
u3_atom s, u3_atom sl,
|
||||
u3_atom n,
|
||||
u3_atom r,
|
||||
u3_atom z,
|
||||
u3_atom r,
|
||||
u3_atom z,
|
||||
u3_atom d)
|
||||
{
|
||||
// asserting that n is power of 2 in _crypto_scrypt
|
||||
@ -30,7 +32,7 @@ static int _crypto_scrypt(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
(((c3_d)r * 128 * ((c3_d)n + z - 1)) <= (1 << 30))))
|
||||
return u3m_bail(c3__exit);
|
||||
|
||||
c3_y* b_p = u3a_malloc(pl + 1); c3_y* b_s= u3a_malloc(sl + 1);
|
||||
c3_y* b_p = u3a_malloc(pl + 1); c3_y* b_s= u3a_malloc(sl + 1);
|
||||
u3r_bytes(0, pl, b_p, p); u3r_bytes(0, sl, b_s, s);
|
||||
b_p[pl] = 0; b_s[sl]=0;
|
||||
c3_y* buf = u3a_malloc(d);
|
||||
@ -73,7 +75,7 @@ static int _crypto_scrypt(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
return u3m_bail(c3__exit);
|
||||
|
||||
c3_w pl = u3r_met(3, p); c3_w sl = u3r_met(3, s);
|
||||
c3_y* b_p = u3a_malloc(pl + 1); c3_y* b_s= u3a_malloc(sl + 1);
|
||||
c3_y* b_p = u3a_malloc(pl + 1); c3_y* b_s= u3a_malloc(sl + 1);
|
||||
u3r_bytes(0, pl, b_p, p); u3r_bytes(0, sl, b_s, s);
|
||||
b_p[pl] = 0; b_s[sl]=0;
|
||||
c3_y* buf = u3a_malloc(d);
|
||||
@ -112,12 +114,12 @@ static int _crypto_scrypt(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
(c != 0)))
|
||||
return u3m_bail(c3__exit);
|
||||
|
||||
c3_y* b_p = u3a_malloc(pl + 1); c3_y* b_s= u3a_malloc(pl + 1);
|
||||
c3_y* b_p = u3a_malloc(pl + 1); c3_y* b_s= u3a_malloc(pl + 1);
|
||||
u3r_bytes(0, pl, b_p, p); u3r_bytes(0, sl, b_s, s);
|
||||
b_p[pl] = 0; b_s[sl]=0;
|
||||
c3_y* buf = u3a_malloc(d);
|
||||
|
||||
PBKDF2_SHA256(b_p, pl, b_s, sl, c, buf, d);
|
||||
libscrypt_PBKDF2_SHA256(b_p, pl, b_s, sl, c, buf, d);
|
||||
|
||||
u3_noun res = u3i_bytes(d, buf);
|
||||
u3a_free(b_p); u3a_free(b_s); u3a_free(buf);
|
||||
@ -147,12 +149,12 @@ static int _crypto_scrypt(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
return u3m_bail(c3__exit);
|
||||
|
||||
c3_w pl = u3r_met(3, p); c3_w sl = u3r_met(3, s);
|
||||
c3_y* b_p = u3a_malloc(pl + 1); c3_y* b_s= u3a_malloc(pl + 1);
|
||||
c3_y* b_p = u3a_malloc(pl + 1); c3_y* b_s= u3a_malloc(pl + 1);
|
||||
u3r_bytes(0, pl, b_p, p); u3r_bytes(0, sl, b_s, s);
|
||||
b_p[pl] = 0; b_s[sl]=0;
|
||||
c3_y* buf = u3a_malloc(d);
|
||||
|
||||
PBKDF2_SHA256(b_p, pl, b_s, sl, c, buf, d);
|
||||
libscrypt_PBKDF2_SHA256(b_p, pl, b_s, sl, c, buf, d);
|
||||
|
||||
u3_noun res = u3i_bytes(d, buf);
|
||||
u3a_free(b_p); u3a_free(b_s); u3a_free(buf);
|
||||
@ -170,35 +172,6 @@ static int _crypto_scrypt(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
return u3qes_pbk(p, s, c, d);
|
||||
}
|
||||
|
||||
/*-
|
||||
* Copyright 2009 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
/**
|
||||
* crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
|
||||
* Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
|
||||
@ -213,77 +186,5 @@ _crypto_scrypt(const uint8_t * passwd, size_t passwdlen,
|
||||
const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p,
|
||||
uint8_t * buf, size_t buflen)
|
||||
{
|
||||
void * B0, * V0, * XY0;
|
||||
uint8_t * B;
|
||||
uint32_t * V;
|
||||
uint32_t * XY;
|
||||
uint32_t i;
|
||||
|
||||
if (((N & (N-1)) != 0) || N == 0)
|
||||
goto err0;
|
||||
|
||||
/* Sanity-check parameters. */
|
||||
#if SIZE_MAX > UINT32_MAX
|
||||
if (buflen > (((uint64_t)(1) << 32) - 1) * 32) {
|
||||
errno = EFBIG;
|
||||
goto err0;
|
||||
}
|
||||
#endif
|
||||
if ((uint64_t)(r) * (uint64_t)(p) >= (1 << 30)) {
|
||||
errno = EFBIG;
|
||||
goto err0;
|
||||
}
|
||||
if (((N & (N - 1)) != 0) || (N == 0)) {
|
||||
errno = EINVAL;
|
||||
goto err0;
|
||||
}
|
||||
int test_size_max = (r > SIZE_MAX / 128 / p) || (N > SIZE_MAX / 128 / r);
|
||||
|
||||
#if SIZE_MAX / 256 <= UINT32_MAX
|
||||
test_size_max = (r > (SIZE_MAX - 64) / 256) || test_size_max;
|
||||
#endif
|
||||
if(test_size_max) {
|
||||
errno = ENOMEM;
|
||||
goto err0;
|
||||
}
|
||||
|
||||
/* Allocate memory. */
|
||||
if ((B0 = u3a_malloc(128 * r * p + 63)) == NULL)
|
||||
goto err0;
|
||||
B = (uint8_t *)(((uintptr_t)(B0) + 63) & ~ (uintptr_t)(63));
|
||||
if ((XY0 = u3a_malloc(256 * r + 64 + 63)) == NULL)
|
||||
goto err1;
|
||||
XY = (uint32_t *)(((uintptr_t)(XY0) + 63) & ~ (uintptr_t)(63));
|
||||
if ((V0 = u3a_malloc(128 * r * N + 63)) == NULL)
|
||||
goto err2;
|
||||
V = (uint32_t *)(((uintptr_t)(V0) + 63) & ~ (uintptr_t)(63));
|
||||
|
||||
/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
|
||||
PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, p * 128 * r);
|
||||
|
||||
/* 2: for i = 0 to p - 1 do */
|
||||
for (i = 0; i < p; i++) {
|
||||
/* 3: B_i <-- MF(B_i, N) */
|
||||
smix(&B[i * 128 * r], r, N, V, XY);
|
||||
}
|
||||
|
||||
/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
|
||||
PBKDF2_SHA256(passwd, passwdlen, B, p * 128 * r, 1, buf, buflen);
|
||||
|
||||
/* Free memory. */
|
||||
|
||||
u3a_free(V0);
|
||||
u3a_free(XY0);
|
||||
u3a_free(B0);
|
||||
|
||||
/* Success! */
|
||||
return (0);
|
||||
|
||||
err2:
|
||||
u3a_free(XY0);
|
||||
err1:
|
||||
u3a_free(B0);
|
||||
err0:
|
||||
/* Failure! */
|
||||
return (-1);
|
||||
return libscrypt_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen);
|
||||
}
|
||||
|
332
meson.build
Normal file
332
meson.build
Normal file
@ -0,0 +1,332 @@
|
||||
project('urbit', 'c', meson_version: '>=0.29.0')
|
||||
|
||||
legacy_meson = false
|
||||
|
||||
detect_meson_version = run_command('meson', '--version')
|
||||
meson_ver = detect_meson_version.stdout()
|
||||
|
||||
if(meson_ver == '0.29.0\n')
|
||||
legacy_meson = true
|
||||
elif(not meson.version().version_compare('>=0.40.0'))
|
||||
error('Meson 0.29.0 is last legacy version supported. Otherwise please upgrade to 0.40.0 or higher.')
|
||||
endif
|
||||
|
||||
jets_a_src = [
|
||||
'jets/a/add.c',
|
||||
'jets/a/dec.c',
|
||||
'jets/a/div.c',
|
||||
'jets/a/gte.c',
|
||||
'jets/a/gth.c',
|
||||
'jets/a/lte.c',
|
||||
'jets/a/lth.c',
|
||||
'jets/a/mod.c',
|
||||
'jets/a/mul.c',
|
||||
'jets/a/sub.c',]
|
||||
|
||||
jets_b_src = [
|
||||
'jets/b/bind.c',
|
||||
'jets/b/clap.c',
|
||||
'jets/b/drop.c',
|
||||
'jets/b/flop.c',
|
||||
'jets/b/lent.c',
|
||||
'jets/b/levy.c',
|
||||
'jets/b/lien.c',
|
||||
'jets/b/murn.c',
|
||||
'jets/b/need.c',
|
||||
'jets/b/reap.c',
|
||||
'jets/b/reel.c',
|
||||
'jets/b/roll.c',
|
||||
'jets/b/skid.c',
|
||||
'jets/b/skim.c',
|
||||
'jets/b/skip.c',
|
||||
'jets/b/scag.c',
|
||||
'jets/b/slag.c',
|
||||
'jets/b/snag.c',
|
||||
'jets/b/sort.c',
|
||||
'jets/b/turn.c',
|
||||
'jets/b/weld.c' ]
|
||||
|
||||
jets_c_src = [
|
||||
'jets/c/bex.c',
|
||||
'jets/c/xeb.c',
|
||||
'jets/c/can.c',
|
||||
'jets/c/cap.c',
|
||||
'jets/c/cat.c',
|
||||
'jets/c/con.c',
|
||||
'jets/c/cut.c',
|
||||
'jets/c/dor.c',
|
||||
'jets/c/dvr.c',
|
||||
'jets/c/dis.c',
|
||||
'jets/c/end.c',
|
||||
'jets/c/gor.c',
|
||||
'jets/c/hor.c',
|
||||
'jets/c/lsh.c',
|
||||
'jets/c/mas.c',
|
||||
'jets/c/met.c',
|
||||
'jets/c/mix.c',
|
||||
'jets/c/mug.c',
|
||||
'jets/c/muk.c',
|
||||
'jets/c/peg.c',
|
||||
'jets/c/po.c',
|
||||
'jets/c/pow.c',
|
||||
'jets/c/rap.c',
|
||||
'jets/c/rep.c',
|
||||
'jets/c/rip.c',
|
||||
'jets/c/rsh.c',
|
||||
'jets/c/sqt.c',
|
||||
'jets/c/vor.c',
|
||||
]
|
||||
|
||||
jets_d_src = [
|
||||
'jets/d/in_has.c',
|
||||
'jets/d/in_int.c',
|
||||
'jets/d/in_gas.c',
|
||||
'jets/d/in_mer.c',
|
||||
'jets/d/in_put.c',
|
||||
'jets/d/in_tap.c',
|
||||
'jets/d/in_uni.c',
|
||||
'jets/d/in_wyt.c',
|
||||
'jets/d/in_bif.c',
|
||||
'jets/d/in_dif.c',
|
||||
'jets/d/by_gas.c',
|
||||
'jets/d/by_get.c',
|
||||
'jets/d/by_has.c',
|
||||
'jets/d/by_int.c',
|
||||
'jets/d/by_put.c',
|
||||
'jets/d/by_uni.c',
|
||||
'jets/d/by_bif.c',
|
||||
'jets/d/by_dif.c'
|
||||
]
|
||||
|
||||
jets_e_src = [
|
||||
'jets/e/aes_ecb.c',
|
||||
'jets/e/aes_cbc.c',
|
||||
'jets/e/aesc.c',
|
||||
'jets/e/cue.c',
|
||||
'jets/e/fl.c',
|
||||
'jets/e/jam.c',
|
||||
'jets/e/mat.c',
|
||||
'jets/e/mink.c',
|
||||
'jets/e/mule.c',
|
||||
'jets/e/parse.c',
|
||||
'jets/e/rd.c',
|
||||
'jets/e/rq.c',
|
||||
'jets/e/rs.c',
|
||||
'jets/e/rh.c',
|
||||
'jets/e/rub.c',
|
||||
'jets/e/scr.c',
|
||||
'jets/e/shax.c',
|
||||
'jets/e/lore.c',
|
||||
'jets/e/loss.c',
|
||||
'jets/e/lune.c',
|
||||
'jets/e/trip.c'
|
||||
]
|
||||
|
||||
jets_e_ed_src = [
|
||||
'jets/e/ed_puck.c',
|
||||
'jets/e/ed_sign.c',
|
||||
'jets/e/ed_veri.c',
|
||||
'jets/e/ed_shar.c'
|
||||
|
||||
]
|
||||
jets_f_src = [
|
||||
'jets/f/ap.c',
|
||||
'jets/f/cell.c',
|
||||
'jets/f/comb.c',
|
||||
'jets/f/cons.c',
|
||||
'jets/f/core.c',
|
||||
'jets/f/face.c',
|
||||
'jets/f/fitz.c',
|
||||
'jets/f/flan.c',
|
||||
'jets/f/flip.c',
|
||||
'jets/f/flor.c',
|
||||
'jets/f/fork.c',
|
||||
'jets/f/help.c',
|
||||
'jets/f/hike.c',
|
||||
'jets/f/look.c',
|
||||
'jets/f/loot.c'
|
||||
]
|
||||
|
||||
jets_f_ut_src = [
|
||||
'jets/f/ut.c',
|
||||
'jets/f/ut_burn.c',
|
||||
'jets/f/ut_buss.c',
|
||||
'jets/f/ut_conk.c',
|
||||
'jets/f/ut_crop.c',
|
||||
'jets/f/ut_find.c',
|
||||
'jets/f/ut_fire.c',
|
||||
'jets/f/ut_fish.c',
|
||||
'jets/f/ut_fuse.c',
|
||||
'jets/f/ut_gain.c',
|
||||
'jets/f/ut_lose.c',
|
||||
'jets/f/ut_mint.c',
|
||||
'jets/f/ut_mull.c',
|
||||
'jets/f/ut_nest.c',
|
||||
'jets/f/ut_peek.c',
|
||||
'jets/f/ut_peel.c',
|
||||
'jets/f/ut_play.c',
|
||||
'jets/f/ut_repo.c',
|
||||
'jets/f/ut_rest.c',
|
||||
'jets/f/ut_tack.c',
|
||||
'jets/f/ut_toss.c',
|
||||
'jets/f/ut_wrap.c'
|
||||
]
|
||||
|
||||
jets_g_src = [
|
||||
'jets/g/down.c'
|
||||
]
|
||||
|
||||
jets_src = [
|
||||
'jets/tree.c'
|
||||
]
|
||||
noun_src = ['noun/allocate.c',
|
||||
'noun/events.c',
|
||||
'noun/hashtable.c',
|
||||
'noun/imprison.c',
|
||||
'noun/jets.c',
|
||||
'noun/manage.c',
|
||||
'noun/nock.c',
|
||||
'noun/retrieve.c',
|
||||
'noun/trace.c',
|
||||
'noun/vortex.c',
|
||||
'noun/xtract.c',
|
||||
'noun/zave.c']
|
||||
|
||||
vere_src = ['vere/ames.c',
|
||||
'vere/behn.c',
|
||||
'vere/cttp.c',
|
||||
'vere/http.c',
|
||||
'vere/loop.c',
|
||||
'vere/main.c',
|
||||
'vere/raft.c',
|
||||
'vere/reck.c',
|
||||
'vere/save.c',
|
||||
'vere/sist.c',
|
||||
'vere/term.c',
|
||||
'vere/time.c',
|
||||
'vere/unix.c',
|
||||
'vere/walk.c']
|
||||
|
||||
src_list = [
|
||||
vere_src, noun_src,
|
||||
jets_a_src, jets_b_src,
|
||||
jets_c_src, jets_d_src,
|
||||
jets_e_src, jets_e_ed_src, jets_f_src, jets_f_ut_src,
|
||||
jets_g_src, jets_src]
|
||||
|
||||
sources = []
|
||||
foreach s : src_list
|
||||
sources += s
|
||||
endforeach
|
||||
|
||||
incdir = include_directories('include/')
|
||||
|
||||
conf_data = configuration_data()
|
||||
conf_data.set('URBIT_VERSION', '"0.5.1"')
|
||||
|
||||
osdet = build_machine.system()
|
||||
os_c_flags = ['-funsigned-char','-ffast-math']
|
||||
os_deps = []
|
||||
os_link_flags = []
|
||||
|
||||
if osdet == 'linux'
|
||||
conf_data.set('U3_OS_linux', true)
|
||||
|
||||
if(legacy_meson)
|
||||
pthread_dep = find_library('pthread')
|
||||
else
|
||||
pthread_dep = meson.get_compiler('c').find_library('pthread')
|
||||
endif
|
||||
|
||||
ncurses_dep = dependency('ncurses')
|
||||
os_deps = os_deps + [pthread_dep, ncurses_dep]
|
||||
|
||||
elif osdet == 'darwin'
|
||||
conf_data.set('U3_OS_osx', true)
|
||||
|
||||
os_c_flags = os_c_flags + ['-bind_at_load']
|
||||
# os_link_flags = ['-framework CoreServices', '-framework CoreFoundation']
|
||||
if(legacy_meson)
|
||||
ncurses_dep = find_library('ncurses')
|
||||
else
|
||||
ncurses_dep = meson.get_compiler('c').find_library('ncurses')
|
||||
endif
|
||||
|
||||
os_deps = os_deps + [ncurses_dep]
|
||||
|
||||
elif osdet == 'bsd'
|
||||
conf_data.set('U3_OS_bsd', true)
|
||||
|
||||
pthread_dep = meson.get_compiler('c').find_library('pthread')
|
||||
kvm_dep = meson.get_compiler('c').find_library('kvm')
|
||||
ncurses_dep = dependency('ncurses')
|
||||
os_deps = os_deps + [kvm_dep, pthread_dep, ncurses_dep]
|
||||
else
|
||||
error('Unsupported OS detected:' + osdet)
|
||||
endif
|
||||
|
||||
endian = build_machine.endian()
|
||||
|
||||
if endian == 'little'
|
||||
conf_data.set('U3_OS_ENDIAN_little', true)
|
||||
else
|
||||
error('Little endian encoding required')
|
||||
endif
|
||||
|
||||
configure_file(input : 'include/config.h.in',
|
||||
output : 'config.h',
|
||||
configuration : conf_data)
|
||||
|
||||
# We expect these libs to supplied with the distribution
|
||||
curl_dep = dependency('libcurl', version: '>=7.35.0')
|
||||
|
||||
if osdet == 'darwin'
|
||||
libcrypto = meson.get_compiler('c').find_library('crypto', dirs: [ '/usr/local/opt/openssl/lib/' ])
|
||||
libssl = meson.get_compiler('c').find_library('ssl', dirs: [ '/usr/local/opt/openssl/lib/' ])
|
||||
openssl_dep = declare_dependency(dependencies: [libcrypto, libssl], include_directories: include_directories('/usr/local/opt/openssl/include'))
|
||||
else
|
||||
openssl_dep = dependency('openssl', version: '>=1.0.0')
|
||||
endif
|
||||
|
||||
if(legacy_meson)
|
||||
gmp_dep = find_library('gmp')
|
||||
sigsegv_dep = find_library('sigsegv')
|
||||
else
|
||||
gmp_dep = meson.get_compiler('c').find_library('gmp')
|
||||
sigsegv_dep = meson.get_compiler('c').find_library('sigsegv')
|
||||
endif
|
||||
|
||||
# For these libs we provide fallback bundle
|
||||
cmark_dep = dependency('libcmark', version: '0.12.0', fallback: ['commonmark-legacy', 'cmark_dep'])
|
||||
urbitscrypt_dep = dependency('libscrypt', version: '>=0.1.21', fallback: ['libscrypt', 'libscrypt_dep'])
|
||||
|
||||
ed25519_dep = dependency('ed25519', version: '>=0.1.0', fallback: ['ed25519', 'ed25519_dep'])
|
||||
murmur3_dep = dependency('murmur3', version: '>=0.1.0', fallback: ['murmur3', 'murmur3_dep'])
|
||||
http_parser_dep = dependency('http-parser', version: '0.1.0', fallback: ['http-parser-legacy', 'http_parser_dep'])
|
||||
softfloat3_dep = dependency('softfloat3', version: '>=3.0.0', fallback: ['softfloat3', 'softfloat3_dep'])
|
||||
libuv_dep = dependency('libuv', version: '>=1.8.0', fallback:['libuv', 'libuv_dep'])
|
||||
|
||||
opt_flags = []
|
||||
if get_option('debug')
|
||||
opt_flags = ['-g']
|
||||
else
|
||||
opt_flags = ['-O3']
|
||||
endif
|
||||
|
||||
executable('urbit',
|
||||
sources : sources,
|
||||
include_directories : incdir,
|
||||
c_args : opt_flags + os_c_flags,
|
||||
link_args: os_link_flags,
|
||||
dependencies: [openssl_dep,
|
||||
curl_dep,
|
||||
libuv_dep,
|
||||
cmark_dep,
|
||||
gmp_dep,
|
||||
sigsegv_dep,
|
||||
urbitscrypt_dep,
|
||||
ed25519_dep,
|
||||
murmur3_dep,
|
||||
http_parser_dep,
|
||||
softfloat3_dep] + os_deps,
|
||||
install: true)
|
1
meson_options.txt
Normal file
1
meson_options.txt
Normal file
@ -0,0 +1 @@
|
||||
option('debug', type:'boolean', value: false)
|
1
outside/anachronism/.gitignore
vendored
1
outside/anachronism/.gitignore
vendored
@ -1 +0,0 @@
|
||||
build/
|
@ -1,19 +0,0 @@
|
||||
Copyright (c) 2010 Jonathan Castello
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
@ -1,70 +0,0 @@
|
||||
SHELL = sh
|
||||
UNAME = $(shell uname)
|
||||
|
||||
ifneq ($(UNAME),FreeBSD)
|
||||
CC = gcc
|
||||
else
|
||||
CC = cc
|
||||
endif
|
||||
FLAGS = -c -fPIC -Iinclude/
|
||||
CFLAGS = --pedantic -Wall -Wextra -march=native -std=gnu99
|
||||
INCLUDE = include/anachronism
|
||||
|
||||
VERSION_MAJOR = 0
|
||||
VERSION = $(VERSION_MAJOR).3.1
|
||||
|
||||
SO = libanachronism.so
|
||||
SOFILE = $(SO).$(VERSION)
|
||||
SONAME = $(SO).$(VERSION_MAJOR)
|
||||
|
||||
|
||||
all: static shared
|
||||
shared: build/ build/$(SOFILE)
|
||||
static: build/ build/libanachronism.a
|
||||
|
||||
build/:
|
||||
mkdir build
|
||||
|
||||
build/$(SOFILE): build/nvt.o build/parser.o
|
||||
$(CC) -shared -Wl,-soname,$(SONAME) -o build/$(SOFILE) build/nvt.o build/parser.o
|
||||
|
||||
build/libanachronism.a: build/nvt.o build/parser.o
|
||||
ar rcs build/libanachronism.a build/nvt.o build/parser.o
|
||||
|
||||
build/nvt.o: src/nvt.c $(INCLUDE)/nvt.h $(INCLUDE)/common.h
|
||||
$(CC) $(FLAGS) $(CFLAGS) src/nvt.c -o build/nvt.o
|
||||
|
||||
build/parser.o: src/parser.c $(INCLUDE)/parser.h $(INCLUDE)/common.h
|
||||
$(CC) $(FLAGS) $(CFLAGS) src/parser.c -o build/parser.o
|
||||
|
||||
src/parser.c: src/parser.rl src/parser_common.rl
|
||||
ragel -C -G2 src/parser.rl -o src/parser.c
|
||||
|
||||
|
||||
graph: doc/parser.png
|
||||
|
||||
doc/parser.png: src/parser.rl src/parser_common.rl
|
||||
ragel -V -p src/parser.rl | dot -Tpng > doc/parser.png
|
||||
|
||||
install: all
|
||||
install -D -d /usr/local/include/anachronism/ /usr/local/lib
|
||||
install -D include/anachronism/* /usr/local/include/anachronism/
|
||||
install -D build/$(SOFILE) /usr/local/lib/$(SOFILE)
|
||||
install -D build/libanachronism.a /usr/local/lib/libanachronism.a
|
||||
ln -s -f /usr/local/lib/$(SOFILE) /usr/local/lib/$(SONAME)
|
||||
ln -s -f /usr/local/lib/$(SOFILE) /usr/local/lib/$(SO)
|
||||
|
||||
uninstall:
|
||||
-rm -rf /usr/local/include/anachronism
|
||||
-rm /usr/local/lib/libanachronism.a
|
||||
-rm /usr/local/lib/$(SOFILE)
|
||||
-rm /usr/local/lib/$(SONAME)
|
||||
-rm /usr/local/lib/$(SO)
|
||||
|
||||
clean:
|
||||
-rm -f build/nvt.o build/router.o build/parser.o
|
||||
|
||||
distclean: clean
|
||||
-rm -f build/libanachronism.a build/$(SOFILE)
|
||||
|
||||
.PHONY: all static shared clean distclean install uninstall
|
@ -1,158 +0,0 @@
|
||||
# Anachronism
|
||||
Anachronism is a fully-compliant implementation of [the Telnet protocol][wiki-telnet]. Fallen
|
||||
out of favor in this day and age, most people only know it as a command-line
|
||||
tool for debugging HTTP. Today, Telnet is most commonly used in the realm of
|
||||
[MUDs][wiki-muds], though there are still a few other niches filled by Telnet.
|
||||
|
||||
Anachronism offers a simple API for translating between streams of data and
|
||||
events, and is completely network-agnostic. Anachronism also offers **channels**, an
|
||||
abstraction layer which treats Telnet as a data multiplexer. Channels make it
|
||||
extremely easy to build reusable modules for Telnet sub-protocols such
|
||||
as MCCP (MUD Client Compression Protocol), which can be written once and plugged
|
||||
into any application that wants to include support.
|
||||
|
||||
[wiki-telnet]: http://en.wikipedia.org/wiki/Telnet (Telnet at Wikipedia)
|
||||
[wiki-muds]: http://en.wikipedia.org/wiki/MUD (MUDs at Wikipedia)
|
||||
|
||||
## Installation
|
||||
While Anachronism has no dependencies and is theoretically cross-platform, I've
|
||||
only written a Makefile for Linux. Help would be appreciated for making this
|
||||
work across more platforms.
|
||||
|
||||
make
|
||||
sudo make install
|
||||
|
||||
This will install Anachronism's shared and static libraries to /usr/local/lib,
|
||||
and its header files to /usr/local/include/anachronism/. You may also need to
|
||||
run `ldconfig` to make Anachronism available to your project's compiler/linker.
|
||||
|
||||
## Usage
|
||||
The anachronism/nvt.h header can be consulted for more complete documentation.
|
||||
|
||||
### Basic usage
|
||||
The core type exposed by Anachronism is the telnet\_nvt, which represents the
|
||||
Telnet RFC's "Network Virtual Terminal". An NVT is created using
|
||||
telnet\_nvt\_new(). When creating an NVT, you must provide it with a set of
|
||||
callbacks to send events to, and an optional void\* to store as the event
|
||||
handler's context. You can use telnet\_recv() to process incoming data, and
|
||||
the telnet\_send\_\*() set of functions to emit outgoing data.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <anachronism/nvt.h>
|
||||
|
||||
void on_event(telnet_nvt* nvt, telnet_event* event)
|
||||
{
|
||||
switch (event->type)
|
||||
{
|
||||
// A data event (normal text received)
|
||||
case TELNET_EV_DATA:
|
||||
{
|
||||
telnet_data_event* ev = (telnet_data_event*)event;
|
||||
printf("[IN]: %.*s\n", ev->length, ev->data);
|
||||
break;
|
||||
}
|
||||
|
||||
// Outgoing data emitted by the NVT
|
||||
case TELNET_EV_SEND:
|
||||
{
|
||||
telnet_send_event* ev = (telnet_send_event*)event;
|
||||
printf("[OUT]: %.*s\n", ev->length, ev->data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// Create an NVT
|
||||
telnet_nvt* nvt = telnet_nvt_new(NULL, &on_event, NULL, NULL);
|
||||
|
||||
// Process some incoming data
|
||||
const char* data = "foo bar baz";
|
||||
telnet_receive(nvt, (const telnet_byte*)data, strlen(data), NULL);
|
||||
|
||||
// Free the NVT
|
||||
telnet_nvt_free(nvt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
### Telopts
|
||||
Anachronism provides an easy-to-use interface to Telnet's "telopt" functionality
|
||||
via the telnet\_telopt\_*() set of functions. As telopts are negotiated and
|
||||
utilized, events are sent to the telopt callback provided to telnet_nvt_new().
|
||||
|
||||
#include <stdio.h>
|
||||
#include <anachronism/nvt.h>
|
||||
|
||||
void on_event(telnet_nvt* nvt, telnet_event* event)
|
||||
{
|
||||
switch (event->type)
|
||||
{
|
||||
// Outgoing data emitted by the NVT
|
||||
case TELNET_EV_SEND:
|
||||
{
|
||||
telnet_send_event* ev = (telnet_send_event*)event;
|
||||
printf("[OUT]: %.*s\n", ev->length, ev->data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void on_telopt_event(telnet_nvt* nvt, telnet_byte telopt, telnet_telopt_event* event)
|
||||
{
|
||||
// telopt is the telopt this event was triggered for
|
||||
|
||||
switch (event->type)
|
||||
{
|
||||
case TELNET_EV_TELOPT_TOGGLE:
|
||||
telnet_telopt_toggle_event* ev = (telnet_telopt_toggle_event*)event;
|
||||
// ev->where is TELNET_TELOPT_LOCAL or TELNET_TELOPT_REMOTE,
|
||||
// corresponding to Telnet's WILL/WONT and DO/DONT commands.
|
||||
// ev->status is TELNET_TELOPT_ON or TELNET_TELOPT_OFF.
|
||||
break;
|
||||
case TELNET_EV_TELOPT_FOCUS:
|
||||
telnet_telopt_focus_event* ev = (telnet_telopt_focus_event*)event;
|
||||
// ev->focus is 1 or 0 depending on if a subnegotiation packet has
|
||||
// begun or ended.
|
||||
break;
|
||||
case TELNET_EV_TELOPT_DATA:
|
||||
telnet_telopt_data_event* ev = (telnet_telopt_data_event*)event;
|
||||
// ev->data is a pointer to the received data.
|
||||
// ev->length is the length of the data buffer.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// Create an NVT
|
||||
telnet_nvt* nvt = telnet_nvt_new(NULL, &on_event, &on_telopt_event, NULL);
|
||||
|
||||
// Ask to enable a telopt locally (a WILL command)
|
||||
telnet_request_enable(nvt, 230, TELNET_LOCAL);
|
||||
|
||||
// Process some incoming data
|
||||
const char* data = "\xFF\xFD\xE6" // IAC DO 230 (turn channel on)
|
||||
"\xFF\xFA\xE6" // IAC SB 230 (switch to channel)
|
||||
"foo bar baz" (send data)
|
||||
"\xFF\xF0"; // IAC SE (switch to main)
|
||||
telnet_receive(nvt, (const telnet_byte*)data, strlen(data), NULL);
|
||||
|
||||
// Free the NVT
|
||||
telnet_nvt_free(nvt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
### Interrupting
|
||||
TODO: Explain how to interrupt the parser.
|
||||
|
||||
## Alternatives
|
||||
* [libtelnet][github-libtelnet], by Elanthis<br>
|
||||
It incorporates a number of (rather MUD-specific) protocols by default,
|
||||
though its API is quite different.
|
||||
|
||||
[github-libtelnet]: https://github.com/elanthis/libtelnet (libtelnet on GitHub)
|
||||
|
||||
## Credits
|
||||
Someone from #startups on Freenode IRC suggested the name (I'm sure as a joke).
|
||||
If you read this, remind me who you are so I can credit you properly!
|
@ -1,50 +0,0 @@
|
||||
# Telnet
|
||||
|
||||
## Channels
|
||||
Telnet supports data multiplexing by way of 256 built-in sub-channels, each
|
||||
identified by a byte in the interval [\x00-\xFF]. By switching between
|
||||
channels, you can send completely separate streams of data through the same
|
||||
connection.
|
||||
|
||||
All channels start out closed by default. To open a channel, one host must
|
||||
request or offer a channel using IAC WILL <id> or IAC DO <id>. The remote host
|
||||
then responds with IAC DO <id> or IAC WILL <id>, respectively. Alternatively,
|
||||
the request may be denied using IAC DONT <id> or IAC WONT <id>, respectively.
|
||||
|
||||
In order to switch to a specific channel, the IAC SB <id> sequence must
|
||||
be used. All data sent afterwards will be routed through that specific channel.
|
||||
To switch back to the main channel, IAC SE must be used. Note that subchannels
|
||||
do not support any IAC sequences except IAC IAC (an escaped \xFF byte) and
|
||||
IAC SE (return to the main channel). In particular, you cannot switch directly
|
||||
from one subchannel to another: you must revert to the main channel first.
|
||||
|
||||
Due to the unbiased nature of Telnet, neither side of the connection is
|
||||
automatically recognized as the server or the client. However, a host may either
|
||||
request a channel (as a client) or offer a channel (as a server). The WILL/WONT
|
||||
commands are used in the role of server ("I will", "I wont"), while DO/DONT
|
||||
are used in the role of client ("You do", "You do not"). As such, a channel
|
||||
may be opened twice (even simultaneously).
|
||||
|
||||
As an example, lets assume a terminal is connected to a server using Telnet. The
|
||||
server offers MCCP (data compression), but wants to know what the terminal's
|
||||
window size is. The following communication might occur:
|
||||
|
||||
<server> IAC DO NAWS
|
||||
<server> IAC WILL MCCP
|
||||
<client> IAC WILL NAWS
|
||||
<client> IAC SB NAWS \x50 \x00 \x50 \x00 IAC SE
|
||||
<client> IAC DO MCCP
|
||||
<server> IAC SB MCCP IAC SE
|
||||
<server> (compressed data)
|
||||
|
||||
Notice that MCCP was negotiated such that the server offers the compression.
|
||||
Only the server-to-client flow of data is compressed; the client would not
|
||||
compress its data unless the channel was negotiated in the other direction as
|
||||
well.
|
||||
|
||||
In general, a specific subchannel is tied to a specific Telnet subprotocol. For
|
||||
example, the EXOPL subprotocol is assigned to channel 255, so that channel
|
||||
should be avoided for any other purpose. A full list of registered subprotocols
|
||||
can be found on the [IANA website][1].
|
||||
|
||||
[1]: http://www.iana.org/assignments/telnet-options
|
Binary file not shown.
Before Width: | Height: | Size: 148 KiB |
@ -1,24 +0,0 @@
|
||||
#ifndef ANACHRONISM_COMMON_H
|
||||
#define ANACHRONISM_COMMON_H
|
||||
|
||||
#include <stdlib.h> /* for size_t */
|
||||
|
||||
// Telnet bytes must be unsigned
|
||||
typedef unsigned char telnet_byte;
|
||||
|
||||
// Error codes returned from API functions
|
||||
// Positive codes are success/notice codes.
|
||||
// Nonpositive codes are errors.
|
||||
// ALLOC is 0 for parity with the NULL result from malloc().
|
||||
typedef enum telnet_error
|
||||
{
|
||||
TELNET_E_NOT_SUBNEGOTIABLE = -4, // The telopt is not open for subnegotiation.
|
||||
TELNET_E_BAD_PARSER = -3, // The telnet_parser* passed is NULL
|
||||
TELNET_E_BAD_NVT = -2, // The telnet_nvt* passed is NULL
|
||||
TELNET_E_INVALID_COMMAND = -1, // The telnet_byte passed is not an allowed command in this API method
|
||||
TELNET_E_ALLOC = 0, // Not enough memory to allocate essential library structures
|
||||
TELNET_E_OK = 1, // Huge Success!
|
||||
TELNET_E_INTERRUPT = 2, // Parser interrupted by user code.
|
||||
} telnet_error;
|
||||
|
||||
#endif // ANACHRONISM_COMMON_H
|
@ -1,214 +0,0 @@
|
||||
#ifndef ANACHRONISM_ANACHRONISM_H
|
||||
#define ANACHRONISM_ANACHRONISM_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <anachronism/common.h>
|
||||
|
||||
// predefined Telnet commands from 240-255
|
||||
enum
|
||||
{
|
||||
IAC_SE = 240,
|
||||
IAC_NOP,
|
||||
IAC_DM,
|
||||
IAC_BRK,
|
||||
IAC_IP,
|
||||
IAC_AO,
|
||||
IAC_AYT,
|
||||
IAC_EC,
|
||||
IAC_EL,
|
||||
IAC_GA,
|
||||
IAC_SB,
|
||||
IAC_WILL,
|
||||
IAC_WONT,
|
||||
IAC_DO,
|
||||
IAC_DONT,
|
||||
IAC_IAC,
|
||||
};
|
||||
|
||||
typedef enum telnet_telopt_location
|
||||
{
|
||||
TELNET_LOCAL,
|
||||
TELNET_REMOTE,
|
||||
} telnet_telopt_location;
|
||||
|
||||
|
||||
/**
|
||||
* NVT Events
|
||||
*/
|
||||
|
||||
typedef enum telnet_event_type
|
||||
{
|
||||
TELNET_EV_DATA, /* A stretch of plain data was received. (data, length) */
|
||||
TELNET_EV_COMMAND, /* A simple IAC comamnd was recevied. (command) */
|
||||
TELNET_EV_WARNING, /* A non-fatal invalid sequence was received. (message, position) */
|
||||
TELNET_EV_SEND, /* Outgoing data to be sent. (data, length) */
|
||||
} telnet_event_type;
|
||||
|
||||
typedef struct telnet_event
|
||||
{
|
||||
telnet_event_type type;
|
||||
} telnet_event;
|
||||
|
||||
typedef struct telnet_data_event
|
||||
{
|
||||
telnet_event SUPER_;
|
||||
const telnet_byte* data;
|
||||
size_t length;
|
||||
} telnet_data_event;
|
||||
|
||||
typedef struct telnet_command_event
|
||||
{
|
||||
telnet_event SUPER_;
|
||||
telnet_byte command;
|
||||
} telnet_command_event;
|
||||
|
||||
typedef struct telnet_warning_event
|
||||
{
|
||||
telnet_event SUPER_;
|
||||
const char* message;
|
||||
size_t position;
|
||||
} telnet_warning_event;
|
||||
|
||||
typedef struct telnet_send_event
|
||||
{
|
||||
telnet_event SUPER_;
|
||||
const telnet_byte* data;
|
||||
size_t length;
|
||||
} telnet_send_event;
|
||||
|
||||
|
||||
/**
|
||||
* Telopt Events
|
||||
*/
|
||||
|
||||
typedef enum telnet_telopt_event_type
|
||||
{
|
||||
TELNET_EV_TELOPT_TOGGLE,
|
||||
TELNET_EV_TELOPT_FOCUS,
|
||||
TELNET_EV_TELOPT_DATA,
|
||||
} telnet_telopt_event_type;
|
||||
|
||||
typedef struct telnet_telopt_event
|
||||
{
|
||||
telnet_telopt_event_type type;
|
||||
} telnet_telopt_event;
|
||||
|
||||
typedef struct telnet_telopt_toggle_event
|
||||
{
|
||||
telnet_telopt_event SUPER_;
|
||||
telnet_telopt_location where;
|
||||
unsigned char status;
|
||||
} telnet_telopt_toggle_event;
|
||||
|
||||
typedef struct telnet_telopt_focus_event
|
||||
{
|
||||
telnet_telopt_event SUPER_;
|
||||
unsigned char focus;
|
||||
} telnet_telopt_focus_event;
|
||||
|
||||
typedef struct telnet_telopt_data_event
|
||||
{
|
||||
telnet_telopt_event SUPER_;
|
||||
const telnet_byte* data;
|
||||
size_t length;
|
||||
} telnet_telopt_data_event;
|
||||
|
||||
|
||||
|
||||
typedef struct telnet_nvt telnet_nvt;
|
||||
|
||||
|
||||
typedef void (*telnet_nvt_event_callback)(telnet_nvt* nvt, telnet_event* event);
|
||||
typedef void (*telnet_telopt_event_callback)(telnet_nvt* nvt, telnet_byte telopt, telnet_telopt_event* event);
|
||||
typedef unsigned char (*telnet_negotiate_event_callback)(telnet_nvt* nvt, telnet_byte telopt, telnet_telopt_location where);
|
||||
|
||||
/**
|
||||
Creates a new Telnet NVT.
|
||||
|
||||
Errors:
|
||||
TELNET_E_ALLOC - Unable to allocate enough memory for the NVT.
|
||||
*/
|
||||
telnet_nvt* telnet_nvt_new(void* userdata,
|
||||
telnet_nvt_event_callback nvt_callback,
|
||||
telnet_telopt_event_callback telopt_callback,
|
||||
telnet_negotiate_event_callback negotiate_callback);
|
||||
|
||||
void telnet_nvt_free(telnet_nvt* nvt);
|
||||
|
||||
/**
|
||||
Every NVT can have some user-specific data attached, such as a user-defined struct.
|
||||
This can be accessed (primarily by event callbacks) to differentiate between NVTs.
|
||||
|
||||
Errors:
|
||||
TELNET_E_BAD_NVT - Invalid telnet_nvt* parameter.
|
||||
|
||||
Example:
|
||||
// assuming a FILE was passed to telnet_nvt_new():
|
||||
FILE out = NULL;
|
||||
telnet_get_userdata(nvt, (void**)&out);
|
||||
*/
|
||||
telnet_error telnet_get_userdata(telnet_nvt* nvt, void** udata);
|
||||
|
||||
/**
|
||||
Processes incoming data.
|
||||
If `bytes_used` is non-NULL, it will be set to the length of the string that
|
||||
was read. This is generally only useful if you use telnet_halt() in a callback.
|
||||
|
||||
Errors:
|
||||
TELNET_E_BAD_NVT - Invalid telnet_nvt* parameter.
|
||||
TELNET_E_ALLOC - Unable to allocate destination buffer for incoming text.
|
||||
TELNET_E_INTERRUPT - User code interrupted the parser.
|
||||
*/
|
||||
telnet_error telnet_receive(telnet_nvt* nvt, const telnet_byte* data, size_t length, size_t* bytes_used);
|
||||
|
||||
/**
|
||||
If currently parsing (i.e. telnet_recv() is running), interrupts the parser.
|
||||
This is useful for things such as MCCP, where a Telnet sequence hails the start of
|
||||
data that must be decompressed before being parsed.
|
||||
|
||||
Errors:
|
||||
TELNET_E_BAD_NVT - Invalid telnet_nvt* parameter.
|
||||
*/
|
||||
telnet_error telnet_interrupt(telnet_nvt* nvt);
|
||||
|
||||
|
||||
/**
|
||||
Sends a string as a stream of escaped Telnet data.
|
||||
|
||||
Errors:
|
||||
TELNET_E_BAD_NVT - Invalid telnet_nvt* parameter.
|
||||
TELNET_E_ALLOC - Unable to allocate destination buffer for outgoing text.
|
||||
*/
|
||||
telnet_error telnet_send_data(telnet_nvt* nvt, const telnet_byte* data, const size_t length);
|
||||
|
||||
/**
|
||||
Sends a Telnet command.
|
||||
|
||||
Errors:
|
||||
TELNET_E_BAD_NVT - Invalid telnet_nvt* parameter.
|
||||
TELNET_E_INVALID_COMMAND - The command cannot be WILL, WONT, DO, DONT, SB, or SE.
|
||||
*/
|
||||
telnet_error telnet_send_command(telnet_nvt* nvt, const telnet_byte command);
|
||||
|
||||
/**
|
||||
Sends a subnegotiation packet.
|
||||
|
||||
Errors:
|
||||
TELNET_E_BAD_NVT - Invalid telnet_nvt* parameter.
|
||||
TELNET_E_ALLOC - Unable to allocate destination buffer for outgoing text.
|
||||
*/
|
||||
telnet_error telnet_send_subnegotiation(telnet_nvt* nvt, const telnet_byte option, const telnet_byte* data, const size_t length);
|
||||
|
||||
|
||||
telnet_error telnet_telopt_enable(telnet_nvt* nvt, const telnet_byte telopt, telnet_telopt_location where);
|
||||
telnet_error telnet_telopt_disable(telnet_nvt* nvt, const telnet_byte telopt, telnet_telopt_location where);
|
||||
telnet_error telnet_telopt_status(telnet_nvt* nvt, const telnet_byte telopt, telnet_telopt_location where, unsigned char* status);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // ANACHRONISM_ANACHRONISM_H
|
@ -1,73 +0,0 @@
|
||||
#ifndef ANACHRONISM_PARSER_H
|
||||
#define ANACHRONISM_PARSER_H
|
||||
|
||||
#include <anachronism/common.h>
|
||||
|
||||
typedef enum telnet_parser_event_type
|
||||
{
|
||||
TELNET_EV_PARSER_DATA,
|
||||
TELNET_EV_PARSER_COMMAND,
|
||||
TELNET_EV_PARSER_OPTION,
|
||||
TELNET_EV_PARSER_SUBNEGOTIATION,
|
||||
TELNET_EV_PARSER_WARNING,
|
||||
} telnet_parser_event_type;
|
||||
|
||||
typedef struct telnet_parser_event
|
||||
{
|
||||
telnet_parser_event_type type;
|
||||
} telnet_parser_event;
|
||||
|
||||
typedef struct telnet_parser_data_event
|
||||
{
|
||||
telnet_parser_event SUPER_;
|
||||
const telnet_byte* data;
|
||||
size_t length;
|
||||
} telnet_parser_data_event;
|
||||
|
||||
typedef struct telnet_parser_command_event
|
||||
{
|
||||
telnet_parser_event SUPER_;
|
||||
telnet_byte command;
|
||||
} telnet_parser_command_event;
|
||||
|
||||
typedef struct telnet_parser_option_event
|
||||
{
|
||||
telnet_parser_event SUPER_;
|
||||
telnet_byte command;
|
||||
telnet_byte option;
|
||||
} telnet_parser_option_event;
|
||||
|
||||
typedef struct telnet_parser_subnegotiation_event
|
||||
{
|
||||
telnet_parser_event SUPER_;
|
||||
int active;
|
||||
telnet_byte option;
|
||||
} telnet_parser_subnegotiation_event;
|
||||
|
||||
typedef struct telnet_parser_warning_event
|
||||
{
|
||||
telnet_parser_event SUPER_;
|
||||
const char* message;
|
||||
size_t position;
|
||||
} telnet_parser_warning_event;
|
||||
|
||||
|
||||
|
||||
typedef struct telnet_parser telnet_parser;
|
||||
|
||||
typedef void (*telnet_parser_callback)(telnet_parser* parser, telnet_parser_event* event);
|
||||
|
||||
|
||||
telnet_parser* telnet_parser_new(void* userdata, telnet_parser_callback callback);
|
||||
void telnet_parser_free(telnet_parser* parser);
|
||||
|
||||
telnet_error telnet_parser_get_userdata(telnet_parser* parser, void** userdata);
|
||||
|
||||
telnet_error telnet_parser_parse(telnet_parser* parser,
|
||||
const telnet_byte* data,
|
||||
size_t length,
|
||||
size_t* bytes_used);
|
||||
|
||||
telnet_error telnet_parser_interrupt(telnet_parser* parser);
|
||||
|
||||
#endif // ANACHRONISM_PARSER_H
|
@ -1,6 +0,0 @@
|
||||
* parser_common.rl
|
||||
<br>The language-agnostic Ragel grammar for the Telnet protocol.
|
||||
* parser.rl
|
||||
<br>The C implementation of the Ragel grammar. Compiled to parser.c by Ragel.
|
||||
* nvt.c
|
||||
<br>The core implementation of Anachronism's NVT and Channel constructs.
|
@ -1,631 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <anachronism/nvt.h>
|
||||
#include <anachronism/parser.h>
|
||||
|
||||
|
||||
#define TELOPT_TOGGLE_CALLBACK(nvt, telopt, where_, status_) do { \
|
||||
if ((nvt)->telopt_callback) { \
|
||||
telnet_telopt_toggle_event ev; \
|
||||
ev.SUPER_.type = TELNET_EV_TELOPT_TOGGLE; \
|
||||
ev.where = (where_); \
|
||||
ev.status = (status_); \
|
||||
\
|
||||
(nvt)->telopt_callback((nvt), (telopt), (telnet_telopt_event*)&ev); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define TELOPT_FOCUS_CALLBACK(nvt, telopt, status_) do { \
|
||||
if ((nvt)->telopt_callback) { \
|
||||
telnet_telopt_focus_event ev; \
|
||||
ev.SUPER_.type = TELNET_EV_TELOPT_FOCUS; \
|
||||
ev.status = (status_); \
|
||||
\
|
||||
(nvt)->telopt_callback((nvt), (telopt), (telnet_telopt_event*)&ev); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define TELOPT_DATA_CALLBACK(nvt, telopt, data_, length_) do { \
|
||||
if ((nvt)->telopt_callback) { \
|
||||
telnet_telopt_data_event ev; \
|
||||
ev.SUPER_.type = TELNET_EV_TELOPT_DATA; \
|
||||
ev.data = (data_); \
|
||||
ev.length = (length_); \
|
||||
\
|
||||
(nvt)->telopt_callback((nvt), (telopt), (telnet_telopt_event*)&ev); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SEND_CALLBACK(nvt, data_, length_) do { \
|
||||
if ((nvt)->callback) { \
|
||||
telnet_send_event ev; \
|
||||
ev.SUPER_.type = TELNET_EV_SEND; \
|
||||
ev.data = (data_); \
|
||||
ev.length = (length_); \
|
||||
\
|
||||
(nvt)->callback((nvt), (telnet_event*)&ev); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
// Q Method of Implementing TELNET Option Negotiation
|
||||
// ftp://ftp.rfc-editor.org/in-notes/rfc1143.txt
|
||||
typedef enum qstate {
|
||||
Q_NO = 0, Q_WANTYES, Q_WANTYESNO,
|
||||
Q_YES, Q_WANTNO, Q_WANTNOYES,
|
||||
} qstate;
|
||||
|
||||
typedef struct telnet_qstate
|
||||
{
|
||||
unsigned remote : 3;
|
||||
unsigned local : 3;
|
||||
} telnet_qstate;
|
||||
|
||||
struct telnet_nvt
|
||||
{
|
||||
telnet_parser* parser;
|
||||
telnet_qstate options[256]; // track the state of each subnegotiation option
|
||||
short current_remote;
|
||||
|
||||
telnet_nvt_event_callback callback;
|
||||
telnet_telopt_event_callback telopt_callback;
|
||||
telnet_negotiate_event_callback negotiate_callback;
|
||||
|
||||
void* userdata;
|
||||
};
|
||||
|
||||
static unsigned char telopt_status(telnet_nvt* nvt,
|
||||
telnet_byte telopt,
|
||||
telnet_telopt_location where)
|
||||
{
|
||||
unsigned int qval = (where == TELNET_LOCAL) ?
|
||||
nvt->options[telopt].local :
|
||||
nvt->options[telopt].remote;
|
||||
|
||||
switch (qval) {
|
||||
case Q_YES: case Q_WANTNO: case Q_WANTNOYES:
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#define telopt_subnegotiable(nvt, telopt) (telopt_status((nvt), (telopt), TELNET_REMOTE) || telopt_status((nvt), (telopt), TELNET_LOCAL))
|
||||
|
||||
|
||||
static void send_option(telnet_nvt* nvt, telnet_byte command, telnet_byte telopt)
|
||||
{
|
||||
const telnet_byte buf[] = {IAC_IAC, command, telopt};
|
||||
SEND_CALLBACK(nvt, buf, 3);
|
||||
}
|
||||
|
||||
static void process_option_event(telnet_nvt* nvt,
|
||||
telnet_byte command,
|
||||
telnet_byte telopt)
|
||||
{
|
||||
telnet_qstate* q = &nvt->options[telopt];
|
||||
// Every qstate begins zeroed-out, and Q_NO is 0.
|
||||
|
||||
switch (command)
|
||||
{
|
||||
case IAC_WILL:
|
||||
switch (q->remote)
|
||||
{
|
||||
case Q_NO:
|
||||
if (nvt->negotiate_callback && nvt->negotiate_callback(nvt, telopt, TELNET_REMOTE)) {
|
||||
send_option(nvt, IAC_DO, telopt);
|
||||
q->remote = Q_YES;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_REMOTE, 1);
|
||||
} else {
|
||||
send_option(nvt, IAC_DONT, telopt);
|
||||
}
|
||||
break;
|
||||
case Q_WANTNO:
|
||||
// error
|
||||
q->remote = Q_NO;
|
||||
break;
|
||||
case Q_WANTNOYES:
|
||||
// error
|
||||
q->remote = Q_YES;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_REMOTE, 1);
|
||||
break;
|
||||
case Q_WANTYES:
|
||||
q->remote = Q_YES;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_REMOTE, 1);
|
||||
break;
|
||||
case Q_WANTYESNO:
|
||||
send_option(nvt, IAC_DONT, telopt);
|
||||
q->remote = Q_WANTNO;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_REMOTE, 1);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case IAC_WONT:
|
||||
switch (q->remote)
|
||||
{
|
||||
case Q_YES:
|
||||
send_option(nvt, IAC_DONT, telopt);
|
||||
q->remote = Q_NO;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_REMOTE, 0);
|
||||
break;
|
||||
case Q_WANTNO:
|
||||
q->remote = Q_NO;
|
||||
break;
|
||||
case Q_WANTNOYES:
|
||||
send_option(nvt, IAC_DO, telopt);
|
||||
q->remote = Q_WANTYES;
|
||||
break;
|
||||
case Q_WANTYES:
|
||||
q->remote = Q_NO;
|
||||
break;
|
||||
case Q_WANTYESNO:
|
||||
q->remote = Q_NO;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case IAC_DO:
|
||||
switch (q->local)
|
||||
{
|
||||
case Q_NO:
|
||||
if (nvt->negotiate_callback && nvt->negotiate_callback(nvt, telopt, TELNET_LOCAL)) {
|
||||
send_option(nvt, IAC_WILL, telopt);
|
||||
q->local = Q_YES;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_LOCAL, 1);
|
||||
} else {
|
||||
send_option(nvt, IAC_WONT, telopt);
|
||||
}
|
||||
break;
|
||||
case Q_WANTNO:
|
||||
// error
|
||||
q->local = Q_NO;
|
||||
break;
|
||||
case Q_WANTNOYES:
|
||||
// error
|
||||
q->local = Q_YES;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_LOCAL, 1);
|
||||
break;
|
||||
case Q_WANTYES:
|
||||
q->local = Q_YES;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_LOCAL, 1);
|
||||
break;
|
||||
case Q_WANTYESNO:
|
||||
send_option(nvt, IAC_WONT, telopt);
|
||||
q->local = Q_WANTNO;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_LOCAL, 1);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case IAC_DONT:
|
||||
switch (q->local)
|
||||
{
|
||||
case Q_YES:
|
||||
send_option(nvt, IAC_DONT, telopt);
|
||||
q->local = Q_NO;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_LOCAL, 0);
|
||||
break;
|
||||
case Q_WANTNO:
|
||||
q->local = Q_NO;
|
||||
break;
|
||||
case Q_WANTNOYES:
|
||||
send_option(nvt, IAC_WILL, telopt);
|
||||
q->local = Q_WANTYES;
|
||||
break;
|
||||
case Q_WANTYES:
|
||||
q->local = Q_NO;
|
||||
break;
|
||||
case Q_WANTYESNO:
|
||||
q->local = Q_NO;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void process_data_event(telnet_nvt* nvt,
|
||||
const telnet_byte* data,
|
||||
size_t length)
|
||||
{
|
||||
if (nvt->current_remote == -1) {
|
||||
// Main-line data
|
||||
if (nvt->callback) {
|
||||
telnet_data_event ev;
|
||||
ev.SUPER_.type = TELNET_EV_DATA;
|
||||
ev.data = data;
|
||||
ev.length = length;
|
||||
nvt->callback(nvt, (telnet_event*)&ev);
|
||||
}
|
||||
} else {
|
||||
// Telopt data
|
||||
telnet_byte telopt = (telnet_byte)nvt->current_remote;
|
||||
|
||||
if (nvt->telopt_callback) {
|
||||
// Make sure the telopt is enabled
|
||||
if (telopt_subnegotiable(nvt, telopt)) {
|
||||
telnet_telopt_data_event ev;
|
||||
ev.SUPER_.type = TELNET_EV_TELOPT_DATA;
|
||||
ev.data = data;
|
||||
ev.length = length;
|
||||
nvt->telopt_callback(nvt, telopt, (telnet_telopt_event*)&ev);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void process_subnegotiation_event(telnet_nvt* nvt,
|
||||
int open,
|
||||
telnet_byte telopt)
|
||||
{
|
||||
if (open) {
|
||||
nvt->current_remote = telopt;
|
||||
} else {
|
||||
nvt->current_remote = -1;
|
||||
}
|
||||
|
||||
if (nvt->telopt_callback) {
|
||||
// Make sure the telopt is enabled
|
||||
if (telopt_subnegotiable(nvt, telopt)) {
|
||||
telnet_telopt_focus_event ev;
|
||||
ev.SUPER_.type = TELNET_EV_TELOPT_FOCUS;
|
||||
ev.focus = open;
|
||||
nvt->telopt_callback(nvt, telopt, (telnet_telopt_event*)&ev);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void process_event(telnet_parser* parser, telnet_parser_event* event)
|
||||
{
|
||||
telnet_nvt* nvt = NULL;
|
||||
telnet_parser_get_userdata(parser, (void*)&nvt);
|
||||
|
||||
switch (event->type)
|
||||
{
|
||||
case TELNET_EV_PARSER_DATA:
|
||||
{
|
||||
telnet_parser_data_event* ev = (telnet_parser_data_event*)event;
|
||||
process_data_event(nvt, ev->data, ev->length);
|
||||
break;
|
||||
}
|
||||
|
||||
case TELNET_EV_PARSER_OPTION:
|
||||
{
|
||||
telnet_parser_option_event* ev = (telnet_parser_option_event*)event;
|
||||
process_option_event(nvt, ev->command, ev->option);
|
||||
break;
|
||||
}
|
||||
|
||||
case TELNET_EV_PARSER_SUBNEGOTIATION:
|
||||
{
|
||||
telnet_parser_subnegotiation_event* ev = (telnet_parser_subnegotiation_event*)event;
|
||||
process_subnegotiation_event(nvt, ev->active, ev->option);
|
||||
break;
|
||||
}
|
||||
|
||||
case TELNET_EV_PARSER_COMMAND:
|
||||
{
|
||||
if (nvt->callback) {
|
||||
telnet_parser_command_event* parser_ev = (telnet_parser_command_event*) event;
|
||||
|
||||
telnet_command_event ev;
|
||||
ev.SUPER_.type = TELNET_EV_COMMAND;
|
||||
ev.command = parser_ev->command;
|
||||
nvt->callback(nvt, (telnet_event*)&ev);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case TELNET_EV_PARSER_WARNING:
|
||||
{
|
||||
if (nvt->callback) {
|
||||
telnet_parser_warning_event* parser_ev = (telnet_parser_warning_event*) event;
|
||||
|
||||
telnet_warning_event ev;
|
||||
ev.SUPER_.type = TELNET_EV_WARNING;
|
||||
ev.message = parser_ev->message;
|
||||
ev.position = parser_ev->position;
|
||||
nvt->callback(nvt, (telnet_event*)&ev);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
telnet_nvt* telnet_nvt_new(void* userdata,
|
||||
telnet_nvt_event_callback nvt_callback,
|
||||
telnet_telopt_event_callback telopt_callback,
|
||||
telnet_negotiate_event_callback negotiate_callback)
|
||||
{
|
||||
telnet_nvt* nvt = malloc(sizeof(telnet_nvt));
|
||||
if (nvt)
|
||||
{
|
||||
telnet_parser* parser = telnet_parser_new((void*)nvt, &process_event);
|
||||
if (parser)
|
||||
{
|
||||
memset(nvt, 0, sizeof(*nvt));
|
||||
nvt->parser = parser;
|
||||
nvt->callback = nvt_callback;
|
||||
nvt->telopt_callback = telopt_callback;
|
||||
nvt->negotiate_callback = negotiate_callback;
|
||||
nvt->userdata = userdata;
|
||||
nvt->current_remote = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
free(nvt);
|
||||
nvt = NULL;
|
||||
}
|
||||
}
|
||||
return nvt;
|
||||
}
|
||||
|
||||
void telnet_nvt_free(telnet_nvt* nvt)
|
||||
{
|
||||
if (nvt)
|
||||
{
|
||||
telnet_parser_free(nvt->parser);
|
||||
free(nvt);
|
||||
}
|
||||
}
|
||||
|
||||
telnet_error telnet_get_userdata(telnet_nvt* nvt, void** userdata)
|
||||
{
|
||||
if (!nvt)
|
||||
return TELNET_E_BAD_NVT;
|
||||
|
||||
*userdata = nvt->userdata;
|
||||
return TELNET_E_OK;
|
||||
}
|
||||
|
||||
telnet_error telnet_receive(telnet_nvt* nvt, const telnet_byte* data, size_t length, size_t* bytes_used)
|
||||
{
|
||||
if (!nvt)
|
||||
return TELNET_E_BAD_NVT;
|
||||
|
||||
return telnet_parser_parse(nvt->parser, data, length, bytes_used);
|
||||
}
|
||||
|
||||
telnet_error telnet_interrupt(telnet_nvt* nvt)
|
||||
{
|
||||
if (!nvt)
|
||||
return TELNET_E_BAD_NVT;
|
||||
|
||||
return telnet_parser_interrupt(nvt->parser);
|
||||
}
|
||||
|
||||
|
||||
static int safe_concat(const telnet_byte* in, size_t inlen, telnet_byte* out, size_t outlen)
|
||||
{
|
||||
// Copy as much as possible into the buffer.
|
||||
memcpy(out, in, (outlen < inlen) ? outlen : inlen);
|
||||
|
||||
// true if everything could be copied, false otherwise
|
||||
return outlen >= inlen;
|
||||
}
|
||||
|
||||
// Escapes any special characters in data, writing the result data to out.
|
||||
// Returns -1 if not everything could be copied (and out is full).
|
||||
// Otherwise returns the length of the data in out.
|
||||
//
|
||||
// To avoid potential -1 return values, pass in an out buffer double the length of the data buffer.
|
||||
static size_t telnet_escape(const telnet_byte* data, size_t length, telnet_byte* out, size_t outsize)
|
||||
{
|
||||
if (data == NULL || out == NULL)
|
||||
return 0;
|
||||
|
||||
size_t outlen = 0;
|
||||
size_t left = 0;
|
||||
size_t right = 0;
|
||||
const char* seq = NULL;
|
||||
for (; right < length; ++right)
|
||||
{
|
||||
switch (data[right])
|
||||
{
|
||||
case IAC_IAC:
|
||||
seq = "\xFF\xFF";
|
||||
break;
|
||||
case '\r':
|
||||
// Only escape \r if it doesn't immediately precede \n.
|
||||
if (right + 1 >= length || data[right+1] != '\n')
|
||||
{
|
||||
seq = "\r\0";
|
||||
break;
|
||||
}
|
||||
// !!FALLTHROUGH!!
|
||||
default:
|
||||
continue; // Move to the next character
|
||||
}
|
||||
|
||||
// Add any normal data that hasn't been added yet.
|
||||
if (safe_concat(data+left, right-left, out+outlen, outsize-outlen) == 0)
|
||||
return -1;
|
||||
outlen += right - left;
|
||||
left = right + 1;
|
||||
|
||||
// Add the escape sequence.
|
||||
if (safe_concat((const telnet_byte*)seq, 2, out+outlen, outsize-outlen) == 0)
|
||||
return -1;
|
||||
outlen += 2;
|
||||
}
|
||||
|
||||
// Add any leftover normal data.
|
||||
if (left < right)
|
||||
{
|
||||
if (safe_concat(data+left, right-left, out+outlen, outsize-outlen) == 0)
|
||||
return -1;
|
||||
outlen += right - left;
|
||||
}
|
||||
|
||||
return outlen;
|
||||
}
|
||||
|
||||
telnet_error telnet_send_data(telnet_nvt* nvt, const telnet_byte* data, const size_t length)
|
||||
{
|
||||
if (!nvt)
|
||||
return TELNET_E_BAD_NVT;
|
||||
else if (!nvt->callback)
|
||||
return TELNET_E_OK; // immediate success since they apparently don't want the data to go anywhere
|
||||
|
||||
// Due to the nature of the protocol, the most any one byte can be encoded as is two bytes.
|
||||
// Hence, the smallest buffer guaranteed to contain any input is double the length of the source.
|
||||
size_t bufsize = sizeof(telnet_byte) * length * 2;
|
||||
telnet_byte* buf = malloc(bufsize);
|
||||
if (!buf)
|
||||
return TELNET_E_ALLOC;
|
||||
|
||||
bufsize = telnet_escape(data, length, buf, bufsize);
|
||||
|
||||
SEND_CALLBACK(nvt, buf, bufsize);
|
||||
|
||||
free(buf);
|
||||
buf = NULL;
|
||||
|
||||
return TELNET_E_OK;
|
||||
}
|
||||
|
||||
telnet_error telnet_send_command(telnet_nvt* nvt, const telnet_byte command)
|
||||
{
|
||||
if (!nvt)
|
||||
return TELNET_E_BAD_NVT;
|
||||
else if (command >= IAC_SB || command == IAC_SE)
|
||||
return TELNET_E_INVALID_COMMAND; // Invalid command
|
||||
|
||||
const telnet_byte buf[] = {IAC_IAC, command};
|
||||
SEND_CALLBACK(nvt, buf, 2);
|
||||
|
||||
return TELNET_E_OK;
|
||||
}
|
||||
|
||||
telnet_error telnet_send_subnegotiation(telnet_nvt* nvt, const telnet_byte option, const telnet_byte* data, const size_t length)
|
||||
{
|
||||
if (!nvt)
|
||||
return TELNET_E_BAD_NVT;
|
||||
else if (!telopt_subnegotiable(nvt, option))
|
||||
return TELNET_E_NOT_SUBNEGOTIABLE;
|
||||
else if (!nvt->callback)
|
||||
return TELNET_E_OK;
|
||||
|
||||
// length*2 is the maximum buffer size needed for an escaped string.
|
||||
// The extra five bytes are for the IAC, SB, <option>, IAC, and SE frame around the data.
|
||||
size_t bufsize = (sizeof(telnet_byte) * length * 2) + 5;
|
||||
telnet_byte* buf = malloc(bufsize);
|
||||
if (!buf)
|
||||
return TELNET_E_ALLOC;
|
||||
|
||||
// Begin with IAC SB <option>
|
||||
telnet_byte iac[] = {IAC_IAC, IAC_SB, option};
|
||||
memcpy(buf, iac, 3);
|
||||
|
||||
// Add the subnegotiation body
|
||||
size_t escaped_length = telnet_escape(data, length, buf+3, bufsize-3) + 3;
|
||||
|
||||
// End with IAC SE
|
||||
iac[1] = IAC_SE;
|
||||
memcpy(buf+escaped_length, iac, 2);
|
||||
escaped_length += 2;
|
||||
|
||||
SEND_CALLBACK(nvt, buf, escaped_length);
|
||||
|
||||
free(buf);
|
||||
buf = NULL;
|
||||
|
||||
return TELNET_E_OK;
|
||||
}
|
||||
|
||||
|
||||
telnet_error telnet_telopt_enable(telnet_nvt* nvt,
|
||||
telnet_byte telopt,
|
||||
telnet_telopt_location where)
|
||||
{
|
||||
if (!nvt)
|
||||
return TELNET_E_BAD_NVT;
|
||||
|
||||
telnet_qstate* q = &nvt->options[telopt];
|
||||
if (where == TELNET_LOCAL) {
|
||||
switch (q->local)
|
||||
{
|
||||
case Q_NO:
|
||||
q->local = Q_WANTYES;
|
||||
send_option(nvt, IAC_WILL, telopt);
|
||||
break;
|
||||
case Q_WANTNO:
|
||||
q->local = Q_WANTNOYES;
|
||||
break;
|
||||
case Q_WANTYESNO:
|
||||
q->local = Q_WANTYES;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (q->remote)
|
||||
{
|
||||
case Q_NO:
|
||||
q->remote = Q_WANTYES;
|
||||
send_option(nvt, IAC_DO, telopt);
|
||||
break;
|
||||
case Q_WANTNO:
|
||||
q->remote = Q_WANTNOYES;
|
||||
break;
|
||||
case Q_WANTYESNO:
|
||||
q->remote = Q_WANTYES;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return TELNET_E_OK;
|
||||
}
|
||||
|
||||
telnet_error telnet_telopt_disable(telnet_nvt* nvt,
|
||||
telnet_byte telopt,
|
||||
telnet_telopt_location where)
|
||||
{
|
||||
if (!nvt)
|
||||
return TELNET_E_BAD_NVT;
|
||||
|
||||
telnet_qstate* q = &nvt->options[telopt];
|
||||
if (where == TELNET_LOCAL) {
|
||||
switch (q->local)
|
||||
{
|
||||
case Q_YES:
|
||||
send_option(nvt, IAC_WONT, telopt);
|
||||
q->local = Q_WANTNO;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_LOCAL, 0);
|
||||
break;
|
||||
case Q_WANTNOYES:
|
||||
q->local = Q_WANTNO;
|
||||
break;
|
||||
case Q_WANTYES:
|
||||
q->local = Q_WANTYESNO;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (q->remote)
|
||||
{
|
||||
case Q_YES:
|
||||
send_option(nvt, IAC_DONT, telopt);
|
||||
q->remote = Q_WANTNO;
|
||||
TELOPT_TOGGLE_CALLBACK(nvt, telopt, TELNET_REMOTE, 0);
|
||||
break;
|
||||
case Q_WANTNOYES:
|
||||
q->remote = Q_WANTNO;
|
||||
break;
|
||||
case Q_WANTYES:
|
||||
q->remote = Q_WANTYESNO;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return TELNET_E_OK;
|
||||
}
|
||||
|
||||
telnet_error telnet_telopt_status(telnet_nvt* nvt,
|
||||
telnet_byte telopt,
|
||||
telnet_telopt_location where,
|
||||
unsigned char* status)
|
||||
{
|
||||
if (!nvt)
|
||||
return TELNET_E_BAD_NVT;
|
||||
|
||||
*status = telopt_status(nvt, telopt, where);
|
||||
return TELNET_E_OK;
|
||||
}
|
@ -1,455 +0,0 @@
|
||||
|
||||
#line 1 "src/parser.rl"
|
||||
#include <string.h>
|
||||
#include <anachronism/parser.h>
|
||||
|
||||
#define BASE_EV(ev, t) \
|
||||
(ev).SUPER_.type = TELNET_EV_PARSER_##t
|
||||
|
||||
#define EV_DATA(ev, text, len) do {\
|
||||
BASE_EV(ev, DATA);\
|
||||
(ev).data = (text);\
|
||||
(ev).length = (len);\
|
||||
} while (0)
|
||||
|
||||
#define EV_COMMAND(ev, cmd) do {\
|
||||
BASE_EV(ev, COMMAND);\
|
||||
(ev).command = (cmd);\
|
||||
} while (0)
|
||||
|
||||
#define EV_OPTION(ev, cmd, opt) do {\
|
||||
BASE_EV(ev, OPTION);\
|
||||
(ev).command = (cmd);\
|
||||
(ev).option = (opt);\
|
||||
} while (0)
|
||||
|
||||
#define EV_SUBNEGOTIATION(ev, act, opt) do {\
|
||||
BASE_EV(ev, SUBNEGOTIATION);\
|
||||
(ev).active = (act);\
|
||||
(ev).option = (opt);\
|
||||
} while (0)
|
||||
|
||||
#define EV_WARNING(ev, msg, pos) do {\
|
||||
BASE_EV(ev, WARNING);\
|
||||
(ev).message = (msg);\
|
||||
(ev).position = (pos);\
|
||||
} while (0)
|
||||
|
||||
struct telnet_parser {
|
||||
int cs; /* current Ragel state */
|
||||
const telnet_byte* p; /* current position */
|
||||
const telnet_byte* pe; /* end of current packet */
|
||||
const telnet_byte* eof; /* end-of-file marker */
|
||||
|
||||
telnet_byte option_mark; /* temporary storage for a command byte */
|
||||
unsigned char interrupted; /* Flag for interrupts */
|
||||
|
||||
telnet_parser_callback callback; /* Receiver of Telnet events*/
|
||||
void* userdata; /* Context for parser callback */
|
||||
};
|
||||
|
||||
|
||||
#line 53 "src/parser.c"
|
||||
static const int telnet_parser_start = 7;
|
||||
|
||||
|
||||
#line 130 "src/parser.rl"
|
||||
|
||||
|
||||
telnet_parser* telnet_parser_new(void* userdata,
|
||||
telnet_parser_callback callback)
|
||||
{
|
||||
telnet_parser* parser = malloc(sizeof(telnet_parser));
|
||||
if (parser)
|
||||
{
|
||||
memset(parser, 0, sizeof(*parser));
|
||||
|
||||
#line 72 "src/parser.c"
|
||||
{
|
||||
parser->cs = telnet_parser_start;
|
||||
}
|
||||
|
||||
#line 140 "src/parser.rl"
|
||||
parser->callback = callback;
|
||||
parser->userdata = userdata;
|
||||
}
|
||||
return parser;
|
||||
}
|
||||
|
||||
void telnet_parser_free(telnet_parser* parser)
|
||||
{
|
||||
free(parser);
|
||||
}
|
||||
|
||||
telnet_error telnet_parser_get_userdata(telnet_parser* parser, void** userdata)
|
||||
{
|
||||
if (!parser)
|
||||
return TELNET_E_BAD_PARSER;
|
||||
|
||||
*userdata = parser->userdata;
|
||||
return TELNET_E_OK;
|
||||
}
|
||||
|
||||
telnet_error telnet_parser_parse(telnet_parser* parser,
|
||||
const telnet_byte* data,
|
||||
size_t length,
|
||||
size_t* bytes_used)
|
||||
{
|
||||
if (!parser)
|
||||
return TELNET_E_BAD_PARSER;
|
||||
|
||||
// Reset the interrupt flag
|
||||
parser->interrupted = 0;
|
||||
|
||||
// Only bother saving text if it'll be used
|
||||
telnet_byte* buf = NULL;
|
||||
size_t buflen = 0;
|
||||
if (parser->callback)
|
||||
{
|
||||
// Because of how the parser translates data, a run of text is guaranteed to
|
||||
// be at most 'length' characters long. In practice it's usually less, due to
|
||||
// escaped characters (IAC IAC -> IAC) and text separated by commands.
|
||||
buf = malloc(length * sizeof(*buf));
|
||||
if (!buf)
|
||||
return TELNET_E_ALLOC;
|
||||
}
|
||||
|
||||
parser->p = data;
|
||||
parser->pe = data + length;
|
||||
parser->eof = parser->pe;
|
||||
|
||||
|
||||
#line 127 "src/parser.c"
|
||||
{
|
||||
if ( ( parser->p) == ( parser->pe) )
|
||||
goto _test_eof;
|
||||
switch ( parser->cs )
|
||||
{
|
||||
tr1:
|
||||
#line 6 "src/parser_common.rl"
|
||||
{( parser->p)--;}
|
||||
#line 111 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buf != NULL)
|
||||
{
|
||||
telnet_parser_warning_event ev;
|
||||
EV_WARNING(ev, "Invalid \\r: not followed by \\n or \\0.", ( parser->p)-data);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
#line 57 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buflen > 0)
|
||||
{
|
||||
telnet_parser_data_event ev;
|
||||
EV_DATA(ev, buf, buflen);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
buflen = 0;
|
||||
}
|
||||
}
|
||||
goto st7;
|
||||
tr2:
|
||||
#line 67 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buf)
|
||||
buf[buflen++] = (*( parser->p));
|
||||
}
|
||||
goto st7;
|
||||
tr3:
|
||||
#line 57 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buflen > 0)
|
||||
{
|
||||
telnet_parser_data_event ev;
|
||||
EV_DATA(ev, buf, buflen);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
buflen = 0;
|
||||
}
|
||||
}
|
||||
#line 72 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buf)
|
||||
{
|
||||
telnet_parser_command_event ev;
|
||||
EV_COMMAND(ev, (*( parser->p)));
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
goto st7;
|
||||
tr12:
|
||||
#line 57 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buflen > 0)
|
||||
{
|
||||
telnet_parser_data_event ev;
|
||||
EV_DATA(ev, buf, buflen);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
buflen = 0;
|
||||
}
|
||||
}
|
||||
#line 6 "src/parser_common.rl"
|
||||
{( parser->p)--;}
|
||||
#line 119 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buf != NULL)
|
||||
{
|
||||
telnet_parser_warning_event ev;
|
||||
EV_WARNING(ev, "IAC followed by invalid command.", ( parser->p)-data);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
#line 102 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buf != NULL)
|
||||
{
|
||||
telnet_parser_subnegotiation_event ev;
|
||||
EV_SUBNEGOTIATION(ev, 0, parser->option_mark);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
goto st7;
|
||||
tr13:
|
||||
#line 57 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buflen > 0)
|
||||
{
|
||||
telnet_parser_data_event ev;
|
||||
EV_DATA(ev, buf, buflen);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
buflen = 0;
|
||||
}
|
||||
}
|
||||
#line 102 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buf != NULL)
|
||||
{
|
||||
telnet_parser_subnegotiation_event ev;
|
||||
EV_SUBNEGOTIATION(ev, 0, parser->option_mark);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
goto st7;
|
||||
tr14:
|
||||
#line 84 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buf)
|
||||
{
|
||||
telnet_parser_option_event ev;
|
||||
EV_OPTION(ev, parser->option_mark, (*( parser->p)));
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
goto st7;
|
||||
st7:
|
||||
if ( ++( parser->p) == ( parser->pe) )
|
||||
goto _test_eof7;
|
||||
case 7:
|
||||
#line 252 "src/parser.c"
|
||||
switch( (*( parser->p)) ) {
|
||||
case 13u: goto tr15;
|
||||
case 255u: goto st1;
|
||||
}
|
||||
goto tr2;
|
||||
tr15:
|
||||
#line 67 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buf)
|
||||
buf[buflen++] = (*( parser->p));
|
||||
}
|
||||
goto st0;
|
||||
st0:
|
||||
if ( ++( parser->p) == ( parser->pe) )
|
||||
goto _test_eof0;
|
||||
case 0:
|
||||
#line 269 "src/parser.c"
|
||||
switch( (*( parser->p)) ) {
|
||||
case 0u: goto st7;
|
||||
case 10u: goto tr2;
|
||||
}
|
||||
goto tr1;
|
||||
st1:
|
||||
if ( ++( parser->p) == ( parser->pe) )
|
||||
goto _test_eof1;
|
||||
case 1:
|
||||
switch( (*( parser->p)) ) {
|
||||
case 250u: goto tr4;
|
||||
case 255u: goto tr2;
|
||||
}
|
||||
if ( 251u <= (*( parser->p)) && (*( parser->p)) <= 254u )
|
||||
goto tr5;
|
||||
goto tr3;
|
||||
tr4:
|
||||
#line 57 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buflen > 0)
|
||||
{
|
||||
telnet_parser_data_event ev;
|
||||
EV_DATA(ev, buf, buflen);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
buflen = 0;
|
||||
}
|
||||
}
|
||||
goto st2;
|
||||
st2:
|
||||
if ( ++( parser->p) == ( parser->pe) )
|
||||
goto _test_eof2;
|
||||
case 2:
|
||||
#line 302 "src/parser.c"
|
||||
goto tr6;
|
||||
tr11:
|
||||
#line 6 "src/parser_common.rl"
|
||||
{( parser->p)--;}
|
||||
#line 111 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buf != NULL)
|
||||
{
|
||||
telnet_parser_warning_event ev;
|
||||
EV_WARNING(ev, "Invalid \\r: not followed by \\n or \\0.", ( parser->p)-data);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
#line 57 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buflen > 0)
|
||||
{
|
||||
telnet_parser_data_event ev;
|
||||
EV_DATA(ev, buf, buflen);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
buflen = 0;
|
||||
}
|
||||
}
|
||||
goto st3;
|
||||
tr7:
|
||||
#line 67 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buf)
|
||||
buf[buflen++] = (*( parser->p));
|
||||
}
|
||||
goto st3;
|
||||
tr6:
|
||||
#line 93 "src/parser.rl"
|
||||
{
|
||||
parser->option_mark = (*( parser->p));
|
||||
if (parser->callback && buf != NULL)
|
||||
{
|
||||
telnet_parser_subnegotiation_event ev;
|
||||
EV_SUBNEGOTIATION(ev, 1, parser->option_mark);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
goto st3;
|
||||
st3:
|
||||
if ( ++( parser->p) == ( parser->pe) )
|
||||
goto _test_eof3;
|
||||
case 3:
|
||||
#line 350 "src/parser.c"
|
||||
switch( (*( parser->p)) ) {
|
||||
case 13u: goto tr8;
|
||||
case 255u: goto st5;
|
||||
}
|
||||
goto tr7;
|
||||
tr8:
|
||||
#line 67 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buf)
|
||||
buf[buflen++] = (*( parser->p));
|
||||
}
|
||||
goto st4;
|
||||
st4:
|
||||
if ( ++( parser->p) == ( parser->pe) )
|
||||
goto _test_eof4;
|
||||
case 4:
|
||||
#line 367 "src/parser.c"
|
||||
switch( (*( parser->p)) ) {
|
||||
case 0u: goto st3;
|
||||
case 10u: goto tr7;
|
||||
}
|
||||
goto tr11;
|
||||
st5:
|
||||
if ( ++( parser->p) == ( parser->pe) )
|
||||
goto _test_eof5;
|
||||
case 5:
|
||||
switch( (*( parser->p)) ) {
|
||||
case 240u: goto tr13;
|
||||
case 255u: goto tr7;
|
||||
}
|
||||
goto tr12;
|
||||
tr5:
|
||||
#line 57 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buflen > 0)
|
||||
{
|
||||
telnet_parser_data_event ev;
|
||||
EV_DATA(ev, buf, buflen);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
buflen = 0;
|
||||
}
|
||||
}
|
||||
#line 81 "src/parser.rl"
|
||||
{
|
||||
parser->option_mark = (*( parser->p));
|
||||
}
|
||||
goto st6;
|
||||
st6:
|
||||
if ( ++( parser->p) == ( parser->pe) )
|
||||
goto _test_eof6;
|
||||
case 6:
|
||||
#line 402 "src/parser.c"
|
||||
goto tr14;
|
||||
}
|
||||
_test_eof7: parser->cs = 7; goto _test_eof;
|
||||
_test_eof0: parser->cs = 0; goto _test_eof;
|
||||
_test_eof1: parser->cs = 1; goto _test_eof;
|
||||
_test_eof2: parser->cs = 2; goto _test_eof;
|
||||
_test_eof3: parser->cs = 3; goto _test_eof;
|
||||
_test_eof4: parser->cs = 4; goto _test_eof;
|
||||
_test_eof5: parser->cs = 5; goto _test_eof;
|
||||
_test_eof6: parser->cs = 6; goto _test_eof;
|
||||
|
||||
_test_eof: {}
|
||||
if ( ( parser->p) == ( parser->eof) )
|
||||
{
|
||||
switch ( parser->cs ) {
|
||||
case 3:
|
||||
case 7:
|
||||
#line 57 "src/parser.rl"
|
||||
{
|
||||
if (parser->callback && buflen > 0)
|
||||
{
|
||||
telnet_parser_data_event ev;
|
||||
EV_DATA(ev, buf, buflen);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
buflen = 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
#line 431 "src/parser.c"
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#line 189 "src/parser.rl"
|
||||
|
||||
if (bytes_used != NULL)
|
||||
*bytes_used = parser->p - data;
|
||||
|
||||
free(buf);
|
||||
buf = NULL;
|
||||
parser->p = parser->pe = parser->eof = NULL;
|
||||
|
||||
return (parser->interrupted) ? TELNET_E_INTERRUPT : TELNET_E_OK;
|
||||
}
|
||||
|
||||
telnet_error telnet_parser_interrupt(telnet_parser* parser)
|
||||
{
|
||||
if (!parser)
|
||||
return TELNET_E_BAD_PARSER;
|
||||
|
||||
// Force the parser to stop where it's at.
|
||||
if (parser->p)
|
||||
parser->eof = parser->pe = parser->p + 1;
|
||||
|
||||
parser->interrupted = 1;
|
||||
return TELNET_E_OK;
|
||||
}
|
@ -1,211 +0,0 @@
|
||||
#include <string.h>
|
||||
#include <anachronism/parser.h>
|
||||
|
||||
#define BASE_EV(ev, t) \
|
||||
(ev).SUPER_.type = TELNET_EV_PARSER_##t
|
||||
|
||||
#define EV_DATA(ev, text, len) do {\
|
||||
BASE_EV(ev, DATA);\
|
||||
(ev).data = (text);\
|
||||
(ev).length = (len);\
|
||||
} while (0)
|
||||
|
||||
#define EV_COMMAND(ev, cmd) do {\
|
||||
BASE_EV(ev, COMMAND);\
|
||||
(ev).command = (cmd);\
|
||||
} while (0)
|
||||
|
||||
#define EV_OPTION(ev, cmd, opt) do {\
|
||||
BASE_EV(ev, OPTION);\
|
||||
(ev).command = (cmd);\
|
||||
(ev).option = (opt);\
|
||||
} while (0)
|
||||
|
||||
#define EV_SUBNEGOTIATION(ev, act, opt) do {\
|
||||
BASE_EV(ev, SUBNEGOTIATION);\
|
||||
(ev).active = (act);\
|
||||
(ev).option = (opt);\
|
||||
} while (0)
|
||||
|
||||
#define EV_WARNING(ev, msg, pos) do {\
|
||||
BASE_EV(ev, WARNING);\
|
||||
(ev).message = (msg);\
|
||||
(ev).position = (pos);\
|
||||
} while (0)
|
||||
|
||||
struct telnet_parser {
|
||||
int cs; /* current Ragel state */
|
||||
const telnet_byte* p; /* current position */
|
||||
const telnet_byte* pe; /* end of current packet */
|
||||
const telnet_byte* eof; /* end-of-file marker */
|
||||
|
||||
telnet_byte option_mark; /* temporary storage for a command byte */
|
||||
unsigned char interrupted; /* Flag for interrupts */
|
||||
|
||||
telnet_parser_callback callback; /* Receiver of Telnet events*/
|
||||
void* userdata; /* Context for parser callback */
|
||||
};
|
||||
|
||||
%%{
|
||||
machine telnet_parser;
|
||||
|
||||
access parser->;
|
||||
variable p parser->p;
|
||||
variable pe parser->pe;
|
||||
variable eof parser->eof;
|
||||
|
||||
action flush_text {
|
||||
if (parser->callback && buflen > 0)
|
||||
{
|
||||
telnet_parser_data_event ev;
|
||||
EV_DATA(ev, buf, buflen);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
buflen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
action char {
|
||||
if (parser->callback && buf)
|
||||
buf[buflen++] = fc;
|
||||
}
|
||||
|
||||
action basic_command {
|
||||
if (parser->callback && buf)
|
||||
{
|
||||
telnet_parser_command_event ev;
|
||||
EV_COMMAND(ev, fc);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
|
||||
action option_mark {
|
||||
parser->option_mark = fc;
|
||||
}
|
||||
action option_command {
|
||||
if (parser->callback && buf)
|
||||
{
|
||||
telnet_parser_option_event ev;
|
||||
EV_OPTION(ev, parser->option_mark, fc);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
|
||||
action subneg_command {
|
||||
parser->option_mark = fc;
|
||||
if (parser->callback && buf != NULL)
|
||||
{
|
||||
telnet_parser_subnegotiation_event ev;
|
||||
EV_SUBNEGOTIATION(ev, 1, parser->option_mark);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
action subneg_command_end {
|
||||
if (parser->callback && buf != NULL)
|
||||
{
|
||||
telnet_parser_subnegotiation_event ev;
|
||||
EV_SUBNEGOTIATION(ev, 0, parser->option_mark);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
|
||||
action warning_cr {
|
||||
if (parser->callback && buf != NULL)
|
||||
{
|
||||
telnet_parser_warning_event ev;
|
||||
EV_WARNING(ev, "Invalid \\r: not followed by \\n or \\0.", fpc-data);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
action warning_iac {
|
||||
if (parser->callback && buf != NULL)
|
||||
{
|
||||
telnet_parser_warning_event ev;
|
||||
EV_WARNING(ev, "IAC followed by invalid command.", fpc-data);
|
||||
parser->callback(parser, (telnet_parser_event*)&ev);
|
||||
}
|
||||
}
|
||||
|
||||
include telnet_parser_common "parser_common.rl";
|
||||
write data;
|
||||
}%%
|
||||
|
||||
telnet_parser* telnet_parser_new(void* userdata,
|
||||
telnet_parser_callback callback)
|
||||
{
|
||||
telnet_parser* parser = malloc(sizeof(telnet_parser));
|
||||
if (parser)
|
||||
{
|
||||
memset(parser, 0, sizeof(*parser));
|
||||
%% write init;
|
||||
parser->callback = callback;
|
||||
parser->userdata = userdata;
|
||||
}
|
||||
return parser;
|
||||
}
|
||||
|
||||
void telnet_parser_free(telnet_parser* parser)
|
||||
{
|
||||
free(parser);
|
||||
}
|
||||
|
||||
telnet_error telnet_parser_get_userdata(telnet_parser* parser, void** userdata)
|
||||
{
|
||||
if (!parser)
|
||||
return TELNET_E_BAD_PARSER;
|
||||
|
||||
*userdata = parser->userdata;
|
||||
return TELNET_E_OK;
|
||||
}
|
||||
|
||||
telnet_error telnet_parser_parse(telnet_parser* parser,
|
||||
const telnet_byte* data,
|
||||
size_t length,
|
||||
size_t* bytes_used)
|
||||
{
|
||||
if (!parser)
|
||||
return TELNET_E_BAD_PARSER;
|
||||
|
||||
// Reset the interrupt flag
|
||||
parser->interrupted = 0;
|
||||
|
||||
// Only bother saving text if it'll be used
|
||||
telnet_byte* buf = NULL;
|
||||
size_t buflen = 0;
|
||||
if (parser->callback)
|
||||
{
|
||||
// Because of how the parser translates data, a run of text is guaranteed to
|
||||
// be at most 'length' characters long. In practice it's usually less, due to
|
||||
// escaped characters (IAC IAC -> IAC) and text separated by commands.
|
||||
buf = malloc(length * sizeof(*buf));
|
||||
if (!buf)
|
||||
return TELNET_E_ALLOC;
|
||||
}
|
||||
|
||||
parser->p = data;
|
||||
parser->pe = data + length;
|
||||
parser->eof = parser->pe;
|
||||
|
||||
%% write exec;
|
||||
|
||||
if (bytes_used != NULL)
|
||||
*bytes_used = parser->p - data;
|
||||
|
||||
free(buf);
|
||||
buf = NULL;
|
||||
parser->p = parser->pe = parser->eof = NULL;
|
||||
|
||||
return (parser->interrupted) ? TELNET_E_INTERRUPT : TELNET_E_OK;
|
||||
}
|
||||
|
||||
telnet_error telnet_parser_interrupt(telnet_parser* parser)
|
||||
{
|
||||
if (!parser)
|
||||
return TELNET_E_BAD_PARSER;
|
||||
|
||||
// Force the parser to stop where it's at.
|
||||
if (parser->p)
|
||||
parser->eof = parser->pe = parser->p + 1;
|
||||
|
||||
parser->interrupted = 1;
|
||||
return TELNET_E_OK;
|
||||
}
|
@ -1,85 +0,0 @@
|
||||
%%{
|
||||
machine telnet_parser_common;
|
||||
alphtype unsigned char;
|
||||
|
||||
# Shorthand for tidiness.
|
||||
action fhold {fhold;}
|
||||
|
||||
# Special bytes that must be handled differently from normal text:
|
||||
CR = "\r"; # Only \0 or \n may follow
|
||||
IAC = 255; # Telnet command marker
|
||||
special_byte = CR | IAC;
|
||||
|
||||
# The only bytes that may follow a CR:
|
||||
NL = "\n";
|
||||
NUL = "\0";
|
||||
|
||||
# The only bytes that may follow an IAC:
|
||||
SE = 240;
|
||||
NOP = 241;
|
||||
DM = 242;
|
||||
BRK = 243;
|
||||
IP = 244;
|
||||
AO = 245;
|
||||
AYT = 246;
|
||||
EC = 247;
|
||||
EL = 248;
|
||||
GA = 249;
|
||||
SB = 250;
|
||||
WILL = 251;
|
||||
WONT = 252;
|
||||
DO = 253;
|
||||
DONT = 254;
|
||||
# IAC IAC is interpreted as a plain-text IAC byte.
|
||||
|
||||
# Sorting the above IAC commands by type:
|
||||
iac_option_type = WILL | WONT | DO | DONT;
|
||||
iac_subneg_type = SB;
|
||||
iac_command_type = ^(iac_option_type | iac_subneg_type | IAC);
|
||||
|
||||
###
|
||||
# Plain text
|
||||
###
|
||||
plain_text = (^special_byte) @char;
|
||||
cr_seq = CR @char
|
||||
( NUL
|
||||
| NL @char
|
||||
| ^(NUL|NL) @fhold @warning_cr @flush_text
|
||||
);
|
||||
|
||||
###
|
||||
# IAC sequence
|
||||
###
|
||||
iac_command = iac_command_type @basic_command;
|
||||
|
||||
iac_option = iac_option_type @option_mark
|
||||
any @option_command;
|
||||
|
||||
iac_subneg = iac_subneg_type any @subneg_command
|
||||
( plain_text
|
||||
| cr_seq
|
||||
| IAC IAC @char
|
||||
)** %/flush_text
|
||||
IAC
|
||||
( SE
|
||||
| ^(IAC|SE) @fhold @warning_iac
|
||||
) >flush_text @subneg_command_end;
|
||||
|
||||
iac_seq = ( iac_command
|
||||
| iac_option
|
||||
| iac_subneg
|
||||
);
|
||||
|
||||
###
|
||||
# Telnet stream
|
||||
###
|
||||
telnet_stream = ( plain_text
|
||||
| cr_seq
|
||||
| IAC
|
||||
( IAC @char
|
||||
| iac_seq >flush_text
|
||||
)
|
||||
)** %/flush_text;
|
||||
|
||||
main := telnet_stream;
|
||||
}%%
|
@ -1,23 +0,0 @@
|
||||
# editorconfig.org
|
||||
|
||||
root = true
|
||||
|
||||
[*]
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
insert_final_newline = true
|
||||
|
||||
[*.js]
|
||||
trim_trailing_whitespace = true
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
[*.{c,h}]
|
||||
trim_trailing_whitespace = true
|
||||
indent_style = tab
|
||||
indent_size = 8
|
||||
|
||||
[Makefile]
|
||||
trim_trailing_whitespace = true
|
||||
indent_style = tab
|
||||
indent_size = 8
|
33
outside/commonmark/.gitignore
vendored
33
outside/commonmark/.gitignore
vendored
@ -1,33 +0,0 @@
|
||||
# Object files
|
||||
*.o
|
||||
*.ko
|
||||
*.obj
|
||||
*.elf
|
||||
|
||||
# Libraries
|
||||
*.lib
|
||||
*.a
|
||||
|
||||
# Shared objects (inc. Windows DLLs)
|
||||
*.dll
|
||||
*.so
|
||||
*.so.*
|
||||
*.dylib
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
*.i*86
|
||||
*.x86_64
|
||||
*.hex
|
||||
|
||||
*~
|
||||
*.bak
|
||||
*.diff
|
||||
*#
|
||||
*.zip
|
||||
bstrlib.txt
|
||||
build
|
||||
cmark.dSYM/*
|
||||
cmark
|
@ -1,10 +0,0 @@
|
||||
language: c
|
||||
compiler:
|
||||
- clang
|
||||
- gcc
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq pandoc re2c valgrind
|
||||
script:
|
||||
- make testtarball
|
||||
- PROG=`ls cmark-*.*/build/src/cmark` make leakcheck
|
@ -1,24 +0,0 @@
|
||||
project(cmark)
|
||||
cmake_minimum_required(VERSION 2.8)
|
||||
|
||||
if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
|
||||
message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make")
|
||||
endif()
|
||||
|
||||
set(PROJECT_NAME "cmark")
|
||||
|
||||
set(PROJECT_VERSION_MAJOR 0)
|
||||
set(PROJECT_VERSION_MINOR 0)
|
||||
set(PROJECT_VERSION_PATCH 1)
|
||||
set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} )
|
||||
|
||||
add_subdirectory(src)
|
||||
#add_subdirectory(api_test)
|
||||
#add_subdirectory(man)
|
||||
#enable_testing()
|
||||
#add_subdirectory(test)
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
|
||||
"Choose the type of build, options are: Debug Release." FORCE)
|
||||
endif(NOT CMAKE_BUILD_TYPE)
|
@ -1,70 +0,0 @@
|
||||
Copyright (c) 2014, John MacFarlane
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
|
||||
* Neither the name of John MacFarlane nor the names of other
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
-----
|
||||
|
||||
The polyfill for String.fromCodePoint included in commonmark.js is
|
||||
Copyright Mathias Bynens <http://mathiasbynens.be/>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
-----
|
||||
|
||||
The normalization code in runtests.py was derived from the
|
||||
markdowntest project, Copyright 2013 Karl Dubost:
|
||||
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2013 Karl Dubost
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
@ -1,194 +0,0 @@
|
||||
SRCDIR?=src
|
||||
DATADIR?=data
|
||||
BUILDDIR?=build
|
||||
GENERATOR?=Unix Makefiles
|
||||
MINGW_BUILDDIR?=build-mingw
|
||||
MINGW_INSTALLDIR?=windows
|
||||
SPEC=spec.txt
|
||||
SITE=_site
|
||||
SPECVERSION=$(shell perl -ne 'print $$1 if /^version: *([0-9.]+)/' $(SPEC))
|
||||
PKGDIR?=cmark-$(SPECVERSION)
|
||||
TARBALL?=cmark-$(SPECVERSION).tar.gz
|
||||
ZIPARCHIVE?=cmark-$(SPECVERSION).zip
|
||||
FUZZCHARS?=2000000 # for fuzztest
|
||||
BENCHDIR=bench
|
||||
BENCHFILE=$(BENCHDIR)/benchinput.md
|
||||
ALLTESTS=alltests.md
|
||||
NUMRUNS?=10
|
||||
PROG?=$(BUILDDIR)/src/cmark
|
||||
BENCHINP?=README.md
|
||||
JSMODULES=$(wildcard js/lib/*.js)
|
||||
|
||||
.PHONY: all spec leakcheck clean fuzztest dingus upload jshint test testjs benchjs update-site upload-site check npm debug mingw archive tarball ziparchive testarchive testtarball testziparchive testlib bench apidoc
|
||||
|
||||
all: $(BUILDDIR)
|
||||
@$(MAKE) -C $(BUILDDIR)
|
||||
@echo "Binaries can be found in $(BUILDDIR)/src"
|
||||
|
||||
check:
|
||||
@cmake --version > /dev/null || (echo "You need cmake to build this program: http://www.cmake.org/download/" && exit 1)
|
||||
|
||||
$(BUILDDIR): check $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc man/man1/cmark.1 man/man3/cmark.3
|
||||
mkdir -p $(BUILDDIR); \
|
||||
cd $(BUILDDIR); \
|
||||
cmake .. -G "$(GENERATOR)" -DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
||||
|
||||
install: $(BUILDDIR)
|
||||
$(MAKE) -C $(BUILDDIR) install
|
||||
|
||||
debug:
|
||||
mkdir -p $(BUILDDIR); \
|
||||
cd $(BUILDDIR); \
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Debug; \
|
||||
$(MAKE)
|
||||
|
||||
mingw:
|
||||
mkdir -p $(MINGW_BUILDDIR); \
|
||||
cd $(MINGW_BUILDDIR); \
|
||||
cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchain-mingw32.cmake -DCMAKE_INSTALL_PREFIX=$(MINGW_INSTALLDIR) ;\
|
||||
$(MAKE) && $(MAKE) install
|
||||
|
||||
archive: spec.html $(BUILDDIR)
|
||||
@rm -rf $(PKGDIR); \
|
||||
mkdir -p $(PKGDIR)/$(SRCDIR)/html; \
|
||||
mkdir -p $(PKGDIR)/api_test; \
|
||||
srcfiles=`git ls-tree --full-tree -r HEAD --name-only $(SRCDIR) api_test`; \
|
||||
for f in $$srcfiles; do cp -a $$f $(PKGDIR)/$$f; done; \
|
||||
cp -a $(SRCDIR)/scanners.c $(PKGDIR)/$(SRCDIR)/; \
|
||||
cp -a spec.html $(PKGDIR); \
|
||||
cp CMakeLists.txt $(PKGDIR); \
|
||||
perl -ne '$$p++ if /^### JavaScript/; print if (!$$p)' Makefile > $(PKGDIR)/Makefile; \
|
||||
cp -a Makefile.nmake nmake.bat $(PKGDIR); \
|
||||
cp -r man $(PKGDIR)/; \
|
||||
cp -r test $(PKGDIR)/; \
|
||||
cp -a README.md LICENSE spec.txt $(PKGDIR)/; \
|
||||
tar czf $(TARBALL) $(PKGDIR); \
|
||||
zip -q -r $(ZIPARCHIVE) $(PKGDIR); \
|
||||
rm -rf $(PKGDIR) ; \
|
||||
echo "Created $(TARBALL) and $(ZIPARCHIVE)."
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILDDIR) $(MINGW_BUILDDIR) $(MINGW_INSTALLDIR) $(TARBALL) $(ZIPARCHIVE) $(PKGDIR)
|
||||
|
||||
$(PROG): all
|
||||
|
||||
man/man3/cmark.3: src/cmark.h
|
||||
python man/make_man_page.py $< > $@
|
||||
|
||||
# We include html_unescape.h in the repository, so this shouldn't
|
||||
# normally need to be generated.
|
||||
$(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf
|
||||
gperf -L ANSI-C -I -t -N find_entity -H hash_entity -K entity -C -l \
|
||||
--null-strings -m5 $< > $@
|
||||
|
||||
# We include case_fold_switch.inc in the repository, so this shouldn't
|
||||
# normally need to be generated.
|
||||
$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt
|
||||
perl mkcasefold.pl < $< > $@
|
||||
|
||||
test: $(SPEC) $(BUILDDIR)
|
||||
$(MAKE) -C $(BUILDDIR) test ARGS="-V"
|
||||
|
||||
$(TARBALL): archive
|
||||
|
||||
$(ZIPARCHIVE): archive
|
||||
|
||||
testarchive: testtarball testziparchive
|
||||
rm -rf $(PKGDIR)
|
||||
|
||||
testtarball: $(TARBALL)
|
||||
rm -rf $(PKGDIR); \
|
||||
tar xvzf $(TARBALL); \
|
||||
cd $(PKGDIR); \
|
||||
mkdir build && cd build && cmake .. && $(MAKE) && ctest -V
|
||||
|
||||
testziparchive: $(ZIPARCHIVE)
|
||||
rm -rf $(PKGDIR); \
|
||||
unzip $(ZIPARCHIVE); \
|
||||
cd $(PKGDIR); \
|
||||
mkdir build && cd build && cmake .. && $(MAKE) && ctest -V
|
||||
|
||||
$(ALLTESTS): spec.txt
|
||||
python test/spec_tests.py --spec $< --dump-tests | python -c 'import json; import sys; tests = json.loads(sys.stdin.read()); print "\n".join([test["markdown"] for test in tests]).encode("utf-8")' > $@
|
||||
|
||||
leakcheck: $(ALLTESTS) $(PROG)
|
||||
cat $< | valgrind --leak-check=full --dsymutil=yes --error-exitcode=1 $(PROG) >/dev/null
|
||||
|
||||
fuzztest:
|
||||
{ for i in `seq 1 10`; do \
|
||||
cat /dev/urandom | head -c $(FUZZCHARS) | iconv -f latin1 -t utf-8 | tee fuzz-$$i.txt | \
|
||||
/usr/bin/env time -p $(PROG) >/dev/null && rm fuzz-$$i.txt ; \
|
||||
done } 2>&1 | grep 'user\|abnormally'
|
||||
|
||||
# for benchmarking
|
||||
$(BENCHFILE): progit/progit.md
|
||||
-rm $@; for x in `seq 1 20` ; do cat $< >> $@; done
|
||||
|
||||
progit:
|
||||
git clone https://github.com/progit/progit.git
|
||||
|
||||
progit/progit.md: progit
|
||||
cat progit/en/*/*.markdown > $@
|
||||
|
||||
bench: $(BENCHFILE)
|
||||
{ sudo renice 99 $$$$; \
|
||||
for x in `seq 1 $(NUMRUNS)` ; do \
|
||||
/usr/bin/env time -p $(PROG) </dev/null >/dev/null ; \
|
||||
/usr/bin/env time -p $(PROG) <$< >/dev/null ; \
|
||||
done \
|
||||
} 2>&1 | grep 'real' | awk '{print $$2}' | python3 'bench/stats.py'
|
||||
|
||||
operf: $(PROG)
|
||||
operf $(PROG) <$(BENCHINP) >/dev/null
|
||||
|
||||
distclean: clean
|
||||
-rm -f js/commonmark.js
|
||||
-rm -rf *.dSYM
|
||||
-rm -f README.html
|
||||
-rm -f spec.md fuzz.txt spec.html
|
||||
-rm -rf $(BENCHFILE) $(ALLTESTS) progit
|
||||
|
||||
### JavaScript ###
|
||||
|
||||
js/commonmark.js: js/lib/index.js ${JSMODULES}
|
||||
browserify --standalone commonmark $< -o $@
|
||||
|
||||
testjs: $(SPEC)
|
||||
node js/test.js
|
||||
|
||||
jshint:
|
||||
jshint ${JSMODULES}
|
||||
|
||||
benchjs:
|
||||
node js/bench.js ${BENCHINP}
|
||||
|
||||
npm:
|
||||
cd js; npm publish
|
||||
|
||||
dingus: js/commonmark.js
|
||||
echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000
|
||||
|
||||
### Spec ###
|
||||
|
||||
spec.md: $(SPEC)
|
||||
perl spec2md.pl < $< > $@
|
||||
|
||||
spec.html: spec.md template.html
|
||||
pandoc --no-highlight --number-sections --template template.html -s --toc -S $< | \
|
||||
perl -pe 's/a href="@([^"]*)"/a id="\1" href="#\1" class="definition"/g' | \
|
||||
perl -pe 's/␣/<span class="space"> <\/span>/g' \
|
||||
> $@
|
||||
|
||||
spec.pdf: spec.md template.tex specfilter.hs
|
||||
pandoc -s $< --template template.tex \
|
||||
--filter ./specfilter.hs -o $@ --latex-engine=xelatex --toc \
|
||||
--number-sections -V documentclass=report -V tocdepth=2 \
|
||||
-V classoption=twosides
|
||||
|
||||
### Website ###
|
||||
|
||||
update-site: spec.html js/commonmark.js
|
||||
$(MAKE) -C $(SITE) update
|
||||
|
||||
upload-site: spec.html
|
||||
$(MAKE) -C $(SITE) upload
|
@ -1,57 +0,0 @@
|
||||
SRCDIR=src
|
||||
DATADIR=data
|
||||
BUILDDIR=build
|
||||
INSTALLDIR=windows
|
||||
SPEC=spec.txt
|
||||
PROG=$(BUILDDIR)\src\cmark.exe
|
||||
GENERATOR=NMake Makefiles
|
||||
|
||||
all: $(BUILDDIR)
|
||||
@pushd $(BUILDDIR) && $(MAKE) /nologo && popd
|
||||
|
||||
$(BUILDDIR):
|
||||
@cmake --version > nul || (echo "You need cmake to build this program: http://www.cmake.org/download/" && exit 1)
|
||||
-mkdir $(BUILDDIR) 2> nul
|
||||
pushd $(BUILDDIR) && \
|
||||
cmake \
|
||||
-G "$(GENERATOR)" \
|
||||
-D CMAKE_BUILD_TYPE=$(BUILD_TYPE) \
|
||||
-D CMAKE_INSTALL_PREFIX=$(INSTALLDIR) \
|
||||
.. && \
|
||||
popd
|
||||
|
||||
install: all
|
||||
@pushd $(BUILDDIR) && $(MAKE) /nologo install && popd
|
||||
|
||||
clean:
|
||||
-rmdir /s /q $(BUILDDIR) $(MINGW_INSTALLDIR) 2> nul
|
||||
|
||||
$(SRCDIR)\case_fold_switch.inc: $(DATADIR)\CaseFolding-3.2.0.txt
|
||||
perl mkcasefold.pl < $? > $@
|
||||
|
||||
man\man1\cmark.1: man\cmark.1.md
|
||||
pandoc $? -o $@ -s -t man
|
||||
|
||||
test: $(SPEC) all
|
||||
@pushd $(BUILDDIR) && $(MAKE) /nologo test ARGS="-V" && popd
|
||||
|
||||
distclean: clean
|
||||
del /q src\scanners.c 2> nul
|
||||
del /q spec.md spec.html 2> nul
|
||||
|
||||
### Spec ###
|
||||
|
||||
spec.md: $(SPEC)
|
||||
perl spec2md.pl < $? > $@
|
||||
|
||||
spec.html: spec.md template.html
|
||||
pandoc --no-highlight --number-sections --template template.html -s --toc -S $? | \
|
||||
perl -pe "s/a href=\"@([^"]*)\"/a id=\"\\1\" href=\"#\\1\" class=\"definition\"/g" | \
|
||||
perl -pe "s/\\x{2423}/<span class=\"space\"> <\\/span>/g" \
|
||||
> $@
|
||||
|
||||
spec.pdf: spec.md template.tex specfilter.hs
|
||||
pandoc -s $? --template template.tex \
|
||||
--filter specfilter.hs -o $@ --latex-engine=xelatex --toc \
|
||||
--number-sections -V documentclass=report -V tocdepth=2 \
|
||||
-V classoption=twosides
|
@ -1,47 +0,0 @@
|
||||
PREFIX?=/usr/local
|
||||
SRCDIR?=src
|
||||
CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc -Wno-missing-field-initializers -fPIC $(OPTCFLAGS)
|
||||
LDFLAGS?=-g -O3 -Wall -Werror -fPIC $(OPTLDFLAGS)
|
||||
|
||||
HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
|
||||
|
||||
CMARK_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.o
|
||||
|
||||
CMARK_HDR = $(SRCDIR)/cmark.h $(SRCDIR)/buffer.h $(SRCDIR)/references.h \
|
||||
$(SRCDIR)/chunk.h $(SRCDIR)/debug.h $(SRCDIR)/utf8.h \
|
||||
$(SRCDIR)/scanners.h $(SRCDIR)/inlines.h
|
||||
|
||||
HTML_HDR = $(SRCDIR)/html/html_unescape.h $(SRCDIR)/html/houdini.h
|
||||
|
||||
$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(CMARK_OBJ) $(SRCDIR)/main.c
|
||||
$(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(CMARK_OBJ) $(SRCDIR)/main.c
|
||||
|
||||
$(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
|
||||
re2c --case-insensitive -bis $< > $@ || (rm $@ && false)
|
||||
|
||||
|
||||
$(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf
|
||||
gperf -I -t -N find_entity -H hash_entity -K entity -C -l
|
||||
--null-strings -m5 $< > $@
|
||||
|
||||
libcommonmark.so: $(HTML_OBJ) $(CMARK_OBJ)
|
||||
$(CC) $(LDFLAGS) -shared -o $@ $^
|
||||
|
||||
install: libcommonmark.so $(cmark_HDR) $(HTML_HDR)
|
||||
install -d $(PREFIX)/lib $(PREFIX)/include/cmark/html
|
||||
install libcommonmark.so $(PREFIX)/lib/
|
||||
install $(cmark_HDR) $(PREFIX)/include/cmark/
|
||||
install $(HTML_HDR) $(PREFIX)/include/cmark/html/
|
||||
|
||||
CMARK_HDR = $(SRCDIR)/cmark.h $(SRCDIR)/buffer.h $(SRCDIR)/references.h \
|
||||
$(SRCDIR)/chunk.h $(SRCDIR)/debug.h $(SRCDIR)/utf8.h \
|
||||
$(SRCDIR)/scanners.h $(SRCDIR)/inlines.h
|
||||
|
||||
HTML_HDR = $(SRCDIR)/html/html_unescape.h $(SRCDIR)/html/houdini.h
|
||||
|
||||
clean:
|
||||
-rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o libcommonmark.so
|
||||
-rm -f js/commonmark.js
|
||||
-rm -rf *.dSYM
|
||||
-rm -f README.html
|
||||
-rm -f spec.md fuzz.txt spec.html
|
@ -1,298 +0,0 @@
|
||||
CommonMark
|
||||
==========
|
||||
|
||||
CommonMark is a rationalized version of Markdown syntax,
|
||||
with a [spec][the spec] and BSD3-licensed reference
|
||||
implementations in C and JavaScript.
|
||||
|
||||
[Try it now!](http://spec.commonmark.org/dingus.html)
|
||||
|
||||
The implementations
|
||||
-------------------
|
||||
|
||||
The C implementation provides both a shared library (`libcmark`) and a
|
||||
standalone program `cmark` that converts CommonMark to HTML. It is
|
||||
written in standard C99 and has no library dependencies. The parser is
|
||||
very fast (see [benchmarks](benchmarks.md)).
|
||||
|
||||
It is easy to use `libcmark` in python, lua, ruby, and other dynamic
|
||||
languages: see `wrapper.py`, `wrapper.lua`, and `wrapper.rb` in the
|
||||
repository for simple examples.
|
||||
|
||||
The JavaScript implementation is a single JavaScript file, with
|
||||
no dependencies, that can be linked to in an HTML page. Here
|
||||
is a simple usage example:
|
||||
|
||||
``` javascript
|
||||
var reader = new commonmark.DocParser();
|
||||
var writer = new commonmark.HtmlRenderer();
|
||||
var parsed = reader.parse("Hello *world*");
|
||||
var result = writer.render(parsed);
|
||||
```
|
||||
|
||||
A node package is also available; it includes a command-line tool called
|
||||
`commonmark`.
|
||||
|
||||
**A note on security:**
|
||||
Neither implementation attempts to sanitize link attributes or
|
||||
raw HTML. If you use these libraries in applications that accept
|
||||
untrusted user input, you must run the output through an HTML
|
||||
sanitizer to protect against
|
||||
[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
|
||||
|
||||
Installing (C)
|
||||
--------------
|
||||
|
||||
Building the C program (`cmark`) and shared library (`libcmark`)
|
||||
requires [cmake]. If you modify `scanners.re`, then you will also
|
||||
need [re2c], which is used to generate `scanners.c` from
|
||||
`scanners.re`. We have included a pre-generated `scanners.c` in
|
||||
the repository to reduce build dependencies.
|
||||
|
||||
If you have GNU make, you can simply `make`, `make test`, and `make
|
||||
install`. This calls [cmake] to create a `Makefile` in the `build`
|
||||
directory, then uses that `Makefile` to create the executable and
|
||||
library. The binaries can be found in `build/src`.
|
||||
|
||||
For a more portable method, you can use [cmake] manually. [cmake] knows
|
||||
how to create build environments for many build systems. For example,
|
||||
on FreeBSD:
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. # optionally: -DCMAKE_INSTALL_PREFIX=path
|
||||
make # executable will be create as build/src/cmake
|
||||
make test
|
||||
make install
|
||||
|
||||
Or, to create Xcode project files on OSX:
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -G Xcode ..
|
||||
make
|
||||
make test
|
||||
make install
|
||||
|
||||
The GNU Makefile also provides a few other targets for developers.
|
||||
To run a "fuzz test" against ten long randomly generated inputs:
|
||||
|
||||
make fuzztest
|
||||
|
||||
To run a test for memory leaks using valgrind:
|
||||
|
||||
make leakcheck
|
||||
|
||||
To make a release tarball and zip archive:
|
||||
|
||||
make archive
|
||||
|
||||
To test the archives:
|
||||
|
||||
make testarchive
|
||||
|
||||
Compiling for Windows
|
||||
---------------------
|
||||
|
||||
To compile with MSVC and NMAKE:
|
||||
|
||||
nmake
|
||||
|
||||
You can cross-compile a Windows binary and dll on linux if you have the
|
||||
`mingw32` compiler:
|
||||
|
||||
make mingw
|
||||
|
||||
The binaries will be in `build-mingw/windows/bin`.
|
||||
|
||||
Installing (JavaScript)
|
||||
-----------------------
|
||||
|
||||
The JavaScript library can be installed through `npm`:
|
||||
|
||||
npm install commonmark
|
||||
|
||||
To build the JavaScript library as a single standalone file:
|
||||
|
||||
browserify --standalone commonmark js/lib/index.js -o js/commonmark.js
|
||||
|
||||
Or fetch a pre-built copy from
|
||||
<http://spec.commonmark.org/js/commonmark.js>`.
|
||||
|
||||
To run tests for the JavaScript library:
|
||||
|
||||
make testjs
|
||||
|
||||
or
|
||||
|
||||
node js/test.js
|
||||
|
||||
The spec
|
||||
--------
|
||||
|
||||
[The spec] contains over 500 embedded examples which serve as conformance
|
||||
tests. To run the tests using an executable `$PROG`:
|
||||
|
||||
python test/spec_tests.py --program $PROG
|
||||
|
||||
If you want to extract the raw test data from the spec without
|
||||
actually running the tests, you can do:
|
||||
|
||||
python test/spec_tests.py --dump-tests
|
||||
|
||||
and you'll get all the tests in JSON format.
|
||||
|
||||
[The spec]: http://jgm.github.io/CommonMark/spec.html
|
||||
|
||||
The source of [the spec] is `spec.txt`. This is basically a Markdown
|
||||
file, with code examples written in a shorthand form:
|
||||
|
||||
.
|
||||
Markdown source
|
||||
.
|
||||
expected HTML output
|
||||
.
|
||||
|
||||
To build an HTML version of the spec, do `make spec.html`. To build a
|
||||
PDF version, do `make spec.pdf`. Both these commands require that
|
||||
[pandoc] is installed, and creating a PDF requires a latex installation.
|
||||
|
||||
The spec is written from the point of view of the human writer, not
|
||||
the computer reader. It is not an algorithm---an English translation of
|
||||
a computer program---but a declarative description of what counts as a block
|
||||
quote, a code block, and each of the other structural elements that can
|
||||
make up a Markdown document.
|
||||
|
||||
Because John Gruber's [canonical syntax
|
||||
description](http://daringfireball.net/projects/markdown/syntax) leaves
|
||||
many aspects of the syntax undetermined, writing a precise spec requires
|
||||
making a large number of decisions, many of them somewhat arbitrary.
|
||||
In making them, we have appealed to existing conventions and
|
||||
considerations of simplicity, readability, expressive power, and
|
||||
consistency. We have tried to ensure that "normal" documents in the many
|
||||
incompatible existing implementations of Markdown will render, as far as
|
||||
possible, as their authors intended. And we have tried to make the rules
|
||||
for different elements work together harmoniously. In places where
|
||||
different decisions could have been made (for example, the rules
|
||||
governing list indentation), we have explained the rationale for
|
||||
my choices. In a few cases, we have departed slightly from the canonical
|
||||
syntax description, in ways that we think further the goals of Markdown
|
||||
as stated in that description.
|
||||
|
||||
For the most part, we have limited ourselves to the basic elements
|
||||
described in Gruber's canonical syntax description, eschewing extensions
|
||||
like footnotes and definition lists. It is important to get the core
|
||||
right before considering such things. However, we have included a visible
|
||||
syntax for line breaks and fenced code blocks.
|
||||
|
||||
Differences from original Markdown
|
||||
----------------------------------
|
||||
|
||||
There are only a few places where this spec says things that contradict
|
||||
the canonical syntax description:
|
||||
|
||||
- It [allows all punctuation symbols to be
|
||||
backslash-escaped](http://jgm.github.io/CommonMark/spec.html#backslash-escapes),
|
||||
not just the symbols with special meanings in Markdown. We found
|
||||
that it was just too hard to remember which symbols could be
|
||||
escaped.
|
||||
|
||||
- It introduces an [alternative syntax for hard line
|
||||
breaks](http://jgm.github.io/CommonMark/spec.html#hard-line-breaks), a
|
||||
backslash at the end of the line, supplementing the
|
||||
two-spaces-at-the-end-of-line rule. This is motivated by persistent
|
||||
complaints about the “invisible” nature of the two-space rule.
|
||||
|
||||
- Link syntax has been made a bit more predictable (in a
|
||||
backwards-compatible way). For example, `Markdown.pl` allows single
|
||||
quotes around a title in inline links, but not in reference links.
|
||||
This kind of difference is really hard for users to remember, so the
|
||||
spec [allows single quotes in both
|
||||
contexts](http://jgm.github.io/CommonMark/spec.html#links).
|
||||
|
||||
- The rule for HTML blocks differs, though in most real cases it
|
||||
shouldn't make a difference. (See
|
||||
[here](http://jgm.github.io/CommonMark/spec.html#html-blocks) for
|
||||
details.) The spec's proposal makes it easy to include Markdown
|
||||
inside HTML block-level tags, if you want to, but also allows you to
|
||||
exclude this. It is also makes parsing much easier, avoiding
|
||||
expensive backtracking.
|
||||
|
||||
- It does not collapse adjacent bird-track blocks into a single
|
||||
blockquote:
|
||||
|
||||
> this is two
|
||||
|
||||
> blockquotes
|
||||
|
||||
> this is a single
|
||||
>
|
||||
> blockquote with two paragraphs
|
||||
|
||||
- Rules for content in lists differ in a few respects, though (as with
|
||||
HTML blocks), most lists in existing documents should render as
|
||||
intended. There is some discussion of the choice points and
|
||||
differences [here](http://jgm.github.io/CommonMark/spec.html#motivation).
|
||||
We think that the spec's proposal does better than any existing
|
||||
implementation in rendering lists the way a human writer or reader
|
||||
would intuitively understand them. (We could give numerous examples
|
||||
of perfectly natural looking lists that nearly every existing
|
||||
implementation flubs up.)
|
||||
|
||||
- The spec stipulates that two blank lines break out of all list
|
||||
contexts. This is an attempt to deal with issues that often come up
|
||||
when someone wants to have two adjacent lists, or a list followed by
|
||||
an indented code block.
|
||||
|
||||
- Changing bullet characters, or changing from bullets to numbers or
|
||||
vice versa, starts a new list. We think that is almost always going
|
||||
to be the writer's intent.
|
||||
|
||||
- The number that begins an ordered list item may be followed by
|
||||
either `.` or `)`. Changing the delimiter style starts a new
|
||||
list.
|
||||
|
||||
- The start number of an ordered list is significant.
|
||||
|
||||
- [Fenced code blocks](http://jgm.github.io/CommonMark/spec.html#fenced-code-blocks) are supported, delimited by either
|
||||
backticks (```` ``` ```` or tildes (` ~~~ `).
|
||||
|
||||
Contributing
|
||||
------------
|
||||
|
||||
There is a [forum for discussing
|
||||
CommonMark](http://talk.commonmark.org); you should use it instead of
|
||||
github issues for questions and possibly open-ended discussions.
|
||||
Use the [github issue tracker](http://github.com/jgm/CommonMark/issues)
|
||||
only for simple, clear, actionable issues.
|
||||
|
||||
Authors
|
||||
-------
|
||||
|
||||
The spec was written by John MacFarlane, drawing on
|
||||
|
||||
- his experience writing and maintaining Markdown implementations in several
|
||||
languages, including the first Markdown parser not based on regular
|
||||
expression substitutions ([pandoc](http://github.com/jgm/pandoc)) and
|
||||
the first markdown parsers based on PEG grammars
|
||||
([peg-markdown](http://github.com/jgm/peg-markdown),
|
||||
[lunamark](http://github.com/jgm/lunamark))
|
||||
- a detailed examination of the differences between existing Markdown
|
||||
implementations using [BabelMark 2](http://johnmacfarlane.net/babelmark2/),
|
||||
and
|
||||
- extensive discussions with David Greenspan, Jeff Atwood, Vicent
|
||||
Marti, Neil Williams, and Benjamin Dumke-von der Ehe.
|
||||
|
||||
John MacFarlane was also responsible for the original versions of the
|
||||
C and JavaScript implementations. The block parsing algorithm was
|
||||
worked out together with David Greenspan. Vicent Marti
|
||||
optimized the C implementation for performance, increasing its speed
|
||||
tenfold. Kārlis Gaņģis helped work out a better parsing algorithm
|
||||
for links and emphasis, eliminating several worst-case performance
|
||||
issues. Nick Wellnhofer contributed many improvements, including
|
||||
most of the C library's API and its test harness.
|
||||
|
||||
[cmake]: http://www.cmake.org/download/
|
||||
[pandoc]: http://johnmacfarlane.net/pandoc/
|
||||
[re2c]: http://re2c.org
|
||||
|
@ -1,247 +0,0 @@
|
||||
# Appendix B: An alternate spec for HTML blocks {-}
|
||||
|
||||
(The following spec departs less from original markdown than the
|
||||
one described above, but is also less flexible.)
|
||||
|
||||
An [HTML block](#html-block) <a id="html-block-tag"/> begins
|
||||
with an [open tag](#open-tag), [HTML comment](#html-comment),
|
||||
[processing instruction](#processing-instruction),
|
||||
[declaration](#declaration), or [CDATA section](#cdata-section).
|
||||
This opening element may optionally be preceded by 1-3 spaces,
|
||||
and must not be followed on a line by anything other than white space.
|
||||
|
||||
If the opening tag is self-closing, or if it is an [HTML
|
||||
comment](#html-comment), [processing
|
||||
instruction](#processing-instruction), [declaration](#declaration), or
|
||||
[CDATA section](#cdata-section), then the [HTML block](#html-block)
|
||||
contains just that tag.
|
||||
|
||||
If it is an [open tag](#open-tag), then the [HTML block](#html-block)
|
||||
continues until a matching closing tag is found, or until the end
|
||||
of the document. Note that the matching closing tag is not necessarily
|
||||
the first closing tag of the same type that is encountered, since
|
||||
that tag may close a later open tag of the same type. Open and closing
|
||||
tags must be balanced.
|
||||
|
||||
The contents of the HTML block are interpreted as raw HTML, and will not
|
||||
be escaped in HTML output.
|
||||
|
||||
Some simple examples:
|
||||
|
||||
.
|
||||
<table>
|
||||
<tr>
|
||||
<td>
|
||||
hi
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
okay.
|
||||
.
|
||||
<table>
|
||||
<tr>
|
||||
<td>
|
||||
hi
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>okay.</p>
|
||||
.
|
||||
|
||||
|
||||
.
|
||||
<div class="outer">
|
||||
|
||||
<div class="inner">
|
||||
|
||||
<p>fooö</p>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
.
|
||||
<div class="outer">
|
||||
|
||||
<div class="inner">
|
||||
|
||||
<p>fooö</p>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
.
|
||||
|
||||
A self-closing tag:
|
||||
|
||||
.
|
||||
<div />
|
||||
.
|
||||
<div />
|
||||
.
|
||||
|
||||
Here we have an unclosed tag, and the block continues to the end of
|
||||
the document:
|
||||
|
||||
.
|
||||
<div>
|
||||
<div>
|
||||
foo
|
||||
</div>
|
||||
|
||||
*bar*
|
||||
.
|
||||
<div>
|
||||
<div>
|
||||
foo
|
||||
</div>
|
||||
|
||||
*bar*
|
||||
.
|
||||
|
||||
A comment:
|
||||
|
||||
.
|
||||
<!-- Foo
|
||||
bar
|
||||
baz -->
|
||||
.
|
||||
<!-- Foo
|
||||
bar
|
||||
baz -->
|
||||
.
|
||||
|
||||
A processing instruction:
|
||||
|
||||
.
|
||||
<?php
|
||||
echo 'foo'
|
||||
?>
|
||||
.
|
||||
<?php
|
||||
echo 'foo'
|
||||
?>
|
||||
.
|
||||
|
||||
CDATA:
|
||||
|
||||
.
|
||||
<![CDATA[
|
||||
function matchwo(a,b)
|
||||
{
|
||||
if (a < b && a < 0) then
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
]]>
|
||||
.
|
||||
<![CDATA[
|
||||
function matchwo(a,b)
|
||||
{
|
||||
if (a < b && a < 0) then
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
]]>
|
||||
.
|
||||
|
||||
The opening tag can be indented 1-3 spaces, but not 4:
|
||||
|
||||
.
|
||||
<!-- foo -->
|
||||
<!-- foo -->
|
||||
.
|
||||
<!-- foo -->
|
||||
<pre><code><!-- foo -->
|
||||
</code></pre>
|
||||
.
|
||||
|
||||
The opening tag must be on a line (or lines) by itself:
|
||||
|
||||
.
|
||||
<table><tr><td>
|
||||
foo
|
||||
</td></tr></table>
|
||||
.
|
||||
<p><table><tr<td> foo </td></tr></table></p>
|
||||
.
|
||||
|
||||
.
|
||||
<!-- foo -->bar
|
||||
.
|
||||
<p><!-- foo -->bar</p>
|
||||
.
|
||||
|
||||
The opening tag need not be an HTML block tag or even an HTML tag:
|
||||
|
||||
.
|
||||
<a>
|
||||
foo
|
||||
</a>
|
||||
.
|
||||
<a>
|
||||
foo
|
||||
</a>
|
||||
.
|
||||
|
||||
.
|
||||
<foo>
|
||||
bar
|
||||
</foo>
|
||||
.
|
||||
<foo>
|
||||
bar
|
||||
</foo>
|
||||
.
|
||||
|
||||
So, note the difference:
|
||||
|
||||
.
|
||||
<del>
|
||||
bar
|
||||
</del>
|
||||
|
||||
<del>bar</del>
|
||||
.
|
||||
<del>
|
||||
bar
|
||||
</del>
|
||||
<p><del>bar</del></p>
|
||||
.
|
||||
|
||||
This rule differs from John Gruber's original markdown syntax
|
||||
specification, which says:
|
||||
|
||||
> The only restrictions are that block-level HTML elements —
|
||||
> e.g. `<div>`, `<table>`, `<pre>`, `<p>`, etc. — must be separated from
|
||||
> surrounding content by blank lines, and the start and end tags of the
|
||||
> block should not be indented with tabs or spaces.
|
||||
|
||||
In some ways Gruber's rule is more restrictive than the one given
|
||||
here:
|
||||
|
||||
- It requires that an HTML block be preceded and followed by a blank line.
|
||||
- It does not allow the start tag to be indented.
|
||||
- It does not allow the end tag to be indented.
|
||||
- It does not require that the open tag be an HTML block-level tag.
|
||||
|
||||
Indeed, most markdown implementations, including some of Gruber's
|
||||
own perl implementations, do not impose these restrictions.
|
||||
|
||||
However, unlike Gruber's rule, this one requires that the open
|
||||
tag be on a line by itself. It also differs from most markdown
|
||||
implementations in how it handles the case where there is no matching
|
||||
closing tag (a case not mentioned in Gruber's rule). In such a case,
|
||||
the rule stated above includes the whole rest of the document in the
|
||||
HTML block.
|
||||
|
@ -1,25 +0,0 @@
|
||||
add_executable(api_test
|
||||
cplusplus.cpp
|
||||
harness.c
|
||||
harness.h
|
||||
main.c
|
||||
)
|
||||
include_directories(
|
||||
${PROJECT_SOURCE_DIR}/src
|
||||
${PROJECT_BINARY_DIR}/src
|
||||
)
|
||||
target_link_libraries(api_test libcmark)
|
||||
|
||||
# Compiler flags
|
||||
if(MSVC)
|
||||
# Force to always compile with W4
|
||||
if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
|
||||
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
else()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4127 /wd4244 /wd4267 /wd4706 /wd4800 /D_CRT_SECURE_NO_WARNINGS")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP")
|
||||
elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -std=c99 -pedantic")
|
||||
endif()
|
@ -1,15 +0,0 @@
|
||||
#include <cstdlib>
|
||||
|
||||
#include "cmark.h"
|
||||
|
||||
#include "harness.h"
|
||||
|
||||
extern "C" void
|
||||
test_cplusplus(test_batch_runner *runner)
|
||||
{
|
||||
static const char md[] = "paragraph\n";
|
||||
char *html = cmark_markdown_to_html(md, sizeof(md) - 1);
|
||||
STR_EQ(runner, html, "<p>paragraph</p>\n", "libcmark works with C++");
|
||||
free(html);
|
||||
}
|
||||
|
@ -1,102 +0,0 @@
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "harness.h"
|
||||
|
||||
test_batch_runner*
|
||||
test_batch_runner_new()
|
||||
{
|
||||
return (test_batch_runner *)calloc(1, sizeof(test_batch_runner));
|
||||
}
|
||||
|
||||
static void
|
||||
test_result(test_batch_runner *runner, int cond, const char *msg, va_list ap)
|
||||
{
|
||||
++runner->test_num;
|
||||
|
||||
if (cond) {
|
||||
++runner->num_passed;
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "FAILED test %d: ", runner->test_num);
|
||||
vfprintf(stderr, msg, ap);
|
||||
fprintf(stderr, "\n");
|
||||
++runner->num_failed;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SKIP(test_batch_runner *runner, int num_tests)
|
||||
{
|
||||
runner->test_num += num_tests;
|
||||
runner->num_skipped += num_tests;
|
||||
}
|
||||
|
||||
void
|
||||
OK(test_batch_runner *runner, int cond, const char *msg, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, msg);
|
||||
test_result(runner, cond, msg, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
void
|
||||
INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg, ...)
|
||||
{
|
||||
int cond = got == expected;
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, msg);
|
||||
test_result(runner, cond, msg, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (!cond) {
|
||||
fprintf(stderr, " Got: %d\n", got);
|
||||
fprintf(stderr, " Expected: %d\n", expected);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
STR_EQ(test_batch_runner *runner, const char *got, const char *expected,
|
||||
const char *msg, ...)
|
||||
{
|
||||
int cond = strcmp(got, expected) == 0;
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, msg);
|
||||
test_result(runner, cond, msg, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (!cond) {
|
||||
fprintf(stderr, " Got: \"%s\"\n", got);
|
||||
fprintf(stderr, " Expected: \"%s\"\n", expected);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
test_ok(test_batch_runner *runner)
|
||||
{
|
||||
return runner->num_failed == 0;
|
||||
}
|
||||
|
||||
void
|
||||
test_print_summary(test_batch_runner *runner)
|
||||
{
|
||||
int num_passed = runner->num_passed;
|
||||
int num_skipped = runner->num_skipped;
|
||||
int num_failed = runner->num_failed;
|
||||
|
||||
fprintf(stderr, "%d tests passed, %d failed, %d skipped\n",
|
||||
num_passed, num_failed, num_skipped);
|
||||
|
||||
if (test_ok(runner)) {
|
||||
fprintf(stderr, "PASS\n");
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "FAIL\n");
|
||||
}
|
||||
}
|
||||
|
@ -1,42 +0,0 @@
|
||||
#ifndef CMARK_API_TEST_HARNESS_H
|
||||
#define CMARK_API_TEST_HARNESS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int test_num;
|
||||
int num_passed;
|
||||
int num_failed;
|
||||
int num_skipped;
|
||||
} test_batch_runner;
|
||||
|
||||
test_batch_runner*
|
||||
test_batch_runner_new();
|
||||
|
||||
void
|
||||
SKIP(test_batch_runner *runner, int num_tests);
|
||||
|
||||
void
|
||||
OK(test_batch_runner *runner, int cond, const char *msg, ...);
|
||||
|
||||
void
|
||||
INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg, ...);
|
||||
|
||||
void
|
||||
STR_EQ(test_batch_runner *runner, const char *got, const char *expected,
|
||||
const char *msg, ...);
|
||||
|
||||
int
|
||||
test_ok(test_batch_runner *runner);
|
||||
|
||||
void
|
||||
test_print_summary(test_batch_runner *runner);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -1,622 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define CMARK_NO_SHORT_NAMES
|
||||
#include "cmark.h"
|
||||
#include "node.h"
|
||||
|
||||
#include "harness.h"
|
||||
|
||||
#define UTF8_REPL "\xEF\xBF\xBD"
|
||||
|
||||
void
|
||||
test_cplusplus(test_batch_runner *runner);
|
||||
|
||||
static const cmark_node_type node_types[] = {
|
||||
CMARK_NODE_DOCUMENT,
|
||||
CMARK_NODE_BLOCK_QUOTE,
|
||||
CMARK_NODE_LIST,
|
||||
CMARK_NODE_LIST_ITEM,
|
||||
CMARK_NODE_CODE_BLOCK,
|
||||
CMARK_NODE_HTML,
|
||||
CMARK_NODE_PARAGRAPH,
|
||||
CMARK_NODE_HEADER,
|
||||
CMARK_NODE_HRULE,
|
||||
CMARK_NODE_REFERENCE_DEF,
|
||||
CMARK_NODE_TEXT,
|
||||
CMARK_NODE_SOFTBREAK,
|
||||
CMARK_NODE_LINEBREAK,
|
||||
CMARK_NODE_INLINE_CODE,
|
||||
CMARK_NODE_INLINE_HTML,
|
||||
CMARK_NODE_EMPH,
|
||||
CMARK_NODE_STRONG,
|
||||
CMARK_NODE_LINK,
|
||||
CMARK_NODE_IMAGE
|
||||
};
|
||||
static const int num_node_types = sizeof(node_types) / sizeof(*node_types);
|
||||
|
||||
static void
|
||||
test_md_to_html(test_batch_runner *runner, const char *markdown,
|
||||
const char *expected_html, const char *msg);
|
||||
|
||||
static void
|
||||
test_content(test_batch_runner *runner, cmark_node_type type,
|
||||
int allowed_content);
|
||||
|
||||
static void
|
||||
test_char(test_batch_runner *runner, int valid, const char *utf8,
|
||||
const char *msg);
|
||||
|
||||
static void
|
||||
test_incomplete_char(test_batch_runner *runner, const char *utf8,
|
||||
const char *msg);
|
||||
|
||||
static void
|
||||
test_continuation_byte(test_batch_runner *runner, const char *utf8);
|
||||
|
||||
static void
|
||||
constructor(test_batch_runner *runner)
|
||||
{
|
||||
for (int i = 0; i < num_node_types; ++i) {
|
||||
cmark_node_type type = node_types[i];
|
||||
cmark_node *node = cmark_node_new(type);
|
||||
OK(runner, node != NULL, "new type %d", type);
|
||||
INT_EQ(runner, cmark_node_get_type(node), type,
|
||||
"get_type %d", type);
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_HEADER:
|
||||
INT_EQ(runner, cmark_node_get_header_level(node), 1,
|
||||
"default header level is 1");
|
||||
node->as.header.level = 1;
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LIST:
|
||||
INT_EQ(runner, cmark_node_get_list_type(node),
|
||||
CMARK_BULLET_LIST,
|
||||
"default is list type is bullet");
|
||||
INT_EQ(runner, cmark_node_get_list_start(node), 1,
|
||||
"default is list start is 1");
|
||||
INT_EQ(runner, cmark_node_get_list_tight(node), 0,
|
||||
"default is list is loose");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
cmark_node_free(node);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
accessors(test_batch_runner *runner)
|
||||
{
|
||||
static const char markdown[] =
|
||||
"## Header\n"
|
||||
"\n"
|
||||
"* Item 1\n"
|
||||
"* Item 2\n"
|
||||
"\n"
|
||||
"2. Item 1\n"
|
||||
"\n"
|
||||
"3. Item 2\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" code\n"
|
||||
"\n"
|
||||
"``` lang\n"
|
||||
"fenced\n"
|
||||
"```\n"
|
||||
"\n"
|
||||
"<div>html</div>\n"
|
||||
"\n"
|
||||
"[link](url 'title')\n";
|
||||
|
||||
cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1);
|
||||
|
||||
// Getters
|
||||
|
||||
cmark_node *header = cmark_node_first_child(doc);
|
||||
INT_EQ(runner, cmark_node_get_header_level(header), 2,
|
||||
"get_header_level");
|
||||
|
||||
cmark_node *bullet_list = cmark_node_next(header);
|
||||
INT_EQ(runner, cmark_node_get_list_type(bullet_list),
|
||||
CMARK_BULLET_LIST, "get_list_type bullet");
|
||||
INT_EQ(runner, cmark_node_get_list_tight(bullet_list), 1,
|
||||
"get_list_tight tight");
|
||||
|
||||
cmark_node *ordered_list = cmark_node_next(bullet_list);
|
||||
INT_EQ(runner, cmark_node_get_list_type(ordered_list),
|
||||
CMARK_ORDERED_LIST, "get_list_type ordered");
|
||||
INT_EQ(runner, cmark_node_get_list_start(ordered_list), 2,
|
||||
"get_list_start");
|
||||
INT_EQ(runner, cmark_node_get_list_tight(ordered_list), 0,
|
||||
"get_list_tight loose");
|
||||
|
||||
cmark_node *code = cmark_node_next(ordered_list);
|
||||
STR_EQ(runner, cmark_node_get_string_content(code), "code\n",
|
||||
"get_string_content indented code");
|
||||
|
||||
cmark_node *fenced = cmark_node_next(code);
|
||||
STR_EQ(runner, cmark_node_get_string_content(fenced), "fenced\n",
|
||||
"get_string_content fenced code");
|
||||
STR_EQ(runner, cmark_node_get_fence_info(fenced), "lang",
|
||||
"get_fence_info");
|
||||
|
||||
cmark_node *html = cmark_node_next(fenced);
|
||||
STR_EQ(runner, cmark_node_get_string_content(html),
|
||||
"<div>html</div>\n", "get_string_content html");
|
||||
|
||||
cmark_node *paragraph = cmark_node_next(html);
|
||||
INT_EQ(runner, cmark_node_get_start_line(paragraph), 19,
|
||||
"get_start_line");
|
||||
INT_EQ(runner, cmark_node_get_start_column(paragraph), 1,
|
||||
"get_start_column");
|
||||
INT_EQ(runner, cmark_node_get_end_line(paragraph), 19,
|
||||
"get_end_line");
|
||||
|
||||
cmark_node *link = cmark_node_first_child(paragraph);
|
||||
STR_EQ(runner, cmark_node_get_url(link), "url",
|
||||
"get_url");
|
||||
STR_EQ(runner, cmark_node_get_title(link), "title",
|
||||
"get_title");
|
||||
|
||||
cmark_node *string = cmark_node_first_child(link);
|
||||
STR_EQ(runner, cmark_node_get_string_content(string), "link",
|
||||
"get_string_content string");
|
||||
|
||||
// Setters
|
||||
|
||||
OK(runner, cmark_node_set_header_level(header, 3),
|
||||
"set_header_level");
|
||||
|
||||
OK(runner, cmark_node_set_list_type(bullet_list, CMARK_ORDERED_LIST),
|
||||
"set_list_type ordered");
|
||||
OK(runner, cmark_node_set_list_start(bullet_list, 3),
|
||||
"set_list_start");
|
||||
OK(runner, cmark_node_set_list_tight(bullet_list, 0),
|
||||
"set_list_tight loose");
|
||||
|
||||
OK(runner, cmark_node_set_list_type(ordered_list, CMARK_BULLET_LIST),
|
||||
"set_list_type bullet");
|
||||
OK(runner, cmark_node_set_list_tight(ordered_list, 1),
|
||||
"set_list_tight tight");
|
||||
|
||||
OK(runner, cmark_node_set_string_content(code, "CODE\n"),
|
||||
"set_string_content indented code");
|
||||
|
||||
OK(runner, cmark_node_set_string_content(fenced, "FENCED\n"),
|
||||
"set_string_content fenced code");
|
||||
OK(runner, cmark_node_set_fence_info(fenced, "LANG"),
|
||||
"set_fence_info");
|
||||
|
||||
OK(runner, cmark_node_set_string_content(html, "<div>HTML</div>\n"),
|
||||
"set_string_content html");
|
||||
|
||||
OK(runner, cmark_node_set_url(link, "URL"),
|
||||
"set_url");
|
||||
OK(runner, cmark_node_set_title(link, "TITLE"),
|
||||
"set_title");
|
||||
|
||||
OK(runner, cmark_node_set_string_content(string, "LINK"),
|
||||
"set_string_content string");
|
||||
|
||||
char *rendered_html = cmark_render_html(doc);
|
||||
static const char expected_html[] =
|
||||
"<h3>Header</h3>\n"
|
||||
"<ol start=\"3\">\n"
|
||||
"<li>\n"
|
||||
"<p>Item 1</p>\n"
|
||||
"</li>\n"
|
||||
"<li>\n"
|
||||
"<p>Item 2</p>\n"
|
||||
"</li>\n"
|
||||
"</ol>\n"
|
||||
"<ul>\n"
|
||||
"<li>Item 1</li>\n"
|
||||
"<li>Item 2</li>\n"
|
||||
"</ul>\n"
|
||||
"<pre><code>CODE\n"
|
||||
"</code></pre>\n"
|
||||
"<pre><code class=\"language-LANG\">FENCED\n"
|
||||
"</code></pre>\n"
|
||||
"<div>HTML</div>\n"
|
||||
"<p><a href=\"URL\" title=\"TITLE\">LINK</a></p>\n";
|
||||
STR_EQ(runner, rendered_html, expected_html, "setters work");
|
||||
free(rendered_html);
|
||||
|
||||
// Getter errors
|
||||
|
||||
INT_EQ(runner, cmark_node_get_header_level(bullet_list), 0,
|
||||
"get_header_level error");
|
||||
INT_EQ(runner, cmark_node_get_list_type(header), CMARK_NO_LIST,
|
||||
"get_list_type error");
|
||||
INT_EQ(runner, cmark_node_get_list_start(code), 0,
|
||||
"get_list_start error");
|
||||
INT_EQ(runner, cmark_node_get_list_tight(fenced), 0,
|
||||
"get_list_tight error");
|
||||
OK(runner, cmark_node_get_string_content(ordered_list) == NULL,
|
||||
"get_string_content error");
|
||||
OK(runner, cmark_node_get_fence_info(paragraph) == NULL,
|
||||
"get_fence_info error");
|
||||
OK(runner, cmark_node_get_url(html) == NULL,
|
||||
"get_url error");
|
||||
OK(runner, cmark_node_get_title(header) == NULL,
|
||||
"get_title error");
|
||||
|
||||
// Setter errors
|
||||
|
||||
OK(runner, !cmark_node_set_header_level(bullet_list, 3),
|
||||
"set_header_level error");
|
||||
OK(runner, !cmark_node_set_list_type(header, CMARK_ORDERED_LIST),
|
||||
"set_list_type error");
|
||||
OK(runner, !cmark_node_set_list_start(code, 3),
|
||||
"set_list_start error");
|
||||
OK(runner, !cmark_node_set_list_tight(fenced, 0),
|
||||
"set_list_tight error");
|
||||
OK(runner, !cmark_node_set_string_content(ordered_list, "content\n"),
|
||||
"set_string_content error");
|
||||
OK(runner, !cmark_node_set_fence_info(paragraph, "lang"),
|
||||
"set_fence_info error");
|
||||
OK(runner, !cmark_node_set_url(html, "url"),
|
||||
"set_url error");
|
||||
OK(runner, !cmark_node_set_title(header, "title"),
|
||||
"set_title error");
|
||||
|
||||
OK(runner, !cmark_node_set_header_level(header, 0),
|
||||
"set_header_level too small");
|
||||
OK(runner, !cmark_node_set_header_level(header, 7),
|
||||
"set_header_level too large");
|
||||
OK(runner, !cmark_node_set_list_type(bullet_list, CMARK_NO_LIST),
|
||||
"set_list_type invalid");
|
||||
OK(runner, !cmark_node_set_list_start(bullet_list, -1),
|
||||
"set_list_start negative");
|
||||
|
||||
cmark_node_free(doc);
|
||||
}
|
||||
|
||||
static void
|
||||
node_check(test_batch_runner *runner) {
|
||||
// Construct an incomplete tree.
|
||||
cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT);
|
||||
cmark_node *p1 = cmark_node_new(CMARK_NODE_PARAGRAPH);
|
||||
cmark_node *p2 = cmark_node_new(CMARK_NODE_PARAGRAPH);
|
||||
doc->first_child = p1;
|
||||
p1->next = p2;
|
||||
|
||||
INT_EQ(runner, cmark_node_check(doc, NULL), 4, "node_check works");
|
||||
INT_EQ(runner, cmark_node_check(doc, NULL), 0,
|
||||
"node_check fixes tree");
|
||||
|
||||
cmark_node_free(doc);
|
||||
}
|
||||
|
||||
static void
|
||||
create_tree(test_batch_runner *runner)
|
||||
{
|
||||
char *html;
|
||||
cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT);
|
||||
|
||||
cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH);
|
||||
OK(runner, !cmark_node_insert_before(doc, p),
|
||||
"insert before root fails");
|
||||
OK(runner, !cmark_node_insert_after(doc, p),
|
||||
"insert after root fails");
|
||||
OK(runner, cmark_node_append_child(doc, p), "append1");
|
||||
INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append1 consistent");
|
||||
OK(runner, cmark_node_parent(p) == doc, "node_parent");
|
||||
|
||||
cmark_node *emph = cmark_node_new(CMARK_NODE_EMPH);
|
||||
OK(runner, cmark_node_prepend_child(p, emph), "prepend1");
|
||||
INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend1 consistent");
|
||||
|
||||
cmark_node *str1 = cmark_node_new(CMARK_NODE_TEXT);
|
||||
cmark_node_set_string_content(str1, "Hello, ");
|
||||
OK(runner, cmark_node_prepend_child(p, str1), "prepend2");
|
||||
INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend2 consistent");
|
||||
|
||||
cmark_node *str3 = cmark_node_new(CMARK_NODE_TEXT);
|
||||
cmark_node_set_string_content(str3, "!");
|
||||
OK(runner, cmark_node_append_child(p, str3), "append2");
|
||||
INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append2 consistent");
|
||||
|
||||
cmark_node *str2 = cmark_node_new(CMARK_NODE_TEXT);
|
||||
cmark_node_set_string_content(str2, "world");
|
||||
OK(runner, cmark_node_append_child(emph, str2), "append3");
|
||||
INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append3 consistent");
|
||||
|
||||
html = cmark_render_html(doc);
|
||||
STR_EQ(runner, html, "<p>Hello, <em>world</em>!</p>\n",
|
||||
"render_html");
|
||||
free(html);
|
||||
|
||||
OK(runner, cmark_node_insert_before(str1, str3), "ins before1");
|
||||
INT_EQ(runner, cmark_node_check(doc, NULL), 0,
|
||||
"ins before1 consistent");
|
||||
// 31e
|
||||
OK(runner, cmark_node_first_child(p) == str3, "ins before1 works");
|
||||
|
||||
OK(runner, cmark_node_insert_before(str1, emph), "ins before2");
|
||||
INT_EQ(runner, cmark_node_check(doc, NULL), 0,
|
||||
"ins before2 consistent");
|
||||
// 3e1
|
||||
OK(runner, cmark_node_last_child(p) == str1, "ins before2 works");
|
||||
|
||||
OK(runner, cmark_node_insert_after(str1, str3), "ins after1");
|
||||
INT_EQ(runner, cmark_node_check(doc, NULL), 0,
|
||||
"ins after1 consistent");
|
||||
// e13
|
||||
OK(runner, cmark_node_next(str1) == str3, "ins after1 works");
|
||||
|
||||
OK(runner, cmark_node_insert_after(str1, emph), "ins after2");
|
||||
INT_EQ(runner, cmark_node_check(doc, NULL), 0,
|
||||
"ins after2 consistent");
|
||||
// 1e3
|
||||
OK(runner, cmark_node_previous(emph) == str1, "ins after2 works");
|
||||
|
||||
cmark_node_unlink(emph);
|
||||
|
||||
html = cmark_render_html(doc);
|
||||
STR_EQ(runner, html, "<p>Hello, !</p>\n",
|
||||
"render_html after shuffling");
|
||||
free(html);
|
||||
|
||||
cmark_node_free(doc);
|
||||
|
||||
// TODO: Test that the contents of an unlinked inline are valid
|
||||
// after the parent block was destroyed. This doesn't work so far.
|
||||
cmark_node_free(emph);
|
||||
}
|
||||
|
||||
void
|
||||
hierarchy(test_batch_runner *runner)
|
||||
{
|
||||
cmark_node *bquote1 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
|
||||
cmark_node *bquote2 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
|
||||
cmark_node *bquote3 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
|
||||
|
||||
OK(runner, cmark_node_append_child(bquote1, bquote2),
|
||||
"append bquote2");
|
||||
OK(runner, cmark_node_append_child(bquote2, bquote3),
|
||||
"append bquote3");
|
||||
OK(runner, !cmark_node_append_child(bquote3, bquote3),
|
||||
"adding a node as child of itself fails");
|
||||
OK(runner, !cmark_node_append_child(bquote3, bquote1),
|
||||
"adding a parent as child fails");
|
||||
|
||||
cmark_node_free(bquote1);
|
||||
|
||||
int max_node_type = CMARK_NODE_LAST_BLOCK > CMARK_NODE_LAST_INLINE
|
||||
? CMARK_NODE_LAST_BLOCK : CMARK_NODE_LAST_INLINE;
|
||||
OK(runner, max_node_type < 32, "all node types < 32");
|
||||
|
||||
int list_item_flag = 1 << CMARK_NODE_LIST_ITEM;
|
||||
int top_level_blocks =
|
||||
(1 << CMARK_NODE_BLOCK_QUOTE) |
|
||||
(1 << CMARK_NODE_LIST) |
|
||||
(1 << CMARK_NODE_CODE_BLOCK) |
|
||||
(1 << CMARK_NODE_HTML) |
|
||||
(1 << CMARK_NODE_PARAGRAPH) |
|
||||
(1 << CMARK_NODE_HEADER) |
|
||||
(1 << CMARK_NODE_HRULE) |
|
||||
(1 << CMARK_NODE_REFERENCE_DEF);
|
||||
int all_inlines =
|
||||
(1 << CMARK_NODE_TEXT) |
|
||||
(1 << CMARK_NODE_SOFTBREAK) |
|
||||
(1 << CMARK_NODE_LINEBREAK) |
|
||||
(1 << CMARK_NODE_INLINE_CODE) |
|
||||
(1 << CMARK_NODE_INLINE_HTML) |
|
||||
(1 << CMARK_NODE_EMPH) |
|
||||
(1 << CMARK_NODE_STRONG) |
|
||||
(1 << CMARK_NODE_LINK) |
|
||||
(1 << CMARK_NODE_IMAGE);
|
||||
|
||||
test_content(runner, CMARK_NODE_DOCUMENT, top_level_blocks);
|
||||
test_content(runner, CMARK_NODE_BLOCK_QUOTE, top_level_blocks);
|
||||
test_content(runner, CMARK_NODE_LIST, list_item_flag);
|
||||
test_content(runner, CMARK_NODE_LIST_ITEM, top_level_blocks);
|
||||
test_content(runner, CMARK_NODE_CODE_BLOCK , 0);
|
||||
test_content(runner, CMARK_NODE_HTML, 0);
|
||||
test_content(runner, CMARK_NODE_PARAGRAPH, all_inlines);
|
||||
test_content(runner, CMARK_NODE_HEADER, all_inlines);
|
||||
test_content(runner, CMARK_NODE_HRULE, 0);
|
||||
test_content(runner, CMARK_NODE_REFERENCE_DEF, 0);
|
||||
test_content(runner, CMARK_NODE_TEXT, 0);
|
||||
test_content(runner, CMARK_NODE_SOFTBREAK, 0);
|
||||
test_content(runner, CMARK_NODE_LINEBREAK, 0);
|
||||
test_content(runner, CMARK_NODE_INLINE_CODE, 0);
|
||||
test_content(runner, CMARK_NODE_INLINE_HTML, 0);
|
||||
test_content(runner, CMARK_NODE_EMPH, all_inlines);
|
||||
test_content(runner, CMARK_NODE_STRONG, all_inlines);
|
||||
test_content(runner, CMARK_NODE_LINK, all_inlines);
|
||||
test_content(runner, CMARK_NODE_IMAGE, all_inlines);
|
||||
}
|
||||
|
||||
static void
|
||||
test_content(test_batch_runner *runner, cmark_node_type type,
|
||||
int allowed_content)
|
||||
{
|
||||
cmark_node *node = cmark_node_new(type);
|
||||
|
||||
for (int i = 0; i < num_node_types; ++i) {
|
||||
cmark_node_type child_type = node_types[i];
|
||||
cmark_node *child = cmark_node_new(child_type);
|
||||
|
||||
int got = cmark_node_append_child(node, child);
|
||||
int expected = (allowed_content >> child_type) & 1;
|
||||
|
||||
INT_EQ(runner, got, expected,
|
||||
"add %d as child of %d", child_type, type);
|
||||
|
||||
cmark_node_free(child);
|
||||
}
|
||||
|
||||
cmark_node_free(node);
|
||||
}
|
||||
|
||||
static void
|
||||
parser(test_batch_runner *runner)
|
||||
{
|
||||
test_md_to_html(runner, "No newline", "<p>No newline</p>\n",
|
||||
"document without trailing newline");
|
||||
}
|
||||
|
||||
static void
|
||||
render_html(test_batch_runner *runner)
|
||||
{
|
||||
char *html;
|
||||
|
||||
static const char markdown[] =
|
||||
"foo *bar*\n"
|
||||
"\n"
|
||||
"paragraph 2\n";
|
||||
cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1);
|
||||
|
||||
cmark_node *paragraph = cmark_node_first_child(doc);
|
||||
html = cmark_render_html(paragraph);
|
||||
STR_EQ(runner, html, "<p>foo <em>bar</em></p>\n",
|
||||
"render single paragraph");
|
||||
free(html);
|
||||
|
||||
cmark_node *string = cmark_node_first_child(paragraph);
|
||||
html = cmark_render_html(string);
|
||||
STR_EQ(runner, html, "foo ", "render single inline");
|
||||
free(html);
|
||||
|
||||
cmark_node *emph = cmark_node_next(string);
|
||||
html = cmark_render_html(emph);
|
||||
STR_EQ(runner, html, "<em>bar</em>", "render inline with children");
|
||||
free(html);
|
||||
|
||||
cmark_node_free(doc);
|
||||
}
|
||||
|
||||
static void
|
||||
utf8(test_batch_runner *runner)
|
||||
{
|
||||
// Ranges
|
||||
test_char(runner, 1, "\x01", "valid utf8 01");
|
||||
test_char(runner, 1, "\x7F", "valid utf8 7F");
|
||||
test_char(runner, 0, "\x80", "invalid utf8 80");
|
||||
test_char(runner, 0, "\xBF", "invalid utf8 BF");
|
||||
test_char(runner, 0, "\xC0\x80", "invalid utf8 C080");
|
||||
test_char(runner, 0, "\xC1\xBF", "invalid utf8 C1BF");
|
||||
test_char(runner, 1, "\xC2\x80", "valid utf8 C280");
|
||||
test_char(runner, 1, "\xDF\xBF", "valid utf8 DFBF");
|
||||
test_char(runner, 0, "\xE0\x80\x80", "invalid utf8 E08080");
|
||||
test_char(runner, 0, "\xE0\x9F\xBF", "invalid utf8 E09FBF");
|
||||
test_char(runner, 1, "\xE0\xA0\x80", "valid utf8 E0A080");
|
||||
test_char(runner, 1, "\xED\x9F\xBF", "valid utf8 ED9FBF");
|
||||
test_char(runner, 0, "\xED\xA0\x80", "invalid utf8 EDA080");
|
||||
test_char(runner, 0, "\xED\xBF\xBF", "invalid utf8 EDBFBF");
|
||||
test_char(runner, 0, "\xF0\x80\x80\x80", "invalid utf8 F0808080");
|
||||
test_char(runner, 0, "\xF0\x8F\xBF\xBF", "invalid utf8 F08FBFBF");
|
||||
test_char(runner, 1, "\xF0\x90\x80\x80", "valid utf8 F0908080");
|
||||
test_char(runner, 1, "\xF4\x8F\xBF\xBF", "valid utf8 F48FBFBF");
|
||||
test_char(runner, 0, "\xF4\x90\x80\x80", "invalid utf8 F4908080");
|
||||
test_char(runner, 0, "\xF7\xBF\xBF\xBF", "invalid utf8 F7BFBFBF");
|
||||
test_char(runner, 0, "\xF8", "invalid utf8 F8");
|
||||
test_char(runner, 0, "\xFF", "invalid utf8 FF");
|
||||
|
||||
// Incomplete byte sequences at end of input
|
||||
test_incomplete_char(runner, "\xE0\xA0", "invalid utf8 E0A0");
|
||||
test_incomplete_char(runner, "\xF0\x90\x80", "invalid utf8 F09080");
|
||||
|
||||
// Invalid continuation bytes
|
||||
test_continuation_byte(runner, "\xC2\x80");
|
||||
test_continuation_byte(runner, "\xE0\xA0\x80");
|
||||
test_continuation_byte(runner, "\xF0\x90\x80\x80");
|
||||
|
||||
// Test string containing null character
|
||||
static const char string_with_null[] = "((((\0))))";
|
||||
char *html = cmark_markdown_to_html(string_with_null,
|
||||
sizeof(string_with_null) - 1);
|
||||
STR_EQ(runner, html, "<p>((((" UTF8_REPL "))))</p>\n",
|
||||
"utf8 with U+0000");
|
||||
free(html);
|
||||
}
|
||||
|
||||
static void
|
||||
test_char(test_batch_runner *runner, int valid, const char *utf8,
|
||||
const char *msg)
|
||||
{
|
||||
char buf[20];
|
||||
sprintf(buf, "((((%s))))", utf8);
|
||||
|
||||
if (valid) {
|
||||
char expected[30];
|
||||
sprintf(expected, "<p>((((%s))))</p>\n", utf8);
|
||||
test_md_to_html(runner, buf, expected, msg);
|
||||
}
|
||||
else {
|
||||
test_md_to_html(runner, buf, "<p>((((" UTF8_REPL "))))</p>\n",
|
||||
msg);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_incomplete_char(test_batch_runner *runner, const char *utf8,
|
||||
const char *msg)
|
||||
{
|
||||
char buf[20];
|
||||
sprintf(buf, "----%s", utf8);
|
||||
test_md_to_html(runner, buf, "<p>----" UTF8_REPL "</p>\n", msg);
|
||||
}
|
||||
|
||||
static void
|
||||
test_continuation_byte(test_batch_runner *runner, const char *utf8)
|
||||
{
|
||||
int len = strlen(utf8);
|
||||
|
||||
for (int pos = 1; pos < len; ++pos) {
|
||||
char buf[20];
|
||||
sprintf(buf, "((((%s))))", utf8);
|
||||
buf[4+pos] = '\x20';
|
||||
|
||||
char expected[50];
|
||||
strcpy(expected, "<p>((((" UTF8_REPL "\x20");
|
||||
for (int i = pos + 1; i < len; ++i) {
|
||||
strcat(expected, UTF8_REPL);
|
||||
}
|
||||
strcat(expected, "))))</p>\n");
|
||||
|
||||
char *html = cmark_markdown_to_html(buf, strlen(buf));
|
||||
STR_EQ(runner, html, expected,
|
||||
"invalid utf8 continuation byte %d/%d", pos, len);
|
||||
free(html);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_md_to_html(test_batch_runner *runner, const char *markdown,
|
||||
const char *expected_html, const char *msg)
|
||||
{
|
||||
char *html = cmark_markdown_to_html(markdown, strlen(markdown));
|
||||
STR_EQ(runner, html, expected_html, msg);
|
||||
free(html);
|
||||
}
|
||||
|
||||
int main() {
|
||||
int retval;
|
||||
test_batch_runner *runner = test_batch_runner_new();
|
||||
|
||||
constructor(runner);
|
||||
accessors(runner);
|
||||
node_check(runner);
|
||||
create_tree(runner);
|
||||
hierarchy(runner);
|
||||
parser(runner);
|
||||
render_html(runner);
|
||||
utf8(runner);
|
||||
test_cplusplus(runner);
|
||||
|
||||
test_print_summary(runner);
|
||||
retval = test_ok(runner) ? 0 : 1;
|
||||
free(runner);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
@ -1,595 +0,0 @@
|
||||
## Module statistics.py
|
||||
##
|
||||
## Copyright (c) 2013 Steven D'Aprano <steve+python@pearwood.info>.
|
||||
##
|
||||
## Licensed under the Apache License, Version 2.0 (the "License");
|
||||
## you may not use this file except in compliance with the License.
|
||||
## You may obtain a copy of the License at
|
||||
##
|
||||
## http://www.apache.org/licenses/LICENSE-2.0
|
||||
##
|
||||
## Unless required by applicable law or agreed to in writing, software
|
||||
## distributed under the License is distributed on an "AS IS" BASIS,
|
||||
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
## See the License for the specific language governing permissions and
|
||||
## limitations under the License.
|
||||
|
||||
|
||||
"""
|
||||
Basic statistics module.
|
||||
|
||||
This module provides functions for calculating statistics of data, including
|
||||
averages, variance, and standard deviation.
|
||||
|
||||
Calculating averages
|
||||
--------------------
|
||||
|
||||
================== =============================================
|
||||
Function Description
|
||||
================== =============================================
|
||||
mean Arithmetic mean (average) of data.
|
||||
median Median (middle value) of data.
|
||||
median_low Low median of data.
|
||||
median_high High median of data.
|
||||
median_grouped Median, or 50th percentile, of grouped data.
|
||||
mode Mode (most common value) of data.
|
||||
================== =============================================
|
||||
|
||||
Calculate the arithmetic mean ("the average") of data:
|
||||
|
||||
>>> mean([-1.0, 2.5, 3.25, 5.75])
|
||||
2.625
|
||||
|
||||
|
||||
Calculate the standard median of discrete data:
|
||||
|
||||
>>> median([2, 3, 4, 5])
|
||||
3.5
|
||||
|
||||
|
||||
Calculate the median, or 50th percentile, of data grouped into class intervals
|
||||
centred on the data values provided. E.g. if your data points are rounded to
|
||||
the nearest whole number:
|
||||
|
||||
>>> median_grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS
|
||||
2.8333333333...
|
||||
|
||||
This should be interpreted in this way: you have two data points in the class
|
||||
interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in
|
||||
the class interval 3.5-4.5. The median of these data points is 2.8333...
|
||||
|
||||
|
||||
Calculating variability or spread
|
||||
---------------------------------
|
||||
|
||||
================== =============================================
|
||||
Function Description
|
||||
================== =============================================
|
||||
pvariance Population variance of data.
|
||||
variance Sample variance of data.
|
||||
pstdev Population standard deviation of data.
|
||||
stdev Sample standard deviation of data.
|
||||
================== =============================================
|
||||
|
||||
Calculate the standard deviation of sample data:
|
||||
|
||||
>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS
|
||||
4.38961843444...
|
||||
|
||||
If you have previously calculated the mean, you can pass it as the optional
|
||||
second argument to the four "spread" functions to avoid recalculating it:
|
||||
|
||||
>>> data = [1, 2, 2, 4, 4, 4, 5, 6]
|
||||
>>> mu = mean(data)
|
||||
>>> pvariance(data, mu)
|
||||
2.5
|
||||
|
||||
|
||||
Exceptions
|
||||
----------
|
||||
|
||||
A single exception is defined: StatisticsError is a subclass of ValueError.
|
||||
|
||||
"""
|
||||
|
||||
__all__ = [ 'StatisticsError',
|
||||
'pstdev', 'pvariance', 'stdev', 'variance',
|
||||
'median', 'median_low', 'median_high', 'median_grouped',
|
||||
'mean', 'mode',
|
||||
]
|
||||
|
||||
|
||||
import collections
|
||||
import math
|
||||
|
||||
from fractions import Fraction
|
||||
from decimal import Decimal
|
||||
|
||||
|
||||
# === Exceptions ===
|
||||
|
||||
class StatisticsError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
# === Private utilities ===
|
||||
|
||||
def _sum(data, start=0):
|
||||
"""_sum(data [, start]) -> value
|
||||
|
||||
Return a high-precision sum of the given numeric data. If optional
|
||||
argument ``start`` is given, it is added to the total. If ``data`` is
|
||||
empty, ``start`` (defaulting to 0) is returned.
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
|
||||
11.0
|
||||
|
||||
Some sources of round-off error will be avoided:
|
||||
|
||||
>>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero.
|
||||
1000.0
|
||||
|
||||
Fractions and Decimals are also supported:
|
||||
|
||||
>>> from fractions import Fraction as F
|
||||
>>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
|
||||
Fraction(63, 20)
|
||||
|
||||
>>> from decimal import Decimal as D
|
||||
>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
|
||||
>>> _sum(data)
|
||||
Decimal('0.6963')
|
||||
|
||||
Mixed types are currently treated as an error, except that int is
|
||||
allowed.
|
||||
"""
|
||||
# We fail as soon as we reach a value that is not an int or the type of
|
||||
# the first value which is not an int. E.g. _sum([int, int, float, int])
|
||||
# is okay, but sum([int, int, float, Fraction]) is not.
|
||||
allowed_types = set([int, type(start)])
|
||||
n, d = _exact_ratio(start)
|
||||
partials = {d: n} # map {denominator: sum of numerators}
|
||||
# Micro-optimizations.
|
||||
exact_ratio = _exact_ratio
|
||||
partials_get = partials.get
|
||||
# Add numerators for each denominator.
|
||||
for x in data:
|
||||
_check_type(type(x), allowed_types)
|
||||
n, d = exact_ratio(x)
|
||||
partials[d] = partials_get(d, 0) + n
|
||||
# Find the expected result type. If allowed_types has only one item, it
|
||||
# will be int; if it has two, use the one which isn't int.
|
||||
assert len(allowed_types) in (1, 2)
|
||||
if len(allowed_types) == 1:
|
||||
assert allowed_types.pop() is int
|
||||
T = int
|
||||
else:
|
||||
T = (allowed_types - set([int])).pop()
|
||||
if None in partials:
|
||||
assert issubclass(T, (float, Decimal))
|
||||
assert not math.isfinite(partials[None])
|
||||
return T(partials[None])
|
||||
total = Fraction()
|
||||
for d, n in sorted(partials.items()):
|
||||
total += Fraction(n, d)
|
||||
if issubclass(T, int):
|
||||
assert total.denominator == 1
|
||||
return T(total.numerator)
|
||||
if issubclass(T, Decimal):
|
||||
return T(total.numerator)/total.denominator
|
||||
return T(total)
|
||||
|
||||
|
||||
def _check_type(T, allowed):
|
||||
if T not in allowed:
|
||||
if len(allowed) == 1:
|
||||
allowed.add(T)
|
||||
else:
|
||||
types = ', '.join([t.__name__ for t in allowed] + [T.__name__])
|
||||
raise TypeError("unsupported mixed types: %s" % types)
|
||||
|
||||
|
||||
def _exact_ratio(x):
|
||||
"""Convert Real number x exactly to (numerator, denominator) pair.
|
||||
|
||||
>>> _exact_ratio(0.25)
|
||||
(1, 4)
|
||||
|
||||
x is expected to be an int, Fraction, Decimal or float.
|
||||
"""
|
||||
try:
|
||||
try:
|
||||
# int, Fraction
|
||||
return (x.numerator, x.denominator)
|
||||
except AttributeError:
|
||||
# float
|
||||
try:
|
||||
return x.as_integer_ratio()
|
||||
except AttributeError:
|
||||
# Decimal
|
||||
try:
|
||||
return _decimal_to_ratio(x)
|
||||
except AttributeError:
|
||||
msg = "can't convert type '{}' to numerator/denominator"
|
||||
raise TypeError(msg.format(type(x).__name__)) from None
|
||||
except (OverflowError, ValueError):
|
||||
# INF or NAN
|
||||
if __debug__:
|
||||
# Decimal signalling NANs cannot be converted to float :-(
|
||||
if isinstance(x, Decimal):
|
||||
assert not x.is_finite()
|
||||
else:
|
||||
assert not math.isfinite(x)
|
||||
return (x, None)
|
||||
|
||||
|
||||
# FIXME This is faster than Fraction.from_decimal, but still too slow.
|
||||
def _decimal_to_ratio(d):
|
||||
"""Convert Decimal d to exact integer ratio (numerator, denominator).
|
||||
|
||||
>>> from decimal import Decimal
|
||||
>>> _decimal_to_ratio(Decimal("2.6"))
|
||||
(26, 10)
|
||||
|
||||
"""
|
||||
sign, digits, exp = d.as_tuple()
|
||||
if exp in ('F', 'n', 'N'): # INF, NAN, sNAN
|
||||
assert not d.is_finite()
|
||||
raise ValueError
|
||||
num = 0
|
||||
for digit in digits:
|
||||
num = num*10 + digit
|
||||
if exp < 0:
|
||||
den = 10**-exp
|
||||
else:
|
||||
num *= 10**exp
|
||||
den = 1
|
||||
if sign:
|
||||
num = -num
|
||||
return (num, den)
|
||||
|
||||
|
||||
def _counts(data):
|
||||
# Generate a table of sorted (value, frequency) pairs.
|
||||
table = collections.Counter(iter(data)).most_common()
|
||||
if not table:
|
||||
return table
|
||||
# Extract the values with the highest frequency.
|
||||
maxfreq = table[0][1]
|
||||
for i in range(1, len(table)):
|
||||
if table[i][1] != maxfreq:
|
||||
table = table[:i]
|
||||
break
|
||||
return table
|
||||
|
||||
|
||||
# === Measures of central tendency (averages) ===
|
||||
|
||||
def mean(data):
|
||||
"""Return the sample arithmetic mean of data.
|
||||
|
||||
>>> mean([1, 2, 3, 4, 4])
|
||||
2.8
|
||||
|
||||
>>> from fractions import Fraction as F
|
||||
>>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)])
|
||||
Fraction(13, 21)
|
||||
|
||||
>>> from decimal import Decimal as D
|
||||
>>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")])
|
||||
Decimal('0.5625')
|
||||
|
||||
If ``data`` is empty, StatisticsError will be raised.
|
||||
"""
|
||||
if iter(data) is data:
|
||||
data = list(data)
|
||||
n = len(data)
|
||||
if n < 1:
|
||||
raise StatisticsError('mean requires at least one data point')
|
||||
return _sum(data)/n
|
||||
|
||||
|
||||
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
|
||||
def median(data):
|
||||
"""Return the median (middle value) of numeric data.
|
||||
|
||||
When the number of data points is odd, return the middle data point.
|
||||
When the number of data points is even, the median is interpolated by
|
||||
taking the average of the two middle values:
|
||||
|
||||
>>> median([1, 3, 5])
|
||||
3
|
||||
>>> median([1, 3, 5, 7])
|
||||
4.0
|
||||
|
||||
"""
|
||||
data = sorted(data)
|
||||
n = len(data)
|
||||
if n == 0:
|
||||
raise StatisticsError("no median for empty data")
|
||||
if n%2 == 1:
|
||||
return data[n//2]
|
||||
else:
|
||||
i = n//2
|
||||
return (data[i - 1] + data[i])/2
|
||||
|
||||
|
||||
def median_low(data):
|
||||
"""Return the low median of numeric data.
|
||||
|
||||
When the number of data points is odd, the middle value is returned.
|
||||
When it is even, the smaller of the two middle values is returned.
|
||||
|
||||
>>> median_low([1, 3, 5])
|
||||
3
|
||||
>>> median_low([1, 3, 5, 7])
|
||||
3
|
||||
|
||||
"""
|
||||
data = sorted(data)
|
||||
n = len(data)
|
||||
if n == 0:
|
||||
raise StatisticsError("no median for empty data")
|
||||
if n%2 == 1:
|
||||
return data[n//2]
|
||||
else:
|
||||
return data[n//2 - 1]
|
||||
|
||||
|
||||
def median_high(data):
|
||||
"""Return the high median of data.
|
||||
|
||||
When the number of data points is odd, the middle value is returned.
|
||||
When it is even, the larger of the two middle values is returned.
|
||||
|
||||
>>> median_high([1, 3, 5])
|
||||
3
|
||||
>>> median_high([1, 3, 5, 7])
|
||||
5
|
||||
|
||||
"""
|
||||
data = sorted(data)
|
||||
n = len(data)
|
||||
if n == 0:
|
||||
raise StatisticsError("no median for empty data")
|
||||
return data[n//2]
|
||||
|
||||
|
||||
def median_grouped(data, interval=1):
|
||||
""""Return the 50th percentile (median) of grouped continuous data.
|
||||
|
||||
>>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5])
|
||||
3.7
|
||||
>>> median_grouped([52, 52, 53, 54])
|
||||
52.5
|
||||
|
||||
This calculates the median as the 50th percentile, and should be
|
||||
used when your data is continuous and grouped. In the above example,
|
||||
the values 1, 2, 3, etc. actually represent the midpoint of classes
|
||||
0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in
|
||||
class 3.5-4.5, and interpolation is used to estimate it.
|
||||
|
||||
Optional argument ``interval`` represents the class interval, and
|
||||
defaults to 1. Changing the class interval naturally will change the
|
||||
interpolated 50th percentile value:
|
||||
|
||||
>>> median_grouped([1, 3, 3, 5, 7], interval=1)
|
||||
3.25
|
||||
>>> median_grouped([1, 3, 3, 5, 7], interval=2)
|
||||
3.5
|
||||
|
||||
This function does not check whether the data points are at least
|
||||
``interval`` apart.
|
||||
"""
|
||||
data = sorted(data)
|
||||
n = len(data)
|
||||
if n == 0:
|
||||
raise StatisticsError("no median for empty data")
|
||||
elif n == 1:
|
||||
return data[0]
|
||||
# Find the value at the midpoint. Remember this corresponds to the
|
||||
# centre of the class interval.
|
||||
x = data[n//2]
|
||||
for obj in (x, interval):
|
||||
if isinstance(obj, (str, bytes)):
|
||||
raise TypeError('expected number but got %r' % obj)
|
||||
try:
|
||||
L = x - interval/2 # The lower limit of the median interval.
|
||||
except TypeError:
|
||||
# Mixed type. For now we just coerce to float.
|
||||
L = float(x) - float(interval)/2
|
||||
cf = data.index(x) # Number of values below the median interval.
|
||||
# FIXME The following line could be more efficient for big lists.
|
||||
f = data.count(x) # Number of data points in the median interval.
|
||||
return L + interval*(n/2 - cf)/f
|
||||
|
||||
|
||||
def mode(data):
|
||||
"""Return the most common data point from discrete or nominal data.
|
||||
|
||||
``mode`` assumes discrete data, and returns a single value. This is the
|
||||
standard treatment of the mode as commonly taught in schools:
|
||||
|
||||
>>> mode([1, 1, 2, 3, 3, 3, 3, 4])
|
||||
3
|
||||
|
||||
This also works with nominal (non-numeric) data:
|
||||
|
||||
>>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
|
||||
'red'
|
||||
|
||||
If there is not exactly one most common value, ``mode`` will raise
|
||||
StatisticsError.
|
||||
"""
|
||||
# Generate a table of sorted (value, frequency) pairs.
|
||||
table = _counts(data)
|
||||
if len(table) == 1:
|
||||
return table[0][0]
|
||||
elif table:
|
||||
raise StatisticsError(
|
||||
'no unique mode; found %d equally common values' % len(table)
|
||||
)
|
||||
else:
|
||||
raise StatisticsError('no mode for empty data')
|
||||
|
||||
|
||||
# === Measures of spread ===
|
||||
|
||||
# See http://mathworld.wolfram.com/Variance.html
|
||||
# http://mathworld.wolfram.com/SampleVariance.html
|
||||
# http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
||||
#
|
||||
# Under no circumstances use the so-called "computational formula for
|
||||
# variance", as that is only suitable for hand calculations with a small
|
||||
# amount of low-precision data. It has terrible numeric properties.
|
||||
#
|
||||
# See a comparison of three computational methods here:
|
||||
# http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/
|
||||
|
||||
def _ss(data, c=None):
|
||||
"""Return sum of square deviations of sequence data.
|
||||
|
||||
If ``c`` is None, the mean is calculated in one pass, and the deviations
|
||||
from the mean are calculated in a second pass. Otherwise, deviations are
|
||||
calculated from ``c`` as given. Use the second case with care, as it can
|
||||
lead to garbage results.
|
||||
"""
|
||||
if c is None:
|
||||
c = mean(data)
|
||||
ss = _sum((x-c)**2 for x in data)
|
||||
# The following sum should mathematically equal zero, but due to rounding
|
||||
# error may not.
|
||||
ss -= _sum((x-c) for x in data)**2/len(data)
|
||||
assert not ss < 0, 'negative sum of square deviations: %f' % ss
|
||||
return ss
|
||||
|
||||
|
||||
def variance(data, xbar=None):
|
||||
"""Return the sample variance of data.
|
||||
|
||||
data should be an iterable of Real-valued numbers, with at least two
|
||||
values. The optional argument xbar, if given, should be the mean of
|
||||
the data. If it is missing or None, the mean is automatically calculated.
|
||||
|
||||
Use this function when your data is a sample from a population. To
|
||||
calculate the variance from the entire population, see ``pvariance``.
|
||||
|
||||
Examples:
|
||||
|
||||
>>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
|
||||
>>> variance(data)
|
||||
1.3720238095238095
|
||||
|
||||
If you have already calculated the mean of your data, you can pass it as
|
||||
the optional second argument ``xbar`` to avoid recalculating it:
|
||||
|
||||
>>> m = mean(data)
|
||||
>>> variance(data, m)
|
||||
1.3720238095238095
|
||||
|
||||
This function does not check that ``xbar`` is actually the mean of
|
||||
``data``. Giving arbitrary values for ``xbar`` may lead to invalid or
|
||||
impossible results.
|
||||
|
||||
Decimals and Fractions are supported:
|
||||
|
||||
>>> from decimal import Decimal as D
|
||||
>>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
|
||||
Decimal('31.01875')
|
||||
|
||||
>>> from fractions import Fraction as F
|
||||
>>> variance([F(1, 6), F(1, 2), F(5, 3)])
|
||||
Fraction(67, 108)
|
||||
|
||||
"""
|
||||
if iter(data) is data:
|
||||
data = list(data)
|
||||
n = len(data)
|
||||
if n < 2:
|
||||
raise StatisticsError('variance requires at least two data points')
|
||||
ss = _ss(data, xbar)
|
||||
return ss/(n-1)
|
||||
|
||||
|
||||
def pvariance(data, mu=None):
|
||||
"""Return the population variance of ``data``.
|
||||
|
||||
data should be an iterable of Real-valued numbers, with at least one
|
||||
value. The optional argument mu, if given, should be the mean of
|
||||
the data. If it is missing or None, the mean is automatically calculated.
|
||||
|
||||
Use this function to calculate the variance from the entire population.
|
||||
To estimate the variance from a sample, the ``variance`` function is
|
||||
usually a better choice.
|
||||
|
||||
Examples:
|
||||
|
||||
>>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]
|
||||
>>> pvariance(data)
|
||||
1.25
|
||||
|
||||
If you have already calculated the mean of the data, you can pass it as
|
||||
the optional second argument to avoid recalculating it:
|
||||
|
||||
>>> mu = mean(data)
|
||||
>>> pvariance(data, mu)
|
||||
1.25
|
||||
|
||||
This function does not check that ``mu`` is actually the mean of ``data``.
|
||||
Giving arbitrary values for ``mu`` may lead to invalid or impossible
|
||||
results.
|
||||
|
||||
Decimals and Fractions are supported:
|
||||
|
||||
>>> from decimal import Decimal as D
|
||||
>>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
|
||||
Decimal('24.815')
|
||||
|
||||
>>> from fractions import Fraction as F
|
||||
>>> pvariance([F(1, 4), F(5, 4), F(1, 2)])
|
||||
Fraction(13, 72)
|
||||
|
||||
"""
|
||||
if iter(data) is data:
|
||||
data = list(data)
|
||||
n = len(data)
|
||||
if n < 1:
|
||||
raise StatisticsError('pvariance requires at least one data point')
|
||||
ss = _ss(data, mu)
|
||||
return ss/n
|
||||
|
||||
|
||||
def stdev(data, xbar=None):
|
||||
"""Return the square root of the sample variance.
|
||||
|
||||
See ``variance`` for arguments and other details.
|
||||
|
||||
>>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
|
||||
1.0810874155219827
|
||||
|
||||
"""
|
||||
var = variance(data, xbar)
|
||||
try:
|
||||
return var.sqrt()
|
||||
except AttributeError:
|
||||
return math.sqrt(var)
|
||||
|
||||
|
||||
def pstdev(data, mu=None):
|
||||
"""Return the square root of the population variance.
|
||||
|
||||
See ``pvariance`` for arguments and other details.
|
||||
|
||||
>>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
|
||||
0.986893273527251
|
||||
|
||||
"""
|
||||
var = pvariance(data, mu)
|
||||
try:
|
||||
return var.sqrt()
|
||||
except AttributeError:
|
||||
return math.sqrt(var)
|
@ -1,19 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import statistics
|
||||
|
||||
def pairs(l, n):
|
||||
return zip(*[l[i::n] for i in range(n)])
|
||||
|
||||
# data comes in pairs:
|
||||
# n - time for running the program with no input
|
||||
# m - time for running it with the benchmark input
|
||||
# we measure (m - n)
|
||||
|
||||
values = [ float(y) - float(x) for (x,y) in pairs(sys.stdin.readlines(),2)]
|
||||
|
||||
print("mean = %.4f, median = %.4f, stdev = %.4f" %
|
||||
(statistics.mean(values), statistics.median(values),
|
||||
statistics.stdev(values)))
|
||||
|
@ -1,33 +0,0 @@
|
||||
# Benchmarks
|
||||
|
||||
Some benchmarks, run on an ancient Thinkpad running Intel Core 2 Duo at 2GHz.
|
||||
|
||||
|Implementation | Time (sec)| Factor |
|
||||
|-------------------|-----------:|--------:|
|
||||
| Markdown.pl | 2921.24 | 14606.2 |
|
||||
| PHP markdown | 20.85 | 104.3 |
|
||||
| kramdown | 20.83 | 104.1 |
|
||||
| lunamark | 6.295 | 31.5 |
|
||||
| cheapskate | 5.760 | 28.8 |
|
||||
| peg-markdown | 5.450 | 27.3 |
|
||||
| **commonmark.js** | 2.675 | 13.4 |
|
||||
| marked | 1.855 | 9.3 |
|
||||
| discount | 1.705 | 8.5 |
|
||||
| **cmark** | 0.295 | 1.5 |
|
||||
| sundown | 0.200 | 1.0 |
|
||||
|
||||
|
||||
To run these benchmarks, use `make bench PROG=/path/to/program`.
|
||||
|
||||
The input text is a 10MB Markdown file built by concatenating 20 copies
|
||||
of the Markdown source of the first edition of [*Pro
|
||||
Git*](https://github.com/progit/progit/tree/master/en) by Scott Chacon.
|
||||
|
||||
`time` is used to measure execution speed. The reported
|
||||
time is the *difference* between the time to run the program
|
||||
with the benchmark input and the time to run it with no input.
|
||||
(This procedure ensures that implementations in dynamic languages are
|
||||
not prenalized by startup time.) A median of ten runs is taken. The
|
||||
process is reniced to a high priority so that the system doesn't
|
||||
interrupt runs.
|
||||
|
@ -1,10 +0,0 @@
|
||||
[since 0.12]
|
||||
|
||||
* Updated path of test program.
|
||||
* Use terminology "plain textual content" instead of "string."
|
||||
* Added condition that conforming parsers strip or replace NULL characters.
|
||||
* Changed Example 196 to reflect the spec's rules. It should not be a loose
|
||||
list as it has no blank lines.
|
||||
* Adjusted semantically insignificant formatting of HTML output.
|
||||
* Added example to spec of shortcut link with following space (#214).
|
||||
|
@ -1,912 +0,0 @@
|
||||
# CaseFolding-3.2.0.txt
|
||||
# Date: 2002-03-22,20:54:33 GMT [MD]
|
||||
#
|
||||
# Case Folding Properties
|
||||
#
|
||||
# This file is a supplement to the UnicodeData file.
|
||||
# It provides a case folding mapping generated from the Unicode Character Database.
|
||||
# If all characters are mapped according to the full mapping below, then
|
||||
# case differences (according to UnicodeData.txt and SpecialCasing.txt)
|
||||
# are eliminated.
|
||||
#
|
||||
# The data supports both implementations that require simple case foldings
|
||||
# (where string lengths don't change), and implementations that allow full case folding
|
||||
# (where string lengths may grow). Note that where they can be supported, the
|
||||
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
|
||||
#
|
||||
# NOTE: case folding does not preserve normalization formats!
|
||||
#
|
||||
# For information on case folding, see
|
||||
# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
|
||||
#
|
||||
# ================================================================================
|
||||
# Format
|
||||
# ================================================================================
|
||||
# The entries in this file are in the following machine-readable format:
|
||||
#
|
||||
# <code>; <status>; <mapping>; # <name>
|
||||
#
|
||||
# The status field is:
|
||||
# C: common case folding, common mappings shared by both simple and full mappings.
|
||||
# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
|
||||
# S: simple case folding, mappings to single characters where different from F.
|
||||
# T: special case for uppercase I and dotted uppercase I
|
||||
# - For non-Turkic languages, this mapping is normally not used.
|
||||
# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
|
||||
#
|
||||
# Usage:
|
||||
# A. To do a simple case folding, use the mappings with status C + S.
|
||||
# B. To do a full case folding, use the mappings with status C + F.
|
||||
#
|
||||
# The mappings with status T can be used or omitted depending on the desired case-folding
|
||||
# behavior. (The default option is to exclude them.)
|
||||
#
|
||||
# =================================================================
|
||||
|
||||
0041; C; 0061; # LATIN CAPITAL LETTER A
|
||||
0042; C; 0062; # LATIN CAPITAL LETTER B
|
||||
0043; C; 0063; # LATIN CAPITAL LETTER C
|
||||
0044; C; 0064; # LATIN CAPITAL LETTER D
|
||||
0045; C; 0065; # LATIN CAPITAL LETTER E
|
||||
0046; C; 0066; # LATIN CAPITAL LETTER F
|
||||
0047; C; 0067; # LATIN CAPITAL LETTER G
|
||||
0048; C; 0068; # LATIN CAPITAL LETTER H
|
||||
0049; C; 0069; # LATIN CAPITAL LETTER I
|
||||
0049; T; 0131; # LATIN CAPITAL LETTER I
|
||||
004A; C; 006A; # LATIN CAPITAL LETTER J
|
||||
004B; C; 006B; # LATIN CAPITAL LETTER K
|
||||
004C; C; 006C; # LATIN CAPITAL LETTER L
|
||||
004D; C; 006D; # LATIN CAPITAL LETTER M
|
||||
004E; C; 006E; # LATIN CAPITAL LETTER N
|
||||
004F; C; 006F; # LATIN CAPITAL LETTER O
|
||||
0050; C; 0070; # LATIN CAPITAL LETTER P
|
||||
0051; C; 0071; # LATIN CAPITAL LETTER Q
|
||||
0052; C; 0072; # LATIN CAPITAL LETTER R
|
||||
0053; C; 0073; # LATIN CAPITAL LETTER S
|
||||
0054; C; 0074; # LATIN CAPITAL LETTER T
|
||||
0055; C; 0075; # LATIN CAPITAL LETTER U
|
||||
0056; C; 0076; # LATIN CAPITAL LETTER V
|
||||
0057; C; 0077; # LATIN CAPITAL LETTER W
|
||||
0058; C; 0078; # LATIN CAPITAL LETTER X
|
||||
0059; C; 0079; # LATIN CAPITAL LETTER Y
|
||||
005A; C; 007A; # LATIN CAPITAL LETTER Z
|
||||
00B5; C; 03BC; # MICRO SIGN
|
||||
00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
|
||||
00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
00C6; C; 00E6; # LATIN CAPITAL LETTER AE
|
||||
00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
|
||||
00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
|
||||
00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
|
||||
00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
|
||||
00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
|
||||
00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
|
||||
0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
||||
010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||
010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
|
||||
010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
|
||||
0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
|
||||
0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
|
||||
011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
||||
011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||
0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||
0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
|
||||
0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
|
||||
012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
|
||||
012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
|
||||
012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
|
||||
0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||
0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
|
||||
013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
|
||||
013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
|
||||
0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
|
||||
0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
014A; C; 014B; # LATIN CAPITAL LETTER ENG
|
||||
014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
|
||||
014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
|
||||
0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0152; C; 0153; # LATIN CAPITAL LIGATURE OE
|
||||
0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
|
||||
0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
|
||||
015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
||||
015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
|
||||
0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
|
||||
0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
|
||||
0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
|
||||
0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
|
||||
016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
|
||||
016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
|
||||
016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||
0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
|
||||
0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
|
||||
0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
|
||||
017F; C; 0073; # LATIN SMALL LETTER LONG S
|
||||
0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
|
||||
0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
|
||||
0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
|
||||
0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
|
||||
0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
|
||||
0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
|
||||
018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
|
||||
018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
|
||||
018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
|
||||
018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
|
||||
0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
|
||||
0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
|
||||
0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
|
||||
0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
|
||||
0196; C; 0269; # LATIN CAPITAL LETTER IOTA
|
||||
0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
|
||||
0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
|
||||
019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
|
||||
019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
|
||||
019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
|
||||
01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
|
||||
01A2; C; 01A3; # LATIN CAPITAL LETTER OI
|
||||
01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
|
||||
01A6; C; 0280; # LATIN LETTER YR
|
||||
01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
|
||||
01A9; C; 0283; # LATIN CAPITAL LETTER ESH
|
||||
01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
|
||||
01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
|
||||
01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
|
||||
01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
|
||||
01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
|
||||
01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
|
||||
01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
|
||||
01B7; C; 0292; # LATIN CAPITAL LETTER EZH
|
||||
01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
|
||||
01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
|
||||
01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
|
||||
01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
|
||||
01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
|
||||
01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
|
||||
01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
|
||||
01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
|
||||
01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
|
||||
01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
|
||||
01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
|
||||
01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
|
||||
01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
|
||||
01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
|
||||
01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
|
||||
01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
|
||||
01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
|
||||
01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
|
||||
01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
|
||||
01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
|
||||
01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
|
||||
01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
|
||||
01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
|
||||
01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
|
||||
01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
|
||||
01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
|
||||
01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
|
||||
01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
|
||||
01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
|
||||
01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
|
||||
01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
|
||||
01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
|
||||
01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
|
||||
01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
|
||||
0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
|
||||
0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
|
||||
0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
|
||||
0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
|
||||
0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
|
||||
020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
|
||||
020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
|
||||
020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
|
||||
0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
|
||||
0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
|
||||
0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
|
||||
0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
|
||||
0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
|
||||
021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
|
||||
021C; C; 021D; # LATIN CAPITAL LETTER YOGH
|
||||
021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
|
||||
0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
|
||||
0222; C; 0223; # LATIN CAPITAL LETTER OU
|
||||
0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
|
||||
0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
|
||||
0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
|
||||
022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
|
||||
022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
|
||||
022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
|
||||
0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
|
||||
0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
|
||||
0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
|
||||
0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
|
||||
0392; C; 03B2; # GREEK CAPITAL LETTER BETA
|
||||
0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
|
||||
0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
|
||||
0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
|
||||
0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
|
||||
0397; C; 03B7; # GREEK CAPITAL LETTER ETA
|
||||
0398; C; 03B8; # GREEK CAPITAL LETTER THETA
|
||||
0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
|
||||
039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
|
||||
039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
|
||||
039C; C; 03BC; # GREEK CAPITAL LETTER MU
|
||||
039D; C; 03BD; # GREEK CAPITAL LETTER NU
|
||||
039E; C; 03BE; # GREEK CAPITAL LETTER XI
|
||||
039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
|
||||
03A0; C; 03C0; # GREEK CAPITAL LETTER PI
|
||||
03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
|
||||
03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
|
||||
03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
|
||||
03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
|
||||
03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
|
||||
03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
|
||||
03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
|
||||
03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
|
||||
03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
|
||||
03D0; C; 03B2; # GREEK BETA SYMBOL
|
||||
03D1; C; 03B8; # GREEK THETA SYMBOL
|
||||
03D5; C; 03C6; # GREEK PHI SYMBOL
|
||||
03D6; C; 03C0; # GREEK PI SYMBOL
|
||||
03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
|
||||
03DA; C; 03DB; # GREEK LETTER STIGMA
|
||||
03DC; C; 03DD; # GREEK LETTER DIGAMMA
|
||||
03DE; C; 03DF; # GREEK LETTER KOPPA
|
||||
03E0; C; 03E1; # GREEK LETTER SAMPI
|
||||
03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
|
||||
03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
|
||||
03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
|
||||
03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
|
||||
03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
|
||||
03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
|
||||
03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
|
||||
03F0; C; 03BA; # GREEK KAPPA SYMBOL
|
||||
03F1; C; 03C1; # GREEK RHO SYMBOL
|
||||
03F2; C; 03C3; # GREEK LUNATE SIGMA SYMBOL
|
||||
03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
|
||||
03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
|
||||
0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
|
||||
0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
|
||||
0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
|
||||
0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
|
||||
0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
|
||||
0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
|
||||
0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
|
||||
0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
|
||||
040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
|
||||
040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
|
||||
040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
|
||||
040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
|
||||
040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
|
||||
040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
|
||||
0410; C; 0430; # CYRILLIC CAPITAL LETTER A
|
||||
0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
|
||||
0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
|
||||
0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
|
||||
0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
|
||||
0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
|
||||
0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
|
||||
0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
|
||||
0418; C; 0438; # CYRILLIC CAPITAL LETTER I
|
||||
0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
|
||||
041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
|
||||
041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
|
||||
041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
|
||||
041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
|
||||
041E; C; 043E; # CYRILLIC CAPITAL LETTER O
|
||||
041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
|
||||
0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
|
||||
0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
|
||||
0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
|
||||
0423; C; 0443; # CYRILLIC CAPITAL LETTER U
|
||||
0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
|
||||
0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
|
||||
0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
|
||||
0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
|
||||
0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
|
||||
0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
|
||||
042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
|
||||
042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
042D; C; 044D; # CYRILLIC CAPITAL LETTER E
|
||||
042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
|
||||
042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
|
||||
0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
|
||||
0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
|
||||
0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
|
||||
0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
|
||||
0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
|
||||
046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
|
||||
046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
|
||||
046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
|
||||
0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
|
||||
0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
|
||||
0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
|
||||
0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
|
||||
047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
|
||||
047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
||||
047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
|
||||
0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
|
||||
048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
|
||||
048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
|
||||
048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
|
||||
0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
|
||||
0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
|
||||
0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
|
||||
0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
|
||||
049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
|
||||
049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
|
||||
049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
|
||||
04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
|
||||
04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
|
||||
04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
|
||||
04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
|
||||
04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
|
||||
04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
|
||||
04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
|
||||
04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
|
||||
04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
|
||||
04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
|
||||
04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
|
||||
04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
|
||||
04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
|
||||
04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
|
||||
04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
||||
04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
|
||||
04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
|
||||
04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
|
||||
04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
|
||||
04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
|
||||
04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
||||
04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
|
||||
04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
|
||||
04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
|
||||
04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
|
||||
04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
|
||||
04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
|
||||
04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
|
||||
04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
|
||||
04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
|
||||
04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
||||
04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
|
||||
04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
|
||||
04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
|
||||
04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
|
||||
04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
|
||||
04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
|
||||
04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
|
||||
04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
|
||||
04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
|
||||
0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
|
||||
0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
|
||||
0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
|
||||
0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
|
||||
0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
|
||||
050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
|
||||
050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
|
||||
050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
|
||||
0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
|
||||
0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
|
||||
0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
|
||||
0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
|
||||
0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
|
||||
0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
|
||||
0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
|
||||
0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
|
||||
0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
|
||||
053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
|
||||
053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
|
||||
053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
|
||||
053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
|
||||
053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
|
||||
053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
|
||||
0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
|
||||
0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
|
||||
0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
|
||||
0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
|
||||
0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
|
||||
0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
|
||||
0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
|
||||
0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
|
||||
0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
|
||||
0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
|
||||
054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
|
||||
054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
|
||||
054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
|
||||
054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
|
||||
054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
|
||||
054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
|
||||
0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
|
||||
0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
|
||||
0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
|
||||
0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
|
||||
0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
|
||||
0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
|
||||
0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
|
||||
0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
|
||||
1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
|
||||
1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
|
||||
1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
|
||||
1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
|
||||
1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
|
||||
1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
|
||||
1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
|
||||
1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
|
||||
1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
|
||||
1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
|
||||
1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
|
||||
1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
|
||||
1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
|
||||
1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
|
||||
1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
|
||||
1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
|
||||
1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
|
||||
1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
|
||||
1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
|
||||
1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
|
||||
1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
|
||||
1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
|
||||
1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
|
||||
1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
|
||||
1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
|
||||
1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
|
||||
1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
|
||||
1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
|
||||
1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
|
||||
1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
|
||||
1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
|
||||
1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
|
||||
1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
|
||||
1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
|
||||
1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
|
||||
1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
|
||||
1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
|
||||
1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
|
||||
1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
|
||||
1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
|
||||
1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
|
||||
1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
|
||||
1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
|
||||
1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
|
||||
1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
|
||||
1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
|
||||
1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
|
||||
1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
|
||||
1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
|
||||
1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
|
||||
1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
|
||||
1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
|
||||
1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
|
||||
1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
|
||||
1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
|
||||
1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
|
||||
1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
|
||||
1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
|
||||
1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
|
||||
1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
|
||||
1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
|
||||
1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
|
||||
1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
|
||||
1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
|
||||
1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
|
||||
1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
|
||||
1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
|
||||
1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
|
||||
1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
|
||||
1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
|
||||
1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
|
||||
1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
|
||||
1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
|
||||
1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
|
||||
1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
|
||||
1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
|
||||
1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
|
||||
1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
|
||||
1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
|
||||
1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
|
||||
1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
|
||||
1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
|
||||
1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
|
||||
1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
|
||||
1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
|
||||
1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
|
||||
1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
|
||||
1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
|
||||
1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
|
||||
1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
|
||||
1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
|
||||
1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
|
||||
1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
|
||||
1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
|
||||
1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
|
||||
1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
|
||||
1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
|
||||
1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
|
||||
1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
|
||||
1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
|
||||
1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
|
||||
1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
|
||||
1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
|
||||
1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
|
||||
1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
|
||||
1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
|
||||
1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
|
||||
1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
|
||||
1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
|
||||
1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
|
||||
1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
|
||||
1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
|
||||
1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
|
||||
1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
|
||||
1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
|
||||
1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
|
||||
1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
|
||||
1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
|
||||
1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
|
||||
1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
|
||||
1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
|
||||
1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
|
||||
1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
|
||||
1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
|
||||
1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
|
||||
1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
|
||||
1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
|
||||
1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
|
||||
1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
|
||||
1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
|
||||
1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
|
||||
1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
|
||||
1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
|
||||
1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
|
||||
1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
|
||||
1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
|
||||
1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
|
||||
1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
|
||||
1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
|
||||
1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
|
||||
1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
|
||||
1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
|
||||
1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
|
||||
1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
|
||||
1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
|
||||
1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
|
||||
1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
|
||||
1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
|
||||
1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
|
||||
1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
|
||||
1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
|
||||
1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
|
||||
1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
|
||||
1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
|
||||
1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
|
||||
1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
|
||||
1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
|
||||
1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
|
||||
1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
|
||||
1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
|
||||
1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
|
||||
1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
|
||||
1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
|
||||
1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
|
||||
1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
|
||||
1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
|
||||
1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
|
||||
1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
|
||||
1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
|
||||
1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
|
||||
1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
|
||||
1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
|
||||
1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
|
||||
1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
|
||||
1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
|
||||
1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
|
||||
1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
|
||||
1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
|
||||
1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
|
||||
1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
|
||||
1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
|
||||
1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
|
||||
1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
|
||||
1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
|
||||
1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
|
||||
1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
|
||||
1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
|
||||
1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
|
||||
1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
|
||||
1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
|
||||
1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
|
||||
1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
|
||||
1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
|
||||
1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
|
||||
1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
|
||||
1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
|
||||
1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
|
||||
1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
|
||||
1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
|
||||
1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
|
||||
1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
|
||||
1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
|
||||
1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
|
||||
1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
|
||||
1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
|
||||
1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||||
1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
|
||||
1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
|
||||
1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
|
||||
1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
|
||||
1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
|
||||
1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
|
||||
1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
|
||||
1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
|
||||
1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
|
||||
1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
|
||||
1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
|
||||
1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
|
||||
1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
|
||||
1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
|
||||
1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
|
||||
1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
|
||||
1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
|
||||
1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
|
||||
1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
|
||||
1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
|
||||
1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
|
||||
1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
|
||||
1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
2126; C; 03C9; # OHM SIGN
|
||||
212A; C; 006B; # KELVIN SIGN
|
||||
212B; C; 00E5; # ANGSTROM SIGN
|
||||
2160; C; 2170; # ROMAN NUMERAL ONE
|
||||
2161; C; 2171; # ROMAN NUMERAL TWO
|
||||
2162; C; 2172; # ROMAN NUMERAL THREE
|
||||
2163; C; 2173; # ROMAN NUMERAL FOUR
|
||||
2164; C; 2174; # ROMAN NUMERAL FIVE
|
||||
2165; C; 2175; # ROMAN NUMERAL SIX
|
||||
2166; C; 2176; # ROMAN NUMERAL SEVEN
|
||||
2167; C; 2177; # ROMAN NUMERAL EIGHT
|
||||
2168; C; 2178; # ROMAN NUMERAL NINE
|
||||
2169; C; 2179; # ROMAN NUMERAL TEN
|
||||
216A; C; 217A; # ROMAN NUMERAL ELEVEN
|
||||
216B; C; 217B; # ROMAN NUMERAL TWELVE
|
||||
216C; C; 217C; # ROMAN NUMERAL FIFTY
|
||||
216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
|
||||
216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
|
||||
216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
|
||||
24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
|
||||
24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
|
||||
24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
|
||||
24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
|
||||
24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
|
||||
24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
|
||||
24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
|
||||
24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
|
||||
24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
|
||||
24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
|
||||
24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
|
||||
24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
|
||||
24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
|
||||
24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
|
||||
24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
|
||||
24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
|
||||
24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
|
||||
24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
|
||||
24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
|
||||
24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
|
||||
24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
|
||||
24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
|
||||
24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
|
||||
24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
|
||||
24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
|
||||
24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
|
||||
FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
|
||||
FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
|
||||
FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
|
||||
FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
|
||||
FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
|
||||
FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
|
||||
FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
|
||||
FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
|
||||
FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
|
||||
FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
|
||||
FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
|
||||
FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
|
||||
FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
|
||||
FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
|
||||
FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
|
||||
FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
|
||||
FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
|
||||
FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
|
||||
FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
|
||||
FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
|
||||
FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
|
||||
FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
|
||||
FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
|
||||
FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
|
||||
FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
|
||||
FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
|
||||
FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
|
||||
FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
|
||||
FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
|
||||
FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
|
||||
FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
|
||||
FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
|
||||
FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
|
||||
FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
|
||||
FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
|
||||
FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
10400; C; 10428; # DESERET CAPITAL LETTER LONG I
|
||||
10401; C; 10429; # DESERET CAPITAL LETTER LONG E
|
||||
10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
|
||||
10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
|
||||
10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
|
||||
10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
|
||||
10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
|
||||
10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
|
||||
10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
|
||||
10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
|
||||
1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
|
||||
1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
|
||||
1040C; C; 10434; # DESERET CAPITAL LETTER AY
|
||||
1040D; C; 10435; # DESERET CAPITAL LETTER OW
|
||||
1040E; C; 10436; # DESERET CAPITAL LETTER WU
|
||||
1040F; C; 10437; # DESERET CAPITAL LETTER YEE
|
||||
10410; C; 10438; # DESERET CAPITAL LETTER H
|
||||
10411; C; 10439; # DESERET CAPITAL LETTER PEE
|
||||
10412; C; 1043A; # DESERET CAPITAL LETTER BEE
|
||||
10413; C; 1043B; # DESERET CAPITAL LETTER TEE
|
||||
10414; C; 1043C; # DESERET CAPITAL LETTER DEE
|
||||
10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
|
||||
10416; C; 1043E; # DESERET CAPITAL LETTER JEE
|
||||
10417; C; 1043F; # DESERET CAPITAL LETTER KAY
|
||||
10418; C; 10440; # DESERET CAPITAL LETTER GAY
|
||||
10419; C; 10441; # DESERET CAPITAL LETTER EF
|
||||
1041A; C; 10442; # DESERET CAPITAL LETTER VEE
|
||||
1041B; C; 10443; # DESERET CAPITAL LETTER ETH
|
||||
1041C; C; 10444; # DESERET CAPITAL LETTER THEE
|
||||
1041D; C; 10445; # DESERET CAPITAL LETTER ES
|
||||
1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
|
||||
1041F; C; 10447; # DESERET CAPITAL LETTER ESH
|
||||
10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
|
||||
10421; C; 10449; # DESERET CAPITAL LETTER ER
|
||||
10422; C; 1044A; # DESERET CAPITAL LETTER EL
|
||||
10423; C; 1044B; # DESERET CAPITAL LETTER EM
|
||||
10424; C; 1044C; # DESERET CAPITAL LETTER EN
|
||||
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
|
@ -1,151 +0,0 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>commonmark.js demo</title>
|
||||
<script src="//code.jquery.com/jquery-1.11.0.min.js"></script>
|
||||
<script src="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
|
||||
<link href="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
|
||||
<script src="js/commonmark.js"></script>
|
||||
<script type="text/javascript">
|
||||
|
||||
var writer = new commonmark.HtmlRenderer();
|
||||
var reader = new commonmark.DocParser();
|
||||
|
||||
function getQueryVariable(variable)
|
||||
{
|
||||
var query = window.location.search.substring(1);
|
||||
var vars = query.split("&");
|
||||
for (var i=0;i<vars.length;i++) {
|
||||
var pair = vars[i].split("=");
|
||||
if(pair[0] == variable){return decodeURIComponent(pair[1]);}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
$(document).ready(function() {
|
||||
var timer;
|
||||
var x;
|
||||
var parsed;
|
||||
var render = function() {
|
||||
if (parsed === undefined) {
|
||||
return;
|
||||
}
|
||||
var startTime = new Date().getTime();
|
||||
var result = writer.renderBlock(parsed);
|
||||
var endTime = new Date().getTime();
|
||||
var renderTime = endTime - startTime;
|
||||
// $("#html").text(result);
|
||||
$("#preview").html(result);
|
||||
$("#html").text(result);
|
||||
$("#ast").text(commonmark.ASTRenderer(parsed));
|
||||
$("#rendertime").text(renderTime);
|
||||
};
|
||||
var parseAndRender = function () {
|
||||
if (x) { x.abort() } // If there is an existing XHR, abort it.
|
||||
clearTimeout(timer); // Clear the timer so we don't end up with dupes.
|
||||
timer = setTimeout(function() { // assign timer a new timeout
|
||||
var startTime = new Date().getTime();
|
||||
parsed = reader.parse($("#text").val());
|
||||
var endTime = new Date().getTime();
|
||||
var parseTime = endTime - startTime;
|
||||
$("#parsetime").text(parseTime);
|
||||
$(".timing").css('visibility','visible');
|
||||
/*
|
||||
var warnings = parsed.warnings;
|
||||
$("#warnings").html('');
|
||||
for (i=0; i < warnings.length; i++) {
|
||||
var w = warnings[i];
|
||||
var warning = $("#warnings").append('<li></li>');
|
||||
$("#warnings li").last().text('Line ' + w.line + ' column ' + w.column + ': ' + w.message);
|
||||
}
|
||||
*/
|
||||
render();
|
||||
}, 0); // ms delay
|
||||
};
|
||||
var initial_text = getQueryVariable("text");
|
||||
if (initial_text) {
|
||||
$("#text").val(initial_text);
|
||||
// show HTML tab if text is from query
|
||||
$('#result-tabs a[href="#result"]').tab('show');
|
||||
}
|
||||
// make tab insert a tab in the text box:
|
||||
$("#text").keydown(function (e) {
|
||||
if (e.which == 9) {
|
||||
e.preventDefault();
|
||||
this.value += "\t";
|
||||
}
|
||||
});
|
||||
parseAndRender();
|
||||
$("#clear-text-box").click(function(e) {
|
||||
$("#text").val('');
|
||||
window.location.search = "";
|
||||
parseAndRender();
|
||||
});
|
||||
$("#permalink").click(function(e) {
|
||||
window.location.pathname = "/index.html";
|
||||
window.location.search = "text=" + encodeURIComponent($("#text").val());
|
||||
});
|
||||
$("#text").bind('keyup paste cut mouseup', parseAndRender);
|
||||
$(".option").change(render);
|
||||
});
|
||||
</script>
|
||||
<style type="text/css">
|
||||
h1.title { font-family: monospace; font-size: 120%; font-weight: bold;
|
||||
margin-top: 0.5em; margin-bottom: 0; }
|
||||
textarea#text { height: 400px; width: 95%; font-family: monospace; font-size: 92%; }
|
||||
pre code#html { font-size: 92%; font-family: monospace; }
|
||||
pre#htmlpre { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
|
||||
div#astpre { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
|
||||
div#preview { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
|
||||
div.row { margin-top: 1em; }
|
||||
blockquote { font-size: 100%; }
|
||||
footer { color: #555; text-align: center; margin: 1em; }
|
||||
pre { display: block; padding: 0.5em; color: #333; background: #f8f8ff }
|
||||
#warnings li { color: red; font-weight: bold; }
|
||||
label { padding-left: 1em; padding-top: 0; padding-bottom: 0; }
|
||||
div.timing { color: gray; visibility: hidden; height: 2em; }
|
||||
p#text-controls { height: 1em; margin-top: 1em; }
|
||||
a#permalink { margin-left: 1em; }
|
||||
span.timing { font-weight: bold; }
|
||||
span.timing { font-weight: bold; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col-md-6">
|
||||
<h1 class="title">commonmark.js dingus</h1>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col-md-6">
|
||||
<p id="text-controls"><a id="clear-text-box">clear</a> <a
|
||||
id="permalink">permalink</a></p>
|
||||
<textarea id="text"></textarea>
|
||||
<ul id="warnings"></ul>
|
||||
<div class="timing">Parsed in <span class="timing" id="parsetime"></span>
|
||||
ms. Rendered in <span class="timing" id="rendertime"></span> ms.</div>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<ul id="result-tabs" class="nav nav-tabs" role="tablist">
|
||||
<li class="active"><a href="#preview" role="tab" data-toggle="tab">Preview</a></li>
|
||||
<li><a href="#result" role="tab" data-toggle="tab">HTML</a></li>
|
||||
<li><a href="#result-ast" role="tab" data-toggle="tab">AST</a></li>
|
||||
</ul>
|
||||
<div class="tab-content">
|
||||
<div id="preview" class="tab-pane active">
|
||||
</div>
|
||||
<div id="result" class="tab-pane">
|
||||
<pre id="htmlpre"><code id="html"></code></pre>
|
||||
</div>
|
||||
<div id="result-ast" class="tab-pane">
|
||||
<pre id="astpre"><code id="ast"></code></pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@ -1,3 +0,0 @@
|
||||
commonmark.js
|
||||
*.tgz
|
||||
index.html
|
@ -1,30 +0,0 @@
|
||||
Copyright (c) 2014, John MacFarlane
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
|
||||
* Neither the name of John MacFarlane nor the names of other
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -1,24 +0,0 @@
|
||||
CommonMark
|
||||
==========
|
||||
|
||||
CommonMark is a rationalized version of Markdown syntax,
|
||||
with a [spec][the spec] and BSD3-licensed reference
|
||||
implementations in C and JavaScript.
|
||||
|
||||
For more information, see <http://commonmark.org>.
|
||||
|
||||
To play with this library without installing it, see
|
||||
the live dingus at <http://spec.commonmark.org/dingus.html>.
|
||||
|
||||
This package includes the commonmark library and a
|
||||
command-line executable, `commonmark`.
|
||||
|
||||
Basic usage example:
|
||||
|
||||
var reader = new commonmark.DocParser();
|
||||
var writer = new commonmark.HtmlRenderer();
|
||||
var parsed = reader.parse("Hello *world*");
|
||||
var result = writer.render(parsed);
|
||||
|
||||
[the spec]: http://spec.commonmark.org
|
||||
|
@ -1,405 +0,0 @@
|
||||
|
||||
/**
|
||||
* References:
|
||||
*
|
||||
* - http://en.wikipedia.org/wiki/ANSI_escape_code
|
||||
* - http://www.termsys.demon.co.uk/vtansi.htm
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* Module dependencies.
|
||||
*/
|
||||
|
||||
var emitNewlineEvents = require('./newlines')
|
||||
, prefix = '\x1b[' // For all escape codes
|
||||
, suffix = 'm' // Only for color codes
|
||||
|
||||
/**
|
||||
* The ANSI escape sequences.
|
||||
*/
|
||||
|
||||
var codes = {
|
||||
up: 'A'
|
||||
, down: 'B'
|
||||
, forward: 'C'
|
||||
, back: 'D'
|
||||
, nextLine: 'E'
|
||||
, previousLine: 'F'
|
||||
, horizontalAbsolute: 'G'
|
||||
, eraseData: 'J'
|
||||
, eraseLine: 'K'
|
||||
, scrollUp: 'S'
|
||||
, scrollDown: 'T'
|
||||
, savePosition: 's'
|
||||
, restorePosition: 'u'
|
||||
, queryPosition: '6n'
|
||||
, hide: '?25l'
|
||||
, show: '?25h'
|
||||
}
|
||||
|
||||
/**
|
||||
* Rendering ANSI codes.
|
||||
*/
|
||||
|
||||
var styles = {
|
||||
bold: 1
|
||||
, italic: 3
|
||||
, underline: 4
|
||||
, inverse: 7
|
||||
}
|
||||
|
||||
/**
|
||||
* The negating ANSI code for the rendering modes.
|
||||
*/
|
||||
|
||||
var reset = {
|
||||
bold: 22
|
||||
, italic: 23
|
||||
, underline: 24
|
||||
, inverse: 27
|
||||
}
|
||||
|
||||
/**
|
||||
* The standard, styleable ANSI colors.
|
||||
*/
|
||||
|
||||
var colors = {
|
||||
white: 37
|
||||
, black: 30
|
||||
, blue: 34
|
||||
, cyan: 36
|
||||
, green: 32
|
||||
, magenta: 35
|
||||
, red: 31
|
||||
, yellow: 33
|
||||
, grey: 90
|
||||
, brightBlack: 90
|
||||
, brightRed: 91
|
||||
, brightGreen: 92
|
||||
, brightYellow: 93
|
||||
, brightBlue: 94
|
||||
, brightMagenta: 95
|
||||
, brightCyan: 96
|
||||
, brightWhite: 97
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a Cursor instance based off the given `writable stream` instance.
|
||||
*/
|
||||
|
||||
function ansi (stream, options) {
|
||||
if (stream._ansicursor) {
|
||||
return stream._ansicursor
|
||||
} else {
|
||||
return stream._ansicursor = new Cursor(stream, options)
|
||||
}
|
||||
}
|
||||
module.exports = exports = ansi
|
||||
|
||||
/**
|
||||
* The `Cursor` class.
|
||||
*/
|
||||
|
||||
function Cursor (stream, options) {
|
||||
if (!(this instanceof Cursor)) {
|
||||
return new Cursor(stream, options)
|
||||
}
|
||||
if (typeof stream != 'object' || typeof stream.write != 'function') {
|
||||
throw new Error('a valid Stream instance must be passed in')
|
||||
}
|
||||
|
||||
// the stream to use
|
||||
this.stream = stream
|
||||
|
||||
// when 'enabled' is false then all the functions are no-ops except for write()
|
||||
this.enabled = options && options.enabled
|
||||
if (typeof this.enabled === 'undefined') {
|
||||
this.enabled = stream.isTTY
|
||||
}
|
||||
this.enabled = !!this.enabled
|
||||
|
||||
// then `buffering` is true, then `write()` calls are buffered in
|
||||
// memory until `flush()` is invoked
|
||||
this.buffering = !!(options && options.buffering)
|
||||
this._buffer = []
|
||||
|
||||
// controls the foreground and background colors
|
||||
this.fg = this.foreground = new Colorer(this, 0)
|
||||
this.bg = this.background = new Colorer(this, 10)
|
||||
|
||||
// defaults
|
||||
this.Bold = false
|
||||
this.Italic = false
|
||||
this.Underline = false
|
||||
this.Inverse = false
|
||||
|
||||
// keep track of the number of "newlines" that get encountered
|
||||
this.newlines = 0
|
||||
emitNewlineEvents(stream)
|
||||
stream.on('newline', function () {
|
||||
this.newlines++
|
||||
}.bind(this))
|
||||
}
|
||||
exports.Cursor = Cursor
|
||||
|
||||
/**
|
||||
* Helper function that calls `write()` on the underlying Stream.
|
||||
* Returns `this` instead of the write() return value to keep
|
||||
* the chaining going.
|
||||
*/
|
||||
|
||||
Cursor.prototype.write = function (data) {
|
||||
if (this.buffering) {
|
||||
this._buffer.push(arguments)
|
||||
} else {
|
||||
this.stream.write.apply(this.stream, arguments)
|
||||
}
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* Buffer `write()` calls into memory.
|
||||
*
|
||||
* @api public
|
||||
*/
|
||||
|
||||
Cursor.prototype.buffer = function () {
|
||||
this.buffering = true
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out the in-memory buffer.
|
||||
*
|
||||
* @api public
|
||||
*/
|
||||
|
||||
Cursor.prototype.flush = function () {
|
||||
this.buffering = false
|
||||
var str = this._buffer.map(function (args) {
|
||||
if (args.length != 1) throw new Error('unexpected args length! ' + args.length);
|
||||
return args[0];
|
||||
}).join('');
|
||||
this._buffer.splice(0); // empty
|
||||
this.write(str);
|
||||
return this
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The `Colorer` class manages both the background and foreground colors.
|
||||
*/
|
||||
|
||||
function Colorer (cursor, base) {
|
||||
this.current = null
|
||||
this.cursor = cursor
|
||||
this.base = base
|
||||
}
|
||||
exports.Colorer = Colorer
|
||||
|
||||
/**
|
||||
* Write an ANSI color code, ensuring that the same code doesn't get rewritten.
|
||||
*/
|
||||
|
||||
Colorer.prototype._setColorCode = function setColorCode (code) {
|
||||
var c = String(code)
|
||||
if (this.current === c) return
|
||||
this.cursor.enabled && this.cursor.write(prefix + c + suffix)
|
||||
this.current = c
|
||||
return this
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set up the positional ANSI codes.
|
||||
*/
|
||||
|
||||
Object.keys(codes).forEach(function (name) {
|
||||
var code = String(codes[name])
|
||||
Cursor.prototype[name] = function () {
|
||||
var c = code
|
||||
if (arguments.length > 0) {
|
||||
c = toArray(arguments).map(Math.round).join(';') + code
|
||||
}
|
||||
this.enabled && this.write(prefix + c)
|
||||
return this
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* Set up the functions for the rendering ANSI codes.
|
||||
*/
|
||||
|
||||
Object.keys(styles).forEach(function (style) {
|
||||
var name = style[0].toUpperCase() + style.substring(1)
|
||||
, c = styles[style]
|
||||
, r = reset[style]
|
||||
|
||||
Cursor.prototype[style] = function () {
|
||||
if (this[name]) return
|
||||
this.enabled && this.write(prefix + c + suffix)
|
||||
this[name] = true
|
||||
return this
|
||||
}
|
||||
|
||||
Cursor.prototype['reset' + name] = function () {
|
||||
if (!this[name]) return
|
||||
this.enabled && this.write(prefix + r + suffix)
|
||||
this[name] = false
|
||||
return this
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* Setup the functions for the standard colors.
|
||||
*/
|
||||
|
||||
Object.keys(colors).forEach(function (color) {
|
||||
var code = colors[color]
|
||||
|
||||
Colorer.prototype[color] = function () {
|
||||
this._setColorCode(this.base + code)
|
||||
return this.cursor
|
||||
}
|
||||
|
||||
Cursor.prototype[color] = function () {
|
||||
return this.foreground[color]()
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* Makes a beep sound!
|
||||
*/
|
||||
|
||||
Cursor.prototype.beep = function () {
|
||||
this.enabled && this.write('\x07')
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves cursor to specific position
|
||||
*/
|
||||
|
||||
Cursor.prototype.goto = function (x, y) {
|
||||
x = x | 0
|
||||
y = y | 0
|
||||
this.enabled && this.write(prefix + y + ';' + x + 'H')
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the color.
|
||||
*/
|
||||
|
||||
Colorer.prototype.reset = function () {
|
||||
this._setColorCode(this.base + 39)
|
||||
return this.cursor
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets all ANSI formatting on the stream.
|
||||
*/
|
||||
|
||||
Cursor.prototype.reset = function () {
|
||||
this.enabled && this.write(prefix + '0' + suffix)
|
||||
this.Bold = false
|
||||
this.Italic = false
|
||||
this.Underline = false
|
||||
this.Inverse = false
|
||||
this.foreground.current = null
|
||||
this.background.current = null
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the foreground color with the given RGB values.
|
||||
* The closest match out of the 216 colors is picked.
|
||||
*/
|
||||
|
||||
Colorer.prototype.rgb = function (r, g, b) {
|
||||
var base = this.base + 38
|
||||
, code = rgb(r, g, b)
|
||||
this._setColorCode(base + ';5;' + code)
|
||||
return this.cursor
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as `cursor.fg.rgb(r, g, b)`.
|
||||
*/
|
||||
|
||||
Cursor.prototype.rgb = function (r, g, b) {
|
||||
return this.foreground.rgb(r, g, b)
|
||||
}
|
||||
|
||||
/**
|
||||
* Accepts CSS color codes for use with ANSI escape codes.
|
||||
* For example: `#FF000` would be bright red.
|
||||
*/
|
||||
|
||||
Colorer.prototype.hex = function (color) {
|
||||
return this.rgb.apply(this, hex(color))
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as `cursor.fg.hex(color)`.
|
||||
*/
|
||||
|
||||
Cursor.prototype.hex = function (color) {
|
||||
return this.foreground.hex(color)
|
||||
}
|
||||
|
||||
|
||||
// UTIL FUNCTIONS //
|
||||
|
||||
/**
|
||||
* Translates a 255 RGB value to a 0-5 ANSI RGV value,
|
||||
* then returns the single ANSI color code to use.
|
||||
*/
|
||||
|
||||
function rgb (r, g, b) {
|
||||
var red = r / 255 * 5
|
||||
, green = g / 255 * 5
|
||||
, blue = b / 255 * 5
|
||||
return rgb5(red, green, blue)
|
||||
}
|
||||
|
||||
/**
|
||||
* Turns rgb 0-5 values into a single ANSI color code to use.
|
||||
*/
|
||||
|
||||
function rgb5 (r, g, b) {
|
||||
var red = Math.round(r)
|
||||
, green = Math.round(g)
|
||||
, blue = Math.round(b)
|
||||
return 16 + (red*36) + (green*6) + blue
|
||||
}
|
||||
|
||||
/**
|
||||
* Accepts a hex CSS color code string (# is optional) and
|
||||
* translates it into an Array of 3 RGB 0-255 values, which
|
||||
* can then be used with rgb().
|
||||
*/
|
||||
|
||||
function hex (color) {
|
||||
var c = color[0] === '#' ? color.substring(1) : color
|
||||
, r = c.substring(0, 2)
|
||||
, g = c.substring(2, 4)
|
||||
, b = c.substring(4, 6)
|
||||
return [parseInt(r, 16), parseInt(g, 16), parseInt(b, 16)]
|
||||
}
|
||||
|
||||
/**
|
||||
* Turns an array-like object into a real array.
|
||||
*/
|
||||
|
||||
function toArray (a) {
|
||||
var i = 0
|
||||
, l = a.length
|
||||
, rtn = []
|
||||
for (; i<l; i++) {
|
||||
rtn.push(a[i])
|
||||
}
|
||||
return rtn
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
|
||||
/**
|
||||
* Accepts any node Stream instance and hijacks its "write()" function,
|
||||
* so that it can count any newlines that get written to the output.
|
||||
*
|
||||
* When a '\n' byte is encountered, then a "newline" event will be emitted
|
||||
* on the stream, with no arguments. It is up to the listeners to determine
|
||||
* any necessary deltas required for their use-case.
|
||||
*
|
||||
* Ex:
|
||||
*
|
||||
* var cursor = ansi(process.stdout)
|
||||
* , ln = 0
|
||||
* process.stdout.on('newline', function () {
|
||||
* ln++
|
||||
* })
|
||||
*/
|
||||
|
||||
/**
|
||||
* Module dependencies.
|
||||
*/
|
||||
|
||||
var assert = require('assert')
|
||||
var NEWLINE = '\n'.charCodeAt(0)
|
||||
|
||||
function emitNewlineEvents (stream) {
|
||||
if (stream._emittingNewlines) {
|
||||
// already emitting newline events
|
||||
return
|
||||
}
|
||||
|
||||
var write = stream.write
|
||||
|
||||
stream.write = function (data) {
|
||||
// first write the data
|
||||
var rtn = write.apply(stream, arguments)
|
||||
|
||||
if (stream.listeners('newline').length > 0) {
|
||||
var len = data.length
|
||||
, i = 0
|
||||
// now try to calculate any deltas
|
||||
if (typeof data == 'string') {
|
||||
for (; i<len; i++) {
|
||||
processByte(stream, data.charCodeAt(i))
|
||||
}
|
||||
} else {
|
||||
// buffer
|
||||
for (; i<len; i++) {
|
||||
processByte(stream, data[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rtn
|
||||
}
|
||||
|
||||
stream._emittingNewlines = true
|
||||
}
|
||||
module.exports = emitNewlineEvents
|
||||
|
||||
|
||||
/**
|
||||
* Processes an individual byte being written to a stream
|
||||
*/
|
||||
|
||||
function processByte (stream, b) {
|
||||
assert.equal(typeof b, 'number')
|
||||
if (b === NEWLINE) {
|
||||
stream.emit('newline')
|
||||
}
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
var Benchmark = require('benchmark').Benchmark;
|
||||
var suite = new Benchmark.Suite;
|
||||
var fs = require('fs');
|
||||
var sm = require('./lib/index.js');
|
||||
// https://github.com/coreyti/showdown
|
||||
var showdown = require('../../showdown/src/showdown');
|
||||
// https://github.com/chjj/marked
|
||||
var marked = require('../../marked/marked.min.js');
|
||||
|
||||
var benchfile = process.argv[2];
|
||||
|
||||
var contents = fs.readFileSync(benchfile, 'utf8');
|
||||
|
||||
// var converter = new showdown.converter();
|
||||
|
||||
suite.add('commonmark.js markdown->html', function() {
|
||||
var doc = new sm.DocParser().parse(contents);
|
||||
var renderer = new sm.HtmlRenderer();
|
||||
renderer.renderBlock(doc);
|
||||
})
|
||||
|
||||
.add('showdown.js markdown->html', function() {
|
||||
var converter = new showdown.converter();
|
||||
converter.makeHtml(contents);
|
||||
})
|
||||
|
||||
.add('marked.js markdown->html', function() {
|
||||
marked(contents);
|
||||
})
|
||||
|
||||
.on('cycle', function(event) {
|
||||
console.log(String(event.target));
|
||||
})
|
||||
.run();
|
||||
|
@ -1,33 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
var fs = require('fs');
|
||||
var util = require('util');
|
||||
var commonmark = require('../lib/index.js');
|
||||
|
||||
var parser = new commonmark.DocParser();
|
||||
var renderer;
|
||||
var inps = [];
|
||||
|
||||
var output_ast = false;
|
||||
var files = [];
|
||||
|
||||
if (process.argv[2] === '--ast') {
|
||||
output_ast = true;
|
||||
files = process.argv.slice(3);
|
||||
renderer = { render: function(x) {
|
||||
return util.inspect(x, null, Infinity) + '\n';
|
||||
} };
|
||||
} else {
|
||||
files = process.argv.slice(2);
|
||||
renderer = new commonmark.HtmlRenderer();
|
||||
}
|
||||
|
||||
if (files.length === 0) {
|
||||
files = ['/dev/stdin'];
|
||||
}
|
||||
|
||||
for (var i = 0; i < files.length; i++) {
|
||||
file = files[i];
|
||||
inps.push(fs.readFileSync(file, 'utf8'));
|
||||
}
|
||||
|
||||
process.stdout.write(renderer.render(parser.parse(inps.join('\n'))));
|
@ -1,12 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>CommonMark dingus</title>
|
||||
<meta http-equiv="refresh" content="0;URL='/dingus.html" >
|
||||
</head>
|
||||
<body>
|
||||
<p>The most recent version of the CommonMark dingus can be found
|
||||
at <a
|
||||
href="http://try.commonmark.org/dingus.html/">/dingus.html/</a>.</p>
|
||||
</body>
|
||||
</html>
|
@ -1,698 +0,0 @@
|
||||
var C_GREATERTHAN = 62;
|
||||
var C_SPACE = 32;
|
||||
var C_OPEN_BRACKET = 91;
|
||||
|
||||
var InlineParser = require('./inlines');
|
||||
var unescapeString = new InlineParser().unescapeString;
|
||||
|
||||
// Returns true if string contains only space characters.
|
||||
var isBlank = function(s) {
|
||||
return /^\s*$/.test(s);
|
||||
};
|
||||
|
||||
// Convert tabs to spaces on each line using a 4-space tab stop.
|
||||
var detabLine = function(text) {
|
||||
if (text.indexOf('\t') == -1) {
|
||||
return text;
|
||||
} else {
|
||||
var lastStop = 0;
|
||||
return text.replace(/\t/g, function(match, offset) {
|
||||
var result = ' '.slice((offset - lastStop) % 4);
|
||||
lastStop = offset + 1;
|
||||
return result;
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// Attempt to match a regex in string s at offset offset.
|
||||
// Return index of match or -1.
|
||||
var matchAt = function(re, s, offset) {
|
||||
var res = s.slice(offset).match(re);
|
||||
if (res) {
|
||||
return offset + res.index;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
|
||||
var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
|
||||
"/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
|
||||
var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i');
|
||||
|
||||
var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
|
||||
|
||||
|
||||
// DOC PARSER
|
||||
|
||||
// These are methods of a DocParser object, defined below.
|
||||
|
||||
var makeBlock = function(tag, start_line, start_column) {
|
||||
return { t: tag,
|
||||
open: true,
|
||||
last_line_blank: false,
|
||||
start_line: start_line,
|
||||
start_column: start_column,
|
||||
end_line: start_line,
|
||||
children: [],
|
||||
parent: null,
|
||||
// string_content is formed by concatenating strings, in finalize:
|
||||
string_content: "",
|
||||
strings: [],
|
||||
inline_content: []
|
||||
};
|
||||
};
|
||||
|
||||
// Returns true if parent block can contain child block.
|
||||
var canContain = function(parent_type, child_type) {
|
||||
return ( parent_type == 'Document' ||
|
||||
parent_type == 'BlockQuote' ||
|
||||
parent_type == 'ListItem' ||
|
||||
(parent_type == 'List' && child_type == 'ListItem') );
|
||||
};
|
||||
|
||||
// Returns true if block type can accept lines of text.
|
||||
var acceptsLines = function(block_type) {
|
||||
return ( block_type == 'Paragraph' ||
|
||||
block_type == 'IndentedCode' ||
|
||||
block_type == 'FencedCode' );
|
||||
};
|
||||
|
||||
// Returns true if block ends with a blank line, descending if needed
|
||||
// into lists and sublists.
|
||||
var endsWithBlankLine = function(block) {
|
||||
if (block.last_line_blank) {
|
||||
return true;
|
||||
}
|
||||
if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) {
|
||||
return endsWithBlankLine(block.children[block.children.length - 1]);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Break out of all containing lists, resetting the tip of the
|
||||
// document to the parent of the highest list, and finalizing
|
||||
// all the lists. (This is used to implement the "two blank lines
|
||||
// break of of all lists" feature.)
|
||||
var breakOutOfLists = function(block, line_number) {
|
||||
var b = block;
|
||||
var last_list = null;
|
||||
do {
|
||||
if (b.t === 'List') {
|
||||
last_list = b;
|
||||
}
|
||||
b = b.parent;
|
||||
} while (b);
|
||||
|
||||
if (last_list) {
|
||||
while (block != last_list) {
|
||||
this.finalize(block, line_number);
|
||||
block = block.parent;
|
||||
}
|
||||
this.finalize(last_list, line_number);
|
||||
this.tip = last_list.parent;
|
||||
}
|
||||
};
|
||||
|
||||
// Add a line to the block at the tip. We assume the tip
|
||||
// can accept lines -- that check should be done before calling this.
|
||||
var addLine = function(ln, offset) {
|
||||
var s = ln.slice(offset);
|
||||
if (!(this.tip.open)) {
|
||||
throw({ msg: "Attempted to add line (" + ln + ") to closed container." });
|
||||
}
|
||||
this.tip.strings.push(s);
|
||||
};
|
||||
|
||||
// Add block of type tag as a child of the tip. If the tip can't
|
||||
// accept children, close and finalize it and try its parent,
|
||||
// and so on til we find a block that can accept children.
|
||||
var addChild = function(tag, line_number, offset) {
|
||||
while (!canContain(this.tip.t, tag)) {
|
||||
this.finalize(this.tip, line_number);
|
||||
}
|
||||
|
||||
var column_number = offset + 1; // offset 0 = column 1
|
||||
var newBlock = makeBlock(tag, line_number, column_number);
|
||||
this.tip.children.push(newBlock);
|
||||
newBlock.parent = this.tip;
|
||||
this.tip = newBlock;
|
||||
return newBlock;
|
||||
};
|
||||
|
||||
// Parse a list marker and return data on the marker (type,
|
||||
// start, delimiter, bullet character, padding) or null.
|
||||
var parseListMarker = function(ln, offset) {
|
||||
var rest = ln.slice(offset);
|
||||
var match;
|
||||
var spaces_after_marker;
|
||||
var data = {};
|
||||
if (rest.match(reHrule)) {
|
||||
return null;
|
||||
}
|
||||
if ((match = rest.match(/^[*+-]( +|$)/))) {
|
||||
spaces_after_marker = match[1].length;
|
||||
data.type = 'Bullet';
|
||||
data.bullet_char = match[0][0];
|
||||
|
||||
} else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) {
|
||||
spaces_after_marker = match[3].length;
|
||||
data.type = 'Ordered';
|
||||
data.start = parseInt(match[1]);
|
||||
data.delimiter = match[2];
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
var blank_item = match[0].length === rest.length;
|
||||
if (spaces_after_marker >= 5 ||
|
||||
spaces_after_marker < 1 ||
|
||||
blank_item) {
|
||||
data.padding = match[0].length - spaces_after_marker + 1;
|
||||
} else {
|
||||
data.padding = match[0].length;
|
||||
}
|
||||
return data;
|
||||
};
|
||||
|
||||
// Returns true if the two list items are of the same type,
|
||||
// with the same delimiter and bullet character. This is used
|
||||
// in agglomerating list items into lists.
|
||||
var listsMatch = function(list_data, item_data) {
|
||||
return (list_data.type === item_data.type &&
|
||||
list_data.delimiter === item_data.delimiter &&
|
||||
list_data.bullet_char === item_data.bullet_char);
|
||||
};
|
||||
|
||||
// Analyze a line of text and update the document appropriately.
|
||||
// We parse markdown text by calling this on each line of input,
|
||||
// then finalizing the document.
|
||||
var incorporateLine = function(ln, line_number) {
|
||||
|
||||
var all_matched = true;
|
||||
var last_child;
|
||||
var first_nonspace;
|
||||
var offset = 0;
|
||||
var match;
|
||||
var data;
|
||||
var blank;
|
||||
var indent;
|
||||
var last_matched_container;
|
||||
var i;
|
||||
var CODE_INDENT = 4;
|
||||
|
||||
var container = this.doc;
|
||||
var oldtip = this.tip;
|
||||
|
||||
// Convert tabs to spaces:
|
||||
ln = detabLine(ln);
|
||||
|
||||
// For each containing block, try to parse the associated line start.
|
||||
// Bail out on failure: container will point to the last matching block.
|
||||
// Set all_matched to false if not all containers match.
|
||||
while (container.children.length > 0) {
|
||||
last_child = container.children[container.children.length - 1];
|
||||
if (!last_child.open) {
|
||||
break;
|
||||
}
|
||||
container = last_child;
|
||||
|
||||
match = matchAt(/[^ ]/, ln, offset);
|
||||
if (match === -1) {
|
||||
first_nonspace = ln.length;
|
||||
blank = true;
|
||||
} else {
|
||||
first_nonspace = match;
|
||||
blank = false;
|
||||
}
|
||||
indent = first_nonspace - offset;
|
||||
|
||||
switch (container.t) {
|
||||
case 'BlockQuote':
|
||||
if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
|
||||
offset = first_nonspace + 1;
|
||||
if (ln.charCodeAt(offset) === C_SPACE) {
|
||||
offset++;
|
||||
}
|
||||
} else {
|
||||
all_matched = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case 'ListItem':
|
||||
if (indent >= container.list_data.marker_offset +
|
||||
container.list_data.padding) {
|
||||
offset += container.list_data.marker_offset +
|
||||
container.list_data.padding;
|
||||
} else if (blank) {
|
||||
offset = first_nonspace;
|
||||
} else {
|
||||
all_matched = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case 'IndentedCode':
|
||||
if (indent >= CODE_INDENT) {
|
||||
offset += CODE_INDENT;
|
||||
} else if (blank) {
|
||||
offset = first_nonspace;
|
||||
} else {
|
||||
all_matched = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case 'Header':
|
||||
case 'HorizontalRule':
|
||||
// a header can never container > 1 line, so fail to match:
|
||||
all_matched = false;
|
||||
if (blank) {
|
||||
container.last_line_blank = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case 'FencedCode':
|
||||
// skip optional spaces of fence offset
|
||||
i = container.fence_offset;
|
||||
while (i > 0 && ln.charCodeAt(offset) === C_SPACE) {
|
||||
offset++;
|
||||
i--;
|
||||
}
|
||||
break;
|
||||
|
||||
case 'HtmlBlock':
|
||||
if (blank) {
|
||||
container.last_line_blank = true;
|
||||
all_matched = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case 'Paragraph':
|
||||
if (blank) {
|
||||
container.last_line_blank = true;
|
||||
all_matched = false;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
}
|
||||
|
||||
if (!all_matched) {
|
||||
container = container.parent; // back up to last matching block
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
last_matched_container = container;
|
||||
|
||||
// This function is used to finalize and close any unmatched
|
||||
// blocks. We aren't ready to do this now, because we might
|
||||
// have a lazy paragraph continuation, in which case we don't
|
||||
// want to close unmatched blocks. So we store this closure for
|
||||
// use later, when we have more information.
|
||||
var closeUnmatchedBlocks = function(mythis) {
|
||||
// finalize any blocks not matched
|
||||
while (!already_done && oldtip != last_matched_container) {
|
||||
mythis.finalize(oldtip, line_number);
|
||||
oldtip = oldtip.parent;
|
||||
}
|
||||
var already_done = true;
|
||||
};
|
||||
|
||||
// Check to see if we've hit 2nd blank line; if so break out of list:
|
||||
if (blank && container.last_line_blank) {
|
||||
this.breakOutOfLists(container, line_number);
|
||||
}
|
||||
|
||||
// Unless last matched container is a code block, try new container starts,
|
||||
// adding children to the last matched container:
|
||||
while (container.t != 'FencedCode' &&
|
||||
container.t != 'IndentedCode' &&
|
||||
container.t != 'HtmlBlock' &&
|
||||
// this is a little performance optimization:
|
||||
matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== -1) {
|
||||
|
||||
match = matchAt(/[^ ]/, ln, offset);
|
||||
if (match === -1) {
|
||||
first_nonspace = ln.length;
|
||||
blank = true;
|
||||
} else {
|
||||
first_nonspace = match;
|
||||
blank = false;
|
||||
}
|
||||
indent = first_nonspace - offset;
|
||||
|
||||
if (indent >= CODE_INDENT) {
|
||||
// indented code
|
||||
if (this.tip.t != 'Paragraph' && !blank) {
|
||||
offset += CODE_INDENT;
|
||||
closeUnmatchedBlocks(this);
|
||||
container = this.addChild('IndentedCode', line_number, offset);
|
||||
} else { // indent > 4 in a lazy paragraph continuation
|
||||
break;
|
||||
}
|
||||
|
||||
} else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
|
||||
// blockquote
|
||||
offset = first_nonspace + 1;
|
||||
// optional following space
|
||||
if (ln.charCodeAt(offset) === C_SPACE) {
|
||||
offset++;
|
||||
}
|
||||
closeUnmatchedBlocks(this);
|
||||
container = this.addChild('BlockQuote', line_number, offset);
|
||||
|
||||
} else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
|
||||
// ATX header
|
||||
offset = first_nonspace + match[0].length;
|
||||
closeUnmatchedBlocks(this);
|
||||
container = this.addChild('Header', line_number, first_nonspace);
|
||||
container.level = match[0].trim().length; // number of #s
|
||||
// remove trailing ###s:
|
||||
container.strings =
|
||||
[ln.slice(offset).replace(/^ *#+ *$/, '').replace(/ +#+ *$/,'')];
|
||||
break;
|
||||
|
||||
} else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) {
|
||||
// fenced code block
|
||||
var fence_length = match[0].length;
|
||||
closeUnmatchedBlocks(this);
|
||||
container = this.addChild('FencedCode', line_number, first_nonspace);
|
||||
container.fence_length = fence_length;
|
||||
container.fence_char = match[0][0];
|
||||
container.fence_offset = first_nonspace - offset;
|
||||
offset = first_nonspace + fence_length;
|
||||
break;
|
||||
|
||||
} else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== -1) {
|
||||
// html block
|
||||
closeUnmatchedBlocks(this);
|
||||
container = this.addChild('HtmlBlock', line_number, first_nonspace);
|
||||
// note, we don't adjust offset because the tag is part of the text
|
||||
break;
|
||||
|
||||
} else if (container.t == 'Paragraph' &&
|
||||
container.strings.length === 1 &&
|
||||
((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
|
||||
// setext header line
|
||||
closeUnmatchedBlocks(this);
|
||||
container.t = 'Header'; // convert Paragraph to SetextHeader
|
||||
container.level = match[0][0] === '=' ? 1 : 2;
|
||||
offset = ln.length;
|
||||
|
||||
} else if (matchAt(reHrule, ln, first_nonspace) !== -1) {
|
||||
// hrule
|
||||
closeUnmatchedBlocks(this);
|
||||
container = this.addChild('HorizontalRule', line_number, first_nonspace);
|
||||
offset = ln.length - 1;
|
||||
break;
|
||||
|
||||
} else if ((data = parseListMarker(ln, first_nonspace))) {
|
||||
// list item
|
||||
closeUnmatchedBlocks(this);
|
||||
data.marker_offset = indent;
|
||||
offset = first_nonspace + data.padding;
|
||||
|
||||
// add the list if needed
|
||||
if (container.t !== 'List' ||
|
||||
!(listsMatch(container.list_data, data))) {
|
||||
container = this.addChild('List', line_number, first_nonspace);
|
||||
container.list_data = data;
|
||||
}
|
||||
|
||||
// add the list item
|
||||
container = this.addChild('ListItem', line_number, first_nonspace);
|
||||
container.list_data = data;
|
||||
|
||||
} else {
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
if (acceptsLines(container.t)) {
|
||||
// if it's a line container, it can't contain other containers
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// What remains at the offset is a text line. Add the text to the
|
||||
// appropriate container.
|
||||
|
||||
match = matchAt(/[^ ]/, ln, offset);
|
||||
if (match === -1) {
|
||||
first_nonspace = ln.length;
|
||||
blank = true;
|
||||
} else {
|
||||
first_nonspace = match;
|
||||
blank = false;
|
||||
}
|
||||
indent = first_nonspace - offset;
|
||||
|
||||
// First check for a lazy paragraph continuation:
|
||||
if (this.tip !== last_matched_container &&
|
||||
!blank &&
|
||||
this.tip.t == 'Paragraph' &&
|
||||
this.tip.strings.length > 0) {
|
||||
// lazy paragraph continuation
|
||||
|
||||
this.last_line_blank = false;
|
||||
this.addLine(ln, offset);
|
||||
|
||||
} else { // not a lazy continuation
|
||||
|
||||
// finalize any blocks not matched
|
||||
closeUnmatchedBlocks(this);
|
||||
|
||||
// Block quote lines are never blank as they start with >
|
||||
// and we don't count blanks in fenced code for purposes of tight/loose
|
||||
// lists or breaking out of lists. We also don't set last_line_blank
|
||||
// on an empty list item.
|
||||
container.last_line_blank = blank &&
|
||||
!(container.t == 'BlockQuote' ||
|
||||
container.t == 'Header' ||
|
||||
container.t == 'FencedCode' ||
|
||||
(container.t == 'ListItem' &&
|
||||
container.children.length === 0 &&
|
||||
container.start_line == line_number));
|
||||
|
||||
var cont = container;
|
||||
while (cont.parent) {
|
||||
cont.parent.last_line_blank = false;
|
||||
cont = cont.parent;
|
||||
}
|
||||
|
||||
switch (container.t) {
|
||||
case 'IndentedCode':
|
||||
case 'HtmlBlock':
|
||||
this.addLine(ln, offset);
|
||||
break;
|
||||
|
||||
case 'FencedCode':
|
||||
// check for closing code fence:
|
||||
match = (indent <= 3 &&
|
||||
ln.charAt(first_nonspace) == container.fence_char &&
|
||||
ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/));
|
||||
if (match && match[0].length >= container.fence_length) {
|
||||
// don't add closing fence to container; instead, close it:
|
||||
this.finalize(container, line_number);
|
||||
} else {
|
||||
this.addLine(ln, offset);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'Header':
|
||||
case 'HorizontalRule':
|
||||
// nothing to do; we already added the contents.
|
||||
break;
|
||||
|
||||
default:
|
||||
if (acceptsLines(container.t)) {
|
||||
this.addLine(ln, first_nonspace);
|
||||
} else if (blank) {
|
||||
// do nothing
|
||||
} else if (container.t != 'HorizontalRule' &&
|
||||
container.t != 'Header') {
|
||||
// create paragraph container for line
|
||||
container = this.addChild('Paragraph', line_number, first_nonspace);
|
||||
this.addLine(ln, first_nonspace);
|
||||
} else {
|
||||
console.log("Line " + line_number.toString() +
|
||||
" with container type " + container.t +
|
||||
" did not match any condition.");
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Finalize a block. Close it and do any necessary postprocessing,
|
||||
// e.g. creating string_content from strings, setting the 'tight'
|
||||
// or 'loose' status of a list, and parsing the beginnings
|
||||
// of paragraphs for reference definitions. Reset the tip to the
|
||||
// parent of the closed block.
|
||||
var finalize = function(block, line_number) {
|
||||
var pos;
|
||||
// don't do anything if the block is already closed
|
||||
if (!block.open) {
|
||||
return 0;
|
||||
}
|
||||
block.open = false;
|
||||
if (line_number > block.start_line) {
|
||||
block.end_line = line_number - 1;
|
||||
} else {
|
||||
block.end_line = line_number;
|
||||
}
|
||||
|
||||
switch (block.t) {
|
||||
case 'Paragraph':
|
||||
block.string_content = block.strings.join('\n').replace(/^ */m,'');
|
||||
// delete block.strings;
|
||||
|
||||
// try parsing the beginning as link reference definitions:
|
||||
while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET &&
|
||||
(pos = this.inlineParser.parseReference(block.string_content,
|
||||
this.refmap))) {
|
||||
block.string_content = block.string_content.slice(pos);
|
||||
if (isBlank(block.string_content)) {
|
||||
block.t = 'ReferenceDef';
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 'Header':
|
||||
case 'HtmlBlock':
|
||||
block.string_content = block.strings.join('\n');
|
||||
break;
|
||||
|
||||
case 'IndentedCode':
|
||||
block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n');
|
||||
block.t = 'CodeBlock';
|
||||
break;
|
||||
|
||||
case 'FencedCode':
|
||||
// first line becomes info string
|
||||
block.info = unescapeString(block.strings[0].trim());
|
||||
if (block.strings.length == 1) {
|
||||
block.string_content = '';
|
||||
} else {
|
||||
block.string_content = block.strings.slice(1).join('\n') + '\n';
|
||||
}
|
||||
block.t = 'CodeBlock';
|
||||
break;
|
||||
|
||||
case 'List':
|
||||
block.tight = true; // tight by default
|
||||
|
||||
var numitems = block.children.length;
|
||||
var i = 0;
|
||||
while (i < numitems) {
|
||||
var item = block.children[i];
|
||||
// check for non-final list item ending with blank line:
|
||||
var last_item = i == numitems - 1;
|
||||
if (endsWithBlankLine(item) && !last_item) {
|
||||
block.tight = false;
|
||||
break;
|
||||
}
|
||||
// recurse into children of list item, to see if there are
|
||||
// spaces between any of them:
|
||||
var numsubitems = item.children.length;
|
||||
var j = 0;
|
||||
while (j < numsubitems) {
|
||||
var subitem = item.children[j];
|
||||
var last_subitem = j == numsubitems - 1;
|
||||
if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) {
|
||||
block.tight = false;
|
||||
break;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
this.tip = block.parent || this.top;
|
||||
};
|
||||
|
||||
// Walk through a block & children recursively, parsing string content
|
||||
// into inline content where appropriate. Returns new object.
|
||||
var processInlines = function(block) {
|
||||
var newblock = {};
|
||||
newblock.t = block.t;
|
||||
newblock.start_line = block.start_line;
|
||||
newblock.start_column = block.start_column;
|
||||
newblock.end_line = block.end_line;
|
||||
|
||||
switch(block.t) {
|
||||
case 'Paragraph':
|
||||
newblock.inline_content =
|
||||
this.inlineParser.parse(block.string_content.trim(), this.refmap);
|
||||
break;
|
||||
case 'Header':
|
||||
newblock.inline_content =
|
||||
this.inlineParser.parse(block.string_content.trim(), this.refmap);
|
||||
newblock.level = block.level;
|
||||
break;
|
||||
case 'List':
|
||||
newblock.list_data = block.list_data;
|
||||
newblock.tight = block.tight;
|
||||
break;
|
||||
case 'CodeBlock':
|
||||
newblock.string_content = block.string_content;
|
||||
newblock.info = block.info;
|
||||
break;
|
||||
case 'HtmlBlock':
|
||||
newblock.string_content = block.string_content;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (block.children) {
|
||||
var newchildren = [];
|
||||
for (var i = 0; i < block.children.length; i++) {
|
||||
newchildren.push(this.processInlines(block.children[i]));
|
||||
}
|
||||
newblock.children = newchildren;
|
||||
}
|
||||
return newblock;
|
||||
};
|
||||
|
||||
// The main parsing function. Returns a parsed document AST.
|
||||
var parse = function(input) {
|
||||
this.doc = makeBlock('Document', 1, 1);
|
||||
this.tip = this.doc;
|
||||
this.refmap = {};
|
||||
var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/);
|
||||
var len = lines.length;
|
||||
for (var i = 0; i < len; i++) {
|
||||
this.incorporateLine(lines[i], i+1);
|
||||
}
|
||||
while (this.tip) {
|
||||
this.finalize(this.tip, len - 1);
|
||||
}
|
||||
return this.processInlines(this.doc);
|
||||
};
|
||||
|
||||
|
||||
// The DocParser object.
|
||||
function DocParser(){
|
||||
return {
|
||||
doc: makeBlock('Document', 1, 1),
|
||||
tip: this.doc,
|
||||
refmap: {},
|
||||
inlineParser: new InlineParser(),
|
||||
breakOutOfLists: breakOutOfLists,
|
||||
addLine: addLine,
|
||||
addChild: addChild,
|
||||
incorporateLine: incorporateLine,
|
||||
finalize: finalize,
|
||||
processInlines: processInlines,
|
||||
parse: parse
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = DocParser;
|
@ -1,58 +0,0 @@
|
||||
// derived from https://github.com/mathiasbynens/String.fromCodePoint
|
||||
/*! http://mths.be/fromcodepoint v0.2.1 by @mathias */
|
||||
if (String.fromCodePoint) {
|
||||
|
||||
module.exports = function (_) {
|
||||
try {
|
||||
return String.fromCodePoint(_);
|
||||
} catch (e) {
|
||||
if (e instanceof RangeError) {
|
||||
return String.fromCharCode(0xFFFD);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
var stringFromCharCode = String.fromCharCode;
|
||||
var floor = Math.floor;
|
||||
var fromCodePoint = function(_) {
|
||||
var MAX_SIZE = 0x4000;
|
||||
var codeUnits = [];
|
||||
var highSurrogate;
|
||||
var lowSurrogate;
|
||||
var index = -1;
|
||||
var length = arguments.length;
|
||||
if (!length) {
|
||||
return '';
|
||||
}
|
||||
var result = '';
|
||||
while (++index < length) {
|
||||
var codePoint = Number(arguments[index]);
|
||||
if (
|
||||
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
|
||||
codePoint < 0 || // not a valid Unicode code point
|
||||
codePoint > 0x10FFFF || // not a valid Unicode code point
|
||||
floor(codePoint) != codePoint // not an integer
|
||||
) {
|
||||
return String.fromCharCode(0xFFFD);
|
||||
}
|
||||
if (codePoint <= 0xFFFF) { // BMP code point
|
||||
codeUnits.push(codePoint);
|
||||
} else { // Astral code point; split in surrogate halves
|
||||
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
||||
codePoint -= 0x10000;
|
||||
highSurrogate = (codePoint >> 10) + 0xD800;
|
||||
lowSurrogate = (codePoint % 0x400) + 0xDC00;
|
||||
codeUnits.push(highSurrogate, lowSurrogate);
|
||||
}
|
||||
if (index + 1 == length || codeUnits.length > MAX_SIZE) {
|
||||
result += stringFromCharCode.apply(null, codeUnits);
|
||||
codeUnits.length = 0;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
module.exports = fromCodePoint;
|
||||
}
|
@ -1,168 +0,0 @@
|
||||
// Helper function to produce content in a pair of HTML tags.
|
||||
var inTags = function(tag, attribs, contents, selfclosing) {
|
||||
var result = '<' + tag;
|
||||
if (attribs) {
|
||||
var i = 0;
|
||||
var attrib;
|
||||
while ((attrib = attribs[i]) !== undefined) {
|
||||
result = result.concat(' ', attrib[0], '="', attrib[1], '"');
|
||||
i++;
|
||||
}
|
||||
}
|
||||
if (contents) {
|
||||
result = result.concat('>', contents, '</', tag, '>');
|
||||
} else if (selfclosing) {
|
||||
result = result + ' />';
|
||||
} else {
|
||||
result = result.concat('></', tag, '>');
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
// Render an inline element as HTML.
|
||||
var renderInline = function(inline) {
|
||||
var attrs;
|
||||
switch (inline.t) {
|
||||
case 'Text':
|
||||
return this.escape(inline.c);
|
||||
case 'Softbreak':
|
||||
return this.softbreak;
|
||||
case 'Hardbreak':
|
||||
return inTags('br',[],"",true) + '\n';
|
||||
case 'Emph':
|
||||
return inTags('em', [], this.renderInlines(inline.c));
|
||||
case 'Strong':
|
||||
return inTags('strong', [], this.renderInlines(inline.c));
|
||||
case 'Html':
|
||||
return inline.c;
|
||||
case 'Link':
|
||||
attrs = [['href', this.escape(inline.destination, true)]];
|
||||
if (inline.title) {
|
||||
attrs.push(['title', this.escape(inline.title, true)]);
|
||||
}
|
||||
return inTags('a', attrs, this.renderInlines(inline.label));
|
||||
case 'Image':
|
||||
attrs = [['src', this.escape(inline.destination, true)],
|
||||
['alt', this.renderInlines(inline.label).
|
||||
replace(/\<[^>]*alt="([^"]*)"[^>]*\>/g, '$1').
|
||||
replace(/\<[^>]*\>/g,'')]];
|
||||
if (inline.title) {
|
||||
attrs.push(['title', this.escape(inline.title, true)]);
|
||||
}
|
||||
return inTags('img', attrs, "", true);
|
||||
case 'Code':
|
||||
return inTags('code', [], this.escape(inline.c));
|
||||
default:
|
||||
console.log("Unknown inline type " + inline.t);
|
||||
return "";
|
||||
}
|
||||
};
|
||||
|
||||
// Render a list of inlines.
|
||||
var renderInlines = function(inlines) {
|
||||
var result = '';
|
||||
for (var i=0; i < inlines.length; i++) {
|
||||
result = result + this.renderInline(inlines[i]);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
// Render a single block element.
|
||||
var renderBlock = function(block, in_tight_list) {
|
||||
var tag;
|
||||
var attr;
|
||||
var info_words;
|
||||
switch (block.t) {
|
||||
case 'Document':
|
||||
var whole_doc = this.renderBlocks(block.children);
|
||||
return (whole_doc === '' ? '' : whole_doc + '\n');
|
||||
case 'Paragraph':
|
||||
if (in_tight_list) {
|
||||
return this.renderInlines(block.inline_content);
|
||||
} else {
|
||||
return inTags('p', [], this.renderInlines(block.inline_content));
|
||||
}
|
||||
break;
|
||||
case 'BlockQuote':
|
||||
var filling = this.renderBlocks(block.children);
|
||||
return inTags('blockquote', [], filling === '' ? this.innersep :
|
||||
this.innersep + filling + this.innersep);
|
||||
case 'ListItem':
|
||||
var contents = this.renderBlocks(block.children, in_tight_list);
|
||||
if (/^[<]/.test(contents)) {
|
||||
contents = '\n' + contents;
|
||||
}
|
||||
if (/[>]$/.test(contents)) {
|
||||
contents = contents + '\n';
|
||||
}
|
||||
return inTags('li', [], contents, false).trim();
|
||||
case 'List':
|
||||
tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol';
|
||||
attr = (!block.list_data.start || block.list_data.start == 1) ?
|
||||
[] : [['start', block.list_data.start.toString()]];
|
||||
return inTags(tag, attr, this.innersep +
|
||||
this.renderBlocks(block.children, block.tight) +
|
||||
this.innersep);
|
||||
case 'Header':
|
||||
tag = 'h' + block.level;
|
||||
return inTags(tag, [], this.renderInlines(block.inline_content));
|
||||
case 'CodeBlock':
|
||||
info_words = block.info ? block.info.split(/ +/) : [];
|
||||
attr = (info_words.length === 0 || info_words[0].length === 0) ?
|
||||
[] : [['class','language-' + this.escape(info_words[0],true)]];
|
||||
return inTags('pre', [],
|
||||
inTags('code', attr, this.escape(block.string_content)));
|
||||
case 'HtmlBlock':
|
||||
return block.string_content;
|
||||
case 'ReferenceDef':
|
||||
return "";
|
||||
case 'HorizontalRule':
|
||||
return inTags('hr',[],"",true);
|
||||
default:
|
||||
console.log("Unknown block type " + block.t);
|
||||
return "";
|
||||
}
|
||||
};
|
||||
|
||||
// Render a list of block elements, separated by this.blocksep.
|
||||
var renderBlocks = function(blocks, in_tight_list) {
|
||||
var result = [];
|
||||
for (var i=0; i < blocks.length; i++) {
|
||||
if (blocks[i].t !== 'ReferenceDef') {
|
||||
result.push(this.renderBlock(blocks[i], in_tight_list));
|
||||
}
|
||||
}
|
||||
return result.join(this.blocksep);
|
||||
};
|
||||
|
||||
// The HtmlRenderer object.
|
||||
function HtmlRenderer(){
|
||||
return {
|
||||
// default options:
|
||||
blocksep: '\n', // space between blocks
|
||||
innersep: '\n', // space between block container tag and contents
|
||||
softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML
|
||||
// set to "<br />" to make them hard breaks
|
||||
// set to " " if you want to ignore line wrapping in source
|
||||
escape: function(s, preserve_entities) {
|
||||
if (preserve_entities) {
|
||||
return s.replace(/[&](?![#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)/gi,'&')
|
||||
.replace(/[<]/g,'<')
|
||||
.replace(/[>]/g,'>')
|
||||
.replace(/["]/g,'"');
|
||||
} else {
|
||||
return s.replace(/[&]/g,'&')
|
||||
.replace(/[<]/g,'<')
|
||||
.replace(/[>]/g,'>')
|
||||
.replace(/["]/g,'"');
|
||||
}
|
||||
},
|
||||
renderInline: renderInline,
|
||||
renderInlines: renderInlines,
|
||||
renderBlock: renderBlock,
|
||||
renderBlocks: renderBlocks,
|
||||
render: renderBlock
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = HtmlRenderer;
|
File diff suppressed because it is too large
Load Diff
@ -1,20 +0,0 @@
|
||||
// commonmark.js - CommomMark in JavaScript
|
||||
// Copyright (C) 2014 John MacFarlane
|
||||
// License: BSD3.
|
||||
|
||||
// Basic usage:
|
||||
//
|
||||
// var commonmark = require('commonmark');
|
||||
// var parser = new commonmark.DocParser();
|
||||
// var renderer = new commonmark.HtmlRenderer();
|
||||
// console.log(renderer.render(parser.parse('Hello *world*')));
|
||||
|
||||
var util = require('util');
|
||||
|
||||
var renderAST = function(tree) {
|
||||
return util.inspect(tree, {depth: null});
|
||||
};
|
||||
|
||||
module.exports.DocParser = require('./blocks');
|
||||
module.exports.HtmlRenderer = require('./html-renderer');
|
||||
module.exports.ASTRenderer = renderAST;
|
@ -1,854 +0,0 @@
|
||||
var fromCodePoint = require('./from-code-point.js');
|
||||
var entityToChar = require('./html5-entities.js').entityToChar;
|
||||
|
||||
// Constants for character codes:
|
||||
|
||||
var C_NEWLINE = 10;
|
||||
var C_SPACE = 32;
|
||||
var C_ASTERISK = 42;
|
||||
var C_UNDERSCORE = 95;
|
||||
var C_BACKTICK = 96;
|
||||
var C_OPEN_BRACKET = 91;
|
||||
var C_CLOSE_BRACKET = 93;
|
||||
var C_LESSTHAN = 60;
|
||||
var C_GREATERTHAN = 62;
|
||||
var C_BANG = 33;
|
||||
var C_BACKSLASH = 92;
|
||||
var C_AMPERSAND = 38;
|
||||
var C_OPEN_PAREN = 40;
|
||||
var C_COLON = 58;
|
||||
|
||||
// Some regexps used in inline parser:
|
||||
|
||||
var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]';
|
||||
var ESCAPED_CHAR = '\\\\' + ESCAPABLE;
|
||||
var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"';
|
||||
var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'';
|
||||
var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)';
|
||||
var REG_CHAR = '[^\\\\()\\x00-\\x20]';
|
||||
var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)';
|
||||
var TAGNAME = '[A-Za-z][A-Za-z0-9]*';
|
||||
var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
|
||||
var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+";
|
||||
var SINGLEQUOTEDVALUE = "'[^']*'";
|
||||
var DOUBLEQUOTEDVALUE = '"[^"]*"';
|
||||
var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")";
|
||||
var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")";
|
||||
var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)";
|
||||
var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>";
|
||||
var CLOSETAG = "</" + TAGNAME + "\\s*[>]";
|
||||
var HTMLCOMMENT = "<!--([^-]+|[-][^-]+)*-->";
|
||||
var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]";
|
||||
var DECLARATION = "<![A-Z]+" + "\\s+[^>]*>";
|
||||
var CDATA = "<!\\[CDATA\\[([^\\]]+|\\][^\\]]|\\]\\][^>])*\\]\\]>";
|
||||
var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" +
|
||||
PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")";
|
||||
var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});";
|
||||
|
||||
var reHtmlTag = new RegExp('^' + HTMLTAG, 'i');
|
||||
|
||||
var reLinkTitle = new RegExp(
|
||||
'^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' +
|
||||
'|' +
|
||||
'\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' +
|
||||
'|' +
|
||||
'\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))');
|
||||
|
||||
var reLinkDestinationBraces = new RegExp(
|
||||
'^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])');
|
||||
|
||||
var reLinkDestination = new RegExp(
|
||||
'^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*');
|
||||
|
||||
var reEscapable = new RegExp(ESCAPABLE);
|
||||
|
||||
var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g');
|
||||
|
||||
var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')');
|
||||
|
||||
var reEntityHere = new RegExp('^' + ENTITY, 'i');
|
||||
|
||||
var reEntity = new RegExp(ENTITY, 'gi');
|
||||
|
||||
// Matches a character with a special meaning in markdown,
|
||||
// or a string of non-special characters. Note: we match
|
||||
// clumps of _ or * or `, because they need to be handled in groups.
|
||||
var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m;
|
||||
|
||||
// Replace entities and backslash escapes with literal characters.
|
||||
var unescapeString = function(s) {
|
||||
return s.replace(reAllEscapedChar, '$1')
|
||||
.replace(reEntity, entityToChar);
|
||||
};
|
||||
|
||||
// Normalize reference label: collapse internal whitespace
|
||||
// to single space, remove leading/trailing whitespace, case fold.
|
||||
var normalizeReference = function(s) {
|
||||
return s.trim()
|
||||
.replace(/\s+/,' ')
|
||||
.toUpperCase();
|
||||
};
|
||||
|
||||
// INLINE PARSER
|
||||
|
||||
// These are methods of an InlineParser object, defined below.
|
||||
// An InlineParser keeps track of a subject (a string to be
|
||||
// parsed) and a position in that subject.
|
||||
|
||||
// If re matches at current position in the subject, advance
|
||||
// position in subject and return the match; otherwise return null.
|
||||
var match = function(re) {
|
||||
var match = re.exec(this.subject.slice(this.pos));
|
||||
if (match) {
|
||||
this.pos += match.index + match[0].length;
|
||||
return match[0];
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
// Returns the code for the character at the current subject position, or -1
|
||||
// there are no more characters.
|
||||
var peek = function() {
|
||||
if (this.pos < this.subject.length) {
|
||||
return this.subject.charCodeAt(this.pos);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
// Parse zero or more space characters, including at most one newline
|
||||
var spnl = function() {
|
||||
this.match(/^ *(?:\n *)?/);
|
||||
return 1;
|
||||
};
|
||||
|
||||
// All of the parsers below try to match something at the current position
|
||||
// in the subject. If they succeed in matching anything, they
|
||||
// return the inline matched, advancing the subject.
|
||||
|
||||
// Attempt to parse backticks, returning either a backtick code span or a
|
||||
// literal sequence of backticks.
|
||||
var parseBackticks = function(inlines) {
|
||||
var startpos = this.pos;
|
||||
var ticks = this.match(/^`+/);
|
||||
if (!ticks) {
|
||||
return 0;
|
||||
}
|
||||
var afterOpenTicks = this.pos;
|
||||
var foundCode = false;
|
||||
var match;
|
||||
while (!foundCode && (match = this.match(/`+/m))) {
|
||||
if (match === ticks) {
|
||||
inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks,
|
||||
this.pos - ticks.length)
|
||||
.replace(/[ \n]+/g,' ')
|
||||
.trim() });
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// If we got here, we didn't match a closing backtick sequence.
|
||||
this.pos = afterOpenTicks;
|
||||
inlines.push({ t: 'Text', c: ticks });
|
||||
return true;
|
||||
};
|
||||
|
||||
// Parse a backslash-escaped special character, adding either the escaped
|
||||
// character, a hard line break (if the backslash is followed by a newline),
|
||||
// or a literal backslash to the 'inlines' list.
|
||||
var parseBackslash = function(inlines) {
|
||||
var subj = this.subject,
|
||||
pos = this.pos;
|
||||
if (subj.charCodeAt(pos) === C_BACKSLASH) {
|
||||
if (subj.charAt(pos + 1) === '\n') {
|
||||
this.pos = this.pos + 2;
|
||||
inlines.push({ t: 'Hardbreak' });
|
||||
} else if (reEscapable.test(subj.charAt(pos + 1))) {
|
||||
this.pos = this.pos + 2;
|
||||
inlines.push({ t: 'Text', c: subj.charAt(pos + 1) });
|
||||
} else {
|
||||
this.pos++;
|
||||
inlines.push({t: 'Text', c: '\\'});
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Attempt to parse an autolink (URL or email in pointy brackets).
|
||||
var parseAutolink = function(inlines) {
|
||||
var m;
|
||||
var dest;
|
||||
if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
|
||||
dest = m.slice(1,-1);
|
||||
inlines.push(
|
||||
{t: 'Link',
|
||||
label: [{ t: 'Text', c: dest }],
|
||||
destination: 'mailto:' + encodeURI(unescape(dest)) });
|
||||
return true;
|
||||
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
|
||||
dest = m.slice(1,-1);
|
||||
inlines.push({
|
||||
t: 'Link',
|
||||
label: [{ t: 'Text', c: dest }],
|
||||
destination: encodeURI(unescape(dest)) });
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Attempt to parse a raw HTML tag.
|
||||
var parseHtmlTag = function(inlines) {
|
||||
var m = this.match(reHtmlTag);
|
||||
if (m) {
|
||||
inlines.push({ t: 'Html', c: m });
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Scan a sequence of characters with code cc, and return information about
|
||||
// the number of delimiters and whether they are positioned such that
|
||||
// they can open and/or close emphasis or strong emphasis. A utility
|
||||
// function for strong/emph parsing.
|
||||
var scanDelims = function(cc) {
|
||||
var numdelims = 0;
|
||||
var first_close_delims = 0;
|
||||
var char_before, char_after, cc_after;
|
||||
var startpos = this.pos;
|
||||
|
||||
char_before = this.pos === 0 ? '\n' :
|
||||
this.subject.charAt(this.pos - 1);
|
||||
|
||||
while (this.peek() === cc) {
|
||||
numdelims++;
|
||||
this.pos++;
|
||||
}
|
||||
|
||||
cc_after = this.peek();
|
||||
if (cc_after === -1) {
|
||||
char_after = '\n';
|
||||
} else {
|
||||
char_after = fromCodePoint(cc_after);
|
||||
}
|
||||
|
||||
var can_open = numdelims > 0 && !(/\s/.test(char_after));
|
||||
var can_close = numdelims > 0 && !(/\s/.test(char_before));
|
||||
if (cc === C_UNDERSCORE) {
|
||||
can_open = can_open && !((/[a-z0-9]/i).test(char_before));
|
||||
can_close = can_close && !((/[a-z0-9]/i).test(char_after));
|
||||
}
|
||||
this.pos = startpos;
|
||||
return { numdelims: numdelims,
|
||||
can_open: can_open,
|
||||
can_close: can_close };
|
||||
};
|
||||
|
||||
var Emph = function(ils) {
|
||||
return {t: 'Emph', c: ils};
|
||||
};
|
||||
|
||||
var Strong = function(ils) {
|
||||
return {t: 'Strong', c: ils};
|
||||
};
|
||||
|
||||
var Str = function(s) {
|
||||
return {t: 'Text', c: s};
|
||||
};
|
||||
|
||||
// Attempt to parse emphasis or strong emphasis.
|
||||
var parseEmphasis = function(cc,inlines) {
|
||||
|
||||
var res = this.scanDelims(cc);
|
||||
var numdelims = res.numdelims;
|
||||
var startpos = this.pos;
|
||||
|
||||
if (numdelims === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.pos += numdelims;
|
||||
inlines.push(Str(this.subject.slice(startpos, this.pos)));
|
||||
|
||||
// Add entry to stack for this opener
|
||||
this.delimiters = { cc: cc,
|
||||
numdelims: numdelims,
|
||||
pos: inlines.length - 1,
|
||||
previous: this.delimiters,
|
||||
next: null,
|
||||
can_open: res.can_open,
|
||||
can_close: res.can_close};
|
||||
if (this.delimiters.previous !== null) {
|
||||
this.delimiters.previous.next = this.delimiters;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
};
|
||||
|
||||
var removeDelimiter = function(delim) {
|
||||
if (delim.previous !== null) {
|
||||
delim.previous.next = delim.next;
|
||||
}
|
||||
if (delim.next === null) {
|
||||
// top of stack
|
||||
this.delimiters = delim.previous;
|
||||
} else {
|
||||
delim.next.previous = delim.previous;
|
||||
}
|
||||
};
|
||||
|
||||
var removeGaps = function(inlines) {
|
||||
// remove gaps from inlines
|
||||
var i, j;
|
||||
j = 0;
|
||||
for (i = 0 ; i < inlines.length; i++) {
|
||||
if (inlines[i] !== null) {
|
||||
inlines[j] = inlines[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
inlines.splice(j);
|
||||
};
|
||||
|
||||
var processEmphasis = function(inlines, stack_bottom) {
|
||||
var opener, closer;
|
||||
var opener_inl, closer_inl;
|
||||
var nextstack, tempstack;
|
||||
var use_delims;
|
||||
var contents;
|
||||
var tmp;
|
||||
var emph;
|
||||
var i,j;
|
||||
|
||||
// find first closer above stack_bottom:
|
||||
closer = this.delimiters;
|
||||
while (closer !== null && closer.previous !== stack_bottom) {
|
||||
closer = closer.previous;
|
||||
}
|
||||
// move forward, looking for closers, and handling each
|
||||
while (closer !== null) {
|
||||
if (closer.can_close && (closer.cc === C_UNDERSCORE || closer.cc === C_ASTERISK)) {
|
||||
// found emphasis closer. now look back for first matching opener:
|
||||
opener = closer.previous;
|
||||
while (opener !== null && opener !== stack_bottom) {
|
||||
if (opener.cc === closer.cc && opener.can_open) {
|
||||
break;
|
||||
}
|
||||
opener = opener.previous;
|
||||
}
|
||||
if (opener !== null && opener !== stack_bottom) {
|
||||
// calculate actual number of delimiters used from this closer
|
||||
if (closer.numdelims < 3 || opener.numdelims < 3) {
|
||||
use_delims = closer.numdelims <= opener.numdelims ?
|
||||
closer.numdelims : opener.numdelims;
|
||||
} else {
|
||||
use_delims = closer.numdelims % 2 === 0 ? 2 : 1;
|
||||
}
|
||||
|
||||
opener_inl = inlines[opener.pos];
|
||||
closer_inl = inlines[closer.pos];
|
||||
|
||||
// remove used delimiters from stack elts and inlines
|
||||
opener.numdelims -= use_delims;
|
||||
closer.numdelims -= use_delims;
|
||||
opener_inl.c = opener_inl.c.slice(0, opener_inl.c.length - use_delims);
|
||||
closer_inl.c = closer_inl.c.slice(0, closer_inl.c.length - use_delims);
|
||||
|
||||
// build contents for new emph element
|
||||
contents = inlines.slice(opener.pos + 1, closer.pos);
|
||||
removeGaps(contents);
|
||||
|
||||
emph = use_delims === 1 ? Emph(contents) : Strong(contents);
|
||||
|
||||
// insert into list of inlines
|
||||
inlines[opener.pos + 1] = emph;
|
||||
for (i = opener.pos + 2; i < closer.pos; i++) {
|
||||
inlines[i] = null;
|
||||
}
|
||||
|
||||
// remove elts btw opener and closer in delimiters stack
|
||||
tempstack = closer.previous;
|
||||
while (tempstack !== null && tempstack !== opener) {
|
||||
nextstack = tempstack.previous;
|
||||
this.removeDelimiter(tempstack);
|
||||
tempstack = nextstack;
|
||||
}
|
||||
|
||||
// if opener has 0 delims, remove it and the inline
|
||||
if (opener.numdelims === 0) {
|
||||
inlines[opener.pos] = null;
|
||||
this.removeDelimiter(opener);
|
||||
}
|
||||
|
||||
if (closer.numdelims === 0) {
|
||||
inlines[closer.pos] = null;
|
||||
tempstack = closer.next;
|
||||
this.removeDelimiter(closer);
|
||||
closer = tempstack;
|
||||
}
|
||||
|
||||
|
||||
} else {
|
||||
closer = closer.next;
|
||||
}
|
||||
|
||||
} else {
|
||||
closer = closer.next;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
removeGaps(inlines);
|
||||
|
||||
// remove all delimiters
|
||||
while (this.delimiters != stack_bottom) {
|
||||
this.removeDelimiter(this.delimiters);
|
||||
}
|
||||
};
|
||||
|
||||
// Attempt to parse link title (sans quotes), returning the string
|
||||
// or null if no match.
|
||||
var parseLinkTitle = function() {
|
||||
var title = this.match(reLinkTitle);
|
||||
if (title) {
|
||||
// chop off quotes from title and unescape:
|
||||
return unescapeString(title.substr(1, title.length - 2));
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
// Attempt to parse link destination, returning the string or
|
||||
// null if no match.
|
||||
var parseLinkDestination = function() {
|
||||
var res = this.match(reLinkDestinationBraces);
|
||||
if (res) { // chop off surrounding <..>:
|
||||
return encodeURI(unescape(unescapeString(res.substr(1, res.length - 2))));
|
||||
} else {
|
||||
res = this.match(reLinkDestination);
|
||||
if (res !== null) {
|
||||
return encodeURI(unescape(unescapeString(res)));
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Attempt to parse a link label, returning number of characters parsed.
|
||||
var parseLinkLabel = function() {
|
||||
var match = this.match(/^\[(?:[^\\\[\]]|\\[\[\]]){0,1000}\]/);
|
||||
return match === null ? 0 : match.length;
|
||||
};
|
||||
|
||||
// Parse raw link label, including surrounding [], and return
|
||||
// inline contents. (Note: this is not a method of InlineParser.)
|
||||
var parseRawLabel = function(s) {
|
||||
// note: parse without a refmap; we don't want links to resolve
|
||||
// in nested brackets!
|
||||
return new InlineParser().parse(s.substr(1, s.length - 2), {});
|
||||
};
|
||||
|
||||
// Add open bracket to delimiter stack and add a Str to inlines.
|
||||
var parseOpenBracket = function(inlines) {
|
||||
|
||||
var startpos = this.pos;
|
||||
this.pos += 1;
|
||||
inlines.push(Str("["));
|
||||
|
||||
// Add entry to stack for this opener
|
||||
this.delimiters = { cc: C_OPEN_BRACKET,
|
||||
numdelims: 1,
|
||||
pos: inlines.length - 1,
|
||||
previous: this.delimiters,
|
||||
next: null,
|
||||
can_open: true,
|
||||
can_close: false,
|
||||
index: startpos };
|
||||
if (this.delimiters.previous !== null) {
|
||||
this.delimiters.previous.next = this.delimiters;
|
||||
}
|
||||
return true;
|
||||
|
||||
};
|
||||
|
||||
// IF next character is [, and ! delimiter to delimiter stack and
|
||||
// add a Str to inlines. Otherwise just add a Str.
|
||||
var parseBang = function(inlines) {
|
||||
|
||||
var startpos = this.pos;
|
||||
this.pos += 1;
|
||||
if (this.peek() === C_OPEN_BRACKET) {
|
||||
this.pos += 1;
|
||||
inlines.push(Str("!["));
|
||||
|
||||
// Add entry to stack for this opener
|
||||
this.delimiters = { cc: C_BANG,
|
||||
numdelims: 1,
|
||||
pos: inlines.length - 1,
|
||||
previous: this.delimiters,
|
||||
next: null,
|
||||
can_open: true,
|
||||
can_close: false,
|
||||
index: startpos + 1 };
|
||||
if (this.delimiters.previous !== null) {
|
||||
this.delimiters.previous.next = this.delimiters;
|
||||
}
|
||||
} else {
|
||||
inlines.push(Str("!"));
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
// Try to match close bracket against an opening in the delimiter
|
||||
// stack. Add either a link or image, or a plain [ character,
|
||||
// to the inlines stack. If there is a matching delimiter,
|
||||
// remove it from the delimiter stack.
|
||||
var parseCloseBracket = function(inlines) {
|
||||
var startpos;
|
||||
var is_image;
|
||||
var dest;
|
||||
var title;
|
||||
var matched = false;
|
||||
var link_text;
|
||||
var i;
|
||||
var opener, closer_above, tempstack;
|
||||
|
||||
this.pos += 1;
|
||||
startpos = this.pos;
|
||||
|
||||
// look through stack of delimiters for a [ or !
|
||||
opener = this.delimiters;
|
||||
while (opener !== null) {
|
||||
if (opener.cc === C_OPEN_BRACKET || opener.cc === C_BANG) {
|
||||
break;
|
||||
}
|
||||
opener = opener.previous;
|
||||
}
|
||||
|
||||
if (opener === null) {
|
||||
// no matched opener, just return a literal
|
||||
inlines.push(Str("]"));
|
||||
return true;
|
||||
}
|
||||
|
||||
// If we got here, open is a potential opener
|
||||
is_image = opener.cc === C_BANG;
|
||||
// instead of copying a slice, we null out the
|
||||
// parts of inlines that don't correspond to link_text;
|
||||
// later, we'll collapse them. This is awkward, and could
|
||||
// be simplified if we made inlines a linked list rather than
|
||||
// an array:
|
||||
link_text = inlines.slice(0);
|
||||
for (i = 0; i < opener.pos + 1; i++) {
|
||||
link_text[i] = null;
|
||||
}
|
||||
|
||||
// Check to see if we have a link/image
|
||||
|
||||
// Inline link?
|
||||
if (this.peek() === C_OPEN_PAREN) {
|
||||
this.pos++;
|
||||
if (this.spnl() &&
|
||||
((dest = this.parseLinkDestination()) !== null) &&
|
||||
this.spnl() &&
|
||||
// make sure there's a space before the title:
|
||||
(/^\s/.test(this.subject.charAt(this.pos - 1)) &&
|
||||
(title = this.parseLinkTitle() || '') || true) &&
|
||||
this.spnl() &&
|
||||
this.match(/^\)/)) {
|
||||
matched = true;
|
||||
}
|
||||
} else {
|
||||
|
||||
// Next, see if there's a link label
|
||||
var savepos = this.pos;
|
||||
this.spnl();
|
||||
var beforelabel = this.pos;
|
||||
n = this.parseLinkLabel();
|
||||
if (n === 0 || n === 2) {
|
||||
// empty or missing second label
|
||||
reflabel = this.subject.slice(opener.index, startpos);
|
||||
} else {
|
||||
reflabel = this.subject.slice(beforelabel, beforelabel + n);
|
||||
}
|
||||
if (n === 0) {
|
||||
// If shortcut reference link, rewind before spaces we skipped.
|
||||
this.pos = savepos;
|
||||
}
|
||||
|
||||
// lookup rawlabel in refmap
|
||||
var link = this.refmap[normalizeReference(reflabel)];
|
||||
if (link) {
|
||||
dest = link.destination;
|
||||
title = link.title;
|
||||
matched = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (matched) {
|
||||
this.processEmphasis(link_text, opener.previous);
|
||||
|
||||
// remove the part of inlines that became link_text.
|
||||
// see note above on why we need to do this instead of splice:
|
||||
for (i = opener.pos; i < inlines.length; i++) {
|
||||
inlines[i] = null;
|
||||
}
|
||||
|
||||
// processEmphasis will remove this and later delimiters.
|
||||
// Now, for a link, we also remove earlier link openers.
|
||||
// (no links in links)
|
||||
if (!is_image) {
|
||||
opener = this.delimiters;
|
||||
closer_above = null;
|
||||
while (opener !== null) {
|
||||
if (opener.cc === C_OPEN_BRACKET) {
|
||||
if (closer_above) {
|
||||
closer_above.previous = opener.previous;
|
||||
} else {
|
||||
this.delimiters = opener.previous;
|
||||
}
|
||||
} else {
|
||||
closer_above = opener;
|
||||
}
|
||||
opener = opener.previous;
|
||||
}
|
||||
}
|
||||
|
||||
inlines.push({t: is_image ? 'Image' : 'Link',
|
||||
destination: dest,
|
||||
title: title,
|
||||
label: link_text});
|
||||
return true;
|
||||
|
||||
} else { // no match
|
||||
|
||||
this.removeDelimiter(opener); // remove this opener from stack
|
||||
this.pos = startpos;
|
||||
inlines.push(Str("]"));
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// Attempt to parse an entity, return Entity object if successful.
|
||||
var parseEntity = function(inlines) {
|
||||
var m;
|
||||
if ((m = this.match(reEntityHere))) {
|
||||
inlines.push({ t: 'Text', c: entityToChar(m) });
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Parse a run of ordinary characters, or a single character with
|
||||
// a special meaning in markdown, as a plain string, adding to inlines.
|
||||
var parseString = function(inlines) {
|
||||
var m;
|
||||
if ((m = this.match(reMain))) {
|
||||
inlines.push({ t: 'Text', c: m });
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Parse a newline. If it was preceded by two spaces, return a hard
|
||||
// line break; otherwise a soft line break.
|
||||
var parseNewline = function(inlines) {
|
||||
var m = this.match(/^ *\n/);
|
||||
if (m) {
|
||||
if (m.length > 2) {
|
||||
inlines.push({ t: 'Hardbreak' });
|
||||
} else if (m.length > 0) {
|
||||
inlines.push({ t: 'Softbreak' });
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// Attempt to parse an image. If the opening '!' is not followed
|
||||
// by a link, return a literal '!'.
|
||||
var parseImage = function(inlines) {
|
||||
if (this.match(/^!/)) {
|
||||
var link = this.parseLink(inlines);
|
||||
if (link) {
|
||||
inlines[inlines.length - 1].t = 'Image';
|
||||
return true;
|
||||
} else {
|
||||
inlines.push({ t: 'Text', c: '!' });
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Attempt to parse a link reference, modifying refmap.
|
||||
var parseReference = function(s, refmap) {
|
||||
this.subject = s;
|
||||
this.pos = 0;
|
||||
this.label_nest_level = 0;
|
||||
var rawlabel;
|
||||
var dest;
|
||||
var title;
|
||||
var matchChars;
|
||||
var startpos = this.pos;
|
||||
var match;
|
||||
|
||||
// label:
|
||||
matchChars = this.parseLinkLabel();
|
||||
if (matchChars === 0) {
|
||||
return 0;
|
||||
} else {
|
||||
rawlabel = this.subject.substr(0, matchChars);
|
||||
}
|
||||
|
||||
// colon:
|
||||
if (this.peek() === C_COLON) {
|
||||
this.pos++;
|
||||
} else {
|
||||
this.pos = startpos;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// link url
|
||||
this.spnl();
|
||||
|
||||
dest = this.parseLinkDestination();
|
||||
if (dest === null || dest.length === 0) {
|
||||
this.pos = startpos;
|
||||
return 0;
|
||||
}
|
||||
|
||||
var beforetitle = this.pos;
|
||||
this.spnl();
|
||||
title = this.parseLinkTitle();
|
||||
if (title === null) {
|
||||
title = '';
|
||||
// rewind before spaces
|
||||
this.pos = beforetitle;
|
||||
}
|
||||
|
||||
// make sure we're at line end:
|
||||
if (this.match(/^ *(?:\n|$)/) === null) {
|
||||
this.pos = startpos;
|
||||
return 0;
|
||||
}
|
||||
|
||||
var normlabel = normalizeReference(rawlabel);
|
||||
|
||||
if (!refmap[normlabel]) {
|
||||
refmap[normlabel] = { destination: dest, title: title };
|
||||
}
|
||||
return this.pos - startpos;
|
||||
};
|
||||
|
||||
// Parse the next inline element in subject, advancing subject position.
|
||||
// On success, add the result to the inlines list, and return true.
|
||||
// On failure, return false.
|
||||
var parseInline = function(inlines) {
|
||||
var startpos = this.pos;
|
||||
var origlen = inlines.length;
|
||||
|
||||
var c = this.peek();
|
||||
if (c === -1) {
|
||||
return false;
|
||||
}
|
||||
var res;
|
||||
switch(c) {
|
||||
case C_NEWLINE:
|
||||
case C_SPACE:
|
||||
res = this.parseNewline(inlines);
|
||||
break;
|
||||
case C_BACKSLASH:
|
||||
res = this.parseBackslash(inlines);
|
||||
break;
|
||||
case C_BACKTICK:
|
||||
res = this.parseBackticks(inlines);
|
||||
break;
|
||||
case C_ASTERISK:
|
||||
case C_UNDERSCORE:
|
||||
res = this.parseEmphasis(c, inlines);
|
||||
break;
|
||||
case C_OPEN_BRACKET:
|
||||
res = this.parseOpenBracket(inlines);
|
||||
break;
|
||||
case C_BANG:
|
||||
res = this.parseBang(inlines);
|
||||
break;
|
||||
case C_CLOSE_BRACKET:
|
||||
res = this.parseCloseBracket(inlines);
|
||||
break;
|
||||
case C_LESSTHAN:
|
||||
res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines);
|
||||
break;
|
||||
case C_AMPERSAND:
|
||||
res = this.parseEntity(inlines);
|
||||
break;
|
||||
default:
|
||||
res = this.parseString(inlines);
|
||||
break;
|
||||
}
|
||||
if (!res) {
|
||||
this.pos += 1;
|
||||
inlines.push({t: 'Text', c: fromCodePoint(c)});
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// Parse s as a list of inlines, using refmap to resolve references.
|
||||
var parseInlines = function(s, refmap) {
|
||||
this.subject = s;
|
||||
this.pos = 0;
|
||||
this.refmap = refmap || {};
|
||||
this.delimiters = null;
|
||||
var inlines = [];
|
||||
while (this.parseInline(inlines)) {
|
||||
}
|
||||
this.processEmphasis(inlines, null);
|
||||
return inlines;
|
||||
};
|
||||
|
||||
// The InlineParser object.
|
||||
function InlineParser(){
|
||||
return {
|
||||
subject: '',
|
||||
label_nest_level: 0, // used by parseLinkLabel method
|
||||
delimiters: null, // used by parseEmphasis method
|
||||
pos: 0,
|
||||
refmap: {},
|
||||
match: match,
|
||||
peek: peek,
|
||||
spnl: spnl,
|
||||
unescapeString: unescapeString,
|
||||
parseBackticks: parseBackticks,
|
||||
parseBackslash: parseBackslash,
|
||||
parseAutolink: parseAutolink,
|
||||
parseHtmlTag: parseHtmlTag,
|
||||
scanDelims: scanDelims,
|
||||
parseEmphasis: parseEmphasis,
|
||||
parseLinkTitle: parseLinkTitle,
|
||||
parseLinkDestination: parseLinkDestination,
|
||||
parseLinkLabel: parseLinkLabel,
|
||||
parseOpenBracket: parseOpenBracket,
|
||||
parseCloseBracket: parseCloseBracket,
|
||||
parseBang: parseBang,
|
||||
parseEntity: parseEntity,
|
||||
parseString: parseString,
|
||||
parseNewline: parseNewline,
|
||||
parseReference: parseReference,
|
||||
parseInline: parseInline,
|
||||
processEmphasis: processEmphasis,
|
||||
removeDelimiter: removeDelimiter,
|
||||
parse: parseInlines
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = InlineParser;
|
@ -1,25 +0,0 @@
|
||||
{ "name": "commonmark",
|
||||
"description": "a strongly specified, highly compatible variant of Markdown",
|
||||
"version": "0.12.0",
|
||||
"homepage": "http://commonmark.org",
|
||||
"keywords":
|
||||
[ "markdown",
|
||||
"commonmark",
|
||||
"md",
|
||||
"stmd" ],
|
||||
"repository":
|
||||
{ "type": "git",
|
||||
"url": "https://github.com/jgm/CommonMark.git" },
|
||||
"author": "John MacFarlane",
|
||||
"bugs": { "url": "https://github.com/jgm/CommonMark/issues" },
|
||||
"license": "BSD-3-Clause",
|
||||
"main": "./lib/index.js",
|
||||
"bin": { "commonmark": "./bin/commonmark" },
|
||||
"scripts": { "test": "node ./test.js" },
|
||||
"directories": {
|
||||
"lib": "./lib"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
}
|
@ -1,82 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
var fs = require('fs');
|
||||
var commonmark = require('./lib/index.js');
|
||||
var ansi = require('./ansi/ansi');
|
||||
var cursor = ansi(process.stdout);
|
||||
|
||||
var writer = new commonmark.HtmlRenderer();
|
||||
var reader = new commonmark.DocParser();
|
||||
|
||||
var passed = 0;
|
||||
var failed = 0;
|
||||
|
||||
var showSpaces = function(s) {
|
||||
var t = s;
|
||||
return t.replace(/\t/g,'→')
|
||||
.replace(/ /g,'␣');
|
||||
};
|
||||
|
||||
fs.readFile('spec.txt', 'utf8', function(err, data) {
|
||||
if (err) {
|
||||
return console.log(err);
|
||||
}
|
||||
var i;
|
||||
var examples = [];
|
||||
var current_section = "";
|
||||
var example_number = 0;
|
||||
var tests = data
|
||||
.replace(/\r\n?/g, "\n") // Normalize newlines for platform independence
|
||||
.replace(/^<!-- END TESTS -->(.|[\n])*/m, '');
|
||||
|
||||
tests.replace(/^\.\n([\s\S]*?)^\.\n([\s\S]*?)^\.$|^#{1,6} *(.*)$/gm,
|
||||
function(_,markdownSubmatch,htmlSubmatch,sectionSubmatch){
|
||||
if (sectionSubmatch) {
|
||||
current_section = sectionSubmatch;
|
||||
} else {
|
||||
example_number++;
|
||||
examples.push({markdown: markdownSubmatch,
|
||||
html: htmlSubmatch,
|
||||
section: current_section,
|
||||
number: example_number});
|
||||
}
|
||||
});
|
||||
|
||||
current_section = "";
|
||||
|
||||
console.time("Elapsed time");
|
||||
|
||||
for (i = 0; i < examples.length; i++) {
|
||||
var example = examples[i];
|
||||
if (example.section !== current_section) {
|
||||
if (current_section !== '') {
|
||||
cursor.write('\n');
|
||||
}
|
||||
current_section = example.section;
|
||||
cursor.reset().write(current_section).reset().write(' ');
|
||||
}
|
||||
var actual = writer.renderBlock(reader.parse(example.markdown.replace(/→/g, '\t')));
|
||||
if (actual === example.html) {
|
||||
passed++;
|
||||
cursor.green().write('✓').reset();
|
||||
} else {
|
||||
failed++;
|
||||
cursor.write('\n');
|
||||
|
||||
cursor.red().write('✘ Example ' + example.number + '\n');
|
||||
cursor.cyan();
|
||||
cursor.write('=== markdown ===============\n');
|
||||
cursor.write(showSpaces(example.markdown));
|
||||
cursor.write('=== expected ===============\n');
|
||||
cursor.write(showSpaces(example.html));
|
||||
cursor.write('=== got ====================\n');
|
||||
cursor.write(showSpaces(actual));
|
||||
cursor.reset();
|
||||
}
|
||||
}
|
||||
cursor.write('\n' + passed.toString() + ' tests passed, ' +
|
||||
failed.toString() + ' failed.\n');
|
||||
|
||||
console.timeEnd("Elapsed time");
|
||||
});
|
||||
|
@ -1,5 +0,0 @@
|
||||
INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man1/cmark.1
|
||||
DESTINATION share/man/man1)
|
||||
|
||||
INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man3/cmark.3
|
||||
DESTINATION share/man/man3)
|
@ -1,102 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Creates a man page from a C file.
|
||||
|
||||
# Comments beginning with `/**` are treated as Groff man, except that
|
||||
# 'this' is converted to \fIthis\fR, and ''this'' to \fBthis\fR.
|
||||
|
||||
# Non-blank lines immediately following a man page comment are treated
|
||||
# as function signatures or examples and parsed into .Ft, .Fo, .Fa, .Fc. The
|
||||
# immediately preceding man documentation chunk is printed after the example
|
||||
# as a comment on it.
|
||||
|
||||
# That's about it!
|
||||
|
||||
import sys, re, os
|
||||
from datetime import date
|
||||
|
||||
comment_start_re = re.compile('^\/\*\* ?')
|
||||
comment_delim_re = re.compile('^[/ ]\** ?')
|
||||
comment_end_re = re.compile('^ \**\/')
|
||||
function_re = re.compile('^ *(?:CMARK_EXPORT\s+)?(?P<type>(?:const\s+)?\w+(?:\s*[*])?)\s*(?P<name>\w+)\s*\((?P<args>[^)]*)\)')
|
||||
blank_re = re.compile('^\s*$')
|
||||
macro_re = re.compile('CMARK_EXPORT *')
|
||||
typedef_start_re = re.compile('typedef.*{$')
|
||||
typedef_end_re = re.compile('}')
|
||||
single_quote_re = re.compile("(?<!\w)'([^']+)'(?!\w)")
|
||||
double_quote_re = re.compile("(?<!\w)''([^']+)''(?!\w)")
|
||||
|
||||
def handle_quotes(s):
|
||||
return re.sub(double_quote_re, '\\\\fB\g<1>\\\\fR', re.sub(single_quote_re, '\\\\fI\g<1>\\\\fR', s))
|
||||
|
||||
typedef = False
|
||||
mdlines = []
|
||||
chunk = []
|
||||
sig = []
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
sourcefile = sys.argv[1]
|
||||
else:
|
||||
print("Usage: make_man_page.py sourcefile")
|
||||
exit(1)
|
||||
|
||||
with open(sourcefile, 'r') as cmarkh:
|
||||
state = 'default'
|
||||
for line in cmarkh:
|
||||
# state transition
|
||||
oldstate = state
|
||||
if comment_start_re.match(line):
|
||||
state = 'man'
|
||||
elif comment_end_re.match(line) and state == 'man':
|
||||
continue
|
||||
elif comment_delim_re.match(line) and state == 'man':
|
||||
state = 'man'
|
||||
elif not typedef and blank_re.match(line):
|
||||
state = 'default'
|
||||
elif typedef and typedef_end_re.match(line):
|
||||
typedef = False
|
||||
elif state == 'man':
|
||||
state = 'signature'
|
||||
typedef = typedef_start_re.match(line)
|
||||
|
||||
# handle line
|
||||
if state == 'man':
|
||||
chunk.append(handle_quotes(re.sub(comment_delim_re, '', line)))
|
||||
elif state == 'signature':
|
||||
ln = re.sub(macro_re, '', line)
|
||||
if typedef or not re.match(blank_re, ln):
|
||||
sig.append(ln)
|
||||
elif oldstate == 'signature' and state != 'signature':
|
||||
if len(mdlines) > 0 and mdlines[-1] != '\n':
|
||||
mdlines.append('\n')
|
||||
rawsig = ''.join(sig)
|
||||
m = function_re.match(rawsig)
|
||||
if m:
|
||||
mdlines.append('\\fI' + m.group('type') + '\\fR' + ' ')
|
||||
mdlines.append('\\fB' + m.group('name') + '\\fR' + '(')
|
||||
first = True
|
||||
for argument in re.split(',', m.group('args')):
|
||||
if not first:
|
||||
mdlines.append(', ')
|
||||
first = False
|
||||
mdlines.append('\\fI' + argument.strip() + '\\fR')
|
||||
mdlines.append(')\n')
|
||||
else:
|
||||
mdlines.append('.nf\n\\f[C]\n.RS 0n\n')
|
||||
mdlines += sig
|
||||
mdlines.append('.RE\n\\f[]\n.fi\n')
|
||||
if len(mdlines) > 0 and mdlines[-1] != '\n':
|
||||
mdlines.append('\n')
|
||||
mdlines.append('.PP\n')
|
||||
mdlines += chunk
|
||||
chunk = []
|
||||
sig = []
|
||||
elif oldstate == 'man' and state != 'signature':
|
||||
if len(mdlines) > 0 and mdlines[-1] != '\n':
|
||||
mdlines.append('\n')
|
||||
mdlines += chunk # add man chunk
|
||||
chunk = []
|
||||
mdlines.append('\n')
|
||||
|
||||
sys.stdout.write('.TH ' + os.path.basename(sourcefile).replace('.h','') + ' 3 "' + date.today().strftime('%B %d, %Y') + '" "LOCAL" "Library Functions Manual"\n')
|
||||
sys.stdout.write(''.join(mdlines))
|
@ -1,31 +0,0 @@
|
||||
.TH "cmark" "1" "November 30, 2014" "LOCAL" "General Commands Manual"
|
||||
.SH "NAME"
|
||||
\fBcmark\fR
|
||||
\- convert CommonMark formatted text to HTML
|
||||
.SH "SYNOPSIS"
|
||||
.HP 6n
|
||||
\fBcmark\fR
|
||||
[\fB\-\-ast\fR]
|
||||
file*
|
||||
.SH "DESCRIPTION"
|
||||
\fBcmark\fR
|
||||
acts as a pipe, reading from
|
||||
\fRstdin\fR
|
||||
or from the specified files and writing to
|
||||
\fRstdout\fR.
|
||||
It converts Markdown formatted plain text to HTML, using the conventions
|
||||
described in the CommonMark spec.
|
||||
If multiple files are specified, the contents of the files are simply
|
||||
concatenated before parsing.
|
||||
.SH "OPTIONS"
|
||||
.TP 12n
|
||||
\-\--ast
|
||||
Print an abstract syntax tree instead of HTML.
|
||||
.TP 12n
|
||||
\-\-help
|
||||
Print usage information.
|
||||
.TP 12n
|
||||
\-\-version
|
||||
Print version.
|
||||
.SH "AUTHORS"
|
||||
John MacFarlane
|
@ -1,275 +0,0 @@
|
||||
.TH cmark 3 "December 05, 2014" "LOCAL" "Library Functions Manual"
|
||||
.SH NAME
|
||||
|
||||
.B cmark
|
||||
\- CommonMark parsing, manipulating, and rendering
|
||||
|
||||
.SH SIMPLE INTERFACE
|
||||
|
||||
.nf
|
||||
\f[C]
|
||||
.RS 0n
|
||||
#define CMARK_VERSION "0.1"
|
||||
.RE
|
||||
\f[]
|
||||
.fi
|
||||
|
||||
.PP
|
||||
Current version of library.
|
||||
|
||||
\fIchar *\fR \fBcmark_markdown_to_html\fR(\fIconst char *text\fR, \fIint len\fR)
|
||||
|
||||
.PP
|
||||
Convert \fItext\fR (assumed to be a UTF-8 encoded string with length
|
||||
\fIlen\fR from CommonMark Markdown to HTML, returning a null-terminated,
|
||||
UTF-8-encoded string.
|
||||
|
||||
.SH NODE STRUCTURE
|
||||
|
||||
.nf
|
||||
\f[C]
|
||||
.RS 0n
|
||||
typedef enum {
|
||||
/* Block */
|
||||
CMARK_NODE_DOCUMENT,
|
||||
CMARK_NODE_BLOCK_QUOTE,
|
||||
CMARK_NODE_LIST,
|
||||
CMARK_NODE_LIST_ITEM,
|
||||
CMARK_NODE_CODE_BLOCK,
|
||||
CMARK_NODE_HTML,
|
||||
CMARK_NODE_PARAGRAPH,
|
||||
CMARK_NODE_HEADER,
|
||||
CMARK_NODE_HRULE,
|
||||
CMARK_NODE_REFERENCE_DEF,
|
||||
|
||||
CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
|
||||
CMARK_NODE_LAST_BLOCK = CMARK_NODE_REFERENCE_DEF,
|
||||
|
||||
/* Inline */
|
||||
CMARK_NODE_TEXT,
|
||||
CMARK_NODE_SOFTBREAK,
|
||||
CMARK_NODE_LINEBREAK,
|
||||
CMARK_NODE_INLINE_CODE,
|
||||
CMARK_NODE_INLINE_HTML,
|
||||
CMARK_NODE_EMPH,
|
||||
CMARK_NODE_STRONG,
|
||||
CMARK_NODE_LINK,
|
||||
CMARK_NODE_IMAGE,
|
||||
|
||||
CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
|
||||
CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
|
||||
} cmark_node_type;
|
||||
.RE
|
||||
\f[]
|
||||
.fi
|
||||
|
||||
.PP
|
||||
|
||||
.nf
|
||||
\f[C]
|
||||
.RS 0n
|
||||
typedef enum {
|
||||
CMARK_NO_LIST,
|
||||
CMARK_BULLET_LIST,
|
||||
CMARK_ORDERED_LIST
|
||||
} cmark_list_type;
|
||||
.RE
|
||||
\f[]
|
||||
.fi
|
||||
|
||||
.PP
|
||||
|
||||
.nf
|
||||
\f[C]
|
||||
.RS 0n
|
||||
typedef enum {
|
||||
CMARK_PERIOD_DELIM,
|
||||
CMARK_PAREN_DELIM
|
||||
} cmark_delim_type;
|
||||
.RE
|
||||
\f[]
|
||||
.fi
|
||||
|
||||
.PP
|
||||
|
||||
|
||||
.SH CREATING AND DESTROYING NODES
|
||||
|
||||
\fIcmark_node*\fR \fBcmark_node_new\fR(\fIcmark_node_type type\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIvoid\fR \fBcmark_node_free\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIcmark_node*\fR \fBcmark_node_next\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
.SH TREE TRAVERSAL
|
||||
|
||||
\fIcmark_node*\fR \fBcmark_node_previous\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIcmark_node*\fR \fBcmark_node_parent\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIcmark_node*\fR \fBcmark_node_first_child\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIcmark_node*\fR \fBcmark_node_last_child\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
|
||||
.SH ACCESSORS
|
||||
|
||||
\fIcmark_node_type\fR \fBcmark_node_get_type\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIconst char*\fR \fBcmark_node_get_string_content\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_set_string_content\fR(\fIcmark_node *node\fR, \fIconst char *content\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_get_header_level\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_set_header_level\fR(\fIcmark_node *node\fR, \fIint level\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIcmark_list_type\fR \fBcmark_node_get_list_type\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_set_list_type\fR(\fIcmark_node *node\fR, \fIcmark_list_type type\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_get_list_start\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_set_list_start\fR(\fIcmark_node *node\fR, \fIint start\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_get_list_tight\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_set_list_tight\fR(\fIcmark_node *node\fR, \fIint tight\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIconst char*\fR \fBcmark_node_get_fence_info\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_set_fence_info\fR(\fIcmark_node *node\fR, \fIconst char *info\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIconst char*\fR \fBcmark_node_get_url\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_set_url\fR(\fIcmark_node *node\fR, \fIconst char *url\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIconst char*\fR \fBcmark_node_get_title\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_set_title\fR(\fIcmark_node *node\fR, \fIconst char *title\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_get_start_line\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_get_start_column\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_get_end_line\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
|
||||
.SH TREE MANIPULATION
|
||||
|
||||
\fIvoid\fR \fBcmark_node_unlink\fR(\fIcmark_node *node\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_insert_before\fR(\fIcmark_node *node\fR, \fIcmark_node *sibling\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_insert_after\fR(\fIcmark_node *node\fR, \fIcmark_node *sibling\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_prepend_child\fR(\fIcmark_node *node\fR, \fIcmark_node *child\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIint\fR \fBcmark_node_append_child\fR(\fIcmark_node *node\fR, \fIcmark_node *child\fR)
|
||||
|
||||
.PP
|
||||
|
||||
|
||||
.SH PARSING
|
||||
|
||||
\fIcmark_parser *\fR \fBcmark_parser_new\fR(\fI\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIvoid\fR \fBcmark_parser_free\fR(\fIcmark_parser *parser\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIcmark_node *\fR \fBcmark_parser_finish\fR(\fIcmark_parser *parser\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIvoid\fR \fBcmark_parser_feed\fR(\fIcmark_parser *parser\fR, \fIconst char *buffer\fR, \fIsize_t len\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIcmark_node *\fR \fBcmark_parse_document\fR(\fIconst char *buffer\fR, \fIsize_t len\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIcmark_node *\fR \fBcmark_parse_file\fR(\fIFILE *f\fR)
|
||||
|
||||
.PP
|
||||
|
||||
|
||||
.SH RENDERING
|
||||
|
||||
\fIchar *\fR \fBcmark_render_ast\fR(\fIcmark_node *root\fR)
|
||||
|
||||
.PP
|
||||
|
||||
\fIchar *\fR \fBcmark_render_html\fR(\fIcmark_node *root\fR)
|
||||
|
||||
.PP
|
||||
|
||||
.SH AUTHORS
|
||||
|
||||
John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
|
||||
|
@ -1,22 +0,0 @@
|
||||
binmode STDOUT;
|
||||
print(" switch (c) {\n");
|
||||
my $lastchar = "";
|
||||
while (<STDIN>) {
|
||||
if (/^[A-F0-9]/ and / [CF]; /) {
|
||||
my ($char, $type, $subst) = m/([A-F0-9]+); ([CF]); ([^;]+)/;
|
||||
if ($char eq $lastchar) {
|
||||
break;
|
||||
}
|
||||
my @subst = $subst =~ m/(\w+)/g;
|
||||
printf(" case 0x%s:\n", $char);
|
||||
foreach (@subst) {
|
||||
printf(" bufpush(0x%s);\n", $_);
|
||||
}
|
||||
printf(" break;\n");
|
||||
$lastchar = $char;
|
||||
}
|
||||
}
|
||||
printf(" default:\n");
|
||||
printf(" bufpush(c);\n");
|
||||
print(" }\n");
|
||||
|
@ -1 +0,0 @@
|
||||
@nmake.exe /nologo /f Makefile.nmake %*
|
File diff suppressed because it is too large
Load Diff
@ -1,17 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
var fs = require('fs');
|
||||
var util = require('util');
|
||||
|
||||
fs.readFile('spec.txt', 'utf8', function(err, data) {
|
||||
if (err) {
|
||||
return console.log(err);
|
||||
}
|
||||
var examples = [];
|
||||
data.replace(/^\.\n([\s\S]*?)^\.\n([\s\S]*?)^\.$/gm,
|
||||
function(_,x,y){
|
||||
examples.push({markdown: x, html: y});
|
||||
});
|
||||
console.log(util.inspect(examples, { depth: null }));
|
||||
console.warn(examples.length + ' examples');
|
||||
});
|
@ -1,36 +0,0 @@
|
||||
#!/usr/bin/env perl
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
my $stage = 0;
|
||||
my $example = 0;
|
||||
my @match;
|
||||
my $section = "";
|
||||
|
||||
while (<STDIN>) {
|
||||
if (/^\.$/) {
|
||||
if ($stage == 0) {
|
||||
$example++;
|
||||
print "\n<div class=\"example\" id=\"example-$example\" data-section=\"$section\">\n";
|
||||
print "<div class=\"examplenum\"><a href=\"#example-$example\">Example $example</a> <a class=\"dingus\" title=\"open in interactive dingus\">(interact)</a></div>\n\n";
|
||||
print "````````````````````````````````````````````````````````` markdown\n";
|
||||
} elsif ($stage == 1) {
|
||||
print "`````````````````````````````````````````````````````````\n\n";
|
||||
print "````````````````````````````````````````````````````````` html\n";
|
||||
} elsif ($stage == 2) {
|
||||
print "`````````````````````````````````````````````````````````\n\n";
|
||||
print "</div>\n\n";
|
||||
} else {
|
||||
die "Encountered unknown stage $stage";
|
||||
}
|
||||
$stage = ($stage + 1) % 3;
|
||||
} else {
|
||||
if ($stage == 0 && (@match = ($_ =~ /^#{1,6} *(.*)/))) {
|
||||
$section = $match[0];
|
||||
}
|
||||
if ($stage != 0) {
|
||||
$_ =~ s/ /␣/g;
|
||||
}
|
||||
print $_;
|
||||
}
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
#!/usr/bin/env runhaskell
|
||||
|
||||
import Text.Pandoc.JSON
|
||||
import Text.Pandoc.Walk
|
||||
|
||||
main = toJSONFilter go
|
||||
where go :: Pandoc -> Pandoc
|
||||
go = walk exampleDivs . walk anchors
|
||||
|
||||
exampleDivs :: Block -> Block
|
||||
exampleDivs (Div (ident, ["example"], kvs)
|
||||
[ d@(Div (_,["examplenum"],_) _),
|
||||
c1@(CodeBlock (_,["markdown"],_) _),
|
||||
c2@(CodeBlock (_,["html"],_) _)
|
||||
]) = Div (ident, ["example"], kvs)
|
||||
[ rawtex "\\begin{minipage}[t]{\\textwidth}\n{\\scriptsize "
|
||||
, d
|
||||
, rawtex "\\vspace{-1em}}"
|
||||
, rawtex "\\begin{minipage}[t]{0.49\\textwidth}\n\\definecolor{shadecolor}{gray}{0.85}\n"
|
||||
, addBreaks c1
|
||||
, rawtex "\\end{minipage}\n\\hfill\n\\begin{minipage}[t]{0.49\\textwidth}\n\\definecolor{shadecolor}{gray}{0.95}\n"
|
||||
, addBreaks c2
|
||||
, rawtex "\\end{minipage}\n\\end{minipage}"
|
||||
]
|
||||
where rawtex = RawBlock (Format "latex")
|
||||
addBreaks (CodeBlock attrs code) = CodeBlock attrs $ addBreaks' code
|
||||
addBreaks' code =
|
||||
if length code > 49
|
||||
then take 49 code ++ ('\n':addBreaks' (drop 49 code))
|
||||
else code
|
||||
exampleDivs x = x
|
||||
|
||||
anchors :: Inline -> Inline
|
||||
anchors (RawInline (Format "html") ('<':'a':' ':'i':'d':'=':'"':xs)) =
|
||||
RawInline (Format "latex") ("\\hyperdef{}{" ++ lab ++ "}{\\label{" ++ lab ++ "}}")
|
||||
where lab = takeWhile (/='"') xs
|
||||
anchors x = x
|
@ -1,144 +0,0 @@
|
||||
cmake_minimum_required(VERSION 2.8)
|
||||
set(LIBRARY "libcmark")
|
||||
set(HEADERS
|
||||
cmark.h
|
||||
parser.h
|
||||
buffer.h
|
||||
node.h
|
||||
chunk.h
|
||||
references.h
|
||||
debug.h
|
||||
bench.h
|
||||
utf8.h
|
||||
scanners.h
|
||||
inlines.h
|
||||
html/html_unescape.h
|
||||
html/houdini.h
|
||||
)
|
||||
set(LIBRARY_SOURCES
|
||||
cmark.c
|
||||
node.c
|
||||
blocks.c
|
||||
inlines.c
|
||||
print.c
|
||||
scanners.c
|
||||
scanners.re
|
||||
utf8.c
|
||||
buffer.c
|
||||
references.c
|
||||
html/html.c
|
||||
html/html_unescape.gperf
|
||||
html/houdini_href_e.c
|
||||
html/houdini_html_e.c
|
||||
html/houdini_html_u.c
|
||||
${HEADERS}
|
||||
)
|
||||
|
||||
#set(PROGRAM "cmark")
|
||||
#set(PROGRAM_SOURCES
|
||||
# ${LIBRARY_SOURCES}
|
||||
# main.c
|
||||
# )
|
||||
|
||||
include_directories(. html ${CMAKE_CURRENT_BINARY_DIR})
|
||||
include_directories(../../../i)
|
||||
|
||||
set(RE2C re2c)
|
||||
if (MSVC)
|
||||
file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR} DOS_CURRENT_SOURCE_DIR)
|
||||
add_custom_command( OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/scanners.c
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/scanners.re
|
||||
COMMAND ${RE2C} --case-insensitive -b -i
|
||||
--no-generation-date
|
||||
-o ${DOS_CURRENT_SOURCE_DIR}\\scanners.c
|
||||
${DOS_CURRENT_SOURCE_DIR}\\scanners.re )
|
||||
else(MSVC)
|
||||
add_custom_command( OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/scanners.c
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/scanners.re
|
||||
COMMAND ${RE2C} --case-insensitive -b -i
|
||||
--no-generation-date
|
||||
-o ${CMAKE_CURRENT_SOURCE_DIR}/scanners.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scanners.re )
|
||||
endif(MSVC)
|
||||
|
||||
include (GenerateExportHeader)
|
||||
|
||||
#add_executable(${PROGRAM} ${PROGRAM_SOURCES})
|
||||
#add_compiler_export_flags()
|
||||
#
|
||||
## Disable the PUBLIC declarations when compiling the executable:
|
||||
#set_target_properties(${PROGRAM} PROPERTIES
|
||||
# COMPILE_FLAGS -DCMARK_STATIC_DEFINE)
|
||||
|
||||
# Check integrity of node structure when compiled as debug:
|
||||
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES")
|
||||
|
||||
if (${CMAKE_MAJOR_VERSION} GREATER 1 AND ${CMAKE_MINOR_VERSION} GREATER 8)
|
||||
set(CMAKE_C_VISIBILITY_PRESET hidden)
|
||||
set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
|
||||
elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
|
||||
endif ()
|
||||
|
||||
# SHARED add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES})
|
||||
add_library(${LIBRARY} STATIC ${LIBRARY_SOURCES})
|
||||
set_target_properties(${LIBRARY}
|
||||
PROPERTIES OUTPUT_NAME "cmark")
|
||||
|
||||
set_property(TARGET ${LIBRARY}
|
||||
APPEND PROPERTY MACOSX_RPATH true)
|
||||
|
||||
generate_export_header(${LIBRARY}
|
||||
BASE_NAME ${PROJECT_NAME})
|
||||
|
||||
#if (MSVC)
|
||||
# set_property(TARGET ${PROGRAM}
|
||||
# APPEND PROPERTY LINK_FLAGS /INCREMENTAL:NO)
|
||||
#endif(MSVC)
|
||||
|
||||
#install(TARGETS ${PROGRAM} # SHARED ${LIBRARY}
|
||||
# RUNTIME DESTINATION bin
|
||||
# LIBRARY DESTINATION lib
|
||||
# )
|
||||
|
||||
install(FILES cmark.h ${CMAKE_CURRENT_BINARY_DIR}/cmark_export.h
|
||||
DESTINATION include
|
||||
)
|
||||
|
||||
# Feature tests
|
||||
include(CheckIncludeFile)
|
||||
include(CheckCSourceCompiles)
|
||||
CHECK_INCLUDE_FILE(stdbool.h HAVE_STDBOOL_H)
|
||||
CHECK_C_SOURCE_COMPILES(
|
||||
"int main() { __builtin_expect(0,0); return 0; }"
|
||||
HAVE___BUILTIN_EXPECT)
|
||||
CHECK_C_SOURCE_COMPILES("
|
||||
int f(void) __attribute__ (());
|
||||
int main() { return 0; }
|
||||
" HAVE___ATTRIBUTE__)
|
||||
|
||||
CONFIGURE_FILE(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/config.h.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/config.h)
|
||||
|
||||
# Always compile with warnings
|
||||
if(MSVC)
|
||||
# Force to always compile with W4
|
||||
if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
|
||||
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
else()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4127 /wd4244 /wd4267 /wd4706 /wd4800 /D_CRT_SECURE_NO_WARNINGS")
|
||||
elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -std=c99 -pedantic")
|
||||
endif()
|
||||
|
||||
# Compile as C++ under MSVC
|
||||
if(MSVC)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP")
|
||||
endif()
|
||||
|
||||
if($ENV{TIMER})
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DTIMER=1")
|
||||
endif($ENV{TIMER})
|
@ -1,27 +0,0 @@
|
||||
#ifndef CMARK_BENCH_H
|
||||
#define CMARK_BENCH_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
#ifdef TIMER
|
||||
float _cmark_start_time;
|
||||
float _cmark_end_time;
|
||||
float _cmark_save_time;
|
||||
|
||||
#define start_timer() \
|
||||
_cmark_save_time = _cmark_start_time; \
|
||||
_cmark_start_time = (float)clock()/CLOCKS_PER_SEC
|
||||
|
||||
#define end_timer(M) \
|
||||
_cmark_end_time = (float)clock()/CLOCKS_PER_SEC; \
|
||||
fprintf(stderr, "[TIME] (%s:%d) %4.f ns " M "\n", __FILE__, \
|
||||
__LINE__, (_cmark_end_time - _cmark_start_time) * 1000000); \
|
||||
_cmark_start_time = _cmark_save_time;
|
||||
|
||||
#else
|
||||
#define start_timer()
|
||||
#define end_timer(M)
|
||||
#endif
|
||||
|
||||
#endif
|
@ -1,897 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "parser.h"
|
||||
#include "cmark.h"
|
||||
#include "node.h"
|
||||
#include "references.h"
|
||||
#include "utf8.h"
|
||||
#include "scanners.h"
|
||||
#include "inlines.h"
|
||||
#include "html/houdini.h"
|
||||
#include "buffer.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define CODE_INDENT 4
|
||||
#define peek_at(i, n) (i)->data[n]
|
||||
|
||||
static void
|
||||
S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
|
||||
bool eof);
|
||||
|
||||
static void
|
||||
S_process_line(cmark_parser *parser, const unsigned char *buffer,
|
||||
size_t bytes);
|
||||
|
||||
static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column)
|
||||
{
|
||||
cmark_node* e;
|
||||
|
||||
e = (cmark_node *)calloc(1, sizeof(*e));
|
||||
if(e != NULL) {
|
||||
e->type = tag;
|
||||
e->open = true;
|
||||
e->start_line = start_line;
|
||||
e->start_column = start_column;
|
||||
e->end_line = start_line;
|
||||
strbuf_init(&e->string_content, 32);
|
||||
}
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
// Create a root document cmark_node.
|
||||
static cmark_node* make_document()
|
||||
{
|
||||
cmark_node *e = make_block(NODE_DOCUMENT, 1, 1);
|
||||
return e;
|
||||
}
|
||||
|
||||
cmark_parser *cmark_parser_new()
|
||||
{
|
||||
cmark_parser *parser = (cmark_parser*)malloc(sizeof(cmark_parser));
|
||||
cmark_node *document = make_document();
|
||||
strbuf *line = (strbuf*)malloc(sizeof(strbuf));
|
||||
strbuf *buf = (strbuf*)malloc(sizeof(strbuf));
|
||||
cmark_strbuf_init(line, 256);
|
||||
cmark_strbuf_init(buf, 0);
|
||||
|
||||
parser->refmap = cmark_reference_map_new();
|
||||
parser->root = document;
|
||||
parser->current = document;
|
||||
parser->line_number = 0;
|
||||
parser->curline = line;
|
||||
parser->linebuf = buf;
|
||||
|
||||
return parser;
|
||||
}
|
||||
|
||||
void cmark_parser_free(cmark_parser *parser)
|
||||
{
|
||||
cmark_strbuf_free(parser->curline);
|
||||
free(parser->curline);
|
||||
cmark_strbuf_free(parser->linebuf);
|
||||
free(parser->linebuf);
|
||||
cmark_reference_map_free(parser->refmap);
|
||||
free(parser);
|
||||
}
|
||||
|
||||
static void finalize(cmark_parser *parser, cmark_node* b, int line_number);
|
||||
|
||||
// Returns true if line has only space characters, else false.
|
||||
static bool is_blank(strbuf *s, int offset)
|
||||
{
|
||||
while (offset < s->size) {
|
||||
switch (s->ptr[offset]) {
|
||||
case '\n':
|
||||
return true;
|
||||
case ' ':
|
||||
offset++;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool can_contain(cmark_node_type parent_type, cmark_node_type child_type)
|
||||
{
|
||||
return ( parent_type == NODE_DOCUMENT ||
|
||||
parent_type == NODE_BLOCK_QUOTE ||
|
||||
parent_type == NODE_LIST_ITEM ||
|
||||
(parent_type == NODE_LIST && child_type == NODE_LIST_ITEM) );
|
||||
}
|
||||
|
||||
static inline bool accepts_lines(cmark_node_type block_type)
|
||||
{
|
||||
return (block_type == NODE_PARAGRAPH ||
|
||||
block_type == NODE_HEADER ||
|
||||
block_type == NODE_CODE_BLOCK);
|
||||
}
|
||||
|
||||
static void add_line(cmark_node* cmark_node, chunk *ch, int offset)
|
||||
{
|
||||
assert(cmark_node->open);
|
||||
strbuf_put(&cmark_node->string_content, ch->data + offset, ch->len - offset);
|
||||
}
|
||||
|
||||
static void remove_trailing_blank_lines(strbuf *ln)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = ln->size - 1; i >= 0; --i) {
|
||||
unsigned char c = ln->ptr[i];
|
||||
|
||||
if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
|
||||
break;
|
||||
}
|
||||
|
||||
if (i < 0) {
|
||||
strbuf_clear(ln);
|
||||
return;
|
||||
}
|
||||
|
||||
i = strbuf_strchr(ln, '\n', i);
|
||||
if (i >= 0)
|
||||
strbuf_truncate(ln, i);
|
||||
}
|
||||
|
||||
// Check to see if a cmark_node ends with a blank line, descending
|
||||
// if needed into lists and sublists.
|
||||
static bool ends_with_blank_line(cmark_node* cmark_node)
|
||||
{
|
||||
if (cmark_node->last_line_blank) {
|
||||
return true;
|
||||
}
|
||||
if ((cmark_node->type == NODE_LIST || cmark_node->type == NODE_LIST_ITEM) && cmark_node->last_child) {
|
||||
return ends_with_blank_line(cmark_node->last_child);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Break out of all containing lists
|
||||
static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr, int line_number)
|
||||
{
|
||||
cmark_node *container = *bptr;
|
||||
cmark_node *b = parser->root;
|
||||
// find first containing NODE_LIST:
|
||||
while (b && b->type != NODE_LIST) {
|
||||
b = b->last_child;
|
||||
}
|
||||
if (b) {
|
||||
while (container && container != b) {
|
||||
finalize(parser, container, line_number);
|
||||
container = container->parent;
|
||||
}
|
||||
finalize(parser, b, line_number);
|
||||
*bptr = b->parent;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void finalize(cmark_parser *parser, cmark_node* b, int line_number)
|
||||
{
|
||||
int firstlinelen;
|
||||
int pos;
|
||||
cmark_node* item;
|
||||
cmark_node* subitem;
|
||||
|
||||
if (!b->open)
|
||||
return; // don't do anything if the cmark_node is already closed
|
||||
|
||||
b->open = false;
|
||||
if (line_number > b->start_line) {
|
||||
b->end_line = line_number - 1;
|
||||
} else {
|
||||
b->end_line = line_number;
|
||||
}
|
||||
|
||||
switch (b->type) {
|
||||
case NODE_PARAGRAPH:
|
||||
while (strbuf_at(&b->string_content, 0) == '[' &&
|
||||
(pos = cmark_parse_reference_inline(&b->string_content, parser->refmap))) {
|
||||
|
||||
strbuf_drop(&b->string_content, pos);
|
||||
}
|
||||
if (is_blank(&b->string_content, 0)) {
|
||||
b->type = NODE_REFERENCE_DEF;
|
||||
}
|
||||
break;
|
||||
|
||||
case NODE_CODE_BLOCK:
|
||||
if (!b->as.code.fenced) { // indented code
|
||||
remove_trailing_blank_lines(&b->string_content);
|
||||
strbuf_putc(&b->string_content, '\n');
|
||||
break;
|
||||
} else {
|
||||
|
||||
// first line of contents becomes info
|
||||
firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
|
||||
|
||||
houdini_unescape_html_f(
|
||||
&b->as.code.info,
|
||||
b->string_content.ptr,
|
||||
firstlinelen
|
||||
);
|
||||
|
||||
strbuf_drop(&b->string_content, firstlinelen + 1);
|
||||
|
||||
strbuf_trim(&b->as.code.info);
|
||||
strbuf_unescape(&b->as.code.info);
|
||||
break;
|
||||
}
|
||||
|
||||
case NODE_LIST: // determine tight/loose status
|
||||
b->as.list.tight = true; // tight by default
|
||||
item = b->first_child;
|
||||
|
||||
while (item) {
|
||||
// check for non-final non-empty list item ending with blank line:
|
||||
if (item->last_line_blank && item->next) {
|
||||
b->as.list.tight = false;
|
||||
break;
|
||||
}
|
||||
// recurse into children of list item, to see if there are
|
||||
// spaces between them:
|
||||
subitem = item->first_child;
|
||||
while (subitem) {
|
||||
if (ends_with_blank_line(subitem) &&
|
||||
(item->next || subitem->next)) {
|
||||
b->as.list.tight = false;
|
||||
break;
|
||||
}
|
||||
subitem = subitem->next;
|
||||
}
|
||||
if (!(b->as.list.tight)) {
|
||||
break;
|
||||
}
|
||||
item = item->next;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Add a cmark_node as child of another. Return pointer to child.
|
||||
static cmark_node* add_child(cmark_parser *parser, cmark_node* parent,
|
||||
cmark_node_type block_type, int start_line, int start_column)
|
||||
{
|
||||
assert(parent);
|
||||
|
||||
// if 'parent' isn't the kind of cmark_node that can accept this child,
|
||||
// then back up til we hit a cmark_node that can.
|
||||
while (!can_contain(parent->type, block_type)) {
|
||||
finalize(parser, parent, start_line);
|
||||
parent = parent->parent;
|
||||
}
|
||||
|
||||
cmark_node* child = make_block(block_type, start_line, start_column);
|
||||
child->parent = parent;
|
||||
|
||||
if (parent->last_child) {
|
||||
parent->last_child->next = child;
|
||||
child->prev = parent->last_child;
|
||||
} else {
|
||||
parent->first_child = child;
|
||||
child->prev = NULL;
|
||||
}
|
||||
parent->last_child = child;
|
||||
return child;
|
||||
}
|
||||
|
||||
|
||||
typedef struct BlockStack {
|
||||
struct BlockStack *previous;
|
||||
cmark_node *next_sibling;
|
||||
} block_stack;
|
||||
|
||||
// Walk through cmark_node and all children, recursively, parsing
|
||||
// string content into inline content where appropriate.
|
||||
static void process_inlines(cmark_node* cur, cmark_reference_map *refmap)
|
||||
{
|
||||
block_stack* stack = NULL;
|
||||
block_stack* newstack = NULL;
|
||||
|
||||
while (cur != NULL) {
|
||||
switch (cur->type) {
|
||||
case NODE_PARAGRAPH:
|
||||
case NODE_HEADER:
|
||||
cmark_parse_inlines(cur, refmap);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (cur->first_child) {
|
||||
newstack = (block_stack*)malloc(sizeof(block_stack));
|
||||
if (newstack == NULL) break;
|
||||
newstack->previous = stack;
|
||||
stack = newstack;
|
||||
stack->next_sibling = cur->next;
|
||||
cur = cur->first_child;
|
||||
} else {
|
||||
cur = cur->next;
|
||||
}
|
||||
|
||||
while (cur == NULL && stack != NULL) {
|
||||
cur = stack->next_sibling;
|
||||
newstack = stack->previous;
|
||||
free(stack);
|
||||
stack = newstack;
|
||||
}
|
||||
}
|
||||
while (stack != NULL) {
|
||||
newstack = stack->previous;
|
||||
free(stack);
|
||||
stack = newstack;
|
||||
}
|
||||
}
|
||||
|
||||
// Attempts to parse a list item marker (bullet or enumerated).
|
||||
// On success, returns length of the marker, and populates
|
||||
// data with the details. On failure, returns 0.
|
||||
static int parse_list_marker(chunk *input, int pos, cmark_list **dataptr)
|
||||
{
|
||||
unsigned char c;
|
||||
int startpos;
|
||||
cmark_list *data;
|
||||
|
||||
startpos = pos;
|
||||
c = peek_at(input, pos);
|
||||
|
||||
if ((c == '*' || c == '-' || c == '+') && !scan_hrule(input, pos)) {
|
||||
pos++;
|
||||
if (!isspace(peek_at(input, pos))) {
|
||||
return 0;
|
||||
}
|
||||
data = (cmark_list *)calloc(1, sizeof(*data));
|
||||
if(data == NULL) {
|
||||
return 0;
|
||||
} else {
|
||||
data->marker_offset = 0; // will be adjusted later
|
||||
data->list_type = CMARK_BULLET_LIST;
|
||||
data->bullet_char = c;
|
||||
data->start = 1;
|
||||
data->delimiter = CMARK_PERIOD_DELIM;
|
||||
data->tight = false;
|
||||
}
|
||||
} else if (isdigit(c)) {
|
||||
int start = 0;
|
||||
|
||||
do {
|
||||
start = (10 * start) + (peek_at(input, pos) - '0');
|
||||
pos++;
|
||||
} while (isdigit(peek_at(input, pos)));
|
||||
|
||||
c = peek_at(input, pos);
|
||||
if (c == '.' || c == ')') {
|
||||
pos++;
|
||||
if (!isspace(peek_at(input, pos))) {
|
||||
return 0;
|
||||
}
|
||||
data = (cmark_list *)calloc(1, sizeof(*data));
|
||||
if(data == NULL) {
|
||||
return 0;
|
||||
} else {
|
||||
data->marker_offset = 0; // will be adjusted later
|
||||
data->list_type = CMARK_ORDERED_LIST;
|
||||
data->bullet_char = 0;
|
||||
data->start = start;
|
||||
data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM);
|
||||
data->tight = false;
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
*dataptr = data;
|
||||
return (pos - startpos);
|
||||
}
|
||||
|
||||
// Return 1 if list item belongs in list, else 0.
|
||||
static int lists_match(cmark_list *list_data, cmark_list *item_data)
|
||||
{
|
||||
return (list_data->list_type == item_data->list_type &&
|
||||
list_data->delimiter == item_data->delimiter &&
|
||||
// list_data->marker_offset == item_data.marker_offset &&
|
||||
list_data->bullet_char == item_data->bullet_char);
|
||||
}
|
||||
|
||||
static cmark_node *finalize_document(cmark_parser *parser)
|
||||
{
|
||||
while (parser->current != parser->root) {
|
||||
finalize(parser, parser->current, parser->line_number);
|
||||
parser->current = parser->current->parent;
|
||||
}
|
||||
|
||||
finalize(parser, parser->root, parser->line_number);
|
||||
process_inlines(parser->root, parser->refmap);
|
||||
|
||||
return parser->root;
|
||||
}
|
||||
|
||||
cmark_node *cmark_parse_file(FILE *f)
|
||||
{
|
||||
unsigned char buffer[4096];
|
||||
cmark_parser *parser = cmark_parser_new();
|
||||
size_t bytes;
|
||||
cmark_node *document;
|
||||
|
||||
while ((bytes = fread(buffer, 1, sizeof(buffer), f)) > 0) {
|
||||
bool eof = bytes < sizeof(buffer);
|
||||
S_parser_feed(parser, buffer, bytes, eof);
|
||||
if (eof) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
document = cmark_parser_finish(parser);
|
||||
cmark_parser_free(parser);
|
||||
return document;
|
||||
}
|
||||
|
||||
cmark_node *cmark_parse_document(const char *buffer, size_t len)
|
||||
{
|
||||
cmark_parser *parser = cmark_parser_new();
|
||||
cmark_node *document;
|
||||
|
||||
S_parser_feed(parser, (const unsigned char *)buffer, len, true);
|
||||
|
||||
document = cmark_parser_finish(parser);
|
||||
cmark_parser_free(parser);
|
||||
return document;
|
||||
}
|
||||
|
||||
void
|
||||
cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len)
|
||||
{
|
||||
S_parser_feed(parser, (const unsigned char *)buffer, len, false);
|
||||
}
|
||||
|
||||
static void
|
||||
S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
|
||||
bool eof)
|
||||
{
|
||||
const unsigned char *end = buffer + len;
|
||||
|
||||
while (buffer < end) {
|
||||
const unsigned char *eol
|
||||
= (const unsigned char *)memchr(buffer, '\n',
|
||||
end - buffer);
|
||||
size_t line_len;
|
||||
|
||||
if (eol) {
|
||||
line_len = eol + 1 - buffer;
|
||||
}
|
||||
else if (eof) {
|
||||
line_len = end - buffer;
|
||||
}
|
||||
else {
|
||||
strbuf_put(parser->linebuf, buffer, end - buffer);
|
||||
break;
|
||||
}
|
||||
|
||||
if (parser->linebuf->size > 0) {
|
||||
strbuf_put(parser->linebuf, buffer, line_len);
|
||||
S_process_line(parser, parser->linebuf->ptr,
|
||||
parser->linebuf->size);
|
||||
strbuf_clear(parser->linebuf);
|
||||
}
|
||||
else {
|
||||
S_process_line(parser, buffer, line_len);
|
||||
}
|
||||
|
||||
buffer += line_len;
|
||||
}
|
||||
}
|
||||
|
||||
static void chop_trailing_hashtags(chunk *ch)
|
||||
{
|
||||
int n, orig_n;
|
||||
|
||||
chunk_rtrim(ch);
|
||||
orig_n = n = ch->len - 1;
|
||||
|
||||
// if string ends in space followed by #s, remove these:
|
||||
while (n >= 0 && peek_at(ch, n) == '#')
|
||||
n--;
|
||||
|
||||
// Check for a be a space before the final #s:
|
||||
if (n != orig_n && n >= 0 && peek_at(ch, n) == ' ') {
|
||||
ch->len = n;
|
||||
chunk_rtrim(ch);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
|
||||
{
|
||||
cmark_node* last_matched_container;
|
||||
int offset = 0;
|
||||
int matched = 0;
|
||||
int lev = 0;
|
||||
int i;
|
||||
cmark_list *data = NULL;
|
||||
bool all_matched = true;
|
||||
cmark_node* container;
|
||||
cmark_node* cur = parser->current;
|
||||
bool blank = false;
|
||||
int first_nonspace;
|
||||
int indent;
|
||||
chunk input;
|
||||
|
||||
utf8proc_detab(parser->curline, buffer, bytes);
|
||||
|
||||
// Add a newline to the end if not present:
|
||||
// TODO this breaks abstraction:
|
||||
if (parser->curline->ptr[parser->curline->size - 1] != '\n') {
|
||||
strbuf_putc(parser->curline, '\n');
|
||||
}
|
||||
input.data = parser->curline->ptr;
|
||||
input.len = parser->curline->size;
|
||||
|
||||
// container starts at the document root.
|
||||
container = parser->root;
|
||||
|
||||
parser->line_number++;
|
||||
|
||||
// for each containing cmark_node, try to parse the associated line start.
|
||||
// bail out on failure: container will point to the last matching cmark_node.
|
||||
|
||||
while (container->last_child && container->last_child->open) {
|
||||
container = container->last_child;
|
||||
|
||||
first_nonspace = offset;
|
||||
while (peek_at(&input, first_nonspace) == ' ') {
|
||||
first_nonspace++;
|
||||
}
|
||||
|
||||
indent = first_nonspace - offset;
|
||||
blank = peek_at(&input, first_nonspace) == '\n';
|
||||
|
||||
if (container->type == NODE_BLOCK_QUOTE) {
|
||||
matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
|
||||
if (matched) {
|
||||
offset = first_nonspace + 1;
|
||||
if (peek_at(&input, offset) == ' ')
|
||||
offset++;
|
||||
} else {
|
||||
all_matched = false;
|
||||
}
|
||||
|
||||
} else if (container->type == NODE_LIST_ITEM) {
|
||||
|
||||
if (indent >= container->as.list.marker_offset +
|
||||
container->as.list.padding) {
|
||||
offset += container->as.list.marker_offset +
|
||||
container->as.list.padding;
|
||||
} else if (blank) {
|
||||
offset = first_nonspace;
|
||||
} else {
|
||||
all_matched = false;
|
||||
}
|
||||
|
||||
} else if (container->type == NODE_CODE_BLOCK) {
|
||||
|
||||
if (!container->as.code.fenced) { // indented
|
||||
if (indent >= CODE_INDENT) {
|
||||
offset += CODE_INDENT;
|
||||
} else if (blank) {
|
||||
offset = first_nonspace;
|
||||
} else {
|
||||
all_matched = false;
|
||||
}
|
||||
} else {
|
||||
// skip optional spaces of fence offset
|
||||
i = container->as.code.fence_offset;
|
||||
while (i > 0 && peek_at(&input, offset) == ' ') {
|
||||
offset++;
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
} else if (container->type == NODE_HEADER) {
|
||||
|
||||
// a header can never contain more than one line
|
||||
all_matched = false;
|
||||
if (blank) {
|
||||
container->last_line_blank = true;
|
||||
}
|
||||
|
||||
} else if (container->type == NODE_HTML) {
|
||||
|
||||
if (blank) {
|
||||
container->last_line_blank = true;
|
||||
all_matched = false;
|
||||
}
|
||||
|
||||
} else if (container->type == NODE_PARAGRAPH) {
|
||||
|
||||
if (blank) {
|
||||
container->last_line_blank = true;
|
||||
all_matched = false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!all_matched) {
|
||||
container = container->parent; // back up to last matching cmark_node
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
last_matched_container = container;
|
||||
|
||||
// check to see if we've hit 2nd blank line, break out of list:
|
||||
if (blank && container->last_line_blank) {
|
||||
break_out_of_lists(parser, &container, parser->line_number);
|
||||
}
|
||||
|
||||
// unless last matched container is code cmark_node, try new container starts:
|
||||
while (container->type != NODE_CODE_BLOCK &&
|
||||
container->type != NODE_HTML) {
|
||||
|
||||
first_nonspace = offset;
|
||||
while (peek_at(&input, first_nonspace) == ' ')
|
||||
first_nonspace++;
|
||||
|
||||
indent = first_nonspace - offset;
|
||||
blank = peek_at(&input, first_nonspace) == '\n';
|
||||
|
||||
if (indent >= CODE_INDENT) {
|
||||
if (cur->type != NODE_PARAGRAPH && !blank) {
|
||||
offset += CODE_INDENT;
|
||||
container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, offset + 1);
|
||||
container->as.code.fenced = false;
|
||||
container->as.code.fence_char = 0;
|
||||
container->as.code.fence_length = 0;
|
||||
container->as.code.fence_offset = 0;
|
||||
strbuf_init(&container->as.code.info, 0);
|
||||
} else { // indent > 4 in lazy line
|
||||
break;
|
||||
}
|
||||
|
||||
} else if (peek_at(&input, first_nonspace) == '>') {
|
||||
|
||||
offset = first_nonspace + 1;
|
||||
// optional following character
|
||||
if (peek_at(&input, offset) == ' ')
|
||||
offset++;
|
||||
container = add_child(parser, container, NODE_BLOCK_QUOTE, parser->line_number, offset + 1);
|
||||
|
||||
} else if ((matched = scan_atx_header_start(&input, first_nonspace))) {
|
||||
|
||||
offset = first_nonspace + matched;
|
||||
container = add_child(parser, container, NODE_HEADER, parser->line_number, offset + 1);
|
||||
|
||||
int hashpos = chunk_strchr(&input, '#', first_nonspace);
|
||||
int level = 0;
|
||||
|
||||
while (peek_at(&input, hashpos) == '#') {
|
||||
level++;
|
||||
hashpos++;
|
||||
}
|
||||
container->as.header.level = level;
|
||||
container->as.header.setext = false;
|
||||
|
||||
} else if ((matched = scan_open_code_fence(&input, first_nonspace))) {
|
||||
|
||||
container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, first_nonspace + 1);
|
||||
container->as.code.fenced = true;
|
||||
container->as.code.fence_char = peek_at(&input, first_nonspace);
|
||||
container->as.code.fence_length = matched;
|
||||
container->as.code.fence_offset = first_nonspace - offset;
|
||||
strbuf_init(&container->as.code.info, 0);
|
||||
offset = first_nonspace + matched;
|
||||
|
||||
} else if ((matched = scan_html_block_tag(&input, first_nonspace))) {
|
||||
|
||||
container = add_child(parser, container, NODE_HTML, parser->line_number, first_nonspace + 1);
|
||||
// note, we don't adjust offset because the tag is part of the text
|
||||
|
||||
} else if (container->type == NODE_PARAGRAPH &&
|
||||
(lev = scan_setext_header_line(&input, first_nonspace)) &&
|
||||
// check that there is only one line in the paragraph:
|
||||
strbuf_strrchr(&container->string_content, '\n',
|
||||
strbuf_len(&container->string_content) - 2) < 0) {
|
||||
|
||||
container->type = NODE_HEADER;
|
||||
container->as.header.level = lev;
|
||||
container->as.header.setext = true;
|
||||
offset = input.len - 1;
|
||||
|
||||
} else if (!(container->type == NODE_PARAGRAPH && !all_matched) &&
|
||||
(matched = scan_hrule(&input, first_nonspace))) {
|
||||
|
||||
// it's only now that we know the line is not part of a setext header:
|
||||
container = add_child(parser, container, NODE_HRULE, parser->line_number, first_nonspace + 1);
|
||||
finalize(parser, container, parser->line_number);
|
||||
container = container->parent;
|
||||
offset = input.len - 1;
|
||||
|
||||
} else if ((matched = parse_list_marker(&input, first_nonspace, &data))) {
|
||||
|
||||
// compute padding:
|
||||
offset = first_nonspace + matched;
|
||||
i = 0;
|
||||
while (i <= 5 && peek_at(&input, offset + i) == ' ') {
|
||||
i++;
|
||||
}
|
||||
// i = number of spaces after marker, up to 5
|
||||
if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
|
||||
data->padding = matched + 1;
|
||||
if (i > 0) {
|
||||
offset += 1;
|
||||
}
|
||||
} else {
|
||||
data->padding = matched + i;
|
||||
offset += i;
|
||||
}
|
||||
|
||||
// check container; if it's a list, see if this list item
|
||||
// can continue the list; otherwise, create a list container.
|
||||
|
||||
data->marker_offset = indent;
|
||||
|
||||
if (container->type != NODE_LIST ||
|
||||
!lists_match(&container->as.list, data)) {
|
||||
container = add_child(parser, container, NODE_LIST, parser->line_number,
|
||||
first_nonspace + 1);
|
||||
|
||||
memcpy(&container->as.list, data, sizeof(*data));
|
||||
}
|
||||
|
||||
// add the list item
|
||||
container = add_child(parser, container, NODE_LIST_ITEM, parser->line_number,
|
||||
first_nonspace + 1);
|
||||
/* TODO: static */
|
||||
memcpy(&container->as.list, data, sizeof(*data));
|
||||
free(data);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
if (accepts_lines(container->type)) {
|
||||
// if it's a line container, it can't contain other containers
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// what remains at offset is a text line. add the text to the
|
||||
// appropriate container.
|
||||
|
||||
first_nonspace = offset;
|
||||
while (peek_at(&input, first_nonspace) == ' ')
|
||||
first_nonspace++;
|
||||
|
||||
indent = first_nonspace - offset;
|
||||
blank = peek_at(&input, first_nonspace) == '\n';
|
||||
|
||||
// cmark_node quote lines are never blank as they start with >
|
||||
// and we don't count blanks in fenced code for purposes of tight/loose
|
||||
// lists or breaking out of lists. we also don't set last_line_blank
|
||||
// on an empty list item.
|
||||
container->last_line_blank = (blank &&
|
||||
container->type != NODE_BLOCK_QUOTE &&
|
||||
container->type != NODE_HEADER &&
|
||||
(container->type != NODE_CODE_BLOCK &&
|
||||
container->as.code.fenced) &&
|
||||
!(container->type == NODE_LIST_ITEM &&
|
||||
container->first_child == NULL &&
|
||||
container->start_line == parser->line_number));
|
||||
|
||||
cmark_node *cont = container;
|
||||
while (cont->parent) {
|
||||
cont->parent->last_line_blank = false;
|
||||
cont = cont->parent;
|
||||
}
|
||||
|
||||
if (cur != last_matched_container &&
|
||||
container == last_matched_container &&
|
||||
!blank &&
|
||||
cur->type == NODE_PARAGRAPH &&
|
||||
strbuf_len(&cur->string_content) > 0) {
|
||||
|
||||
add_line(cur, &input, offset);
|
||||
|
||||
} else { // not a lazy continuation
|
||||
|
||||
// finalize any blocks that were not matched and set cur to container:
|
||||
while (cur != last_matched_container) {
|
||||
finalize(parser, cur, parser->line_number);
|
||||
cur = cur->parent;
|
||||
assert(cur != NULL);
|
||||
}
|
||||
|
||||
if (container->type == NODE_CODE_BLOCK &&
|
||||
!container->as.code.fenced) {
|
||||
|
||||
add_line(container, &input, offset);
|
||||
|
||||
} else if (container->type == NODE_CODE_BLOCK &&
|
||||
container->as.code.fenced) {
|
||||
matched = 0;
|
||||
|
||||
if (indent <= 3 &&
|
||||
peek_at(&input, first_nonspace) == container->as.code.fence_char) {
|
||||
int fence_len = scan_close_code_fence(&input, first_nonspace);
|
||||
if (fence_len > container->as.code.fence_length)
|
||||
matched = 1;
|
||||
}
|
||||
|
||||
if (matched) {
|
||||
// if closing fence, don't add line to container; instead, close it:
|
||||
finalize(parser, container, parser->line_number);
|
||||
container = container->parent; // back up to parent
|
||||
} else {
|
||||
add_line(container, &input, offset);
|
||||
}
|
||||
|
||||
} else if (container->type == NODE_HTML) {
|
||||
|
||||
add_line(container, &input, offset);
|
||||
|
||||
} else if (blank) {
|
||||
|
||||
// ??? do nothing
|
||||
|
||||
} else if (container->type == NODE_HEADER) {
|
||||
|
||||
chop_trailing_hashtags(&input);
|
||||
add_line(container, &input, first_nonspace);
|
||||
finalize(parser, container, parser->line_number);
|
||||
container = container->parent;
|
||||
|
||||
} else if (accepts_lines(container->type)) {
|
||||
|
||||
add_line(container, &input, first_nonspace);
|
||||
|
||||
} else if (container->type != NODE_HRULE &&
|
||||
container->type != NODE_HEADER) {
|
||||
|
||||
// create paragraph container for line
|
||||
container = add_child(parser, container, NODE_PARAGRAPH, parser->line_number, first_nonspace + 1);
|
||||
add_line(container, &input, first_nonspace);
|
||||
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
parser->current = container;
|
||||
}
|
||||
strbuf_clear(parser->curline);
|
||||
|
||||
}
|
||||
|
||||
cmark_node *cmark_parser_finish(cmark_parser *parser)
|
||||
{
|
||||
if (parser->linebuf->size) {
|
||||
S_process_line(parser, parser->linebuf->ptr,
|
||||
parser->linebuf->size);
|
||||
strbuf_clear(parser->linebuf);
|
||||
}
|
||||
|
||||
finalize_document(parser);
|
||||
strbuf_free(parser->curline);
|
||||
#if CMARK_DEBUG_NODES
|
||||
if (cmark_node_check(parser->root, stderr)) {
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
return parser->root;
|
||||
}
|
@ -1,375 +0,0 @@
|
||||
#include <stdarg.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
/* Used as default value for strbuf->ptr so that people can always
|
||||
* assume ptr is non-NULL and zero terminated even for new strbufs.
|
||||
*/
|
||||
unsigned char cmark_strbuf__initbuf[1];
|
||||
unsigned char cmark_strbuf__oom[1];
|
||||
|
||||
#define ENSURE_SIZE(b, d) \
|
||||
if ((d) > buf->asize && strbuf_grow(b, (d)) < 0) \
|
||||
return -1;
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x,y) ((x<y) ? x : y)
|
||||
#endif
|
||||
|
||||
void cmark_strbuf_init(strbuf *buf, int initial_size)
|
||||
{
|
||||
buf->asize = 0;
|
||||
buf->size = 0;
|
||||
buf->ptr = cmark_strbuf__initbuf;
|
||||
|
||||
if (initial_size)
|
||||
cmark_strbuf_grow(buf, initial_size);
|
||||
}
|
||||
|
||||
int cmark_strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom)
|
||||
{
|
||||
unsigned char *new_ptr;
|
||||
int new_size;
|
||||
|
||||
if (buf->ptr == cmark_strbuf__oom)
|
||||
return -1;
|
||||
|
||||
if (target_size <= buf->asize)
|
||||
return 0;
|
||||
|
||||
if (buf->asize == 0) {
|
||||
new_size = target_size;
|
||||
new_ptr = NULL;
|
||||
} else {
|
||||
new_size = buf->asize;
|
||||
new_ptr = buf->ptr;
|
||||
}
|
||||
|
||||
/* grow the buffer size by 1.5, until it's big enough
|
||||
* to fit our target size */
|
||||
while (new_size < target_size)
|
||||
new_size = (new_size << 1) - (new_size >> 1);
|
||||
|
||||
/* round allocation up to multiple of 8 */
|
||||
new_size = (new_size + 7) & ~7;
|
||||
|
||||
new_ptr = (unsigned char *)realloc(new_ptr, new_size);
|
||||
|
||||
if (!new_ptr) {
|
||||
if (mark_oom)
|
||||
buf->ptr = cmark_strbuf__oom;
|
||||
return -1;
|
||||
}
|
||||
|
||||
buf->asize = new_size;
|
||||
buf->ptr = new_ptr;
|
||||
|
||||
/* truncate the existing buffer size if necessary */
|
||||
if (buf->size >= buf->asize)
|
||||
buf->size = buf->asize - 1;
|
||||
buf->ptr[buf->size] = '\0';
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmark_strbuf_grow(cmark_strbuf *buf, int target_size)
|
||||
{
|
||||
return cmark_strbuf_try_grow(buf, target_size, true);
|
||||
}
|
||||
|
||||
bool cmark_strbuf_oom(const cmark_strbuf *buf)
|
||||
{
|
||||
return (buf->ptr == cmark_strbuf__oom);
|
||||
}
|
||||
|
||||
size_t cmark_strbuf_len(const cmark_strbuf *buf)
|
||||
{
|
||||
return buf->size;
|
||||
}
|
||||
|
||||
void cmark_strbuf_free(strbuf *buf)
|
||||
{
|
||||
if (!buf) return;
|
||||
|
||||
if (buf->ptr != cmark_strbuf__initbuf && buf->ptr != cmark_strbuf__oom)
|
||||
free(buf->ptr);
|
||||
|
||||
cmark_strbuf_init(buf, 0);
|
||||
}
|
||||
|
||||
void cmark_strbuf_clear(strbuf *buf)
|
||||
{
|
||||
buf->size = 0;
|
||||
|
||||
if (buf->asize > 0)
|
||||
buf->ptr[0] = '\0';
|
||||
}
|
||||
|
||||
int cmark_strbuf_set(strbuf *buf, const unsigned char *data, int len)
|
||||
{
|
||||
if (len <= 0 || data == NULL) {
|
||||
cmark_strbuf_clear(buf);
|
||||
} else {
|
||||
if (data != buf->ptr) {
|
||||
ENSURE_SIZE(buf, len + 1);
|
||||
memmove(buf->ptr, data, len);
|
||||
}
|
||||
buf->size = len;
|
||||
buf->ptr[buf->size] = '\0';
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmark_strbuf_sets(strbuf *buf, const char *string)
|
||||
{
|
||||
return cmark_strbuf_set(buf,
|
||||
(const unsigned char *)string,
|
||||
string ? strlen(string) : 0);
|
||||
}
|
||||
|
||||
int cmark_strbuf_putc(strbuf *buf, int c)
|
||||
{
|
||||
ENSURE_SIZE(buf, buf->size + 2);
|
||||
buf->ptr[buf->size++] = c;
|
||||
buf->ptr[buf->size] = '\0';
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmark_strbuf_put(strbuf *buf, const unsigned char *data, int len)
|
||||
{
|
||||
if (len <= 0)
|
||||
return 0;
|
||||
|
||||
ENSURE_SIZE(buf, buf->size + len + 1);
|
||||
memmove(buf->ptr + buf->size, data, len);
|
||||
buf->size += len;
|
||||
buf->ptr[buf->size] = '\0';
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmark_strbuf_puts(strbuf *buf, const char *string)
|
||||
{
|
||||
return cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string));
|
||||
}
|
||||
|
||||
int cmark_strbuf_vprintf(strbuf *buf, const char *format, va_list ap)
|
||||
{
|
||||
const int expected_size = buf->size + (strlen(format) * 2);
|
||||
int len;
|
||||
|
||||
ENSURE_SIZE(buf, expected_size);
|
||||
|
||||
while (1) {
|
||||
len = vsnprintf(
|
||||
(char *)buf->ptr + buf->size,
|
||||
buf->asize - buf->size,
|
||||
format, ap
|
||||
);
|
||||
|
||||
if (len < 0) {
|
||||
free(buf->ptr);
|
||||
buf->ptr = cmark_strbuf__oom;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (len + 1 <= buf->asize - buf->size) {
|
||||
buf->size += len;
|
||||
break;
|
||||
}
|
||||
|
||||
ENSURE_SIZE(buf, buf->size + len + 1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmark_strbuf_printf(strbuf *buf, const char *format, ...)
|
||||
{
|
||||
int r;
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, format);
|
||||
r = cmark_strbuf_vprintf(buf, format, ap);
|
||||
va_end(ap);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void cmark_strbuf_copy_cstr(char *data, int datasize, const strbuf *buf)
|
||||
{
|
||||
int copylen;
|
||||
|
||||
assert(data && datasize && buf);
|
||||
|
||||
data[0] = '\0';
|
||||
|
||||
if (buf->size == 0 || buf->asize <= 0)
|
||||
return;
|
||||
|
||||
copylen = buf->size;
|
||||
if (copylen > datasize - 1)
|
||||
copylen = datasize - 1;
|
||||
memmove(data, buf->ptr, copylen);
|
||||
data[copylen] = '\0';
|
||||
}
|
||||
|
||||
void cmark_strbuf_swap(strbuf *buf_a, strbuf *buf_b)
|
||||
{
|
||||
strbuf t = *buf_a;
|
||||
*buf_a = *buf_b;
|
||||
*buf_b = t;
|
||||
}
|
||||
|
||||
unsigned char *cmark_strbuf_detach(strbuf *buf)
|
||||
{
|
||||
unsigned char *data = buf->ptr;
|
||||
|
||||
if (buf->asize == 0 || buf->ptr == cmark_strbuf__oom) {
|
||||
/* return an empty string */
|
||||
return (unsigned char *)calloc(1, 1);
|
||||
}
|
||||
|
||||
cmark_strbuf_init(buf, 0);
|
||||
return data;
|
||||
}
|
||||
|
||||
void cmark_strbuf_attach(strbuf *buf, unsigned char *ptr, int asize)
|
||||
{
|
||||
cmark_strbuf_free(buf);
|
||||
|
||||
if (ptr) {
|
||||
buf->ptr = ptr;
|
||||
buf->size = strlen((char *)ptr);
|
||||
if (asize)
|
||||
buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
|
||||
else /* pass 0 to fall back on strlen + 1 */
|
||||
buf->asize = buf->size + 1;
|
||||
} else {
|
||||
cmark_strbuf_grow(buf, asize);
|
||||
}
|
||||
}
|
||||
|
||||
int cmark_strbuf_cmp(const strbuf *a, const strbuf *b)
|
||||
{
|
||||
int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
|
||||
return (result != 0) ? result :
|
||||
(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
|
||||
}
|
||||
|
||||
int cmark_strbuf_strchr(const strbuf *buf, int c, int pos)
|
||||
{
|
||||
const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos);
|
||||
if (!p)
|
||||
return -1;
|
||||
|
||||
return (int)(p - (const unsigned char *)buf->ptr);
|
||||
}
|
||||
|
||||
int cmark_strbuf_strrchr(const strbuf *buf, int c, int pos)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = pos; i >= 0; i--) {
|
||||
if (buf->ptr[i] == (unsigned char) c)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void cmark_strbuf_truncate(strbuf *buf, int len)
|
||||
{
|
||||
if (len < buf->size) {
|
||||
buf->size = len;
|
||||
buf->ptr[buf->size] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
void cmark_strbuf_drop(strbuf *buf, int n)
|
||||
{
|
||||
if (n > 0) {
|
||||
buf->size = buf->size - n;
|
||||
if (buf->size)
|
||||
memmove(buf->ptr, buf->ptr + n, buf->size);
|
||||
|
||||
buf->ptr[buf->size] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
void cmark_strbuf_rtrim(strbuf *buf)
|
||||
{
|
||||
if (!buf->size)
|
||||
return;
|
||||
|
||||
while (buf->size > 0) {
|
||||
if (!isspace(buf->ptr[buf->size - 1]))
|
||||
break;
|
||||
|
||||
buf->size--;
|
||||
}
|
||||
|
||||
buf->ptr[buf->size] = '\0';
|
||||
}
|
||||
|
||||
void cmark_strbuf_trim(strbuf *buf)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
if (!buf->size)
|
||||
return;
|
||||
|
||||
while (i < buf->size && isspace(buf->ptr[i]))
|
||||
i++;
|
||||
|
||||
cmark_strbuf_drop(buf, i);
|
||||
|
||||
cmark_strbuf_rtrim(buf);
|
||||
}
|
||||
|
||||
// Destructively modify string, collapsing consecutive
|
||||
// space and newline characters into a single space.
|
||||
void cmark_strbuf_normalize_whitespace(strbuf *s)
|
||||
{
|
||||
bool last_char_was_space = false;
|
||||
int r, w;
|
||||
|
||||
for (r = 0, w = 0; r < s->size; ++r) {
|
||||
switch (s->ptr[r]) {
|
||||
case ' ':
|
||||
case '\n':
|
||||
if (last_char_was_space)
|
||||
break;
|
||||
|
||||
s->ptr[w++] = ' ';
|
||||
last_char_was_space = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
s->ptr[w++] = s->ptr[r];
|
||||
last_char_was_space = false;
|
||||
}
|
||||
}
|
||||
|
||||
cmark_strbuf_truncate(s, w);
|
||||
}
|
||||
|
||||
// Destructively unescape a string: remove backslashes before punctuation chars.
|
||||
extern void cmark_strbuf_unescape(strbuf *buf)
|
||||
{
|
||||
int r, w;
|
||||
|
||||
for (r = 0, w = 0; r < buf->size; ++r) {
|
||||
if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
|
||||
continue;
|
||||
|
||||
buf->ptr[w++] = buf->ptr[r];
|
||||
}
|
||||
|
||||
cmark_strbuf_truncate(buf, w);
|
||||
}
|
@ -1,177 +0,0 @@
|
||||
#ifndef CMARK_BUFFER_H
|
||||
#define CMARK_BUFFER_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdarg.h>
|
||||
#include "config.h"
|
||||
#include "cmark_export.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
unsigned char *ptr;
|
||||
int asize, size;
|
||||
} cmark_strbuf;
|
||||
|
||||
CMARK_EXPORT
|
||||
extern unsigned char cmark_strbuf__initbuf[];
|
||||
|
||||
CMARK_EXPORT
|
||||
extern unsigned char cmark_strbuf__oom[];
|
||||
|
||||
#define CMARK_GH_BUF_INIT { cmark_strbuf__initbuf, 0, 0 }
|
||||
|
||||
/**
|
||||
* Initialize a strbuf structure.
|
||||
*
|
||||
* For the cases where GH_BUF_INIT cannot be used to do static
|
||||
* initialization.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_init(cmark_strbuf *buf, int initial_size);
|
||||
|
||||
/**
|
||||
* Attempt to grow the buffer to hold at least `target_size` bytes.
|
||||
*
|
||||
* If the allocation fails, this will return an error. If mark_oom is true,
|
||||
* this will mark the buffer as invalid for future operations; if false,
|
||||
* existing buffer content will be preserved, but calling code must handle
|
||||
* that buffer was not expanded.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom);
|
||||
|
||||
/**
|
||||
* Grow the buffer to hold at least `target_size` bytes.
|
||||
*
|
||||
* If the allocation fails, this will return an error and the buffer will be
|
||||
* marked as invalid for future operations, invaliding contents.
|
||||
*
|
||||
* @return 0 on success or -1 on failure
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_grow(cmark_strbuf *buf, int target_size);
|
||||
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_free(cmark_strbuf *buf);
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
|
||||
|
||||
/**
|
||||
* Test if there have been any reallocation failures with this strbuf.
|
||||
*
|
||||
* Any function that writes to a strbuf can fail due to memory allocation
|
||||
* issues. If one fails, the strbuf will be marked with an OOM error and
|
||||
* further calls to modify the buffer will fail. Check strbuf_oom() at the
|
||||
* end of your sequence and it will be true if you ran out of memory at any
|
||||
* point with that buffer.
|
||||
*
|
||||
* @return false if no error, true if allocation error
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
bool cmark_strbuf_oom(const cmark_strbuf *buf);
|
||||
|
||||
CMARK_EXPORT
|
||||
size_t cmark_strbuf_len(const cmark_strbuf *buf);
|
||||
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
|
||||
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize);
|
||||
CMARK_EXPORT
|
||||
unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf);
|
||||
|
||||
static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf)
|
||||
{
|
||||
return (char *)buf->ptr;
|
||||
}
|
||||
|
||||
#define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
|
||||
|
||||
/*
|
||||
* Functions below that return int value error codes will return 0 on
|
||||
* success or -1 on failure (which generally means an allocation failed).
|
||||
* Using a strbuf where the allocation has failed with result in -1 from
|
||||
* all further calls using that buffer. As a result, you can ignore the
|
||||
* return code of these functions and call them in a series then just call
|
||||
* strbuf_oom at the end.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len);
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_putc(cmark_strbuf *buf, int c);
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len);
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...)
|
||||
CMARK_ATTRIBUTE((format (printf, 2, 3)));
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap);
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_clear(cmark_strbuf *buf);
|
||||
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos);
|
||||
CMARK_EXPORT
|
||||
int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos);
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_drop(cmark_strbuf *buf, int n);
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_truncate(cmark_strbuf *buf, int len);
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_rtrim(cmark_strbuf *buf);
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_trim(cmark_strbuf *buf);
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
|
||||
CMARK_EXPORT
|
||||
void cmark_strbuf_unescape(cmark_strbuf *s);
|
||||
|
||||
// Convenience macros
|
||||
#define strbuf cmark_strbuf
|
||||
#define strbuf__initbuf cmark_strbuf__initbuf
|
||||
#define strbuf__oom cmark_strbuf__oom
|
||||
#define GH_BUF_INIT CMARK_GH_BUF_INIT
|
||||
#define strbuf_init cmark_strbuf_init
|
||||
#define strbuf_try_grow cmark_strbuf_try_grow
|
||||
#define strbuf_grow cmark_strbuf_grow
|
||||
#define strbuf_free cmark_strbuf_free
|
||||
#define strbuf_swap cmark_strbuf_swap
|
||||
#define strbuf_oom cmark_strbuf_oom
|
||||
#define strbuf_len cmark_strbuf_len
|
||||
#define strbuf_cmp cmark_strbuf_cmp
|
||||
#define strbuf_attach cmark_strbuf_attach
|
||||
#define strbuf_detach cmark_strbuf_detach
|
||||
#define strbuf_copy_cstr cmark_strbuf_copy_cstr
|
||||
#define strbuf_at cmark_strbuf_at
|
||||
#define strbuf_set cmark_strbuf_set
|
||||
#define strbuf_sets cmark_strbuf_sets
|
||||
#define strbuf_putc cmark_strbuf_putc
|
||||
#define strbuf_put cmark_strbuf_put
|
||||
#define strbuf_puts cmark_strbuf_puts
|
||||
#define strbuf_printf cmark_strbuf_printf
|
||||
#define strbuf_vprintf cmark_strbuf_vprintf
|
||||
#define strbuf_clear cmark_strbuf_clear
|
||||
#define strbuf_strchr cmark_strbuf_strchr
|
||||
#define strbuf_strrchr cmark_strbuf_strrchr
|
||||
#define strbuf_drop cmark_strbuf_drop
|
||||
#define strbuf_truncate cmark_strbuf_truncate
|
||||
#define strbuf_rtrim cmark_strbuf_rtrim
|
||||
#define strbuf_trim cmark_strbuf_trim
|
||||
#define strbuf_normalize_whitespace cmark_strbuf_normalize_whitespace
|
||||
#define strbuf_unescape cmark_strbuf_unescape
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@ -1,122 +0,0 @@
|
||||
#ifndef CMARK_CHUNK_H
|
||||
#define CMARK_CHUNK_H
|
||||
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "buffer.h"
|
||||
|
||||
typedef struct {
|
||||
unsigned char *data;
|
||||
int len;
|
||||
int alloc; // also implies a NULL-terminated string
|
||||
} cmark_chunk;
|
||||
|
||||
static inline void cmark_chunk_free(cmark_chunk *c)
|
||||
{
|
||||
if (c->alloc)
|
||||
free(c->data);
|
||||
|
||||
c->data = NULL;
|
||||
c->alloc = 0;
|
||||
c->len = 0;
|
||||
}
|
||||
|
||||
static inline void cmark_chunk_ltrim(cmark_chunk *c)
|
||||
{
|
||||
assert(!c->alloc);
|
||||
|
||||
while (c->len && isspace(c->data[0])) {
|
||||
c->data++;
|
||||
c->len--;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void cmark_chunk_rtrim(cmark_chunk *c)
|
||||
{
|
||||
while (c->len > 0) {
|
||||
if (!isspace(c->data[c->len - 1]))
|
||||
break;
|
||||
|
||||
c->len--;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void cmark_chunk_trim(cmark_chunk *c)
|
||||
{
|
||||
cmark_chunk_ltrim(c);
|
||||
cmark_chunk_rtrim(c);
|
||||
}
|
||||
|
||||
static inline int cmark_chunk_strchr(cmark_chunk *ch, int c, int offset)
|
||||
{
|
||||
const unsigned char *p = (unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
|
||||
return p ? (int)(p - ch->data) : ch->len;
|
||||
}
|
||||
|
||||
static inline const char *cmark_chunk_to_cstr(cmark_chunk *c)
|
||||
{
|
||||
unsigned char *str;
|
||||
|
||||
if (c->alloc) {
|
||||
return (char *)c->data;
|
||||
}
|
||||
str = (unsigned char *)malloc(c->len + 1);
|
||||
if(str != NULL) {
|
||||
memcpy(str, c->data, c->len);
|
||||
str[c->len] = 0;
|
||||
}
|
||||
c->data = str;
|
||||
c->alloc = 1;
|
||||
|
||||
return (char *)str;
|
||||
}
|
||||
|
||||
static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str)
|
||||
{
|
||||
if (c->alloc) {
|
||||
free(c->data);
|
||||
}
|
||||
c->len = strlen(str);
|
||||
c->data = (unsigned char *)malloc(c->len + 1);
|
||||
c->alloc = 1;
|
||||
memcpy(c->data, str, c->len + 1);
|
||||
}
|
||||
|
||||
static inline cmark_chunk cmark_chunk_literal(const char *data)
|
||||
{
|
||||
cmark_chunk c = {(unsigned char *)data, data ? strlen(data) : 0, 0};
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, int pos, int len)
|
||||
{
|
||||
cmark_chunk c = {ch->data + pos, len, 0};
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf)
|
||||
{
|
||||
cmark_chunk c;
|
||||
|
||||
c.len = buf->size;
|
||||
c.data = cmark_strbuf_detach(buf);
|
||||
c.alloc = 1;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
// Convenience macros
|
||||
#define chunk cmark_chunk
|
||||
#define chunk_free cmark_chunk_free
|
||||
#define chunk_ltrim cmark_chunk_ltrim
|
||||
#define chunk_rtrim cmark_chunk_rtrim
|
||||
#define chunk_trim cmark_chunk_trim
|
||||
#define chunk_strchr cmark_chunk_strchr
|
||||
#define chunk_to_cstr cmark_chunk_to_cstr
|
||||
#define chunk_literal cmark_chunk_literal
|
||||
#define chunk_dup cmark_chunk_dup
|
||||
#define chunk_buf_detach cmark_chunk_buf_detach
|
||||
|
||||
#endif
|
@ -1,21 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include "node.h"
|
||||
#include "html/houdini.h"
|
||||
#include "cmark.h"
|
||||
#include "buffer.h"
|
||||
|
||||
char *cmark_markdown_to_html(const char *text, int len)
|
||||
{
|
||||
cmark_node *doc;
|
||||
char *result;
|
||||
|
||||
doc = cmark_parse_document(text, len);
|
||||
|
||||
result = cmark_render_html(doc);
|
||||
cmark_node_free(doc);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -1,343 +0,0 @@
|
||||
#ifndef CMARK_H
|
||||
#define CMARK_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "cmark_export.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** .SH NAME
|
||||
*
|
||||
* .B cmark
|
||||
* \- CommonMark parsing, manipulating, and rendering
|
||||
*/
|
||||
|
||||
/** .SH SIMPLE INTERFACE
|
||||
*/
|
||||
|
||||
/** Current version of library.
|
||||
*/
|
||||
#define CMARK_VERSION "0.1"
|
||||
|
||||
/** Convert 'text' (assumed to be a UTF-8 encoded string with length
|
||||
* 'len' from CommonMark Markdown to HTML, returning a null-terminated,
|
||||
* UTF-8-encoded string.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
char *cmark_markdown_to_html(const char *text, int len);
|
||||
|
||||
/** .SH NODE STRUCTURE
|
||||
*/
|
||||
|
||||
/**
|
||||
*/
|
||||
typedef enum {
|
||||
/* Block */
|
||||
CMARK_NODE_DOCUMENT,
|
||||
CMARK_NODE_BLOCK_QUOTE,
|
||||
CMARK_NODE_LIST,
|
||||
CMARK_NODE_LIST_ITEM,
|
||||
CMARK_NODE_CODE_BLOCK,
|
||||
CMARK_NODE_HTML,
|
||||
CMARK_NODE_PARAGRAPH,
|
||||
CMARK_NODE_HEADER,
|
||||
CMARK_NODE_HRULE,
|
||||
CMARK_NODE_REFERENCE_DEF,
|
||||
|
||||
CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
|
||||
CMARK_NODE_LAST_BLOCK = CMARK_NODE_REFERENCE_DEF,
|
||||
|
||||
/* Inline */
|
||||
CMARK_NODE_TEXT,
|
||||
CMARK_NODE_SOFTBREAK,
|
||||
CMARK_NODE_LINEBREAK,
|
||||
CMARK_NODE_INLINE_CODE,
|
||||
CMARK_NODE_INLINE_HTML,
|
||||
CMARK_NODE_EMPH,
|
||||
CMARK_NODE_STRONG,
|
||||
CMARK_NODE_LINK,
|
||||
CMARK_NODE_IMAGE,
|
||||
|
||||
CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
|
||||
CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
|
||||
} cmark_node_type;
|
||||
|
||||
|
||||
/**
|
||||
*/
|
||||
typedef enum {
|
||||
CMARK_NO_LIST,
|
||||
CMARK_BULLET_LIST,
|
||||
CMARK_ORDERED_LIST
|
||||
} cmark_list_type;
|
||||
|
||||
/**
|
||||
*/
|
||||
typedef enum {
|
||||
CMARK_PERIOD_DELIM,
|
||||
CMARK_PAREN_DELIM
|
||||
} cmark_delim_type;
|
||||
|
||||
typedef struct cmark_node cmark_node;
|
||||
typedef struct cmark_parser cmark_parser;
|
||||
|
||||
/**
|
||||
* .SH CREATING AND DESTROYING NODES
|
||||
*/
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT cmark_node*
|
||||
cmark_node_new(cmark_node_type type);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT void
|
||||
cmark_node_free(cmark_node *node);
|
||||
|
||||
/**
|
||||
* .SH TREE TRAVERSAL
|
||||
*/
|
||||
CMARK_EXPORT cmark_node*
|
||||
cmark_node_next(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT cmark_node*
|
||||
cmark_node_previous(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT cmark_node*
|
||||
cmark_node_parent(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT cmark_node*
|
||||
cmark_node_first_child(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT cmark_node*
|
||||
cmark_node_last_child(cmark_node *node);
|
||||
|
||||
/**
|
||||
* .SH ACCESSORS
|
||||
*/
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT cmark_node_type
|
||||
cmark_node_get_type(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT const char*
|
||||
cmark_node_get_string_content(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_set_string_content(cmark_node *node, const char *content);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_get_header_level(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_set_header_level(cmark_node *node, int level);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT cmark_list_type
|
||||
cmark_node_get_list_type(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_set_list_type(cmark_node *node, cmark_list_type type);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_get_list_start(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_set_list_start(cmark_node *node, int start);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_get_list_tight(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_set_list_tight(cmark_node *node, int tight);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT const char*
|
||||
cmark_node_get_fence_info(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_set_fence_info(cmark_node *node, const char *info);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT const char*
|
||||
cmark_node_get_url(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_set_url(cmark_node *node, const char *url);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT const char*
|
||||
cmark_node_get_title(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_set_title(cmark_node *node, const char *title);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_get_start_line(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_get_start_column(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_get_end_line(cmark_node *node);
|
||||
|
||||
/**
|
||||
* .SH TREE MANIPULATION
|
||||
*/
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT void
|
||||
cmark_node_unlink(cmark_node *node);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_insert_before(cmark_node *node, cmark_node *sibling);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_insert_after(cmark_node *node, cmark_node *sibling);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_prepend_child(cmark_node *node, cmark_node *child);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT int
|
||||
cmark_node_append_child(cmark_node *node, cmark_node *child);
|
||||
|
||||
/**
|
||||
* .SH PARSING
|
||||
*/
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_parser *cmark_parser_new();
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
void cmark_parser_free(cmark_parser *parser);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_node *cmark_parser_finish(cmark_parser *parser);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_node *cmark_parse_document(const char *buffer, size_t len);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_node *cmark_parse_file(FILE *f);
|
||||
|
||||
/**
|
||||
* .SH RENDERING
|
||||
*/
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
char *cmark_render_ast(cmark_node *root);
|
||||
|
||||
/**
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
char *cmark_render_html(cmark_node *root);
|
||||
|
||||
/** .SH AUTHORS
|
||||
*
|
||||
* John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
|
||||
*/
|
||||
|
||||
#ifndef CMARK_NO_SHORT_NAMES
|
||||
#define NODE_DOCUMENT CMARK_NODE_DOCUMENT
|
||||
#define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE
|
||||
#define NODE_LIST CMARK_NODE_LIST
|
||||
#define NODE_LIST_ITEM CMARK_NODE_LIST_ITEM
|
||||
#define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK
|
||||
#define NODE_HTML CMARK_NODE_HTML
|
||||
#define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH
|
||||
#define NODE_HEADER CMARK_NODE_HEADER
|
||||
#define NODE_HRULE CMARK_NODE_HRULE
|
||||
#define NODE_REFERENCE_DEF CMARK_NODE_REFERENCE_DEF
|
||||
#define NODE_TEXT CMARK_NODE_TEXT
|
||||
#define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK
|
||||
#define NODE_LINEBREAK CMARK_NODE_LINEBREAK
|
||||
#define NODE_INLINE_CODE CMARK_NODE_INLINE_CODE
|
||||
#define NODE_INLINE_HTML CMARK_NODE_INLINE_HTML
|
||||
#define NODE_EMPH CMARK_NODE_EMPH
|
||||
#define NODE_STRONG CMARK_NODE_STRONG
|
||||
#define NODE_LINK CMARK_NODE_LINK
|
||||
#define NODE_IMAGE CMARK_NODE_IMAGE
|
||||
#define NODE_LINK_LABEL CMARK_NODE_LINK_LABEL
|
||||
#define BULLET_LIST CMARK_BULLET_LIST
|
||||
#define ORDERED_LIST CMARK_ORDERED_LIST
|
||||
#define PERIOD_DELIM CMARK_PERIOD_DELIM
|
||||
#define PAREN_DELIM CMARK_PAREN_DELIM
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -1,49 +0,0 @@
|
||||
#cmakedefine HAVE_STDBOOL_H
|
||||
|
||||
#ifdef HAVE_STDBOOL_H
|
||||
#include <stdbool.h>
|
||||
#elif !defined(__cplusplus)
|
||||
typedef char bool;
|
||||
#endif
|
||||
|
||||
#cmakedefine HAVE___BUILTIN_EXPECT
|
||||
|
||||
#cmakedefine HAVE___ATTRIBUTE__
|
||||
|
||||
#ifdef HAVE___ATTRIBUTE__
|
||||
#define CMARK_ATTRIBUTE(list) __attribute__ (list)
|
||||
#else
|
||||
#define CMARK_ATTRIBUTE(list)
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
#ifndef U3_ALL
|
||||
#define U3_ALL
|
||||
#define malloc(a) u3a_malloc(a)
|
||||
#define calloc(a,b) u3a_calloc(a,b)
|
||||
#define realloc(a,b) u3a_realloc(a,b)
|
||||
#define free(a) u3a_free(a)
|
||||
/* From i/n/a.h
|
||||
*/
|
||||
/* u3a_malloc(): aligned storage measured in bytes.
|
||||
*/
|
||||
void*
|
||||
u3a_malloc(size_t len_i);
|
||||
|
||||
/* u3a_calloc(): aligned storage measured in bytes.
|
||||
*/
|
||||
void*
|
||||
u3a_calloc(size_t num_i, size_t len_i);
|
||||
|
||||
/* u3a_realloc(): aligned realloc in bytes.
|
||||
*/
|
||||
void*
|
||||
u3a_realloc(void* lag_v, size_t len_i);
|
||||
|
||||
/* u3a_free(): free for aligned malloc.
|
||||
*/
|
||||
void
|
||||
u3a_free(void* tox_v);
|
||||
#endif
|
||||
#endif
|
||||
|
@ -1,36 +0,0 @@
|
||||
#ifndef CMARK_DEBUG_H
|
||||
#define CMARK_DEBUG_H
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define debug(M, ...)
|
||||
#else
|
||||
#define debug(M, ...) \
|
||||
fprintf(stderr, "DEBUG %s:%d: " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
|
||||
|
||||
#define log_err(M, ...) \
|
||||
fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
|
||||
clean_errno(), ##__VA_ARGS__)
|
||||
|
||||
#define log_warn(M, ...) \
|
||||
fprintf(stderr, "[WARN] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
|
||||
clean_errno(), ##__VA_ARGS__)
|
||||
|
||||
#define log_info(M, ...) fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, \
|
||||
__LINE__, ##__VA_ARGS__)
|
||||
|
||||
#define check(A, M, ...) \
|
||||
if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
|
||||
|
||||
#define sentinel(M, ...) \
|
||||
{ log_err(M, ##__VA_ARGS__); errno=0; goto error; }
|
||||
|
||||
#define check_debug(A, M, ...) \
|
||||
if(!(A)) { debug(M, ##__VA_ARGS__); errno=0; goto error; }
|
||||
|
||||
#endif
|
@ -1,52 +0,0 @@
|
||||
#ifndef CMARK_HOUDINI_H
|
||||
#define CMARK_HOUDINI_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef HAVE___BUILTIN_EXPECT
|
||||
# define likely(x) __builtin_expect((x),1)
|
||||
# define unlikely(x) __builtin_expect((x),0)
|
||||
#else
|
||||
# define likely(x) (x)
|
||||
# define unlikely(x) (x)
|
||||
#endif
|
||||
|
||||
#ifdef HOUDINI_USE_LOCALE
|
||||
# define _isxdigit(c) isxdigit(c)
|
||||
# define _isdigit(c) isdigit(c)
|
||||
#else
|
||||
/*
|
||||
* Helper _isdigit methods -- do not trust the current locale
|
||||
* */
|
||||
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
||||
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
||||
#endif
|
||||
|
||||
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
|
||||
#define HOUDINI_UNESCAPED_SIZE(x) (x)
|
||||
|
||||
extern size_t houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size);
|
||||
extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size);
|
||||
extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure);
|
||||
extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size);
|
||||
extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size);
|
||||
extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size);
|
||||
extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size);
|
||||
extern int houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size);
|
||||
extern int houdini_unescape_uri(strbuf *ob, const uint8_t *src, size_t size);
|
||||
extern int houdini_unescape_url(strbuf *ob, const uint8_t *src, size_t size);
|
||||
extern int houdini_escape_js(strbuf *ob, const uint8_t *src, size_t size);
|
||||
extern int houdini_unescape_js(strbuf *ob, const uint8_t *src, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -1,107 +0,0 @@
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "html/houdini.h"
|
||||
|
||||
/*
|
||||
* The following characters will not be escaped:
|
||||
*
|
||||
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
||||
*
|
||||
* Note that this character set is the addition of:
|
||||
*
|
||||
* - The characters which are safe to be in an URL
|
||||
* - The characters which are *not* safe to be in
|
||||
* an URL because they are RESERVED characters.
|
||||
*
|
||||
* We asume (lazily) that any RESERVED char that
|
||||
* appears inside an URL is actually meant to
|
||||
* have its native function (i.e. as an URL
|
||||
* component/separator) and hence needs no escaping.
|
||||
*
|
||||
* There are two exceptions: the chacters & (amp)
|
||||
* and ' (single quote) do not appear in the table.
|
||||
* They are meant to appear in the URL as components,
|
||||
* yet they require special HTML-entity escaping
|
||||
* to generate valid HTML markup.
|
||||
*
|
||||
* All other characters will be escaped to %XX.
|
||||
*
|
||||
*/
|
||||
static const char HREF_SAFE[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
int
|
||||
houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
||||
size_t i = 0, org;
|
||||
uint8_t hex_str[3];
|
||||
|
||||
hex_str[0] = '%';
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
while (i < size && HREF_SAFE[src[i]] != 0)
|
||||
i++;
|
||||
|
||||
if (likely(i > org))
|
||||
strbuf_put(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
switch (src[i]) {
|
||||
/* amp appears all the time in URLs, but needs
|
||||
* HTML-entity escaping to be inside an href */
|
||||
case '&':
|
||||
strbuf_puts(ob, "&");
|
||||
break;
|
||||
|
||||
/* the single quote is a valid URL character
|
||||
* according to the standard; it needs HTML
|
||||
* entity escaping too */
|
||||
case '\'':
|
||||
strbuf_puts(ob, "'");
|
||||
break;
|
||||
|
||||
/* the space can be escaped to %20 or a plus
|
||||
* sign. we're going with the generic escape
|
||||
* for now. the plus thing is more commonly seen
|
||||
* when building GET strings */
|
||||
#if 0
|
||||
case ' ':
|
||||
strbuf_putc(ob, '+');
|
||||
break;
|
||||
#endif
|
||||
|
||||
/* every other character goes with a %XX escaping */
|
||||
default:
|
||||
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
||||
hex_str[2] = hex_chars[src[i] & 0xF];
|
||||
strbuf_put(ob, hex_str, 3);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
@ -1,81 +0,0 @@
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "html/houdini.h"
|
||||
|
||||
/**
|
||||
* According to the OWASP rules:
|
||||
*
|
||||
* & --> &
|
||||
* < --> <
|
||||
* > --> >
|
||||
* " --> "
|
||||
* ' --> ' ' is not recommended
|
||||
* / --> / forward slash is included as it helps end an HTML entity
|
||||
*
|
||||
*/
|
||||
static const char HTML_ESCAPE_TABLE[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const char *HTML_ESCAPES[] = {
|
||||
"",
|
||||
""",
|
||||
"&",
|
||||
"'",
|
||||
"/",
|
||||
"<",
|
||||
">"
|
||||
};
|
||||
|
||||
int
|
||||
houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure)
|
||||
{
|
||||
size_t i = 0, org, esc = 0;
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
strbuf_put(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (unlikely(i >= size))
|
||||
break;
|
||||
|
||||
/* The forward slash is only escaped in secure mode */
|
||||
if ((src[i] == '/' || src[i] == '\'') && !secure) {
|
||||
strbuf_putc(ob, src[i]);
|
||||
} else {
|
||||
strbuf_puts(ob, HTML_ESCAPES[esc]);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
return houdini_escape_html0(ob, src, size, 1);
|
||||
}
|
@ -1,112 +0,0 @@
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "buffer.h"
|
||||
#include "houdini.h"
|
||||
#include "utf8.h"
|
||||
#include "html_unescape.h"
|
||||
|
||||
size_t
|
||||
houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
if (size > 3 && src[0] == '#') {
|
||||
int codepoint = 0;
|
||||
|
||||
if (_isdigit(src[1])) {
|
||||
for (i = 1; i < size && _isdigit(src[i]); ++i) {
|
||||
int cp = (codepoint * 10) + (src[i] - '0');
|
||||
|
||||
if (cp < codepoint)
|
||||
return 0;
|
||||
|
||||
codepoint = cp;
|
||||
}
|
||||
}
|
||||
|
||||
else if (src[1] == 'x' || src[1] == 'X') {
|
||||
for (i = 2; i < size && _isxdigit(src[i]); ++i) {
|
||||
int cp = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
||||
|
||||
if (cp < codepoint)
|
||||
return 0;
|
||||
|
||||
codepoint = cp;
|
||||
}
|
||||
}
|
||||
|
||||
if (i < size && src[i] == ';' && codepoint) {
|
||||
utf8proc_encode_char(codepoint, ob);
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
else {
|
||||
if (size > MAX_WORD_LENGTH)
|
||||
size = MAX_WORD_LENGTH;
|
||||
|
||||
for (i = MIN_WORD_LENGTH; i < size; ++i) {
|
||||
if (src[i] == ' ')
|
||||
break;
|
||||
|
||||
if (src[i] == ';') {
|
||||
const struct html_ent *entity = find_entity((char *)src, i);
|
||||
|
||||
if (entity != NULL) {
|
||||
strbuf_put(ob, entity->utf8, entity->utf8_len);
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
size_t i = 0, org, ent;
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
while (i < size && src[i] != '&')
|
||||
i++;
|
||||
|
||||
if (likely(i > org)) {
|
||||
if (unlikely(org == 0)) {
|
||||
if (i >= size)
|
||||
return 0;
|
||||
|
||||
strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
||||
}
|
||||
|
||||
strbuf_put(ob, src + org, i - org);
|
||||
}
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
i++;
|
||||
|
||||
ent = houdini_unescape_ent(ob, src + i, size - i);
|
||||
i += ent;
|
||||
|
||||
/* not really an entity */
|
||||
if (ent == 0)
|
||||
strbuf_putc(ob, '&');
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
if (!houdini_unescape_html(ob, src, size))
|
||||
strbuf_put(ob, src, size);
|
||||
}
|
@ -1,357 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "cmark.h"
|
||||
#include "node.h"
|
||||
#include "buffer.h"
|
||||
#include "html/houdini.h"
|
||||
|
||||
// Functions to convert cmark_nodes to HTML strings.
|
||||
|
||||
static bool
|
||||
finish_node(strbuf *html, cmark_node *node, bool tight);
|
||||
|
||||
static void escape_html(strbuf *dest, const unsigned char *source, int length)
|
||||
{
|
||||
if (length < 0)
|
||||
length = strlen((char *)source);
|
||||
|
||||
houdini_escape_html0(dest, source, (size_t)length, 0);
|
||||
}
|
||||
|
||||
static void escape_href(strbuf *dest, const unsigned char *source, int length)
|
||||
{
|
||||
if (length < 0)
|
||||
length = strlen((char *)source);
|
||||
|
||||
houdini_escape_href(dest, source, (size_t)length);
|
||||
}
|
||||
|
||||
static inline void cr(strbuf *html)
|
||||
{
|
||||
if (html->size && html->ptr[html->size - 1] != '\n')
|
||||
strbuf_putc(html, '\n');
|
||||
}
|
||||
|
||||
// Convert the inline children of a node to a plain string.
|
||||
static void inlines_to_plain_html(strbuf *html, cmark_node* node)
|
||||
{
|
||||
cmark_node* cur = node->first_child;
|
||||
|
||||
if (cur == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
switch(cur->type) {
|
||||
case NODE_TEXT:
|
||||
case NODE_INLINE_CODE:
|
||||
case NODE_INLINE_HTML:
|
||||
escape_html(html, cur->as.literal.data, cur->as.literal.len);
|
||||
break;
|
||||
|
||||
case NODE_LINEBREAK:
|
||||
case NODE_SOFTBREAK:
|
||||
strbuf_putc(html, ' ');
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (cur->first_child) {
|
||||
cur = cur->first_child;
|
||||
continue;
|
||||
}
|
||||
|
||||
next_sibling:
|
||||
if (cur->next) {
|
||||
cur = cur->next;
|
||||
continue;
|
||||
}
|
||||
cur = cur->parent;
|
||||
if (cur == node) {
|
||||
break;
|
||||
}
|
||||
goto next_sibling;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Convert a cmark_node to HTML.
|
||||
static void node_to_html(strbuf *html, cmark_node *node)
|
||||
{
|
||||
cmark_node *cur;
|
||||
char start_header[] = "<h0>";
|
||||
bool tight = false;
|
||||
bool visit_children;
|
||||
strbuf *info;
|
||||
|
||||
if (node == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
cur = node;
|
||||
while (true) {
|
||||
// Only NODE_IMAGE wants to skip its children.
|
||||
visit_children = true;
|
||||
|
||||
switch(cur->type) {
|
||||
case NODE_DOCUMENT:
|
||||
break;
|
||||
|
||||
case NODE_PARAGRAPH:
|
||||
if (!tight) {
|
||||
cr(html);
|
||||
strbuf_puts(html, "<p>");
|
||||
}
|
||||
break;
|
||||
|
||||
case NODE_BLOCK_QUOTE:
|
||||
cr(html);
|
||||
strbuf_puts(html, "<blockquote>\n");
|
||||
// BLOCK_QUOTE doesn't use any of the 'as' structs,
|
||||
// so the 'list' member can be used to store the
|
||||
// current value of 'tight'.
|
||||
cur->as.list.tight = tight;
|
||||
tight = false;
|
||||
break;
|
||||
|
||||
case NODE_LIST_ITEM:
|
||||
cr(html);
|
||||
strbuf_puts(html, "<li>");
|
||||
break;
|
||||
|
||||
case NODE_LIST: {
|
||||
cmark_list *list = &cur->as.list;
|
||||
bool tmp;
|
||||
|
||||
// make sure a list starts at the beginning of the line:
|
||||
cr(html);
|
||||
|
||||
if (list->list_type == CMARK_BULLET_LIST) {
|
||||
strbuf_puts(html, "<ul>\n");
|
||||
}
|
||||
else if (list->start == 1) {
|
||||
strbuf_puts(html, "<ol>\n");
|
||||
}
|
||||
else {
|
||||
strbuf_printf(html, "<ol start=\"%d\">\n",
|
||||
list->start);
|
||||
}
|
||||
|
||||
// Store the current value of 'tight' by swapping.
|
||||
tmp = list->tight;
|
||||
list->tight = tight;
|
||||
tight = tmp;
|
||||
break;
|
||||
}
|
||||
|
||||
case NODE_HEADER:
|
||||
cr(html);
|
||||
start_header[2] = '0' + cur->as.header.level;
|
||||
strbuf_puts(html, start_header);
|
||||
break;
|
||||
|
||||
case NODE_CODE_BLOCK:
|
||||
info = &cur->as.code.info;
|
||||
cr(html);
|
||||
|
||||
if (&cur->as.code.fence_length == 0
|
||||
|| strbuf_len(info) == 0) {
|
||||
strbuf_puts(html, "<pre><code>");
|
||||
}
|
||||
else {
|
||||
int first_tag = strbuf_strchr(info, ' ', 0);
|
||||
if (first_tag < 0)
|
||||
first_tag = strbuf_len(info);
|
||||
|
||||
strbuf_puts(html,
|
||||
"<pre><code class=\"language-");
|
||||
escape_html(html, info->ptr, first_tag);
|
||||
strbuf_puts(html, "\">");
|
||||
}
|
||||
|
||||
escape_html(html, cur->string_content.ptr, cur->string_content.size);
|
||||
break;
|
||||
|
||||
case NODE_HTML:
|
||||
cr(html);
|
||||
strbuf_put(html, cur->string_content.ptr, cur->string_content.size);
|
||||
break;
|
||||
|
||||
case NODE_HRULE:
|
||||
cr(html);
|
||||
strbuf_puts(html, "<hr />\n");
|
||||
break;
|
||||
|
||||
case NODE_REFERENCE_DEF:
|
||||
break;
|
||||
|
||||
case NODE_TEXT:
|
||||
escape_html(html, cur->as.literal.data, cur->as.literal.len);
|
||||
break;
|
||||
|
||||
case NODE_LINEBREAK:
|
||||
strbuf_puts(html, "<br />\n");
|
||||
break;
|
||||
|
||||
case NODE_SOFTBREAK:
|
||||
strbuf_putc(html, '\n');
|
||||
break;
|
||||
|
||||
case NODE_INLINE_CODE:
|
||||
strbuf_puts(html, "<code>");
|
||||
escape_html(html, cur->as.literal.data, cur->as.literal.len);
|
||||
break;
|
||||
|
||||
case NODE_INLINE_HTML:
|
||||
strbuf_put(html,
|
||||
cur->as.literal.data,
|
||||
cur->as.literal.len);
|
||||
break;
|
||||
|
||||
case NODE_LINK:
|
||||
strbuf_puts(html, "<a href=\"");
|
||||
if (cur->as.link.url)
|
||||
escape_href(html, cur->as.link.url, -1);
|
||||
|
||||
if (cur->as.link.title) {
|
||||
strbuf_puts(html, "\" title=\"");
|
||||
escape_html(html, cur->as.link.title, -1);
|
||||
}
|
||||
|
||||
strbuf_puts(html, "\">");
|
||||
break;
|
||||
|
||||
case NODE_IMAGE:
|
||||
strbuf_puts(html, "<img src=\"");
|
||||
if (cur->as.link.url)
|
||||
escape_href(html, cur->as.link.url, -1);
|
||||
|
||||
strbuf_puts(html, "\" alt=\"");
|
||||
inlines_to_plain_html(html, cur);
|
||||
|
||||
if (cur->as.link.title) {
|
||||
strbuf_puts(html, "\" title=\"");
|
||||
escape_html(html, cur->as.link.title, -1);
|
||||
}
|
||||
|
||||
strbuf_puts(html, "\" />");
|
||||
visit_children = false;
|
||||
break;
|
||||
|
||||
case NODE_STRONG:
|
||||
strbuf_puts(html, "<strong>");
|
||||
break;
|
||||
|
||||
case NODE_EMPH:
|
||||
strbuf_puts(html, "<em>");
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
if (visit_children && cur->first_child) {
|
||||
cur = cur->first_child;
|
||||
continue;
|
||||
}
|
||||
|
||||
next_sibling:
|
||||
tight = finish_node(html, cur, tight);
|
||||
if (cur == node) {
|
||||
break;
|
||||
}
|
||||
if (cur->next) {
|
||||
cur = cur->next;
|
||||
continue;
|
||||
}
|
||||
cur = cur->parent;
|
||||
goto next_sibling;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the restored value of 'tight'.
|
||||
static bool
|
||||
finish_node(strbuf *html, cmark_node *node, bool tight)
|
||||
{
|
||||
char end_header[] = "</h0>\n";
|
||||
|
||||
switch (node->type) {
|
||||
case NODE_PARAGRAPH:
|
||||
if (!tight) {
|
||||
strbuf_puts(html, "</p>\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case NODE_BLOCK_QUOTE: {
|
||||
cmark_list *list = &node->as.list;
|
||||
strbuf_puts(html, "</blockquote>\n");
|
||||
// Restore old 'tight' value.
|
||||
tight = list->tight;
|
||||
list->tight = false;
|
||||
break;
|
||||
}
|
||||
|
||||
case NODE_LIST_ITEM:
|
||||
strbuf_puts(html, "</li>\n");
|
||||
break;
|
||||
|
||||
case NODE_LIST: {
|
||||
cmark_list *list = &node->as.list;
|
||||
bool tmp;
|
||||
strbuf_puts(html,
|
||||
list->list_type == CMARK_BULLET_LIST ?
|
||||
"</ul>\n" : "</ol>\n");
|
||||
// Restore old 'tight' value.
|
||||
tmp = tight;
|
||||
tight = list->tight;
|
||||
list->tight = tmp;
|
||||
break;
|
||||
}
|
||||
|
||||
case NODE_HEADER:
|
||||
end_header[3] = '0' + node->as.header.level;
|
||||
strbuf_puts(html, end_header);
|
||||
break;
|
||||
|
||||
case NODE_CODE_BLOCK:
|
||||
strbuf_puts(html, "</code></pre>\n");
|
||||
break;
|
||||
|
||||
case NODE_INLINE_CODE:
|
||||
strbuf_puts(html, "</code>");
|
||||
break;
|
||||
|
||||
case NODE_LINK:
|
||||
strbuf_puts(html, "</a>");
|
||||
break;
|
||||
|
||||
case NODE_STRONG:
|
||||
strbuf_puts(html, "</strong>");
|
||||
break;
|
||||
|
||||
case NODE_EMPH:
|
||||
strbuf_puts(html, "</em>");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return tight;
|
||||
}
|
||||
|
||||
char *cmark_render_html(cmark_node *root)
|
||||
{
|
||||
char *result;
|
||||
strbuf html = GH_BUF_INIT;
|
||||
node_to_html(&html, root);
|
||||
result = (char *)strbuf_detach(&html);
|
||||
strbuf_free(&html);
|
||||
return result;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,987 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "node.h"
|
||||
#include "parser.h"
|
||||
#include "references.h"
|
||||
#include "cmark.h"
|
||||
#include "html/houdini.h"
|
||||
#include "utf8.h"
|
||||
#include "scanners.h"
|
||||
#include "inlines.h"
|
||||
|
||||
|
||||
// Macros for creating various kinds of simple.
|
||||
#define make_str(s) make_literal(CMARK_NODE_TEXT, s)
|
||||
#define make_code(s) make_literal(CMARK_NODE_INLINE_CODE, s)
|
||||
#define make_raw_html(s) make_literal(CMARK_NODE_INLINE_HTML, s)
|
||||
#define make_linebreak() make_simple(CMARK_NODE_LINEBREAK)
|
||||
#define make_softbreak() make_simple(CMARK_NODE_SOFTBREAK)
|
||||
#define make_emph() make_simple(CMARK_NODE_EMPH)
|
||||
#define make_strong() make_simple(CMARK_NODE_STRONG)
|
||||
|
||||
typedef struct delimiter {
|
||||
struct delimiter *previous;
|
||||
struct delimiter *next;
|
||||
cmark_node *inl_text;
|
||||
unsigned char delim_char;
|
||||
int position;
|
||||
bool can_open;
|
||||
bool can_close;
|
||||
} delimiter;
|
||||
|
||||
typedef struct {
|
||||
chunk input;
|
||||
int pos;
|
||||
cmark_reference_map *refmap;
|
||||
delimiter *last_delim;
|
||||
} subject;
|
||||
|
||||
static delimiter*
|
||||
S_insert_emph(subject *subj, delimiter *opener, delimiter *closer);
|
||||
|
||||
static int parse_inline(subject* subj, cmark_node * parent);
|
||||
|
||||
static void subject_from_buf(subject *e, strbuf *buffer,
|
||||
cmark_reference_map *refmap);
|
||||
static int subject_find_special_char(subject *subj);
|
||||
|
||||
static unsigned char *cmark_clean_autolink(chunk *url, int is_email)
|
||||
{
|
||||
strbuf buf = GH_BUF_INIT;
|
||||
|
||||
chunk_trim(url);
|
||||
|
||||
if (url->len == 0)
|
||||
return NULL;
|
||||
|
||||
if (is_email)
|
||||
strbuf_puts(&buf, "mailto:");
|
||||
|
||||
houdini_unescape_html_f(&buf, url->data, url->len);
|
||||
return strbuf_detach(&buf);
|
||||
}
|
||||
|
||||
static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title)
|
||||
{
|
||||
cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
|
||||
if(e != NULL) {
|
||||
e->type = CMARK_NODE_LINK;
|
||||
e->first_child = label;
|
||||
e->last_child = label;
|
||||
e->as.link.url = url;
|
||||
e->as.link.title = title;
|
||||
e->next = NULL;
|
||||
label->parent = e;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
static inline cmark_node* make_autolink(cmark_node* label, cmark_chunk url, int is_email)
|
||||
{
|
||||
return make_link(label, cmark_clean_autolink(&url, is_email), NULL);
|
||||
}
|
||||
|
||||
// Create an inline with a literal string value.
|
||||
static inline cmark_node* make_literal(cmark_node_type t, cmark_chunk s)
|
||||
{
|
||||
cmark_node * e = (cmark_node *)calloc(1, sizeof(*e));
|
||||
if(e != NULL) {
|
||||
e->type = t;
|
||||
e->as.literal = s;
|
||||
e->next = NULL;
|
||||
e->prev = NULL;
|
||||
e->parent = NULL;
|
||||
e->first_child = NULL;
|
||||
e->last_child = NULL;
|
||||
// These fields aren't used for inlines:
|
||||
e->start_line = 0;
|
||||
e->start_column = 0;
|
||||
e->end_line = 0;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
// Create an inline with no value.
|
||||
static inline cmark_node* make_simple(cmark_node_type t)
|
||||
{
|
||||
cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
|
||||
if(e != NULL) {
|
||||
e->type = t;
|
||||
e->next = NULL;
|
||||
e->prev = NULL;
|
||||
e->parent = NULL;
|
||||
e->first_child = NULL;
|
||||
e->last_child = NULL;
|
||||
// These fields aren't used for inlines:
|
||||
e->start_line = 0;
|
||||
e->start_column = 0;
|
||||
e->end_line = 0;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
static unsigned char *bufdup(const unsigned char *buf)
|
||||
{
|
||||
unsigned char *new_buf = NULL;
|
||||
|
||||
if (buf) {
|
||||
int len = strlen((char *)buf);
|
||||
new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf));
|
||||
if(new_buf != NULL) {
|
||||
memcpy(new_buf, buf, len + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return new_buf;
|
||||
}
|
||||
|
||||
static void subject_from_buf(subject *e, strbuf *buffer,
|
||||
cmark_reference_map *refmap)
|
||||
{
|
||||
e->input.data = buffer->ptr;
|
||||
e->input.len = buffer->size;
|
||||
e->input.alloc = 0;
|
||||
e->pos = 0;
|
||||
e->refmap = refmap;
|
||||
e->last_delim = NULL;
|
||||
|
||||
chunk_rtrim(&e->input);
|
||||
}
|
||||
|
||||
static inline int isbacktick(int c)
|
||||
{
|
||||
return (c == '`');
|
||||
}
|
||||
|
||||
static inline unsigned char peek_char(subject *subj)
|
||||
{
|
||||
return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
|
||||
}
|
||||
|
||||
static inline unsigned char peek_at(subject *subj, int pos)
|
||||
{
|
||||
return subj->input.data[pos];
|
||||
}
|
||||
|
||||
// Return true if there are more characters in the subject.
|
||||
static inline int is_eof(subject* subj)
|
||||
{
|
||||
return (subj->pos >= subj->input.len);
|
||||
}
|
||||
|
||||
// Advance the subject. Doesn't check for eof.
|
||||
#define advance(subj) (subj)->pos += 1
|
||||
|
||||
// Take characters while a predicate holds, and return a string.
|
||||
static inline chunk take_while(subject* subj, int (*f)(int))
|
||||
{
|
||||
unsigned char c;
|
||||
int startpos = subj->pos;
|
||||
int len = 0;
|
||||
|
||||
while ((c = peek_char(subj)) && (*f)(c)) {
|
||||
advance(subj);
|
||||
len++;
|
||||
}
|
||||
|
||||
return chunk_dup(&subj->input, startpos, len);
|
||||
}
|
||||
|
||||
// Try to process a backtick code span that began with a
|
||||
// span of ticks of length openticklength length (already
|
||||
// parsed). Return 0 if you don't find matching closing
|
||||
// backticks, otherwise return the position in the subject
|
||||
// after the closing backticks.
|
||||
static int scan_to_closing_backticks(subject* subj, int openticklength)
|
||||
{
|
||||
// read non backticks
|
||||
unsigned char c;
|
||||
while ((c = peek_char(subj)) && c != '`') {
|
||||
advance(subj);
|
||||
}
|
||||
if (is_eof(subj)) {
|
||||
return 0; // did not find closing ticks, return 0
|
||||
}
|
||||
int numticks = 0;
|
||||
while (peek_char(subj) == '`') {
|
||||
advance(subj);
|
||||
numticks++;
|
||||
}
|
||||
if (numticks != openticklength){
|
||||
return(scan_to_closing_backticks(subj, openticklength));
|
||||
}
|
||||
return (subj->pos);
|
||||
}
|
||||
|
||||
// Parse backtick code section or raw backticks, return an inline.
|
||||
// Assumes that the subject has a backtick at the current position.
|
||||
static cmark_node* handle_backticks(subject *subj)
|
||||
{
|
||||
chunk openticks = take_while(subj, isbacktick);
|
||||
int startpos = subj->pos;
|
||||
int endpos = scan_to_closing_backticks(subj, openticks.len);
|
||||
|
||||
if (endpos == 0) { // not found
|
||||
subj->pos = startpos; // rewind
|
||||
return make_str(openticks);
|
||||
} else {
|
||||
strbuf buf = GH_BUF_INIT;
|
||||
|
||||
strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
|
||||
strbuf_trim(&buf);
|
||||
strbuf_normalize_whitespace(&buf);
|
||||
|
||||
return make_code(chunk_buf_detach(&buf));
|
||||
}
|
||||
}
|
||||
|
||||
// Scan ***, **, or * and return number scanned, or 0.
|
||||
// Advances position.
|
||||
static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
|
||||
{
|
||||
int numdelims = 0;
|
||||
unsigned char char_before, char_after;
|
||||
|
||||
char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1);
|
||||
while (peek_char(subj) == c) {
|
||||
numdelims++;
|
||||
advance(subj);
|
||||
}
|
||||
char_after = peek_char(subj);
|
||||
*can_open = numdelims > 0 && !isspace(char_after);
|
||||
*can_close = numdelims > 0 && !isspace(char_before);
|
||||
if (c == '_') {
|
||||
*can_open = *can_open && !isalnum(char_before);
|
||||
*can_close = *can_close && !isalnum(char_after);
|
||||
}
|
||||
return numdelims;
|
||||
}
|
||||
|
||||
/*
|
||||
static void print_delimiters(subject *subj)
|
||||
{
|
||||
delimiter *delim;
|
||||
delim = subj->last_delim;
|
||||
while (delim != NULL) {
|
||||
printf("Item at %p: %d %d %d next(%p) prev(%p)\n",
|
||||
delim, delim->delim_char,
|
||||
delim->can_open, delim->can_close,
|
||||
delim->next, delim->previous);
|
||||
delim = delim->previous;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
static void remove_delimiter(subject *subj, delimiter *delim)
|
||||
{
|
||||
if (delim == NULL) return;
|
||||
if (delim->next == NULL) {
|
||||
// end of list:
|
||||
assert(delim == subj->last_delim);
|
||||
subj->last_delim = delim->previous;
|
||||
} else {
|
||||
delim->next->previous = delim->previous;
|
||||
}
|
||||
if (delim->previous != NULL) {
|
||||
delim->previous->next = delim->next;
|
||||
}
|
||||
free(delim);
|
||||
}
|
||||
|
||||
static void push_delimiter(subject *subj, unsigned char c, bool can_open,
|
||||
bool can_close, cmark_node *inl_text)
|
||||
{
|
||||
delimiter *delim =
|
||||
(delimiter*)malloc(sizeof(delimiter));
|
||||
if (delim == NULL) {
|
||||
return;
|
||||
}
|
||||
delim->delim_char = c;
|
||||
delim->can_open = can_open;
|
||||
delim->can_close = can_close;
|
||||
delim->inl_text = inl_text;
|
||||
delim->previous = subj->last_delim;
|
||||
delim->next = NULL;
|
||||
if (delim->previous != NULL) {
|
||||
delim->previous->next = delim;
|
||||
}
|
||||
delim->position = subj->pos;
|
||||
subj->last_delim = delim;
|
||||
}
|
||||
|
||||
// Parse strong/emph or a fallback.
|
||||
// Assumes the subject has '_' or '*' at the current position.
|
||||
static cmark_node* handle_strong_emph(subject* subj, unsigned char c)
|
||||
{
|
||||
int numdelims;
|
||||
cmark_node * inl_text;
|
||||
bool can_open, can_close;
|
||||
|
||||
numdelims = scan_delims(subj, c, &can_open, &can_close);
|
||||
|
||||
inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
|
||||
|
||||
if (can_open || can_close) {
|
||||
push_delimiter(subj, c, can_open, can_close, inl_text);
|
||||
}
|
||||
|
||||
return inl_text;
|
||||
}
|
||||
|
||||
static void process_emphasis(subject *subj, delimiter *start_delim)
|
||||
{
|
||||
delimiter *closer = subj->last_delim;
|
||||
delimiter *opener;
|
||||
|
||||
// move back to first relevant delim.
|
||||
while (closer != NULL && closer->previous != start_delim) {
|
||||
closer = closer->previous;
|
||||
}
|
||||
|
||||
// now move forward, looking for closers, and handling each
|
||||
while (closer != NULL) {
|
||||
if (closer->can_close &&
|
||||
(closer->delim_char == '*' || closer->delim_char == '_')) {
|
||||
// Now look backwards for first matching opener:
|
||||
opener = closer->previous;
|
||||
while (opener != NULL && opener != start_delim) {
|
||||
if (opener->delim_char == closer->delim_char &&
|
||||
opener->can_open) {
|
||||
break;
|
||||
}
|
||||
opener = opener->previous;
|
||||
}
|
||||
if (opener != NULL && opener != start_delim) {
|
||||
closer = S_insert_emph(subj, opener, closer);
|
||||
} else {
|
||||
closer = closer->next;
|
||||
}
|
||||
} else {
|
||||
closer = closer->next;
|
||||
}
|
||||
}
|
||||
// free all delimiters in list until start_delim:
|
||||
while (subj->last_delim != start_delim) {
|
||||
remove_delimiter(subj, subj->last_delim);
|
||||
}
|
||||
}
|
||||
|
||||
static delimiter*
|
||||
S_insert_emph(subject *subj, delimiter *opener, delimiter *closer)
|
||||
{
|
||||
delimiter *delim, *tmp_delim;
|
||||
int use_delims;
|
||||
cmark_node *opener_inl = opener->inl_text;
|
||||
cmark_node *closer_inl = closer->inl_text;
|
||||
int opener_num_chars = opener_inl->as.literal.len;
|
||||
int closer_num_chars = closer_inl->as.literal.len;
|
||||
cmark_node *tmp, *emph, *first_child, *last_child;
|
||||
|
||||
// calculate the actual number of characters used from this closer
|
||||
if (closer_num_chars < 3 || opener_num_chars < 3) {
|
||||
use_delims = closer_num_chars <= opener_num_chars ?
|
||||
closer_num_chars : opener_num_chars;
|
||||
} else { // closer and opener both have >= 3 characters
|
||||
use_delims = closer_num_chars % 2 == 0 ? 2 : 1;
|
||||
}
|
||||
|
||||
// remove used characters from associated inlines.
|
||||
opener_num_chars -= use_delims;
|
||||
closer_num_chars -= use_delims;
|
||||
opener_inl->as.literal.len = opener_num_chars;
|
||||
closer_inl->as.literal.len = closer_num_chars;
|
||||
|
||||
// free delimiters between opener and closer
|
||||
delim = closer->previous;
|
||||
while (delim != NULL && delim != opener) {
|
||||
tmp_delim = delim->previous;
|
||||
remove_delimiter(subj, delim);
|
||||
delim = tmp_delim;
|
||||
}
|
||||
|
||||
first_child = opener_inl->next;
|
||||
last_child = closer_inl->prev;
|
||||
|
||||
// if opener has 0 characters, remove it and its associated inline
|
||||
if (opener_num_chars == 0) {
|
||||
// replace empty opener inline with emph
|
||||
chunk_free(&(opener_inl->as.literal));
|
||||
emph = opener_inl;
|
||||
emph->type = use_delims == 1 ? NODE_EMPH : NODE_STRONG;
|
||||
// remove opener from list
|
||||
remove_delimiter(subj, opener);
|
||||
}
|
||||
else {
|
||||
// create new emph or strong, and splice it in to our inlines
|
||||
// between the opener and closer
|
||||
emph = use_delims == 1 ? make_emph() : make_strong();
|
||||
emph->parent = opener_inl->parent;
|
||||
emph->prev = opener_inl;
|
||||
opener_inl->next = emph;
|
||||
}
|
||||
|
||||
// push children below emph
|
||||
emph->next = closer_inl;
|
||||
closer_inl->prev = emph;
|
||||
emph->first_child = first_child;
|
||||
emph->last_child = last_child;
|
||||
|
||||
// fix children pointers
|
||||
first_child->prev = NULL;
|
||||
last_child->next = NULL;
|
||||
for (tmp = first_child; tmp != NULL; tmp = tmp->next) {
|
||||
tmp->parent = emph;
|
||||
}
|
||||
|
||||
// if closer has 0 characters, remove it and its associated inline
|
||||
if (closer_num_chars == 0) {
|
||||
// remove empty closer inline
|
||||
cmark_node_free(closer_inl);
|
||||
// remove closer from list
|
||||
tmp_delim = closer->next;
|
||||
remove_delimiter(subj, closer);
|
||||
closer = tmp_delim;
|
||||
}
|
||||
|
||||
return closer;
|
||||
}
|
||||
|
||||
// Parse backslash-escape or just a backslash, returning an inline.
|
||||
static cmark_node* handle_backslash(subject *subj)
|
||||
{
|
||||
advance(subj);
|
||||
unsigned char nextchar = peek_char(subj);
|
||||
if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
|
||||
advance(subj);
|
||||
return make_str(chunk_dup(&subj->input, subj->pos - 1, 1));
|
||||
} else if (nextchar == '\n') {
|
||||
advance(subj);
|
||||
return make_linebreak();
|
||||
} else {
|
||||
return make_str(chunk_literal("\\"));
|
||||
}
|
||||
}
|
||||
|
||||
// Parse an entity or a regular "&" string.
|
||||
// Assumes the subject has an '&' character at the current position.
|
||||
static cmark_node* handle_entity(subject* subj)
|
||||
{
|
||||
strbuf ent = GH_BUF_INIT;
|
||||
size_t len;
|
||||
|
||||
advance(subj);
|
||||
|
||||
len = houdini_unescape_ent(&ent,
|
||||
subj->input.data + subj->pos,
|
||||
subj->input.len - subj->pos
|
||||
);
|
||||
|
||||
if (len == 0)
|
||||
return make_str(chunk_literal("&"));
|
||||
|
||||
subj->pos += len;
|
||||
return make_str(chunk_buf_detach(&ent));
|
||||
}
|
||||
|
||||
// Like make_str, but parses entities.
|
||||
// Returns an inline sequence consisting of str and entity elements.
|
||||
static cmark_node *make_str_with_entities(chunk *content)
|
||||
{
|
||||
strbuf unescaped = GH_BUF_INIT;
|
||||
|
||||
if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
|
||||
return make_str(chunk_buf_detach(&unescaped));
|
||||
} else {
|
||||
return make_str(*content);
|
||||
}
|
||||
}
|
||||
|
||||
// Clean a URL: remove surrounding whitespace and surrounding <>,
|
||||
// and remove \ that escape punctuation.
|
||||
unsigned char *cmark_clean_url(chunk *url)
|
||||
{
|
||||
strbuf buf = GH_BUF_INIT;
|
||||
|
||||
chunk_trim(url);
|
||||
|
||||
if (url->len == 0)
|
||||
return NULL;
|
||||
|
||||
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
|
||||
houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
|
||||
} else {
|
||||
houdini_unescape_html_f(&buf, url->data, url->len);
|
||||
}
|
||||
|
||||
strbuf_unescape(&buf);
|
||||
return strbuf_detach(&buf);
|
||||
}
|
||||
|
||||
unsigned char *cmark_clean_title(chunk *title)
|
||||
{
|
||||
strbuf buf = GH_BUF_INIT;
|
||||
unsigned char first, last;
|
||||
|
||||
if (title->len == 0)
|
||||
return NULL;
|
||||
|
||||
first = title->data[0];
|
||||
last = title->data[title->len - 1];
|
||||
|
||||
// remove surrounding quotes if any:
|
||||
if ((first == '\'' && last == '\'') ||
|
||||
(first == '(' && last == ')') ||
|
||||
(first == '"' && last == '"')) {
|
||||
houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
|
||||
} else {
|
||||
houdini_unescape_html_f(&buf, title->data, title->len);
|
||||
}
|
||||
|
||||
strbuf_unescape(&buf);
|
||||
return strbuf_detach(&buf);
|
||||
}
|
||||
|
||||
// Parse an autolink or HTML tag.
|
||||
// Assumes the subject has a '<' character at the current position.
|
||||
static cmark_node* handle_pointy_brace(subject* subj)
|
||||
{
|
||||
int matchlen = 0;
|
||||
chunk contents;
|
||||
|
||||
advance(subj); // advance past first <
|
||||
|
||||
// first try to match a URL autolink
|
||||
matchlen = scan_autolink_uri(&subj->input, subj->pos);
|
||||
if (matchlen > 0) {
|
||||
contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
|
||||
subj->pos += matchlen;
|
||||
|
||||
return make_autolink(
|
||||
make_str_with_entities(&contents),
|
||||
contents, 0
|
||||
);
|
||||
}
|
||||
|
||||
// next try to match an email autolink
|
||||
matchlen = scan_autolink_email(&subj->input, subj->pos);
|
||||
if (matchlen > 0) {
|
||||
contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
|
||||
subj->pos += matchlen;
|
||||
|
||||
return make_autolink(
|
||||
make_str_with_entities(&contents),
|
||||
contents, 1
|
||||
);
|
||||
}
|
||||
|
||||
// finally, try to match an html tag
|
||||
matchlen = scan_html_tag(&subj->input, subj->pos);
|
||||
if (matchlen > 0) {
|
||||
contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
|
||||
subj->pos += matchlen;
|
||||
return make_raw_html(contents);
|
||||
}
|
||||
|
||||
// if nothing matches, just return the opening <:
|
||||
return make_str(chunk_literal("<"));
|
||||
}
|
||||
|
||||
// Parse a link label. Returns 1 if successful.
|
||||
// Note: unescaped brackets are not allowed in labels.
|
||||
// The label begins with `[` and ends with the first `]` character
|
||||
// encountered. Backticks in labels do not start code spans.
|
||||
static int link_label(subject* subj, chunk *raw_label)
|
||||
{
|
||||
int startpos = subj->pos;
|
||||
int length = 0;
|
||||
unsigned char c;
|
||||
|
||||
// advance past [
|
||||
if (peek_char(subj) == '[') {
|
||||
advance(subj);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((c = peek_char(subj)) && c != '[' && c != ']') {
|
||||
if (c == '\\') {
|
||||
advance(subj);
|
||||
length++;
|
||||
if (ispunct(peek_char(subj))) {
|
||||
advance(subj);
|
||||
length++;
|
||||
}
|
||||
} else {
|
||||
advance(subj);
|
||||
length++;
|
||||
}
|
||||
if (length > MAX_LINK_LABEL_LENGTH) {
|
||||
goto noMatch;
|
||||
}
|
||||
}
|
||||
|
||||
if (c == ']') { // match found
|
||||
*raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
|
||||
advance(subj); // advance past ]
|
||||
return 1;
|
||||
}
|
||||
|
||||
noMatch:
|
||||
subj->pos = startpos; // rewind
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
// Return a link, an image, or a literal close bracket.
|
||||
static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
|
||||
{
|
||||
int initial_pos;
|
||||
int starturl, endurl, starttitle, endtitle, endall;
|
||||
int n;
|
||||
int sps;
|
||||
cmark_reference *ref;
|
||||
bool is_image = false;
|
||||
chunk urlchunk, titlechunk;
|
||||
unsigned char *url, *title;
|
||||
delimiter *opener;
|
||||
delimiter *tmp_delim;
|
||||
cmark_node *link_text;
|
||||
cmark_node *inl;
|
||||
chunk raw_label;
|
||||
int found_label;
|
||||
|
||||
advance(subj); // advance past ]
|
||||
initial_pos = subj->pos;
|
||||
|
||||
// look through list of delimiters for a [ or !
|
||||
opener = subj->last_delim;
|
||||
while (opener) {
|
||||
if (opener->delim_char == '[' || opener->delim_char == '!') {
|
||||
break;
|
||||
}
|
||||
opener = opener->previous;
|
||||
}
|
||||
|
||||
if (opener == NULL) {
|
||||
return make_str(chunk_literal("]"));
|
||||
}
|
||||
|
||||
// If we got here, we matched a potential link/image text.
|
||||
is_image = opener->delim_char == '!';
|
||||
link_text = opener->inl_text->next;
|
||||
|
||||
// Now we check to see if it's a link/image.
|
||||
|
||||
// First, look for an inline link.
|
||||
if (peek_char(subj) == '(' &&
|
||||
((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
|
||||
((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
|
||||
|
||||
// try to parse an explicit link:
|
||||
starturl = subj->pos + 1 + sps; // after (
|
||||
endurl = starturl + n;
|
||||
starttitle = endurl + scan_spacechars(&subj->input, endurl);
|
||||
|
||||
// ensure there are spaces btw url and title
|
||||
endtitle = (starttitle == endurl) ? starttitle :
|
||||
starttitle + scan_link_title(&subj->input, starttitle);
|
||||
|
||||
endall = endtitle + scan_spacechars(&subj->input, endtitle);
|
||||
|
||||
if (peek_at(subj, endall) == ')') {
|
||||
subj->pos = endall + 1;
|
||||
|
||||
urlchunk = chunk_dup(&subj->input, starturl, endurl - starturl);
|
||||
titlechunk = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
|
||||
url = cmark_clean_url(&urlchunk);
|
||||
title = cmark_clean_title(&titlechunk);
|
||||
chunk_free(&urlchunk);
|
||||
chunk_free(&titlechunk);
|
||||
goto match;
|
||||
|
||||
} else {
|
||||
goto noMatch;
|
||||
}
|
||||
}
|
||||
|
||||
// Next, look for a following [link label] that matches in refmap.
|
||||
// skip spaces
|
||||
subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos);
|
||||
raw_label = chunk_literal("");
|
||||
found_label = link_label(subj, &raw_label);
|
||||
if (!found_label || raw_label.len == 0) {
|
||||
chunk_free(&raw_label);
|
||||
raw_label = chunk_dup(&subj->input, opener->position,
|
||||
initial_pos - opener->position - 1);
|
||||
}
|
||||
|
||||
if (!found_label) {
|
||||
// If we have a shortcut reference link, back up
|
||||
// to before the spacse we skipped.
|
||||
subj->pos = initial_pos;
|
||||
}
|
||||
|
||||
ref = cmark_reference_lookup(subj->refmap, &raw_label);
|
||||
chunk_free(&raw_label);
|
||||
|
||||
if (ref != NULL) { // found
|
||||
url = bufdup(ref->url);
|
||||
title = bufdup(ref->title);
|
||||
goto match;
|
||||
} else {
|
||||
goto noMatch;
|
||||
}
|
||||
|
||||
noMatch:
|
||||
// If we fall through to here, it means we didn't match a link:
|
||||
remove_delimiter(subj, opener); // remove this opener from delimiter list
|
||||
subj->pos = initial_pos;
|
||||
return make_str(chunk_literal("]"));
|
||||
|
||||
match:
|
||||
inl = opener->inl_text;
|
||||
inl->type = is_image ? NODE_IMAGE : NODE_LINK;
|
||||
chunk_free(&inl->as.literal);
|
||||
inl->first_child = link_text;
|
||||
process_emphasis(subj, opener->previous);
|
||||
inl->as.link.url = url;
|
||||
inl->as.link.title = title;
|
||||
inl->next = NULL;
|
||||
if (link_text) {
|
||||
cmark_node *tmp;
|
||||
link_text->prev = NULL;
|
||||
for (tmp = link_text; tmp->next != NULL; tmp = tmp->next) {
|
||||
tmp->parent = inl;
|
||||
}
|
||||
tmp->parent = inl;
|
||||
inl->last_child = tmp;
|
||||
}
|
||||
parent->last_child = inl;
|
||||
|
||||
// process_emphasis will remove this delimiter and all later ones.
|
||||
// Now, if we have a link, we also want to remove earlier link
|
||||
// delimiters. (This code can be removed if we decide to allow links
|
||||
// inside links.)
|
||||
if (!is_image) {
|
||||
opener = subj->last_delim;
|
||||
while (opener != NULL) {
|
||||
tmp_delim = opener->previous;
|
||||
if (opener->delim_char == '[') {
|
||||
remove_delimiter(subj, opener);
|
||||
}
|
||||
opener = tmp_delim;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Parse a hard or soft linebreak, returning an inline.
|
||||
// Assumes the subject has a newline at the current position.
|
||||
static cmark_node* handle_newline(subject *subj)
|
||||
{
|
||||
int nlpos = subj->pos;
|
||||
// skip over newline
|
||||
advance(subj);
|
||||
// skip spaces at beginning of line
|
||||
while (peek_char(subj) == ' ') {
|
||||
advance(subj);
|
||||
}
|
||||
if (nlpos > 1 &&
|
||||
peek_at(subj, nlpos - 1) == ' ' &&
|
||||
peek_at(subj, nlpos - 2) == ' ') {
|
||||
return make_linebreak();
|
||||
} else {
|
||||
return make_softbreak();
|
||||
}
|
||||
}
|
||||
|
||||
static int subject_find_special_char(subject *subj)
|
||||
{
|
||||
// "\n\\`&_*[]<!"
|
||||
static const int8_t SPECIAL_CHARS[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
int n = subj->pos + 1;
|
||||
|
||||
while (n < subj->input.len) {
|
||||
if (SPECIAL_CHARS[subj->input.data[n]])
|
||||
return n;
|
||||
n++;
|
||||
}
|
||||
|
||||
return subj->input.len;
|
||||
}
|
||||
|
||||
// Parse an inline, advancing subject, and add it as a child of parent.
|
||||
// Return 0 if no inline can be parsed, 1 otherwise.
|
||||
static int parse_inline(subject* subj, cmark_node * parent)
|
||||
{
|
||||
cmark_node* new_inl = NULL;
|
||||
chunk contents;
|
||||
unsigned char c;
|
||||
int endpos;
|
||||
c = peek_char(subj);
|
||||
if (c == 0) {
|
||||
return 0;
|
||||
}
|
||||
switch(c){
|
||||
case '\n':
|
||||
new_inl = handle_newline(subj);
|
||||
break;
|
||||
case '`':
|
||||
new_inl = handle_backticks(subj);
|
||||
break;
|
||||
case '\\':
|
||||
new_inl = handle_backslash(subj);
|
||||
break;
|
||||
case '&':
|
||||
new_inl = handle_entity(subj);
|
||||
break;
|
||||
case '<':
|
||||
new_inl = handle_pointy_brace(subj);
|
||||
break;
|
||||
case '*':
|
||||
case '_':
|
||||
new_inl = handle_strong_emph(subj, c);
|
||||
break;
|
||||
case '[':
|
||||
advance(subj);
|
||||
new_inl = make_str(chunk_literal("["));
|
||||
push_delimiter(subj, '[', true, false, new_inl);
|
||||
break;
|
||||
case ']':
|
||||
new_inl = handle_close_bracket(subj, parent);
|
||||
break;
|
||||
case '!':
|
||||
advance(subj);
|
||||
if (peek_char(subj) == '[') {
|
||||
advance(subj);
|
||||
new_inl = make_str(chunk_literal("!["));
|
||||
push_delimiter(subj, '!', false, true, new_inl);
|
||||
} else {
|
||||
new_inl = make_str(chunk_literal("!"));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
endpos = subject_find_special_char(subj);
|
||||
contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
|
||||
subj->pos = endpos;
|
||||
|
||||
// if we're at a newline, strip trailing spaces.
|
||||
if (peek_char(subj) == '\n') {
|
||||
chunk_rtrim(&contents);
|
||||
}
|
||||
|
||||
new_inl = make_str(contents);
|
||||
}
|
||||
if (new_inl != NULL) {
|
||||
cmark_node_append_child(parent, new_inl);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Parse inlines from parent's string_content, adding as children of parent.
|
||||
extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap)
|
||||
{
|
||||
subject subj;
|
||||
subject_from_buf(&subj, &parent->string_content, refmap);
|
||||
|
||||
while (!is_eof(&subj) && parse_inline(&subj, parent)) ;
|
||||
|
||||
process_emphasis(&subj, NULL);
|
||||
}
|
||||
|
||||
// Parse zero or more space characters, including at most one newline.
|
||||
static void spnl(subject* subj)
|
||||
{
|
||||
bool seen_newline = false;
|
||||
while (peek_char(subj) == ' ' ||
|
||||
(!seen_newline &&
|
||||
(seen_newline = peek_char(subj) == '\n'))) {
|
||||
advance(subj);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse reference. Assumes string begins with '[' character.
|
||||
// Modify refmap if a reference is encountered.
|
||||
// Return 0 if no reference found, otherwise position of subject
|
||||
// after reference is parsed.
|
||||
int cmark_parse_reference_inline(strbuf *input, cmark_reference_map *refmap)
|
||||
{
|
||||
subject subj;
|
||||
|
||||
chunk lab;
|
||||
chunk url;
|
||||
chunk title;
|
||||
|
||||
int matchlen = 0;
|
||||
int beforetitle;
|
||||
|
||||
subject_from_buf(&subj, input, NULL);
|
||||
|
||||
// parse label:
|
||||
if (!link_label(&subj, &lab))
|
||||
return 0;
|
||||
|
||||
// colon:
|
||||
if (peek_char(&subj) == ':') {
|
||||
advance(&subj);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// parse link url:
|
||||
spnl(&subj);
|
||||
matchlen = scan_link_url(&subj.input, subj.pos);
|
||||
if (matchlen) {
|
||||
url = chunk_dup(&subj.input, subj.pos, matchlen);
|
||||
subj.pos += matchlen;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// parse optional link_title
|
||||
beforetitle = subj.pos;
|
||||
spnl(&subj);
|
||||
matchlen = scan_link_title(&subj.input, subj.pos);
|
||||
if (matchlen) {
|
||||
title = chunk_dup(&subj.input, subj.pos, matchlen);
|
||||
subj.pos += matchlen;
|
||||
} else {
|
||||
subj.pos = beforetitle;
|
||||
title = chunk_literal("");
|
||||
}
|
||||
// parse final spaces and newline:
|
||||
while (peek_char(&subj) == ' ') {
|
||||
advance(&subj);
|
||||
}
|
||||
if (peek_char(&subj) == '\n') {
|
||||
advance(&subj);
|
||||
} else if (peek_char(&subj) != 0) {
|
||||
return 0;
|
||||
}
|
||||
// insert reference into refmap
|
||||
cmark_reference_create(refmap, &lab, &url, &title);
|
||||
return subj.pos;
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
#ifndef CMARK_INLINES_H
|
||||
#define CMARK_INLINES_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
unsigned char *cmark_clean_url(cmark_chunk *url);
|
||||
unsigned char *cmark_clean_title(cmark_chunk *title);
|
||||
|
||||
void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap);
|
||||
|
||||
int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -1,114 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include "config.h"
|
||||
#include "cmark.h"
|
||||
#include "debug.h"
|
||||
#include "bench.h"
|
||||
|
||||
void print_usage()
|
||||
{
|
||||
printf("Usage: cmark [FILE*]\n");
|
||||
printf("Options: --help, -h Print usage information\n");
|
||||
printf(" --ast Print AST instead of HTML\n");
|
||||
printf(" --version Print version\n");
|
||||
}
|
||||
|
||||
static void print_document(cmark_node *document, bool ast)
|
||||
{
|
||||
char *result;
|
||||
if (ast) {
|
||||
result = cmark_render_ast(document);
|
||||
} else {
|
||||
|
||||
result = cmark_render_html(document);
|
||||
}
|
||||
printf("%s", result);
|
||||
free(result);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, numfps = 0;
|
||||
bool ast = false;
|
||||
int *files;
|
||||
char buffer[4096];
|
||||
cmark_parser *parser;
|
||||
size_t bytes;
|
||||
cmark_node *document;
|
||||
|
||||
parser = cmark_parser_new();
|
||||
files = (int *)malloc(argc * sizeof(*files));
|
||||
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "--version") == 0) {
|
||||
printf("cmark %s", CMARK_VERSION);
|
||||
printf(" - CommonMark converter (c) 2014 John MacFarlane\n");
|
||||
exit(0);
|
||||
} else if ((strcmp(argv[i], "--help") == 0) ||
|
||||
(strcmp(argv[i], "-h") == 0)) {
|
||||
print_usage();
|
||||
exit(0);
|
||||
} else if (strcmp(argv[i], "--ast") == 0) {
|
||||
ast = true;
|
||||
} else if (*argv[i] == '-') {
|
||||
print_usage();
|
||||
exit(1);
|
||||
} else { // treat as file argument
|
||||
files[numfps++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < numfps; i++) {
|
||||
FILE *fp = fopen(argv[files[i]], "r");
|
||||
if (fp == NULL) {
|
||||
fprintf(stderr, "Error opening file %s: %s\n",
|
||||
argv[files[i]], strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
start_timer();
|
||||
while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
|
||||
cmark_parser_feed(parser, buffer, bytes);
|
||||
if (bytes < sizeof(buffer)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
end_timer("processing lines");
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
if (numfps == 0) {
|
||||
/*
|
||||
document = cmark_parse_file(stdin);
|
||||
print_document(document, ast);
|
||||
exit(0);
|
||||
*/
|
||||
|
||||
while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
|
||||
cmark_parser_feed(parser, buffer, bytes);
|
||||
if (bytes < sizeof(buffer)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
start_timer();
|
||||
document = cmark_parser_finish(parser);
|
||||
end_timer("finishing document");
|
||||
cmark_parser_free(parser);
|
||||
|
||||
start_timer();
|
||||
print_document(document, ast);
|
||||
end_timer("print_document");
|
||||
|
||||
start_timer();
|
||||
cmark_node_free(document);
|
||||
end_timer("free_blocks");
|
||||
|
||||
free(files);
|
||||
|
||||
return 0;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user