diff --git a/hackage-server.cabal b/hackage-server.cabal index 4836c097..6b87d287 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -63,6 +63,12 @@ extra-source-files: tests/unpack-checks/LANGUAGE-GHC-9.2/Main.hs tests/unpack-checks/LANGUAGE-GHC-9.2/Setup.hs tests/unpack-checks/LANGUAGE-GHC-9.2/LANGUAGE-GHC.cabal + libstemmer_c/src_c/stem_ISO_8859_1_english.h + libstemmer_c/include/libstemmer.h + libstemmer_c/runtime/api.h + libstemmer_c/runtime/header.h + libstemmer_c/LICENSE + src/Distribution/Server/Util/NLP/LICENSE source-repository head type: git @@ -359,6 +365,7 @@ library lib-server Distribution.Server.Features.StaticFiles Distribution.Server.Features.ServerIntrospect Distribution.Server.Features.Sitemap + Distribution.Server.Util.NLP.Snowball if flag(debug) cpp-options: -DDEBUG @@ -418,8 +425,12 @@ library lib-server , xss-sanitize ^>= 0.3.6 if !flag(minimal) - build-depends: snowball ^>= 1.0 - , tokenize ^>= 0.3 + build-depends: tokenize ^>= 0.3 + + c-sources: libstemmer_c/src_c/stem_ISO_8859_1_english.c + libstemmer_c/runtime/api.c + libstemmer_c/runtime/utilities.c + libstemmer_c/libstemmer/libstemmer.c if flag(cabal-parsers) build-depends: cabal-parsers ^>= 0 diff --git a/libstemmer_c/LICENSE b/libstemmer_c/LICENSE new file mode 100644 index 00000000..8615bd9a --- /dev/null +++ b/libstemmer_c/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2002, Richard Boulton +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/libstemmer_c/include/libstemmer.h b/libstemmer_c/include/libstemmer.h new file mode 100644 index 00000000..5acb564c --- /dev/null +++ b/libstemmer_c/include/libstemmer.h @@ -0,0 +1,20 @@ + +/* Make header file work when included from C++ */ +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned char sb_symbol; + +struct SN_env * english_ISO_8859_1_stemmer_new(); + +void english_ISO_8859_1_stemmer_delete(struct SN_env * sn_env); + +const sb_symbol * english_ISO_8859_1_stemmer_stem(struct SN_env * sn_env, const sb_symbol * word, int size); + +int english_ISO_8859_1_stemmer_length(struct SN_env * sn_env); + +#ifdef __cplusplus +} +#endif + diff --git a/libstemmer_c/libstemmer/libstemmer.c b/libstemmer_c/libstemmer/libstemmer.c new file mode 100644 index 00000000..446f101a --- /dev/null +++ b/libstemmer_c/libstemmer/libstemmer.c @@ -0,0 +1,47 @@ + +#include +#include +#include "../include/libstemmer.h" +#include "../runtime/api.h" +#include "../src_c/stem_ISO_8859_1_english.h" + +extern struct SN_env * +english_ISO_8859_1_stemmer_new() +{ + struct SN_env * sn_env = english_ISO_8859_1_create_env(); + if (sn_env == NULL) + { + english_ISO_8859_1_stemmer_delete(sn_env); + return NULL; + } + + return sn_env; +} + +void +english_ISO_8859_1_stemmer_delete(struct SN_env * sn_env) +{ + if (sn_env == 0) return; + english_ISO_8859_1_close_env(sn_env); +} + +const sb_symbol * +english_ISO_8859_1_stemmer_stem(struct SN_env * sn_env, const sb_symbol * word, int size) +{ + int ret; + if (SN_set_current(sn_env, size, (const symbol *)(word))) + { + sn_env->l = 0; + return NULL; + } + ret = english_ISO_8859_1_stem(sn_env); + if (ret < 0) return NULL; + sn_env->p[sn_env->l] = 0; + return (const sb_symbol *)(sn_env->p); +} + +int +english_ISO_8859_1_stemmer_length(struct SN_env * sn_env) +{ + return sn_env->l; +} diff --git a/libstemmer_c/runtime/api.c b/libstemmer_c/runtime/api.c new file mode 100644 index 00000000..40039ef4 --- /dev/null +++ b/libstemmer_c/runtime/api.c @@ -0,0 +1,66 @@ + +#include /* for calloc, free */ +#include "header.h" + +extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size) +{ + struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env)); + if (z == NULL) return NULL; + z->p = create_s(); + if (z->p == NULL) goto error; + if (S_size) + { + int i; + z->S = (symbol * *) calloc(S_size, sizeof(symbol *)); + if (z->S == NULL) goto error; + + for (i = 0; i < S_size; i++) + { + z->S[i] = create_s(); + if (z->S[i] == NULL) goto error; + } + } + + if (I_size) + { + z->I = (int *) calloc(I_size, sizeof(int)); + if (z->I == NULL) goto error; + } + + if (B_size) + { + z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char)); + if (z->B == NULL) goto error; + } + + return z; +error: + SN_close_env(z, S_size); + return NULL; +} + +extern void SN_close_env(struct SN_env * z, int S_size) +{ + if (z == NULL) return; + if (S_size) + { + int i; + for (i = 0; i < S_size; i++) + { + lose_s(z->S[i]); + } + free(z->S); + } + free(z->I); + free(z->B); + if (z->p) lose_s(z->p); + free(z); +} + +extern int SN_set_current(struct SN_env * z, int size, const symbol * s) +{ + int err = replace_s(z, 0, z->l, size, s, NULL); + z->c = 0; + return err; +} + diff --git a/libstemmer_c/runtime/api.h b/libstemmer_c/runtime/api.h new file mode 100644 index 00000000..8b997f0c --- /dev/null +++ b/libstemmer_c/runtime/api.h @@ -0,0 +1,26 @@ + +typedef unsigned char symbol; + +/* Or replace 'char' above with 'short' for 16 bit characters. + + More precisely, replace 'char' with whatever type guarantees the + character width you need. Note however that sizeof(symbol) should divide + HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise + there is an alignment problem. In the unlikely event of a problem here, + consult Martin Porter. + +*/ + +struct SN_env { + symbol * p; + int c; int l; int lb; int bra; int ket; + symbol * * S; + int * I; + unsigned char * B; +}; + +extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size); +extern void SN_close_env(struct SN_env * z, int S_size); + +extern int SN_set_current(struct SN_env * z, int size, const symbol * s); + diff --git a/libstemmer_c/runtime/header.h b/libstemmer_c/runtime/header.h new file mode 100644 index 00000000..4d3078f5 --- /dev/null +++ b/libstemmer_c/runtime/header.h @@ -0,0 +1,58 @@ + +#include + +#include "api.h" + +#define MAXINT INT_MAX +#define MININT INT_MIN + +#define HEAD 2*sizeof(int) + +#define SIZE(p) ((int *)(p))[-1] +#define SET_SIZE(p, n) ((int *)(p))[-1] = n +#define CAPACITY(p) ((int *)(p))[-2] + +struct among +{ int s_size; /* number of chars in string */ + const symbol * s; /* search string */ + int substring_i;/* index to longest matching substring */ + int result; /* result of the lookup */ + int (* function)(struct SN_env *); +}; + +extern symbol * create_s(void); +extern void lose_s(symbol * p); + +extern int skip_utf8(const symbol * p, int c, int lb, int l, int n); + +extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); +extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); +extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); +extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); + +extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); +extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); +extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); +extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); + +extern int eq_s(struct SN_env * z, int s_size, const symbol * s); +extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s); +extern int eq_v(struct SN_env * z, const symbol * p); +extern int eq_v_b(struct SN_env * z, const symbol * p); + +extern int find_among(struct SN_env * z, const struct among * v, int v_size); +extern int find_among_b(struct SN_env * z, const struct among * v, int v_size); + +extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment); +extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s); +extern int slice_from_v(struct SN_env * z, const symbol * p); +extern int slice_del(struct SN_env * z); + +extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s); +extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p); + +extern symbol * slice_to(struct SN_env * z, symbol * p); +extern symbol * assign_to(struct SN_env * z, symbol * p); + +extern void debug(struct SN_env * z, int number, int line_count); + diff --git a/libstemmer_c/runtime/utilities.c b/libstemmer_c/runtime/utilities.c new file mode 100644 index 00000000..1840f028 --- /dev/null +++ b/libstemmer_c/runtime/utilities.c @@ -0,0 +1,478 @@ + +#include +#include +#include + +#include "header.h" + +#define unless(C) if(!(C)) + +#define CREATE_SIZE 1 + +extern symbol * create_s(void) { + symbol * p; + void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)); + if (mem == NULL) return NULL; + p = (symbol *) (HEAD + (char *) mem); + CAPACITY(p) = CREATE_SIZE; + SET_SIZE(p, CREATE_SIZE); + return p; +} + +extern void lose_s(symbol * p) { + if (p == NULL) return; + free((char *) p - HEAD); +} + +/* + new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c + if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new + position, or 0 on failure. + + -- used to implement hop and next in the utf8 case. +*/ + +extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) { + int b; + if (n >= 0) { + for (; n > 0; n--) { + if (c >= l) return -1; + b = p[c++]; + if (b >= 0xC0) { /* 1100 0000 */ + while (c < l) { + b = p[c]; + if (b >= 0xC0 || b < 0x80) break; + /* break unless b is 10------ */ + c++; + } + } + } + } else { + for (; n < 0; n++) { + if (c <= lb) return -1; + b = p[--c]; + if (b >= 0x80) { /* 1000 0000 */ + while (c > lb) { + b = p[c]; + if (b >= 0xC0) break; /* 1100 0000 */ + c--; + } + } + } + } + return c; +} + +/* Code for character groupings: utf8 cases */ + +static int get_utf8(const symbol * p, int c, int l, int * slot) { + int b0, b1; + if (c >= l) return 0; + b0 = p[c++]; + if (b0 < 0xC0 || c == l) { /* 1100 0000 */ + * slot = b0; return 1; + } + b1 = p[c++]; + if (b0 < 0xE0 || c == l) { /* 1110 0000 */ + * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2; + } + * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3; +} + +static int get_b_utf8(const symbol * p, int c, int lb, int * slot) { + int b0, b1; + if (c <= lb) return 0; + b0 = p[--c]; + if (b0 < 0x80 || c == lb) { /* 1000 0000 */ + * slot = b0; return 1; + } + b1 = p[--c]; + if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */ + * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2; + } + * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3; +} + +extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + int w = get_utf8(z->p, z->c, z->l, & ch); + unless (w) return -1; + if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return w; + z->c += w; + } while (repeat); + return 0; +} + +extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + int w = get_b_utf8(z->p, z->c, z->lb, & ch); + unless (w) return -1; + if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return w; + z->c -= w; + } while (repeat); + return 0; +} + +extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + int w = get_utf8(z->p, z->c, z->l, & ch); + unless (w) return -1; + unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return w; + z->c += w; + } while (repeat); + return 0; +} + +extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + int w = get_b_utf8(z->p, z->c, z->lb, & ch); + unless (w) return -1; + unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return w; + z->c -= w; + } while (repeat); + return 0; +} + +/* Code for character groupings: non-utf8 cases */ + +extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + if (z->c >= z->l) return -1; + ch = z->p[z->c]; + if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return 1; + z->c++; + } while (repeat); + return 0; +} + +extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + if (z->c <= z->lb) return -1; + ch = z->p[z->c - 1]; + if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return 1; + z->c--; + } while (repeat); + return 0; +} + +extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + if (z->c >= z->l) return -1; + ch = z->p[z->c]; + unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return 1; + z->c++; + } while (repeat); + return 0; +} + +extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + if (z->c <= z->lb) return -1; + ch = z->p[z->c - 1]; + unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return 1; + z->c--; + } while (repeat); + return 0; +} + +extern int eq_s(struct SN_env * z, int s_size, const symbol * s) { + if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0; + z->c += s_size; return 1; +} + +extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) { + if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0; + z->c -= s_size; return 1; +} + +extern int eq_v(struct SN_env * z, const symbol * p) { + return eq_s(z, SIZE(p), p); +} + +extern int eq_v_b(struct SN_env * z, const symbol * p) { + return eq_s_b(z, SIZE(p), p); +} + +extern int find_among(struct SN_env * z, const struct among * v, int v_size) { + + int i = 0; + int j = v_size; + + int c = z->c; int l = z->l; + symbol * q = z->p + c; + + const struct among * w; + + int common_i = 0; + int common_j = 0; + + int first_key_inspected = 0; + + while(1) { + int k = i + ((j - i) >> 1); + int diff = 0; + int common = common_i < common_j ? common_i : common_j; /* smaller */ + w = v + k; + { + int i2; for (i2 = common; i2 < w->s_size; i2++) { + if (c + common == l) { diff = -1; break; } + diff = q[common] - w->s[i2]; + if (diff != 0) break; + common++; + } + } + if (diff < 0) { j = k; common_j = common; } + else { i = k; common_i = common; } + if (j - i <= 1) { + if (i > 0) break; /* v->s has been inspected */ + if (j == i) break; /* only one item in v */ + + /* - but now we need to go round once more to get + v->s inspected. This looks messy, but is actually + the optimal approach. */ + + if (first_key_inspected) break; + first_key_inspected = 1; + } + } + while(1) { + w = v + i; + if (common_i >= w->s_size) { + z->c = c + w->s_size; + if (w->function == 0) return w->result; + { + int res = w->function(z); + z->c = c + w->s_size; + if (res) return w->result; + } + } + i = w->substring_i; + if (i < 0) return 0; + } +} + +/* find_among_b is for backwards processing. Same comments apply */ + +extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) { + + int i = 0; + int j = v_size; + + int c = z->c; int lb = z->lb; + symbol * q = z->p + c - 1; + + const struct among * w; + + int common_i = 0; + int common_j = 0; + + int first_key_inspected = 0; + + while(1) { + int k = i + ((j - i) >> 1); + int diff = 0; + int common = common_i < common_j ? common_i : common_j; + w = v + k; + { + int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) { + if (c - common == lb) { diff = -1; break; } + diff = q[- common] - w->s[i2]; + if (diff != 0) break; + common++; + } + } + if (diff < 0) { j = k; common_j = common; } + else { i = k; common_i = common; } + if (j - i <= 1) { + if (i > 0) break; + if (j == i) break; + if (first_key_inspected) break; + first_key_inspected = 1; + } + } + while(1) { + w = v + i; + if (common_i >= w->s_size) { + z->c = c - w->s_size; + if (w->function == 0) return w->result; + { + int res = w->function(z); + z->c = c - w->s_size; + if (res) return w->result; + } + } + i = w->substring_i; + if (i < 0) return 0; + } +} + + +/* Increase the size of the buffer pointed to by p to at least n symbols. + * If insufficient memory, returns NULL and frees the old buffer. + */ +static symbol * increase_size(symbol * p, int n) { + symbol * q; + int new_size = n + 20; + void * mem = realloc((char *) p - HEAD, + HEAD + (new_size + 1) * sizeof(symbol)); + if (mem == NULL) { + lose_s(p); + return NULL; + } + q = (symbol *) (HEAD + (char *)mem); + CAPACITY(q) = new_size; + return q; +} + +/* to replace symbols between c_bra and c_ket in z->p by the + s_size symbols at s. + Returns 0 on success, -1 on error. + Also, frees z->p (and sets it to NULL) on error. +*/ +extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr) +{ + int adjustment; + int len; + if (z->p == NULL) { + z->p = create_s(); + if (z->p == NULL) return -1; + } + adjustment = s_size - (c_ket - c_bra); + len = SIZE(z->p); + if (adjustment != 0) { + if (adjustment + len > CAPACITY(z->p)) { + z->p = increase_size(z->p, adjustment + len); + if (z->p == NULL) return -1; + } + memmove(z->p + c_ket + adjustment, + z->p + c_ket, + (len - c_ket) * sizeof(symbol)); + SET_SIZE(z->p, adjustment + len); + z->l += adjustment; + if (z->c >= c_ket) + z->c += adjustment; + else + if (z->c > c_bra) + z->c = c_bra; + } + unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); + if (adjptr != NULL) + *adjptr = adjustment; + return 0; +} + +static int slice_check(struct SN_env * z) { + + if (z->bra < 0 || + z->bra > z->ket || + z->ket > z->l || + z->p == NULL || + z->l > SIZE(z->p)) /* this line could be removed */ + { +#if 0 + fprintf(stderr, "faulty slice operation:\n"); + debug(z, -1, 0); +#endif + return -1; + } + return 0; +} + +extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) { + if (slice_check(z)) return -1; + return replace_s(z, z->bra, z->ket, s_size, s, NULL); +} + +extern int slice_from_v(struct SN_env * z, const symbol * p) { + return slice_from_s(z, SIZE(p), p); +} + +extern int slice_del(struct SN_env * z) { + return slice_from_s(z, 0, 0); +} + +extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) { + int adjustment; + if (replace_s(z, bra, ket, s_size, s, &adjustment)) + return -1; + if (bra <= z->bra) z->bra += adjustment; + if (bra <= z->ket) z->ket += adjustment; + return 0; +} + +extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) { + int adjustment; + if (replace_s(z, bra, ket, SIZE(p), p, &adjustment)) + return -1; + if (bra <= z->bra) z->bra += adjustment; + if (bra <= z->ket) z->ket += adjustment; + return 0; +} + +extern symbol * slice_to(struct SN_env * z, symbol * p) { + if (slice_check(z)) { + lose_s(p); + return NULL; + } + { + int len = z->ket - z->bra; + if (CAPACITY(p) < len) { + p = increase_size(p, len); + if (p == NULL) + return NULL; + } + memmove(p, z->p + z->bra, len * sizeof(symbol)); + SET_SIZE(p, len); + } + return p; +} + +extern symbol * assign_to(struct SN_env * z, symbol * p) { + int len = z->l; + if (CAPACITY(p) < len) { + p = increase_size(p, len); + if (p == NULL) + return NULL; + } + memmove(p, z->p, len * sizeof(symbol)); + SET_SIZE(p, len); + return p; +} + +#if 0 +extern void debug(struct SN_env * z, int number, int line_count) { + int i; + int limit = SIZE(z->p); + /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/ + if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit); + for (i = 0; i <= limit; i++) { + if (z->lb == i) printf("{"); + if (z->bra == i) printf("["); + if (z->c == i) printf("|"); + if (z->ket == i) printf("]"); + if (z->l == i) printf("}"); + if (i < limit) + { int ch = z->p[i]; + if (ch == 0) ch = '#'; + printf("%c", ch); + } + } + printf("'\n"); +} +#endif diff --git a/libstemmer_c/src_c/stem_ISO_8859_1_english.c b/libstemmer_c/src_c/stem_ISO_8859_1_english.c new file mode 100644 index 00000000..556b6ab8 --- /dev/null +++ b/libstemmer_c/src_c/stem_ISO_8859_1_english.c @@ -0,0 +1,1117 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "../runtime/header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int english_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_exception2(struct SN_env * z); +static int r_exception1(struct SN_env * z); +static int r_Step_5(struct SN_env * z); +static int r_Step_4(struct SN_env * z); +static int r_Step_3(struct SN_env * z); +static int r_Step_2(struct SN_env * z); +static int r_Step_1c(struct SN_env * z); +static int r_Step_1b(struct SN_env * z); +static int r_Step_1a(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_shortv(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * english_ISO_8859_1_create_env(void); +extern void english_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[5] = { 'a', 'r', 's', 'e', 'n' }; +static const symbol s_0_1[6] = { 'c', 'o', 'm', 'm', 'u', 'n' }; +static const symbol s_0_2[5] = { 'g', 'e', 'n', 'e', 'r' }; + +static const struct among a_0[3] = +{ +/* 0 */ { 5, s_0_0, -1, -1, 0}, +/* 1 */ { 6, s_0_1, -1, -1, 0}, +/* 2 */ { 5, s_0_2, -1, -1, 0} +}; + +static const symbol s_1_0[1] = { '\'' }; +static const symbol s_1_1[3] = { '\'', 's', '\'' }; +static const symbol s_1_2[2] = { '\'', 's' }; + +static const struct among a_1[3] = +{ +/* 0 */ { 1, s_1_0, -1, 1, 0}, +/* 1 */ { 3, s_1_1, 0, 1, 0}, +/* 2 */ { 2, s_1_2, -1, 1, 0} +}; + +static const symbol s_2_0[3] = { 'i', 'e', 'd' }; +static const symbol s_2_1[1] = { 's' }; +static const symbol s_2_2[3] = { 'i', 'e', 's' }; +static const symbol s_2_3[4] = { 's', 's', 'e', 's' }; +static const symbol s_2_4[2] = { 's', 's' }; +static const symbol s_2_5[2] = { 'u', 's' }; + +static const struct among a_2[6] = +{ +/* 0 */ { 3, s_2_0, -1, 2, 0}, +/* 1 */ { 1, s_2_1, -1, 3, 0}, +/* 2 */ { 3, s_2_2, 1, 2, 0}, +/* 3 */ { 4, s_2_3, 1, 1, 0}, +/* 4 */ { 2, s_2_4, 1, -1, 0}, +/* 5 */ { 2, s_2_5, 1, -1, 0} +}; + +static const symbol s_3_1[2] = { 'b', 'b' }; +static const symbol s_3_2[2] = { 'd', 'd' }; +static const symbol s_3_3[2] = { 'f', 'f' }; +static const symbol s_3_4[2] = { 'g', 'g' }; +static const symbol s_3_5[2] = { 'b', 'l' }; +static const symbol s_3_6[2] = { 'm', 'm' }; +static const symbol s_3_7[2] = { 'n', 'n' }; +static const symbol s_3_8[2] = { 'p', 'p' }; +static const symbol s_3_9[2] = { 'r', 'r' }; +static const symbol s_3_10[2] = { 'a', 't' }; +static const symbol s_3_11[2] = { 't', 't' }; +static const symbol s_3_12[2] = { 'i', 'z' }; + +static const struct among a_3[13] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 2, s_3_1, 0, 2, 0}, +/* 2 */ { 2, s_3_2, 0, 2, 0}, +/* 3 */ { 2, s_3_3, 0, 2, 0}, +/* 4 */ { 2, s_3_4, 0, 2, 0}, +/* 5 */ { 2, s_3_5, 0, 1, 0}, +/* 6 */ { 2, s_3_6, 0, 2, 0}, +/* 7 */ { 2, s_3_7, 0, 2, 0}, +/* 8 */ { 2, s_3_8, 0, 2, 0}, +/* 9 */ { 2, s_3_9, 0, 2, 0}, +/* 10 */ { 2, s_3_10, 0, 1, 0}, +/* 11 */ { 2, s_3_11, 0, 2, 0}, +/* 12 */ { 2, s_3_12, 0, 1, 0} +}; + +static const symbol s_4_0[2] = { 'e', 'd' }; +static const symbol s_4_1[3] = { 'e', 'e', 'd' }; +static const symbol s_4_2[3] = { 'i', 'n', 'g' }; +static const symbol s_4_3[4] = { 'e', 'd', 'l', 'y' }; +static const symbol s_4_4[5] = { 'e', 'e', 'd', 'l', 'y' }; +static const symbol s_4_5[5] = { 'i', 'n', 'g', 'l', 'y' }; + +static const struct among a_4[6] = +{ +/* 0 */ { 2, s_4_0, -1, 2, 0}, +/* 1 */ { 3, s_4_1, 0, 1, 0}, +/* 2 */ { 3, s_4_2, -1, 2, 0}, +/* 3 */ { 4, s_4_3, -1, 2, 0}, +/* 4 */ { 5, s_4_4, 3, 1, 0}, +/* 5 */ { 5, s_4_5, -1, 2, 0} +}; + +static const symbol s_5_0[4] = { 'a', 'n', 'c', 'i' }; +static const symbol s_5_1[4] = { 'e', 'n', 'c', 'i' }; +static const symbol s_5_2[3] = { 'o', 'g', 'i' }; +static const symbol s_5_3[2] = { 'l', 'i' }; +static const symbol s_5_4[3] = { 'b', 'l', 'i' }; +static const symbol s_5_5[4] = { 'a', 'b', 'l', 'i' }; +static const symbol s_5_6[4] = { 'a', 'l', 'l', 'i' }; +static const symbol s_5_7[5] = { 'f', 'u', 'l', 'l', 'i' }; +static const symbol s_5_8[6] = { 'l', 'e', 's', 's', 'l', 'i' }; +static const symbol s_5_9[5] = { 'o', 'u', 's', 'l', 'i' }; +static const symbol s_5_10[5] = { 'e', 'n', 't', 'l', 'i' }; +static const symbol s_5_11[5] = { 'a', 'l', 'i', 't', 'i' }; +static const symbol s_5_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; +static const symbol s_5_13[5] = { 'i', 'v', 'i', 't', 'i' }; +static const symbol s_5_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_5_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_5_16[5] = { 'a', 'l', 'i', 's', 'm' }; +static const symbol s_5_17[5] = { 'a', 't', 'i', 'o', 'n' }; +static const symbol s_5_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; +static const symbol s_5_19[4] = { 'i', 'z', 'e', 'r' }; +static const symbol s_5_20[4] = { 'a', 't', 'o', 'r' }; +static const symbol s_5_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; +static const symbol s_5_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; +static const symbol s_5_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; + +static const struct among a_5[24] = +{ +/* 0 */ { 4, s_5_0, -1, 3, 0}, +/* 1 */ { 4, s_5_1, -1, 2, 0}, +/* 2 */ { 3, s_5_2, -1, 13, 0}, +/* 3 */ { 2, s_5_3, -1, 16, 0}, +/* 4 */ { 3, s_5_4, 3, 12, 0}, +/* 5 */ { 4, s_5_5, 4, 4, 0}, +/* 6 */ { 4, s_5_6, 3, 8, 0}, +/* 7 */ { 5, s_5_7, 3, 14, 0}, +/* 8 */ { 6, s_5_8, 3, 15, 0}, +/* 9 */ { 5, s_5_9, 3, 10, 0}, +/* 10 */ { 5, s_5_10, 3, 5, 0}, +/* 11 */ { 5, s_5_11, -1, 8, 0}, +/* 12 */ { 6, s_5_12, -1, 12, 0}, +/* 13 */ { 5, s_5_13, -1, 11, 0}, +/* 14 */ { 6, s_5_14, -1, 1, 0}, +/* 15 */ { 7, s_5_15, 14, 7, 0}, +/* 16 */ { 5, s_5_16, -1, 8, 0}, +/* 17 */ { 5, s_5_17, -1, 7, 0}, +/* 18 */ { 7, s_5_18, 17, 6, 0}, +/* 19 */ { 4, s_5_19, -1, 6, 0}, +/* 20 */ { 4, s_5_20, -1, 7, 0}, +/* 21 */ { 7, s_5_21, -1, 11, 0}, +/* 22 */ { 7, s_5_22, -1, 9, 0}, +/* 23 */ { 7, s_5_23, -1, 10, 0} +}; + +static const symbol s_6_0[5] = { 'i', 'c', 'a', 't', 'e' }; +static const symbol s_6_1[5] = { 'a', 't', 'i', 'v', 'e' }; +static const symbol s_6_2[5] = { 'a', 'l', 'i', 'z', 'e' }; +static const symbol s_6_3[5] = { 'i', 'c', 'i', 't', 'i' }; +static const symbol s_6_4[4] = { 'i', 'c', 'a', 'l' }; +static const symbol s_6_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_6_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_6_7[3] = { 'f', 'u', 'l' }; +static const symbol s_6_8[4] = { 'n', 'e', 's', 's' }; + +static const struct among a_6[9] = +{ +/* 0 */ { 5, s_6_0, -1, 4, 0}, +/* 1 */ { 5, s_6_1, -1, 6, 0}, +/* 2 */ { 5, s_6_2, -1, 3, 0}, +/* 3 */ { 5, s_6_3, -1, 4, 0}, +/* 4 */ { 4, s_6_4, -1, 4, 0}, +/* 5 */ { 6, s_6_5, -1, 1, 0}, +/* 6 */ { 7, s_6_6, 5, 2, 0}, +/* 7 */ { 3, s_6_7, -1, 5, 0}, +/* 8 */ { 4, s_6_8, -1, 5, 0} +}; + +static const symbol s_7_0[2] = { 'i', 'c' }; +static const symbol s_7_1[4] = { 'a', 'n', 'c', 'e' }; +static const symbol s_7_2[4] = { 'e', 'n', 'c', 'e' }; +static const symbol s_7_3[4] = { 'a', 'b', 'l', 'e' }; +static const symbol s_7_4[4] = { 'i', 'b', 'l', 'e' }; +static const symbol s_7_5[3] = { 'a', 't', 'e' }; +static const symbol s_7_6[3] = { 'i', 'v', 'e' }; +static const symbol s_7_7[3] = { 'i', 'z', 'e' }; +static const symbol s_7_8[3] = { 'i', 't', 'i' }; +static const symbol s_7_9[2] = { 'a', 'l' }; +static const symbol s_7_10[3] = { 'i', 's', 'm' }; +static const symbol s_7_11[3] = { 'i', 'o', 'n' }; +static const symbol s_7_12[2] = { 'e', 'r' }; +static const symbol s_7_13[3] = { 'o', 'u', 's' }; +static const symbol s_7_14[3] = { 'a', 'n', 't' }; +static const symbol s_7_15[3] = { 'e', 'n', 't' }; +static const symbol s_7_16[4] = { 'm', 'e', 'n', 't' }; +static const symbol s_7_17[5] = { 'e', 'm', 'e', 'n', 't' }; + +static const struct among a_7[18] = +{ +/* 0 */ { 2, s_7_0, -1, 1, 0}, +/* 1 */ { 4, s_7_1, -1, 1, 0}, +/* 2 */ { 4, s_7_2, -1, 1, 0}, +/* 3 */ { 4, s_7_3, -1, 1, 0}, +/* 4 */ { 4, s_7_4, -1, 1, 0}, +/* 5 */ { 3, s_7_5, -1, 1, 0}, +/* 6 */ { 3, s_7_6, -1, 1, 0}, +/* 7 */ { 3, s_7_7, -1, 1, 0}, +/* 8 */ { 3, s_7_8, -1, 1, 0}, +/* 9 */ { 2, s_7_9, -1, 1, 0}, +/* 10 */ { 3, s_7_10, -1, 1, 0}, +/* 11 */ { 3, s_7_11, -1, 2, 0}, +/* 12 */ { 2, s_7_12, -1, 1, 0}, +/* 13 */ { 3, s_7_13, -1, 1, 0}, +/* 14 */ { 3, s_7_14, -1, 1, 0}, +/* 15 */ { 3, s_7_15, -1, 1, 0}, +/* 16 */ { 4, s_7_16, 15, 1, 0}, +/* 17 */ { 5, s_7_17, 16, 1, 0} +}; + +static const symbol s_8_0[1] = { 'e' }; +static const symbol s_8_1[1] = { 'l' }; + +static const struct among a_8[2] = +{ +/* 0 */ { 1, s_8_0, -1, 1, 0}, +/* 1 */ { 1, s_8_1, -1, 2, 0} +}; + +static const symbol s_9_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' }; +static const symbol s_9_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' }; +static const symbol s_9_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' }; +static const symbol s_9_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' }; +static const symbol s_9_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' }; +static const symbol s_9_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' }; +static const symbol s_9_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' }; +static const symbol s_9_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' }; + +static const struct among a_9[8] = +{ +/* 0 */ { 7, s_9_0, -1, -1, 0}, +/* 1 */ { 7, s_9_1, -1, -1, 0}, +/* 2 */ { 6, s_9_2, -1, -1, 0}, +/* 3 */ { 7, s_9_3, -1, -1, 0}, +/* 4 */ { 6, s_9_4, -1, -1, 0}, +/* 5 */ { 7, s_9_5, -1, -1, 0}, +/* 6 */ { 7, s_9_6, -1, -1, 0}, +/* 7 */ { 6, s_9_7, -1, -1, 0} +}; + +static const symbol s_10_0[5] = { 'a', 'n', 'd', 'e', 's' }; +static const symbol s_10_1[5] = { 'a', 't', 'l', 'a', 's' }; +static const symbol s_10_2[4] = { 'b', 'i', 'a', 's' }; +static const symbol s_10_3[6] = { 'c', 'o', 's', 'm', 'o', 's' }; +static const symbol s_10_4[5] = { 'd', 'y', 'i', 'n', 'g' }; +static const symbol s_10_5[5] = { 'e', 'a', 'r', 'l', 'y' }; +static const symbol s_10_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' }; +static const symbol s_10_7[4] = { 'h', 'o', 'w', 'e' }; +static const symbol s_10_8[4] = { 'i', 'd', 'l', 'y' }; +static const symbol s_10_9[5] = { 'l', 'y', 'i', 'n', 'g' }; +static const symbol s_10_10[4] = { 'n', 'e', 'w', 's' }; +static const symbol s_10_11[4] = { 'o', 'n', 'l', 'y' }; +static const symbol s_10_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' }; +static const symbol s_10_13[5] = { 's', 'k', 'i', 'e', 's' }; +static const symbol s_10_14[4] = { 's', 'k', 'i', 's' }; +static const symbol s_10_15[3] = { 's', 'k', 'y' }; +static const symbol s_10_16[5] = { 't', 'y', 'i', 'n', 'g' }; +static const symbol s_10_17[4] = { 'u', 'g', 'l', 'y' }; + +static const struct among a_10[18] = +{ +/* 0 */ { 5, s_10_0, -1, -1, 0}, +/* 1 */ { 5, s_10_1, -1, -1, 0}, +/* 2 */ { 4, s_10_2, -1, -1, 0}, +/* 3 */ { 6, s_10_3, -1, -1, 0}, +/* 4 */ { 5, s_10_4, -1, 3, 0}, +/* 5 */ { 5, s_10_5, -1, 9, 0}, +/* 6 */ { 6, s_10_6, -1, 7, 0}, +/* 7 */ { 4, s_10_7, -1, -1, 0}, +/* 8 */ { 4, s_10_8, -1, 6, 0}, +/* 9 */ { 5, s_10_9, -1, 4, 0}, +/* 10 */ { 4, s_10_10, -1, -1, 0}, +/* 11 */ { 4, s_10_11, -1, 10, 0}, +/* 12 */ { 6, s_10_12, -1, 11, 0}, +/* 13 */ { 5, s_10_13, -1, 2, 0}, +/* 14 */ { 4, s_10_14, -1, 1, 0}, +/* 15 */ { 3, s_10_15, -1, -1, 0}, +/* 16 */ { 5, s_10_16, -1, 5, 0}, +/* 17 */ { 4, s_10_17, -1, 8, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1 }; + +static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; + +static const unsigned char g_valid_LI[] = { 55, 141, 2 }; + +static const symbol s_0[] = { '\'' }; +static const symbol s_1[] = { 'y' }; +static const symbol s_2[] = { 'Y' }; +static const symbol s_3[] = { 'y' }; +static const symbol s_4[] = { 'Y' }; +static const symbol s_5[] = { 's', 's' }; +static const symbol s_6[] = { 'i' }; +static const symbol s_7[] = { 'i', 'e' }; +static const symbol s_8[] = { 'e', 'e' }; +static const symbol s_9[] = { 'e' }; +static const symbol s_10[] = { 'e' }; +static const symbol s_11[] = { 'y' }; +static const symbol s_12[] = { 'Y' }; +static const symbol s_13[] = { 'i' }; +static const symbol s_14[] = { 't', 'i', 'o', 'n' }; +static const symbol s_15[] = { 'e', 'n', 'c', 'e' }; +static const symbol s_16[] = { 'a', 'n', 'c', 'e' }; +static const symbol s_17[] = { 'a', 'b', 'l', 'e' }; +static const symbol s_18[] = { 'e', 'n', 't' }; +static const symbol s_19[] = { 'i', 'z', 'e' }; +static const symbol s_20[] = { 'a', 't', 'e' }; +static const symbol s_21[] = { 'a', 'l' }; +static const symbol s_22[] = { 'f', 'u', 'l' }; +static const symbol s_23[] = { 'o', 'u', 's' }; +static const symbol s_24[] = { 'i', 'v', 'e' }; +static const symbol s_25[] = { 'b', 'l', 'e' }; +static const symbol s_26[] = { 'l' }; +static const symbol s_27[] = { 'o', 'g' }; +static const symbol s_28[] = { 'f', 'u', 'l' }; +static const symbol s_29[] = { 'l', 'e', 's', 's' }; +static const symbol s_30[] = { 't', 'i', 'o', 'n' }; +static const symbol s_31[] = { 'a', 't', 'e' }; +static const symbol s_32[] = { 'a', 'l' }; +static const symbol s_33[] = { 'i', 'c' }; +static const symbol s_34[] = { 's' }; +static const symbol s_35[] = { 't' }; +static const symbol s_36[] = { 'l' }; +static const symbol s_37[] = { 's', 'k', 'i' }; +static const symbol s_38[] = { 's', 'k', 'y' }; +static const symbol s_39[] = { 'd', 'i', 'e' }; +static const symbol s_40[] = { 'l', 'i', 'e' }; +static const symbol s_41[] = { 't', 'i', 'e' }; +static const symbol s_42[] = { 'i', 'd', 'l' }; +static const symbol s_43[] = { 'g', 'e', 'n', 't', 'l' }; +static const symbol s_44[] = { 'u', 'g', 'l', 'i' }; +static const symbol s_45[] = { 'e', 'a', 'r', 'l', 'i' }; +static const symbol s_46[] = { 'o', 'n', 'l', 'i' }; +static const symbol s_47[] = { 's', 'i', 'n', 'g', 'l' }; +static const symbol s_48[] = { 'Y' }; +static const symbol s_49[] = { 'y' }; + +static int r_prelude(struct SN_env * z) { + z->B[0] = 0; /* unset Y_found, line 26 */ + { int c1 = z->c; /* do, line 27 */ + z->bra = z->c; /* [, line 27 */ + if (!(eq_s(z, 1, s_0))) goto lab0; + z->ket = z->c; /* ], line 27 */ + { int ret = slice_del(z); /* delete, line 27 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 28 */ + z->bra = z->c; /* [, line 28 */ + if (!(eq_s(z, 1, s_1))) goto lab1; + z->ket = z->c; /* ], line 28 */ + { int ret = slice_from_s(z, 1, s_2); /* <-, line 28 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set Y_found, line 28 */ + lab1: + z->c = c2; + } + { int c3 = z->c; /* do, line 29 */ + while(1) { /* repeat, line 29 */ + int c4 = z->c; + while(1) { /* goto, line 29 */ + int c5 = z->c; + if (in_grouping(z, g_v, 97, 121, 0)) goto lab4; + z->bra = z->c; /* [, line 29 */ + if (!(eq_s(z, 1, s_3))) goto lab4; + z->ket = z->c; /* ], line 29 */ + z->c = c5; + break; + lab4: + z->c = c5; + if (z->c >= z->l) goto lab3; + z->c++; /* goto, line 29 */ + } + { int ret = slice_from_s(z, 1, s_4); /* <-, line 29 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set Y_found, line 29 */ + continue; + lab3: + z->c = c4; + break; + } + z->c = c3; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { int c1 = z->c; /* do, line 35 */ + { int c2 = z->c; /* or, line 41 */ + if (z->c + 4 >= z->l || z->p[z->c + 4] >> 5 != 3 || !((2375680 >> (z->p[z->c + 4] & 0x1f)) & 1)) goto lab2; + if (!(find_among(z, a_0, 3))) goto lab2; /* among, line 36 */ + goto lab1; + lab2: + z->c = c2; + { /* gopast */ /* grouping v, line 41 */ + int ret = out_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + { /* gopast */ /* non v, line 41 */ + int ret = in_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + } + lab1: + z->I[0] = z->c; /* setmark p1, line 42 */ + { /* gopast */ /* grouping v, line 43 */ + int ret = out_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + { /* gopast */ /* non v, line 43 */ + int ret = in_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 43 */ + lab0: + z->c = c1; + } + return 1; +} + +static int r_shortv(struct SN_env * z) { + { int m1 = z->l - z->c; (void)m1; /* or, line 51 */ + if (out_grouping_b(z, g_v_WXY, 89, 121, 0)) goto lab1; + if (in_grouping_b(z, g_v, 97, 121, 0)) goto lab1; + if (out_grouping_b(z, g_v, 97, 121, 0)) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (out_grouping_b(z, g_v, 97, 121, 0)) return 0; + if (in_grouping_b(z, g_v, 97, 121, 0)) return 0; + if (z->c > z->lb) return 0; /* atlimit, line 52 */ + } +lab0: + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_Step_1a(struct SN_env * z) { + int among_var; + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 59 */ + z->ket = z->c; /* [, line 60 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 39 && z->p[z->c - 1] != 115)) { z->c = z->l - m_keep; goto lab0; } + among_var = find_among_b(z, a_1, 3); /* substring, line 60 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 60 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab0; } + case 1: + { int ret = slice_del(z); /* delete, line 62 */ + if (ret < 0) return ret; + } + break; + } + lab0: + ; + } + z->ket = z->c; /* [, line 65 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 115)) return 0; + among_var = find_among_b(z, a_2, 6); /* substring, line 65 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 65 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 2, s_5); /* <-, line 66 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m1 = z->l - z->c; (void)m1; /* or, line 68 */ + { int ret = z->c - 2; + if (z->lb > ret || ret > z->l) goto lab2; + z->c = ret; /* hop, line 68 */ + } + { int ret = slice_from_s(z, 1, s_6); /* <-, line 68 */ + if (ret < 0) return ret; + } + goto lab1; + lab2: + z->c = z->l - m1; + { int ret = slice_from_s(z, 2, s_7); /* <-, line 68 */ + if (ret < 0) return ret; + } + } + lab1: + break; + case 3: + if (z->c <= z->lb) return 0; + z->c--; /* next, line 69 */ + { /* gopast */ /* grouping v, line 69 */ + int ret = out_grouping_b(z, g_v, 97, 121, 1); + if (ret < 0) return 0; + z->c -= ret; + } + { int ret = slice_del(z); /* delete, line 69 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_1b(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 75 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33554576 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_4, 6); /* substring, line 75 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 75 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 77 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 2, s_8); /* <-, line 77 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m_test = z->l - z->c; /* test, line 80 */ + { /* gopast */ /* grouping v, line 80 */ + int ret = out_grouping_b(z, g_v, 97, 121, 1); + if (ret < 0) return 0; + z->c -= ret; + } + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 80 */ + if (ret < 0) return ret; + } + { int m_test = z->l - z->c; /* test, line 81 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else + among_var = find_among_b(z, a_3, 13); /* substring, line 81 */ + if (!(among_var)) return 0; + z->c = z->l - m_test; + } + switch(among_var) { + case 0: return 0; + case 1: + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_9); /* <+, line 83 */ + z->c = c_keep; + if (ret < 0) return ret; + } + break; + case 2: + z->ket = z->c; /* [, line 86 */ + if (z->c <= z->lb) return 0; + z->c--; /* next, line 86 */ + z->bra = z->c; /* ], line 86 */ + { int ret = slice_del(z); /* delete, line 86 */ + if (ret < 0) return ret; + } + break; + case 3: + if (z->c != z->I[0]) return 0; /* atmark, line 87 */ + { int m_test = z->l - z->c; /* test, line 87 */ + { int ret = r_shortv(z); + if (ret == 0) return 0; /* call shortv, line 87 */ + if (ret < 0) return ret; + } + z->c = z->l - m_test; + } + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_10); /* <+, line 87 */ + z->c = c_keep; + if (ret < 0) return ret; + } + break; + } + break; + } + return 1; +} + +static int r_Step_1c(struct SN_env * z) { + z->ket = z->c; /* [, line 94 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 94 */ + if (!(eq_s_b(z, 1, s_11))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_12))) return 0; + } +lab0: + z->bra = z->c; /* ], line 94 */ + if (out_grouping_b(z, g_v, 97, 121, 0)) return 0; + { int m2 = z->l - z->c; (void)m2; /* not, line 95 */ + if (z->c > z->lb) goto lab2; /* atlimit, line 95 */ + return 0; + lab2: + z->c = z->l - m2; + } + { int ret = slice_from_s(z, 1, s_13); /* <-, line 96 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_Step_2(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 100 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_5, 24); /* substring, line 100 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 100 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 100 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 4, s_14); /* <-, line 101 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 4, s_15); /* <-, line 102 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 4, s_16); /* <-, line 103 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 4, s_17); /* <-, line 104 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 3, s_18); /* <-, line 105 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 3, s_19); /* <-, line 107 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 3, s_20); /* <-, line 109 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 2, s_21); /* <-, line 111 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_from_s(z, 3, s_22); /* <-, line 112 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_from_s(z, 3, s_23); /* <-, line 114 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_from_s(z, 3, s_24); /* <-, line 116 */ + if (ret < 0) return ret; + } + break; + case 12: + { int ret = slice_from_s(z, 3, s_25); /* <-, line 118 */ + if (ret < 0) return ret; + } + break; + case 13: + if (!(eq_s_b(z, 1, s_26))) return 0; + { int ret = slice_from_s(z, 2, s_27); /* <-, line 119 */ + if (ret < 0) return ret; + } + break; + case 14: + { int ret = slice_from_s(z, 3, s_28); /* <-, line 120 */ + if (ret < 0) return ret; + } + break; + case 15: + { int ret = slice_from_s(z, 4, s_29); /* <-, line 121 */ + if (ret < 0) return ret; + } + break; + case 16: + if (in_grouping_b(z, g_valid_LI, 99, 116, 0)) return 0; + { int ret = slice_del(z); /* delete, line 122 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_3(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 127 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_6, 9); /* substring, line 127 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 127 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 127 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 4, s_30); /* <-, line 128 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 3, s_31); /* <-, line 129 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 2, s_32); /* <-, line 130 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 2, s_33); /* <-, line 132 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_del(z); /* delete, line 134 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 136 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 136 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_4(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 141 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1864232 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_7, 18); /* substring, line 141 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 141 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 141 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 144 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ + if (!(eq_s_b(z, 1, s_34))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_35))) return 0; + } + lab0: + { int ret = slice_del(z); /* delete, line 145 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_5(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 150 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) return 0; + among_var = find_among_b(z, a_8, 2); /* substring, line 150 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 150 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m1 = z->l - z->c; (void)m1; /* or, line 151 */ + { int ret = r_R2(z); + if (ret == 0) goto lab1; /* call R2, line 151 */ + if (ret < 0) return ret; + } + goto lab0; + lab1: + z->c = z->l - m1; + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 151 */ + if (ret < 0) return ret; + } + { int m2 = z->l - z->c; (void)m2; /* not, line 151 */ + { int ret = r_shortv(z); + if (ret == 0) goto lab2; /* call shortv, line 151 */ + if (ret < 0) return ret; + } + return 0; + lab2: + z->c = z->l - m2; + } + } + lab0: + { int ret = slice_del(z); /* delete, line 151 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 152 */ + if (ret < 0) return ret; + } + if (!(eq_s_b(z, 1, s_36))) return 0; + { int ret = slice_del(z); /* delete, line 152 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_exception2(struct SN_env * z) { + z->ket = z->c; /* [, line 158 */ + if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; + if (!(find_among_b(z, a_9, 8))) return 0; /* substring, line 158 */ + z->bra = z->c; /* ], line 158 */ + if (z->c > z->lb) return 0; /* atlimit, line 158 */ + return 1; +} + +static int r_exception1(struct SN_env * z) { + int among_var; + z->bra = z->c; /* [, line 170 */ + if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((42750482 >> (z->p[z->c + 2] & 0x1f)) & 1)) return 0; + among_var = find_among(z, a_10, 18); /* substring, line 170 */ + if (!(among_var)) return 0; + z->ket = z->c; /* ], line 170 */ + if (z->c < z->l) return 0; /* atlimit, line 170 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 3, s_37); /* <-, line 174 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 3, s_38); /* <-, line 175 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 3, s_39); /* <-, line 176 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 3, s_40); /* <-, line 177 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 3, s_41); /* <-, line 178 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 3, s_42); /* <-, line 182 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 5, s_43); /* <-, line 183 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 4, s_44); /* <-, line 184 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_from_s(z, 5, s_45); /* <-, line 185 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_from_s(z, 4, s_46); /* <-, line 186 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_from_s(z, 5, s_47); /* <-, line 187 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + if (!(z->B[0])) return 0; /* Boolean test Y_found, line 203 */ + while(1) { /* repeat, line 203 */ + int c1 = z->c; + while(1) { /* goto, line 203 */ + int c2 = z->c; + z->bra = z->c; /* [, line 203 */ + if (!(eq_s(z, 1, s_48))) goto lab1; + z->ket = z->c; /* ], line 203 */ + z->c = c2; + break; + lab1: + z->c = c2; + if (z->c >= z->l) goto lab0; + z->c++; /* goto, line 203 */ + } + { int ret = slice_from_s(z, 1, s_49); /* <-, line 203 */ + if (ret < 0) return ret; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +extern int english_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* or, line 207 */ + { int ret = r_exception1(z); + if (ret == 0) goto lab1; /* call exception1, line 207 */ + if (ret < 0) return ret; + } + goto lab0; + lab1: + z->c = c1; + { int c2 = z->c; /* not, line 208 */ + { int ret = z->c + 3; + if (0 > ret || ret > z->l) goto lab3; + z->c = ret; /* hop, line 208 */ + } + goto lab2; + lab3: + z->c = c2; + } + goto lab0; + lab2: + z->c = c1; + { int c3 = z->c; /* do, line 209 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab4; /* call prelude, line 209 */ + if (ret < 0) return ret; + } + lab4: + z->c = c3; + } + { int c4 = z->c; /* do, line 210 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab5; /* call mark_regions, line 210 */ + if (ret < 0) return ret; + } + lab5: + z->c = c4; + } + z->lb = z->c; z->c = z->l; /* backwards, line 211 */ + + { int m5 = z->l - z->c; (void)m5; /* do, line 213 */ + { int ret = r_Step_1a(z); + if (ret == 0) goto lab6; /* call Step_1a, line 213 */ + if (ret < 0) return ret; + } + lab6: + z->c = z->l - m5; + } + { int m6 = z->l - z->c; (void)m6; /* or, line 215 */ + { int ret = r_exception2(z); + if (ret == 0) goto lab8; /* call exception2, line 215 */ + if (ret < 0) return ret; + } + goto lab7; + lab8: + z->c = z->l - m6; + { int m7 = z->l - z->c; (void)m7; /* do, line 217 */ + { int ret = r_Step_1b(z); + if (ret == 0) goto lab9; /* call Step_1b, line 217 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m7; + } + { int m8 = z->l - z->c; (void)m8; /* do, line 218 */ + { int ret = r_Step_1c(z); + if (ret == 0) goto lab10; /* call Step_1c, line 218 */ + if (ret < 0) return ret; + } + lab10: + z->c = z->l - m8; + } + { int m9 = z->l - z->c; (void)m9; /* do, line 220 */ + { int ret = r_Step_2(z); + if (ret == 0) goto lab11; /* call Step_2, line 220 */ + if (ret < 0) return ret; + } + lab11: + z->c = z->l - m9; + } + { int m10 = z->l - z->c; (void)m10; /* do, line 221 */ + { int ret = r_Step_3(z); + if (ret == 0) goto lab12; /* call Step_3, line 221 */ + if (ret < 0) return ret; + } + lab12: + z->c = z->l - m10; + } + { int m11 = z->l - z->c; (void)m11; /* do, line 222 */ + { int ret = r_Step_4(z); + if (ret == 0) goto lab13; /* call Step_4, line 222 */ + if (ret < 0) return ret; + } + lab13: + z->c = z->l - m11; + } + { int m12 = z->l - z->c; (void)m12; /* do, line 224 */ + { int ret = r_Step_5(z); + if (ret == 0) goto lab14; /* call Step_5, line 224 */ + if (ret < 0) return ret; + } + lab14: + z->c = z->l - m12; + } + } + lab7: + z->c = z->lb; + { int c13 = z->c; /* do, line 227 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab15; /* call postlude, line 227 */ + if (ret < 0) return ret; + } + lab15: + z->c = c13; + } + } +lab0: + return 1; +} + +extern struct SN_env * english_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); } + +extern void english_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/libstemmer_c/src_c/stem_ISO_8859_1_english.h b/libstemmer_c/src_c/stem_ISO_8859_1_english.h new file mode 100644 index 00000000..e685dcf7 --- /dev/null +++ b/libstemmer_c/src_c/stem_ISO_8859_1_english.h @@ -0,0 +1,16 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#ifdef __cplusplus +extern "C" { +#endif + +extern struct SN_env * english_ISO_8859_1_create_env(void); +extern void english_ISO_8859_1_close_env(struct SN_env * z); + +extern int english_ISO_8859_1_stem(struct SN_env * z); + +#ifdef __cplusplus +} +#endif + diff --git a/src/Distribution/Server/Features.hs b/src/Distribution/Server/Features.hs index f8a8e362..173c022b 100644 --- a/src/Distribution/Server/Features.hs +++ b/src/Distribution/Server/Features.hs @@ -230,6 +230,17 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do uploadFeature (candidatesCoreResource candidatesFeature) + tagsFeature <- mkTagsFeature + coreFeature + uploadFeature + usersFeature + + versionsFeature <- mkVersionsFeature + coreFeature + uploadFeature + tagsFeature + usersFeature + documentationCoreFeature <- mkDocumentationCoreFeature (coreResource coreFeature) (map packageId . allPackages <$> queryGetPackageIndex coreFeature) @@ -237,6 +248,7 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do tarIndexCacheFeature reportsCoreFeature usersFeature + versionsFeature documentationCandidatesFeature <- mkDocumentationCandidatesFeature (candidatesCoreResource candidatesFeature) @@ -245,6 +257,7 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do tarIndexCacheFeature reportsCandidatesFeature usersFeature + versionsFeature downloadFeature <- mkDownloadFeature coreFeature @@ -254,22 +267,11 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do coreFeature usersFeature - tagsFeature <- mkTagsFeature - coreFeature - uploadFeature - usersFeature - analyticsPixelsFeature <- mkAnalyticsPixelsFeature coreFeature usersFeature uploadFeature - versionsFeature <- mkVersionsFeature - coreFeature - uploadFeature - tagsFeature - usersFeature - {- [reverse index disabled] reverseFeature <- mkReverseFeature coreFeature diff --git a/src/Distribution/Server/Features/Documentation.hs b/src/Distribution/Server/Features/Documentation.hs index 98dcfbd8..ed64581a 100644 --- a/src/Distribution/Server/Features/Documentation.hs +++ b/src/Distribution/Server/Features/Documentation.hs @@ -1,5 +1,6 @@ {-# LANGUAGE RankNTypes, FlexibleContexts, NamedFieldPuns, RecordWildCards, PatternGuards #-} +{-# LANGUAGE LambdaCase #-} module Distribution.Server.Features.Documentation ( DocumentationFeature(..), DocumentationResource(..), @@ -41,6 +42,9 @@ import Data.Maybe import Data.Time.Clock (NominalDiffTime, diffUTCTime, getCurrentTime) import System.Directory (getModificationTime) import Control.Applicative +import Distribution.Server.Features.PreferredVersions +import Distribution.Server.Features.PreferredVersions.State (getVersionStatus) +import Distribution.Server.Packages.Types -- TODO: -- 1. Write an HTML view for organizing uploads -- 2. Have cabal generate a standard doc tarball, and serve that here @@ -51,6 +55,8 @@ data DocumentationFeature = DocumentationFeature { queryDocumentation :: forall m. MonadIO m => PackageIdentifier -> m (Maybe BlobId), queryDocumentationIndex :: forall m. MonadIO m => m (Map.Map PackageId BlobId), + latestPackageWithDocumentation :: forall m. MonadIO m => PreferredInfo -> [PkgInfo] -> m (Maybe PackageId), + uploadDocumentation :: DynamicPath -> ServerPartE Response, deleteDocumentation :: DynamicPath -> ServerPartE Response, @@ -82,6 +88,7 @@ initDocumentationFeature :: String -> TarIndexCacheFeature -> ReportsFeature -> UserFeature + -> VersionsFeature -> IO DocumentationFeature) initDocumentationFeature name env@ServerEnv{serverStateDir} = do @@ -91,9 +98,9 @@ initDocumentationFeature name -- Hooks documentationChangeHook <- newHook - return $ \core getPackages upload tarIndexCache reportsCore user -> do + return $ \core getPackages upload tarIndexCache reportsCore user version -> do let feature = documentationFeature name env - core getPackages upload tarIndexCache reportsCore user + core getPackages upload tarIndexCache reportsCore user version documentationState documentationChangeHook return feature @@ -139,6 +146,7 @@ documentationFeature :: String -> TarIndexCacheFeature -> ReportsFeature -> UserFeature + -> VersionsFeature -> StateComponent AcidState Documentation -> Hook PackageId () -> DocumentationFeature @@ -149,13 +157,14 @@ documentationFeature name , guardValidPackageId , corePackagePage , corePackagesPage - , lookupPackageId + , lookupPackageName } getPackages UploadFeature{..} TarIndexCacheFeature{cachedTarIndex} ReportsFeature{..} UserFeature{ guardAuthorised_ } + VersionsFeature{queryGetPreferredInfo} documentationState documentationChangeHook = DocumentationFeature{..} @@ -352,15 +361,30 @@ documentationFeature name runHook_ documentationChangeHook pkgid noContent (toResponse ()) + latestPackageWithDocumentation :: MonadIO m => PreferredInfo -> [PkgInfo] -> m (Maybe PackageId) + latestPackageWithDocumentation prefInfo ps = helper (reverse ps) + where + helper [] = helper2 (reverse ps) + helper (pkg:pkgs) = do + hasDoc <- queryHasDocumentation (pkgInfoId pkg) + let status = getVersionStatus prefInfo (packageVersion pkg) + if hasDoc && status == NormalVersion + then pure (Just (packageId pkg)) + else helper pkgs + + helper2 [] = pure Nothing + helper2 (pkg:pkgs) = do + hasDoc <- queryHasDocumentation (pkgInfoId pkg) + if hasDoc + then pure (Just (packageId pkg)) + else helper2 pkgs + withDocumentation :: Resource -> DynamicPath -> (PackageId -> BlobId -> TarIndex -> ServerPartE Response) -> ServerPartE Response withDocumentation self dpath func = do pkgid <- packageInPath dpath - -- lookupPackageId returns the latest version if no version is specified. - pkginfo <- lookupPackageId pkgid - -- Set up the canonical URL to point to the unversioned path let basedpath = [ if var == "package" @@ -375,17 +399,27 @@ documentationFeature name -- See https://support.google.com/webmasters/answer/139066?hl=en#6 setHeaderM "Link" canonicalHeader - case pkgVersion pkgid == nullVersion of - -- if no version is given we want to redirect to the latest version - True -> tempRedirect latestPkgPath (toResponse "") - where - latest = packageId pkginfo - dpath' = [ if var == "package" - then (var, display latest) - else e - | e@(var, _) <- dpath ] - latestPkgPath = (renderResource' self dpath') + -- Essentially errNotFound, but overloaded to specify a header. + -- (Needed since errNotFound throws away result of setHeaderM) + let errNotFoundH title message = throwError + (ErrorResponse 404 + [("Link", canonicalHeader)] + title message) + case pkgVersion pkgid == nullVersion of + -- if no version is given we want to redirect to the latest version with docs + True -> do + pkgs <- lookupPackageName (pkgName pkgid) + prefInfo <- queryGetPreferredInfo (pkgName pkgid) + latestPackageWithDocumentation prefInfo pkgs >>= \case + Just latestWithDocs -> do + let dpath' = [ if var == "package" + then (var, display latestWithDocs) + else e + | e@(var, _) <- dpath ] + latestPkgPath = (renderResource' self dpath') + tempRedirect latestPkgPath (toResponse "") + Nothing -> errNotFoundH "Not Found" [MText "There is no documentation for this package."] False -> do mdocs <- queryState documentationState $ LookupDocumentation pkgid case mdocs of @@ -397,13 +431,6 @@ documentationFeature name , MLink canonicalLink canonicalLink , MText " for the latest version." ] - where - -- Essentially errNotFound, but overloaded to specify a header. - -- (Needed since errNotFound throws away result of setHeaderM) - errNotFoundH title message = throwError - (ErrorResponse 404 - [("Link", canonicalHeader)] - title message) Just blob -> do index <- liftIO $ cachedTarIndex blob func pkgid blob index @@ -439,6 +466,7 @@ checkDocTarball pkgid = docMetaPath = DocMeta.packageDocMetaTarPath pkgid + {------------------------------------------------------------------------------ Auxiliary ------------------------------------------------------------------------------} diff --git a/src/Distribution/Server/Features/Html.hs b/src/Distribution/Server/Features/Html.hs index a4448609..48dc07d3 100644 --- a/src/Distribution/Server/Features/Html.hs +++ b/src/Distribution/Server/Features/Html.hs @@ -606,6 +606,8 @@ mkHtmlCore ServerEnv{serverBaseURI, serverBlobStore} deprs <- queryGetDeprecatedFor pkgname mreadme <- makeReadme render hasDocs <- queryHasDocumentation documentationFeature realpkg + mDocPkgId <- if hasDocs then pure Nothing + else latestPackageWithDocumentation documentationFeature prefInfo pkgs rptStats <- queryLastReportStats reportsFeature realpkg candidates <- lookupCandidateName pkgname buildStatus <- renderBuildStatus @@ -670,7 +672,7 @@ mkHtmlCore ServerEnv{serverBaseURI, serverBlobStore} -- Items not related to IO (mostly pure functions) PagesNew.packagePageTemplate render mdocIndex mdocMeta mreadme - docURL distributions + docURL mDocPkgId distributions deprs utilities False @@ -1294,7 +1296,7 @@ mkHtmlCandidates utilities@HtmlUtilities{..} ] ++ PagesNew.packagePageTemplate render mdocIndex Nothing mreadme - docURL [] Nothing + docURL Nothing [] Nothing utilities True diff --git a/src/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs b/src/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs index d07ed63e..eab6563d 100644 --- a/src/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs +++ b/src/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs @@ -12,7 +12,7 @@ import Data.Set (Set) import qualified Data.Set as Set import Data.Char import qualified NLP.Tokenize as NLP -import qualified NLP.Snowball as NLP +import qualified Distribution.Server.Util.NLP.Snowball as NLP import qualified Data.Foldable as F import qualified Documentation.Haddock.Markup as Haddock @@ -26,7 +26,7 @@ extraStems ss x = x : mapMaybe (`T.stripSuffix` x) ss extractSynopsisTerms :: [Text] -> Set Text -> String -> [Text] extractSynopsisTerms ss stopWords = concatMap (extraStems ss) --note this adds extra possible stems, it doesn't delete any given one. - . NLP.stems NLP.English + . NLP.stems . filter (`Set.notMember` stopWords) . map (T.toCaseFold . T.pack) . concatMap splitTok @@ -54,7 +54,7 @@ splitTok tok = extractDescriptionTerms :: [Text] -> Set Text -> String -> [Text] extractDescriptionTerms ss stopWords = concatMap (extraStems ss) - . NLP.stems NLP.English + . NLP.stems . filter (`Set.notMember` stopWords) . map (T.toCaseFold . T.pack) . maybe diff --git a/src/Distribution/Server/Features/Search/PkgSearch.hs b/src/Distribution/Server/Features/Search/PkgSearch.hs index 9465986d..2f8078b8 100644 --- a/src/Distribution/Server/Features/Search/PkgSearch.hs +++ b/src/Distribution/Server/Features/Search/PkgSearch.hs @@ -14,7 +14,7 @@ import Data.Set (Set) import qualified Data.Set as Set import Data.Text (Text, unpack) import qualified Data.Text as T -import NLP.Snowball +import Distribution.Server.Util.NLP.Snowball import Distribution.Package import Distribution.PackageDescription @@ -62,7 +62,7 @@ pkgSearchConfig = normaliseQueryToken tok = let tokFold = T.toCaseFold tok -- we don't need to use extraStems here because the index is inflated by it already. - tokStem = stem English tokFold + tokStem = stem tokFold in \field -> case field of NameField -> tokFold SynopsisField -> tokStem diff --git a/src/Distribution/Server/Pages/Package.hs b/src/Distribution/Server/Pages/Package.hs index 6bc46205..5813355e 100644 --- a/src/Distribution/Server/Pages/Package.hs +++ b/src/Distribution/Server/Pages/Package.hs @@ -34,7 +34,7 @@ import Distribution.Utils.ShortText (fromShortText, ShortText) import Text.XHtml.Strict hiding (p, name, title, content) import qualified Text.XHtml.Strict -import Data.Maybe (fromMaybe, maybeToList, isJust, mapMaybe) +import Data.Maybe (fromMaybe, maybeToList, isJust, mapMaybe, catMaybes) import Data.List (intersperse, intercalate, partition) import Control.Arrow (second) import System.FilePath.Posix ((), (<.>)) @@ -151,8 +151,8 @@ renderPackageFlags render docURL = code = (thespan ! [theclass "code"] <<) whenNotNull xs a = if null xs then [] else a -moduleSection :: PackageRender -> Maybe TarIndex -> URL -> Bool -> [Html] -moduleSection render mdocIndex docURL quickNav = +moduleSection :: PackageRender -> Maybe TarIndex -> URL -> Maybe PackageId -> Bool -> [Html] +moduleSection render mdocIndex docURL mPkgId quickNav = maybeToList $ fmap msect (rendModules render mdocIndex) where msect ModSigIndex{ modIndex = m, sigIndex = s } = toHtml $ (if not (null s) @@ -164,16 +164,25 @@ moduleSection render mdocIndex docURL quickNav = [renderDocIndexLink] ++ [renderModuleForest docURL m ] else []) - renderDocIndexLink = case mdocIndex of - Just tindex -> - let docIndexURL | isJust (Tar.lookup tindex "doc-index-All.html") = docURL "doc-index-All.html" - | otherwise = docURL "doc-index.html" - in paragraph ! [thestyle "font-size: small"] - << ("[" +++ anchor ! [href docIndexURL] << "Index" +++ "]" +++ - (if quickNav - then " [" +++ anchor ! [identifier "quickjump-trigger", href "#"] << "Quick Jump" +++ "]" - else mempty)) - Nothing -> mempty + renderDocIndexLink = case concatLinks indexLinks of + Nothing -> mempty + Just links -> paragraph ! [thestyle "font-size: small"] << ("[" +++ links +++ "]") + where + indexLinks = catMaybes $ case mdocIndex of + Just tindex -> + let docIndexURL | isJust (Tar.lookup tindex "doc-index-All.html") = docURL "doc-index-All.html" + | otherwise = docURL "doc-index.html" + in [ Just $ anchor ! [href docIndexURL] << "Index" + , if quickNav + then Just $ anchor ! [identifier "quickjump-trigger", href "#"] << "Quick Jump" + else Nothing + ] + Nothing -> [] + ++ [fmap (\pkgId -> anchor ! [href (packageURL pkgId)] << "Last Documentation") mPkgId] + + concatLinks [] = Nothing + concatLinks [h] = Just h + concatLinks (h:hs) = (h +++) . ("] [" +++) <$> concatLinks hs tabulate :: [(String, Html)] -> Html tabulate items = table ! [theclass "properties"] << diff --git a/src/Distribution/Server/Pages/PackageFromTemplate.hs b/src/Distribution/Server/Pages/PackageFromTemplate.hs index 89dcb711..b922ab87 100644 --- a/src/Distribution/Server/Pages/PackageFromTemplate.hs +++ b/src/Distribution/Server/Pages/PackageFromTemplate.hs @@ -80,14 +80,14 @@ import Distribution.Server.Features.Html.HtmlUtilities -- votes it has. packagePageTemplate :: PackageRender -> Maybe TarIndex -> Maybe DocMeta -> Maybe BS.ByteString - -> URL -> [(DistroName, DistroPackageInfo)] + -> URL -> Maybe PackageId -> [(DistroName, DistroPackageInfo)] -> Maybe [PackageName] -> HtmlUtilities -> Bool -> [TemplateAttr] packagePageTemplate render mdocIndex mdocMeta mreadme - docURL distributions + docURL mPkgId distributions deprs utilities isCandidate = if isCandidate then @@ -97,7 +97,7 @@ packagePageTemplate render , "doc" $= docFieldsTemplate ] ++ -- Miscellaneous things that could still stand to be refactored a bit. - [ "moduleList" $= Old.moduleSection render mdocIndex docURL False + [ "moduleList" $= Old.moduleSection render mdocIndex docURL mPkgId False , "downloadSection" $= Old.downloadSection render ] else @@ -107,7 +107,7 @@ packagePageTemplate render , "doc" $= docFieldsTemplate ] ++ -- Miscellaneous things that could still stand to be refactored a bit. - [ "moduleList" $= Old.moduleSection render mdocIndex docURL hasQuickNav + [ "moduleList" $= Old.moduleSection render mdocIndex docURL mPkgId hasQuickNav , "executables" $= (commaList . map toHtml $ rendExecNames render) , "downloadSection" $= Old.downloadSection render , "stability" $= renderStability desc @@ -339,7 +339,6 @@ candidatesPageTemplate cands candidates candidatesCore= , toHtml $ ". " ++ fromShortText (synopsis desc) ] - -- #ToDo: Pick out several interesting versions to display, with a link to -- display all versions. renderVersion :: PackageId -> [(Version, VersionStatus)] -> Maybe String -> Html diff --git a/src/Distribution/Server/Util/NLP/LICENSE b/src/Distribution/Server/Util/NLP/LICENSE new file mode 100644 index 00000000..1d79b062 --- /dev/null +++ b/src/Distribution/Server/Util/NLP/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2012, Dag Odenhall +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/src/Distribution/Server/Util/NLP/Snowball.hs b/src/Distribution/Server/Util/NLP/Snowball.hs new file mode 100644 index 00000000..5778c7c8 --- /dev/null +++ b/src/Distribution/Server/Util/NLP/Snowball.hs @@ -0,0 +1,81 @@ +module Distribution.Server.Util.NLP.Snowball where + +------------------------------------------------------------------------------- +import Control.Concurrent (MVar, newMVar, withMVar) +import Control.Monad (forM, when) +------------------------------------------------------------------------------- +import Data.ByteString.Char8 (packCStringLen, useAsCString) +import Data.Text (Text) +import qualified Data.Text as Text +import qualified Data.Text.Encoding as Text +------------------------------------------------------------------------------- +import Foreign (ForeignPtr, FunPtr, Ptr, newForeignPtr, + nullPtr, withForeignPtr) +import Foreign.C (CInt (..), CString) +------------------------------------------------------------------------------- +import System.IO.Unsafe (unsafePerformIO) +------------------------------------------------------------------------------- + +stem :: Text -> Text +stem word = let [a] = stems [word] in a + +stems :: [Text] -> [Text] +stems ws = + unsafePerformIO $ + do stemmer <- newStemmer + stemsWith stemmer ws + +------------------------------------------------------------------------------- + +-- | A thread and memory safe Snowball stemmer instance. +newtype Stemmer = Stemmer (MVar (ForeignPtr Struct)) + +-- | Create a new reusable 'Stemmer' instance. +newStemmer :: IO Stemmer +newStemmer = do + struct <- stemmer_new + when (struct == nullPtr) $ + error "Text.Snowball.newStemmer: nullPtr" + structPtr <- newForeignPtr stemmer_delete struct + mvar <- newMVar (structPtr) + return $ Stemmer mvar + +-- | Use a 'Stemmer' to stem a word. This can be used more efficiently +-- than 'stem' because you can keep a stemmer around and reuse it, but it +-- requires 'IO' to ensure thread safety. +stemWith :: Stemmer -> Text -> IO Text +stemWith stemmer word = do + [a] <- stemsWith stemmer [word] + return a + +-- | Use a 'Stemmer' to stem multiple words in one go. This can be more +-- efficient than @'mapM' 'stemWith'@ because the 'Stemmer' is only +-- locked once. +stemsWith :: Stemmer -> [Text] -> IO [Text] +stemsWith (Stemmer mvar) ws = + withMVar mvar $ \(structPtr) -> + withForeignPtr structPtr $ \struct -> + forM ws $ \word -> + useAsCString (Text.encodeUtf8 word) $ \word' -> + do ptr <- stemmer_stem struct word' $ + fromIntegral $ Text.length word + len <- stemmer_length struct + bytes <- packCStringLen (ptr,fromIntegral len) + return $ Text.decodeUtf8 bytes + + +------------------------------------------------------------------------------- + +data Struct + +foreign import ccall unsafe "libstemmer.h english_ISO_8859_1_stemmer_new" + stemmer_new :: IO (Ptr Struct) + +foreign import ccall unsafe "libstemmer.h &english_ISO_8859_1_stemmer_delete" + stemmer_delete :: FunPtr (Ptr Struct -> IO ()) + +foreign import ccall unsafe "libstemmer.h english_ISO_8859_1_stemmer_stem" + stemmer_stem :: Ptr Struct -> CString -> CInt -> IO (CString) + +foreign import ccall unsafe "libstemmer.h english_ISO_8859_1_stemmer_length" + stemmer_length :: Ptr Struct -> IO CInt