Vendor snowball package (#1116)

* vendor snowball
This commit is contained in:
Alias Qli 2022-12-31 13:17:16 +08:00 committed by GitHub
parent a5bf92c522
commit 8095dc0c1b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 2069 additions and 61 deletions

View File

@ -63,6 +63,12 @@ extra-source-files:
tests/unpack-checks/LANGUAGE-GHC-9.2/Main.hs
tests/unpack-checks/LANGUAGE-GHC-9.2/Setup.hs
tests/unpack-checks/LANGUAGE-GHC-9.2/LANGUAGE-GHC.cabal
libstemmer_c/src_c/stem_ISO_8859_1_english.h
libstemmer_c/include/libstemmer.h
libstemmer_c/runtime/api.h
libstemmer_c/runtime/header.h
libstemmer_c/LICENSE
src/Distribution/Server/Util/NLP/LICENSE
source-repository head
type: git
@ -359,6 +365,7 @@ library lib-server
Distribution.Server.Features.StaticFiles
Distribution.Server.Features.ServerIntrospect
Distribution.Server.Features.Sitemap
Distribution.Server.Util.NLP.Snowball
if flag(debug)
cpp-options: -DDEBUG
@ -418,8 +425,12 @@ library lib-server
, xss-sanitize ^>= 0.3.6
if !flag(minimal)
build-depends: snowball ^>= 1.0
, tokenize ^>= 0.3
build-depends: tokenize ^>= 0.3
c-sources: libstemmer_c/src_c/stem_ISO_8859_1_english.c
libstemmer_c/runtime/api.c
libstemmer_c/runtime/utilities.c
libstemmer_c/libstemmer/libstemmer.c
if flag(cabal-parsers)
build-depends: cabal-parsers ^>= 0

24
libstemmer_c/LICENSE Normal file
View File

@ -0,0 +1,24 @@
Copyright (c) 2002, Richard Boulton
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,20 @@
/* Make header file work when included from C++ */
#ifdef __cplusplus
extern "C" {
#endif
typedef unsigned char sb_symbol;
struct SN_env * english_ISO_8859_1_stemmer_new();
void english_ISO_8859_1_stemmer_delete(struct SN_env * sn_env);
const sb_symbol * english_ISO_8859_1_stemmer_stem(struct SN_env * sn_env, const sb_symbol * word, int size);
int english_ISO_8859_1_stemmer_length(struct SN_env * sn_env);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,47 @@
#include <stdlib.h>
#include <string.h>
#include "../include/libstemmer.h"
#include "../runtime/api.h"
#include "../src_c/stem_ISO_8859_1_english.h"
extern struct SN_env *
english_ISO_8859_1_stemmer_new()
{
struct SN_env * sn_env = english_ISO_8859_1_create_env();
if (sn_env == NULL)
{
english_ISO_8859_1_stemmer_delete(sn_env);
return NULL;
}
return sn_env;
}
void
english_ISO_8859_1_stemmer_delete(struct SN_env * sn_env)
{
if (sn_env == 0) return;
english_ISO_8859_1_close_env(sn_env);
}
const sb_symbol *
english_ISO_8859_1_stemmer_stem(struct SN_env * sn_env, const sb_symbol * word, int size)
{
int ret;
if (SN_set_current(sn_env, size, (const symbol *)(word)))
{
sn_env->l = 0;
return NULL;
}
ret = english_ISO_8859_1_stem(sn_env);
if (ret < 0) return NULL;
sn_env->p[sn_env->l] = 0;
return (const sb_symbol *)(sn_env->p);
}
int
english_ISO_8859_1_stemmer_length(struct SN_env * sn_env)
{
return sn_env->l;
}

View File

@ -0,0 +1,66 @@
#include <stdlib.h> /* for calloc, free */
#include "header.h"
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
{
struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
if (z == NULL) return NULL;
z->p = create_s();
if (z->p == NULL) goto error;
if (S_size)
{
int i;
z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
if (z->S == NULL) goto error;
for (i = 0; i < S_size; i++)
{
z->S[i] = create_s();
if (z->S[i] == NULL) goto error;
}
}
if (I_size)
{
z->I = (int *) calloc(I_size, sizeof(int));
if (z->I == NULL) goto error;
}
if (B_size)
{
z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
if (z->B == NULL) goto error;
}
return z;
error:
SN_close_env(z, S_size);
return NULL;
}
extern void SN_close_env(struct SN_env * z, int S_size)
{
if (z == NULL) return;
if (S_size)
{
int i;
for (i = 0; i < S_size; i++)
{
lose_s(z->S[i]);
}
free(z->S);
}
free(z->I);
free(z->B);
if (z->p) lose_s(z->p);
free(z);
}
extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
{
int err = replace_s(z, 0, z->l, size, s, NULL);
z->c = 0;
return err;
}

View File

@ -0,0 +1,26 @@
typedef unsigned char symbol;
/* Or replace 'char' above with 'short' for 16 bit characters.
More precisely, replace 'char' with whatever type guarantees the
character width you need. Note however that sizeof(symbol) should divide
HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
there is an alignment problem. In the unlikely event of a problem here,
consult Martin Porter.
*/
struct SN_env {
symbol * p;
int c; int l; int lb; int bra; int ket;
symbol * * S;
int * I;
unsigned char * B;
};
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
extern void SN_close_env(struct SN_env * z, int S_size);
extern int SN_set_current(struct SN_env * z, int size, const symbol * s);

View File

@ -0,0 +1,58 @@
#include <limits.h>
#include "api.h"
#define MAXINT INT_MAX
#define MININT INT_MIN
#define HEAD 2*sizeof(int)
#define SIZE(p) ((int *)(p))[-1]
#define SET_SIZE(p, n) ((int *)(p))[-1] = n
#define CAPACITY(p) ((int *)(p))[-2]
struct among
{ int s_size; /* number of chars in string */
const symbol * s; /* search string */
int substring_i;/* index to longest matching substring */
int result; /* result of the lookup */
int (* function)(struct SN_env *);
};
extern symbol * create_s(void);
extern void lose_s(symbol * p);
extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
extern int eq_v(struct SN_env * z, const symbol * p);
extern int eq_v_b(struct SN_env * z, const symbol * p);
extern int find_among(struct SN_env * z, const struct among * v, int v_size);
extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
extern int slice_from_v(struct SN_env * z, const symbol * p);
extern int slice_del(struct SN_env * z);
extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);
extern symbol * slice_to(struct SN_env * z, symbol * p);
extern symbol * assign_to(struct SN_env * z, symbol * p);
extern void debug(struct SN_env * z, int number, int line_count);

View File

@ -0,0 +1,478 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "header.h"
#define unless(C) if(!(C))
#define CREATE_SIZE 1
extern symbol * create_s(void) {
symbol * p;
void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
if (mem == NULL) return NULL;
p = (symbol *) (HEAD + (char *) mem);
CAPACITY(p) = CREATE_SIZE;
SET_SIZE(p, CREATE_SIZE);
return p;
}
extern void lose_s(symbol * p) {
if (p == NULL) return;
free((char *) p - HEAD);
}
/*
new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new
position, or 0 on failure.
-- used to implement hop and next in the utf8 case.
*/
extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
int b;
if (n >= 0) {
for (; n > 0; n--) {
if (c >= l) return -1;
b = p[c++];
if (b >= 0xC0) { /* 1100 0000 */
while (c < l) {
b = p[c];
if (b >= 0xC0 || b < 0x80) break;
/* break unless b is 10------ */
c++;
}
}
}
} else {
for (; n < 0; n++) {
if (c <= lb) return -1;
b = p[--c];
if (b >= 0x80) { /* 1000 0000 */
while (c > lb) {
b = p[c];
if (b >= 0xC0) break; /* 1100 0000 */
c--;
}
}
}
}
return c;
}
/* Code for character groupings: utf8 cases */
static int get_utf8(const symbol * p, int c, int l, int * slot) {
int b0, b1;
if (c >= l) return 0;
b0 = p[c++];
if (b0 < 0xC0 || c == l) { /* 1100 0000 */
* slot = b0; return 1;
}
b1 = p[c++];
if (b0 < 0xE0 || c == l) { /* 1110 0000 */
* slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
}
* slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3;
}
static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
int b0, b1;
if (c <= lb) return 0;
b0 = p[--c];
if (b0 < 0x80 || c == lb) { /* 1000 0000 */
* slot = b0; return 1;
}
b1 = p[--c];
if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */
* slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
}
* slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
}
extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_utf8(z->p, z->c, z->l, & ch);
unless (w) return -1;
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c += w;
} while (repeat);
return 0;
}
extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
unless (w) return -1;
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c -= w;
} while (repeat);
return 0;
}
extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_utf8(z->p, z->c, z->l, & ch);
unless (w) return -1;
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c += w;
} while (repeat);
return 0;
}
extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
unless (w) return -1;
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c -= w;
} while (repeat);
return 0;
}
/* Code for character groupings: non-utf8 cases */
extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c >= z->l) return -1;
ch = z->p[z->c];
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c++;
} while (repeat);
return 0;
}
extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c <= z->lb) return -1;
ch = z->p[z->c - 1];
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c--;
} while (repeat);
return 0;
}
extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c >= z->l) return -1;
ch = z->p[z->c];
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c++;
} while (repeat);
return 0;
}
extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c <= z->lb) return -1;
ch = z->p[z->c - 1];
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c--;
} while (repeat);
return 0;
}
extern int eq_s(struct SN_env * z, int s_size, const symbol * s) {
if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
z->c += s_size; return 1;
}
extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) {
if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
z->c -= s_size; return 1;
}
extern int eq_v(struct SN_env * z, const symbol * p) {
return eq_s(z, SIZE(p), p);
}
extern int eq_v_b(struct SN_env * z, const symbol * p) {
return eq_s_b(z, SIZE(p), p);
}
extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
int i = 0;
int j = v_size;
int c = z->c; int l = z->l;
symbol * q = z->p + c;
const struct among * w;
int common_i = 0;
int common_j = 0;
int first_key_inspected = 0;
while(1) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j; /* smaller */
w = v + k;
{
int i2; for (i2 = common; i2 < w->s_size; i2++) {
if (c + common == l) { diff = -1; break; }
diff = q[common] - w->s[i2];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1) {
if (i > 0) break; /* v->s has been inspected */
if (j == i) break; /* only one item in v */
/* - but now we need to go round once more to get
v->s inspected. This looks messy, but is actually
the optimal approach. */
if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1) {
w = v + i;
if (common_i >= w->s_size) {
z->c = c + w->s_size;
if (w->function == 0) return w->result;
{
int res = w->function(z);
z->c = c + w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
}
/* find_among_b is for backwards processing. Same comments apply */
extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
int i = 0;
int j = v_size;
int c = z->c; int lb = z->lb;
symbol * q = z->p + c - 1;
const struct among * w;
int common_i = 0;
int common_j = 0;
int first_key_inspected = 0;
while(1) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j;
w = v + k;
{
int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) {
if (c - common == lb) { diff = -1; break; }
diff = q[- common] - w->s[i2];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1) {
if (i > 0) break;
if (j == i) break;
if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1) {
w = v + i;
if (common_i >= w->s_size) {
z->c = c - w->s_size;
if (w->function == 0) return w->result;
{
int res = w->function(z);
z->c = c - w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
}
/* Increase the size of the buffer pointed to by p to at least n symbols.
* If insufficient memory, returns NULL and frees the old buffer.
*/
static symbol * increase_size(symbol * p, int n) {
symbol * q;
int new_size = n + 20;
void * mem = realloc((char *) p - HEAD,
HEAD + (new_size + 1) * sizeof(symbol));
if (mem == NULL) {
lose_s(p);
return NULL;
}
q = (symbol *) (HEAD + (char *)mem);
CAPACITY(q) = new_size;
return q;
}
/* to replace symbols between c_bra and c_ket in z->p by the
s_size symbols at s.
Returns 0 on success, -1 on error.
Also, frees z->p (and sets it to NULL) on error.
*/
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
{
int adjustment;
int len;
if (z->p == NULL) {
z->p = create_s();
if (z->p == NULL) return -1;
}
adjustment = s_size - (c_ket - c_bra);
len = SIZE(z->p);
if (adjustment != 0) {
if (adjustment + len > CAPACITY(z->p)) {
z->p = increase_size(z->p, adjustment + len);
if (z->p == NULL) return -1;
}
memmove(z->p + c_ket + adjustment,
z->p + c_ket,
(len - c_ket) * sizeof(symbol));
SET_SIZE(z->p, adjustment + len);
z->l += adjustment;
if (z->c >= c_ket)
z->c += adjustment;
else
if (z->c > c_bra)
z->c = c_bra;
}
unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
if (adjptr != NULL)
*adjptr = adjustment;
return 0;
}
static int slice_check(struct SN_env * z) {
if (z->bra < 0 ||
z->bra > z->ket ||
z->ket > z->l ||
z->p == NULL ||
z->l > SIZE(z->p)) /* this line could be removed */
{
#if 0
fprintf(stderr, "faulty slice operation:\n");
debug(z, -1, 0);
#endif
return -1;
}
return 0;
}
extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) {
if (slice_check(z)) return -1;
return replace_s(z, z->bra, z->ket, s_size, s, NULL);
}
extern int slice_from_v(struct SN_env * z, const symbol * p) {
return slice_from_s(z, SIZE(p), p);
}
extern int slice_del(struct SN_env * z) {
return slice_from_s(z, 0, 0);
}
extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
int adjustment;
if (replace_s(z, bra, ket, s_size, s, &adjustment))
return -1;
if (bra <= z->bra) z->bra += adjustment;
if (bra <= z->ket) z->ket += adjustment;
return 0;
}
extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
int adjustment;
if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
return -1;
if (bra <= z->bra) z->bra += adjustment;
if (bra <= z->ket) z->ket += adjustment;
return 0;
}
extern symbol * slice_to(struct SN_env * z, symbol * p) {
if (slice_check(z)) {
lose_s(p);
return NULL;
}
{
int len = z->ket - z->bra;
if (CAPACITY(p) < len) {
p = increase_size(p, len);
if (p == NULL)
return NULL;
}
memmove(p, z->p + z->bra, len * sizeof(symbol));
SET_SIZE(p, len);
}
return p;
}
extern symbol * assign_to(struct SN_env * z, symbol * p) {
int len = z->l;
if (CAPACITY(p) < len) {
p = increase_size(p, len);
if (p == NULL)
return NULL;
}
memmove(p, z->p, len * sizeof(symbol));
SET_SIZE(p, len);
return p;
}
#if 0
extern void debug(struct SN_env * z, int number, int line_count) {
int i;
int limit = SIZE(z->p);
/*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
for (i = 0; i <= limit; i++) {
if (z->lb == i) printf("{");
if (z->bra == i) printf("[");
if (z->c == i) printf("|");
if (z->ket == i) printf("]");
if (z->l == i) printf("}");
if (i < limit)
{ int ch = z->p[i];
if (ch == 0) ch = '#';
printf("%c", ch);
}
}
printf("'\n");
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,16 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * english_ISO_8859_1_create_env(void);
extern void english_ISO_8859_1_close_env(struct SN_env * z);
extern int english_ISO_8859_1_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif

View File

@ -230,6 +230,17 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do
uploadFeature
(candidatesCoreResource candidatesFeature)
tagsFeature <- mkTagsFeature
coreFeature
uploadFeature
usersFeature
versionsFeature <- mkVersionsFeature
coreFeature
uploadFeature
tagsFeature
usersFeature
documentationCoreFeature <- mkDocumentationCoreFeature
(coreResource coreFeature)
(map packageId . allPackages <$> queryGetPackageIndex coreFeature)
@ -237,6 +248,7 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do
tarIndexCacheFeature
reportsCoreFeature
usersFeature
versionsFeature
documentationCandidatesFeature <- mkDocumentationCandidatesFeature
(candidatesCoreResource candidatesFeature)
@ -245,6 +257,7 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do
tarIndexCacheFeature
reportsCandidatesFeature
usersFeature
versionsFeature
downloadFeature <- mkDownloadFeature
coreFeature
@ -254,22 +267,11 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do
coreFeature
usersFeature
tagsFeature <- mkTagsFeature
coreFeature
uploadFeature
usersFeature
analyticsPixelsFeature <- mkAnalyticsPixelsFeature
coreFeature
usersFeature
uploadFeature
versionsFeature <- mkVersionsFeature
coreFeature
uploadFeature
tagsFeature
usersFeature
{- [reverse index disabled]
reverseFeature <- mkReverseFeature
coreFeature

View File

@ -1,5 +1,6 @@
{-# LANGUAGE RankNTypes, FlexibleContexts,
NamedFieldPuns, RecordWildCards, PatternGuards #-}
{-# LANGUAGE LambdaCase #-}
module Distribution.Server.Features.Documentation (
DocumentationFeature(..),
DocumentationResource(..),
@ -41,6 +42,9 @@ import Data.Maybe
import Data.Time.Clock (NominalDiffTime, diffUTCTime, getCurrentTime)
import System.Directory (getModificationTime)
import Control.Applicative
import Distribution.Server.Features.PreferredVersions
import Distribution.Server.Features.PreferredVersions.State (getVersionStatus)
import Distribution.Server.Packages.Types
-- TODO:
-- 1. Write an HTML view for organizing uploads
-- 2. Have cabal generate a standard doc tarball, and serve that here
@ -51,6 +55,8 @@ data DocumentationFeature = DocumentationFeature {
queryDocumentation :: forall m. MonadIO m => PackageIdentifier -> m (Maybe BlobId),
queryDocumentationIndex :: forall m. MonadIO m => m (Map.Map PackageId BlobId),
latestPackageWithDocumentation :: forall m. MonadIO m => PreferredInfo -> [PkgInfo] -> m (Maybe PackageId),
uploadDocumentation :: DynamicPath -> ServerPartE Response,
deleteDocumentation :: DynamicPath -> ServerPartE Response,
@ -82,6 +88,7 @@ initDocumentationFeature :: String
-> TarIndexCacheFeature
-> ReportsFeature
-> UserFeature
-> VersionsFeature
-> IO DocumentationFeature)
initDocumentationFeature name
env@ServerEnv{serverStateDir} = do
@ -91,9 +98,9 @@ initDocumentationFeature name
-- Hooks
documentationChangeHook <- newHook
return $ \core getPackages upload tarIndexCache reportsCore user -> do
return $ \core getPackages upload tarIndexCache reportsCore user version -> do
let feature = documentationFeature name env
core getPackages upload tarIndexCache reportsCore user
core getPackages upload tarIndexCache reportsCore user version
documentationState
documentationChangeHook
return feature
@ -139,6 +146,7 @@ documentationFeature :: String
-> TarIndexCacheFeature
-> ReportsFeature
-> UserFeature
-> VersionsFeature
-> StateComponent AcidState Documentation
-> Hook PackageId ()
-> DocumentationFeature
@ -149,13 +157,14 @@ documentationFeature name
, guardValidPackageId
, corePackagePage
, corePackagesPage
, lookupPackageId
, lookupPackageName
}
getPackages
UploadFeature{..}
TarIndexCacheFeature{cachedTarIndex}
ReportsFeature{..}
UserFeature{ guardAuthorised_ }
VersionsFeature{queryGetPreferredInfo}
documentationState
documentationChangeHook
= DocumentationFeature{..}
@ -352,15 +361,30 @@ documentationFeature name
runHook_ documentationChangeHook pkgid
noContent (toResponse ())
latestPackageWithDocumentation :: MonadIO m => PreferredInfo -> [PkgInfo] -> m (Maybe PackageId)
latestPackageWithDocumentation prefInfo ps = helper (reverse ps)
where
helper [] = helper2 (reverse ps)
helper (pkg:pkgs) = do
hasDoc <- queryHasDocumentation (pkgInfoId pkg)
let status = getVersionStatus prefInfo (packageVersion pkg)
if hasDoc && status == NormalVersion
then pure (Just (packageId pkg))
else helper pkgs
helper2 [] = pure Nothing
helper2 (pkg:pkgs) = do
hasDoc <- queryHasDocumentation (pkgInfoId pkg)
if hasDoc
then pure (Just (packageId pkg))
else helper2 pkgs
withDocumentation :: Resource -> DynamicPath
-> (PackageId -> BlobId -> TarIndex -> ServerPartE Response)
-> ServerPartE Response
withDocumentation self dpath func = do
pkgid <- packageInPath dpath
-- lookupPackageId returns the latest version if no version is specified.
pkginfo <- lookupPackageId pkgid
-- Set up the canonical URL to point to the unversioned path
let basedpath =
[ if var == "package"
@ -375,17 +399,27 @@ documentationFeature name
-- See https://support.google.com/webmasters/answer/139066?hl=en#6
setHeaderM "Link" canonicalHeader
case pkgVersion pkgid == nullVersion of
-- if no version is given we want to redirect to the latest version
True -> tempRedirect latestPkgPath (toResponse "")
where
latest = packageId pkginfo
dpath' = [ if var == "package"
then (var, display latest)
else e
| e@(var, _) <- dpath ]
latestPkgPath = (renderResource' self dpath')
-- Essentially errNotFound, but overloaded to specify a header.
-- (Needed since errNotFound throws away result of setHeaderM)
let errNotFoundH title message = throwError
(ErrorResponse 404
[("Link", canonicalHeader)]
title message)
case pkgVersion pkgid == nullVersion of
-- if no version is given we want to redirect to the latest version with docs
True -> do
pkgs <- lookupPackageName (pkgName pkgid)
prefInfo <- queryGetPreferredInfo (pkgName pkgid)
latestPackageWithDocumentation prefInfo pkgs >>= \case
Just latestWithDocs -> do
let dpath' = [ if var == "package"
then (var, display latestWithDocs)
else e
| e@(var, _) <- dpath ]
latestPkgPath = (renderResource' self dpath')
tempRedirect latestPkgPath (toResponse "")
Nothing -> errNotFoundH "Not Found" [MText "There is no documentation for this package."]
False -> do
mdocs <- queryState documentationState $ LookupDocumentation pkgid
case mdocs of
@ -397,13 +431,6 @@ documentationFeature name
, MLink canonicalLink canonicalLink
, MText " for the latest version."
]
where
-- Essentially errNotFound, but overloaded to specify a header.
-- (Needed since errNotFound throws away result of setHeaderM)
errNotFoundH title message = throwError
(ErrorResponse 404
[("Link", canonicalHeader)]
title message)
Just blob -> do
index <- liftIO $ cachedTarIndex blob
func pkgid blob index
@ -439,6 +466,7 @@ checkDocTarball pkgid =
docMetaPath = DocMeta.packageDocMetaTarPath pkgid
{------------------------------------------------------------------------------
Auxiliary
------------------------------------------------------------------------------}

View File

@ -606,6 +606,8 @@ mkHtmlCore ServerEnv{serverBaseURI, serverBlobStore}
deprs <- queryGetDeprecatedFor pkgname
mreadme <- makeReadme render
hasDocs <- queryHasDocumentation documentationFeature realpkg
mDocPkgId <- if hasDocs then pure Nothing
else latestPackageWithDocumentation documentationFeature prefInfo pkgs
rptStats <- queryLastReportStats reportsFeature realpkg
candidates <- lookupCandidateName pkgname
buildStatus <- renderBuildStatus
@ -670,7 +672,7 @@ mkHtmlCore ServerEnv{serverBaseURI, serverBlobStore}
-- Items not related to IO (mostly pure functions)
PagesNew.packagePageTemplate render
mdocIndex mdocMeta mreadme
docURL distributions
docURL mDocPkgId distributions
deprs
utilities
False
@ -1294,7 +1296,7 @@ mkHtmlCandidates utilities@HtmlUtilities{..}
] ++
PagesNew.packagePageTemplate render
mdocIndex Nothing mreadme
docURL [] Nothing
docURL Nothing [] Nothing
utilities
True

View File

@ -12,7 +12,7 @@ import Data.Set (Set)
import qualified Data.Set as Set
import Data.Char
import qualified NLP.Tokenize as NLP
import qualified NLP.Snowball as NLP
import qualified Distribution.Server.Util.NLP.Snowball as NLP
import qualified Data.Foldable as F
import qualified Documentation.Haddock.Markup as Haddock
@ -26,7 +26,7 @@ extraStems ss x = x : mapMaybe (`T.stripSuffix` x) ss
extractSynopsisTerms :: [Text] -> Set Text -> String -> [Text]
extractSynopsisTerms ss stopWords =
concatMap (extraStems ss) --note this adds extra possible stems, it doesn't delete any given one.
. NLP.stems NLP.English
. NLP.stems
. filter (`Set.notMember` stopWords)
. map (T.toCaseFold . T.pack)
. concatMap splitTok
@ -54,7 +54,7 @@ splitTok tok =
extractDescriptionTerms :: [Text] -> Set Text -> String -> [Text]
extractDescriptionTerms ss stopWords =
concatMap (extraStems ss)
. NLP.stems NLP.English
. NLP.stems
. filter (`Set.notMember` stopWords)
. map (T.toCaseFold . T.pack)
. maybe

View File

@ -14,7 +14,7 @@ import Data.Set (Set)
import qualified Data.Set as Set
import Data.Text (Text, unpack)
import qualified Data.Text as T
import NLP.Snowball
import Distribution.Server.Util.NLP.Snowball
import Distribution.Package
import Distribution.PackageDescription
@ -62,7 +62,7 @@ pkgSearchConfig =
normaliseQueryToken tok =
let tokFold = T.toCaseFold tok
-- we don't need to use extraStems here because the index is inflated by it already.
tokStem = stem English tokFold
tokStem = stem tokFold
in \field -> case field of
NameField -> tokFold
SynopsisField -> tokStem

View File

@ -34,7 +34,7 @@ import Distribution.Utils.ShortText (fromShortText, ShortText)
import Text.XHtml.Strict hiding (p, name, title, content)
import qualified Text.XHtml.Strict
import Data.Maybe (fromMaybe, maybeToList, isJust, mapMaybe)
import Data.Maybe (fromMaybe, maybeToList, isJust, mapMaybe, catMaybes)
import Data.List (intersperse, intercalate, partition)
import Control.Arrow (second)
import System.FilePath.Posix ((</>), (<.>))
@ -151,8 +151,8 @@ renderPackageFlags render docURL =
code = (thespan ! [theclass "code"] <<)
whenNotNull xs a = if null xs then [] else a
moduleSection :: PackageRender -> Maybe TarIndex -> URL -> Bool -> [Html]
moduleSection render mdocIndex docURL quickNav =
moduleSection :: PackageRender -> Maybe TarIndex -> URL -> Maybe PackageId -> Bool -> [Html]
moduleSection render mdocIndex docURL mPkgId quickNav =
maybeToList $ fmap msect (rendModules render mdocIndex)
where msect ModSigIndex{ modIndex = m, sigIndex = s } = toHtml $
(if not (null s)
@ -164,16 +164,25 @@ moduleSection render mdocIndex docURL quickNav =
[renderDocIndexLink] ++
[renderModuleForest docURL m ]
else [])
renderDocIndexLink = case mdocIndex of
Just tindex ->
let docIndexURL | isJust (Tar.lookup tindex "doc-index-All.html") = docURL </> "doc-index-All.html"
| otherwise = docURL </> "doc-index.html"
in paragraph ! [thestyle "font-size: small"]
<< ("[" +++ anchor ! [href docIndexURL] << "Index" +++ "]" +++
(if quickNav
then " [" +++ anchor ! [identifier "quickjump-trigger", href "#"] << "Quick Jump" +++ "]"
else mempty))
Nothing -> mempty
renderDocIndexLink = case concatLinks indexLinks of
Nothing -> mempty
Just links -> paragraph ! [thestyle "font-size: small"] << ("[" +++ links +++ "]")
where
indexLinks = catMaybes $ case mdocIndex of
Just tindex ->
let docIndexURL | isJust (Tar.lookup tindex "doc-index-All.html") = docURL </> "doc-index-All.html"
| otherwise = docURL </> "doc-index.html"
in [ Just $ anchor ! [href docIndexURL] << "Index"
, if quickNav
then Just $ anchor ! [identifier "quickjump-trigger", href "#"] << "Quick Jump"
else Nothing
]
Nothing -> []
++ [fmap (\pkgId -> anchor ! [href (packageURL pkgId)] << "Last Documentation") mPkgId]
concatLinks [] = Nothing
concatLinks [h] = Just h
concatLinks (h:hs) = (h +++) . ("] [" +++) <$> concatLinks hs
tabulate :: [(String, Html)] -> Html
tabulate items = table ! [theclass "properties"] <<

View File

@ -80,14 +80,14 @@ import Distribution.Server.Features.Html.HtmlUtilities
-- votes it has.
packagePageTemplate :: PackageRender
-> Maybe TarIndex -> Maybe DocMeta -> Maybe BS.ByteString
-> URL -> [(DistroName, DistroPackageInfo)]
-> URL -> Maybe PackageId -> [(DistroName, DistroPackageInfo)]
-> Maybe [PackageName]
-> HtmlUtilities
-> Bool
-> [TemplateAttr]
packagePageTemplate render
mdocIndex mdocMeta mreadme
docURL distributions
docURL mPkgId distributions
deprs utilities isCandidate =
if isCandidate
then
@ -97,7 +97,7 @@ packagePageTemplate render
, "doc" $= docFieldsTemplate
] ++
-- Miscellaneous things that could still stand to be refactored a bit.
[ "moduleList" $= Old.moduleSection render mdocIndex docURL False
[ "moduleList" $= Old.moduleSection render mdocIndex docURL mPkgId False
, "downloadSection" $= Old.downloadSection render
]
else
@ -107,7 +107,7 @@ packagePageTemplate render
, "doc" $= docFieldsTemplate
] ++
-- Miscellaneous things that could still stand to be refactored a bit.
[ "moduleList" $= Old.moduleSection render mdocIndex docURL hasQuickNav
[ "moduleList" $= Old.moduleSection render mdocIndex docURL mPkgId hasQuickNav
, "executables" $= (commaList . map toHtml $ rendExecNames render)
, "downloadSection" $= Old.downloadSection render
, "stability" $= renderStability desc
@ -339,7 +339,6 @@ candidatesPageTemplate cands candidates candidatesCore=
, toHtml $ ". " ++ fromShortText (synopsis desc)
]
-- #ToDo: Pick out several interesting versions to display, with a link to
-- display all versions.
renderVersion :: PackageId -> [(Version, VersionStatus)] -> Maybe String -> Html

View File

@ -0,0 +1,24 @@
Copyright (c) 2012, Dag Odenhall
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,81 @@
module Distribution.Server.Util.NLP.Snowball where
-------------------------------------------------------------------------------
import Control.Concurrent (MVar, newMVar, withMVar)
import Control.Monad (forM, when)
-------------------------------------------------------------------------------
import Data.ByteString.Char8 (packCStringLen, useAsCString)
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
-------------------------------------------------------------------------------
import Foreign (ForeignPtr, FunPtr, Ptr, newForeignPtr,
nullPtr, withForeignPtr)
import Foreign.C (CInt (..), CString)
-------------------------------------------------------------------------------
import System.IO.Unsafe (unsafePerformIO)
-------------------------------------------------------------------------------
stem :: Text -> Text
stem word = let [a] = stems [word] in a
stems :: [Text] -> [Text]
stems ws =
unsafePerformIO $
do stemmer <- newStemmer
stemsWith stemmer ws
-------------------------------------------------------------------------------
-- | A thread and memory safe Snowball stemmer instance.
newtype Stemmer = Stemmer (MVar (ForeignPtr Struct))
-- | Create a new reusable 'Stemmer' instance.
newStemmer :: IO Stemmer
newStemmer = do
struct <- stemmer_new
when (struct == nullPtr) $
error "Text.Snowball.newStemmer: nullPtr"
structPtr <- newForeignPtr stemmer_delete struct
mvar <- newMVar (structPtr)
return $ Stemmer mvar
-- | Use a 'Stemmer' to stem a word. This can be used more efficiently
-- than 'stem' because you can keep a stemmer around and reuse it, but it
-- requires 'IO' to ensure thread safety.
stemWith :: Stemmer -> Text -> IO Text
stemWith stemmer word = do
[a] <- stemsWith stemmer [word]
return a
-- | Use a 'Stemmer' to stem multiple words in one go. This can be more
-- efficient than @'mapM' 'stemWith'@ because the 'Stemmer' is only
-- locked once.
stemsWith :: Stemmer -> [Text] -> IO [Text]
stemsWith (Stemmer mvar) ws =
withMVar mvar $ \(structPtr) ->
withForeignPtr structPtr $ \struct ->
forM ws $ \word ->
useAsCString (Text.encodeUtf8 word) $ \word' ->
do ptr <- stemmer_stem struct word' $
fromIntegral $ Text.length word
len <- stemmer_length struct
bytes <- packCStringLen (ptr,fromIntegral len)
return $ Text.decodeUtf8 bytes
-------------------------------------------------------------------------------
data Struct
foreign import ccall unsafe "libstemmer.h english_ISO_8859_1_stemmer_new"
stemmer_new :: IO (Ptr Struct)
foreign import ccall unsafe "libstemmer.h &english_ISO_8859_1_stemmer_delete"
stemmer_delete :: FunPtr (Ptr Struct -> IO ())
foreign import ccall unsafe "libstemmer.h english_ISO_8859_1_stemmer_stem"
stemmer_stem :: Ptr Struct -> CString -> CInt -> IO (CString)
foreign import ccall unsafe "libstemmer.h english_ISO_8859_1_stemmer_length"
stemmer_length :: Ptr Struct -> IO CInt