mirror of
https://github.com/haskell/hackage-server.git
synced 2024-11-24 14:52:16 +03:00
parent
a5bf92c522
commit
8095dc0c1b
@ -63,6 +63,12 @@ extra-source-files:
|
||||
tests/unpack-checks/LANGUAGE-GHC-9.2/Main.hs
|
||||
tests/unpack-checks/LANGUAGE-GHC-9.2/Setup.hs
|
||||
tests/unpack-checks/LANGUAGE-GHC-9.2/LANGUAGE-GHC.cabal
|
||||
libstemmer_c/src_c/stem_ISO_8859_1_english.h
|
||||
libstemmer_c/include/libstemmer.h
|
||||
libstemmer_c/runtime/api.h
|
||||
libstemmer_c/runtime/header.h
|
||||
libstemmer_c/LICENSE
|
||||
src/Distribution/Server/Util/NLP/LICENSE
|
||||
|
||||
source-repository head
|
||||
type: git
|
||||
@ -359,6 +365,7 @@ library lib-server
|
||||
Distribution.Server.Features.StaticFiles
|
||||
Distribution.Server.Features.ServerIntrospect
|
||||
Distribution.Server.Features.Sitemap
|
||||
Distribution.Server.Util.NLP.Snowball
|
||||
|
||||
if flag(debug)
|
||||
cpp-options: -DDEBUG
|
||||
@ -418,8 +425,12 @@ library lib-server
|
||||
, xss-sanitize ^>= 0.3.6
|
||||
|
||||
if !flag(minimal)
|
||||
build-depends: snowball ^>= 1.0
|
||||
, tokenize ^>= 0.3
|
||||
build-depends: tokenize ^>= 0.3
|
||||
|
||||
c-sources: libstemmer_c/src_c/stem_ISO_8859_1_english.c
|
||||
libstemmer_c/runtime/api.c
|
||||
libstemmer_c/runtime/utilities.c
|
||||
libstemmer_c/libstemmer/libstemmer.c
|
||||
|
||||
if flag(cabal-parsers)
|
||||
build-depends: cabal-parsers ^>= 0
|
||||
|
24
libstemmer_c/LICENSE
Normal file
24
libstemmer_c/LICENSE
Normal file
@ -0,0 +1,24 @@
|
||||
Copyright (c) 2002, Richard Boulton
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
20
libstemmer_c/include/libstemmer.h
Normal file
20
libstemmer_c/include/libstemmer.h
Normal file
@ -0,0 +1,20 @@
|
||||
|
||||
/* Make header file work when included from C++ */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef unsigned char sb_symbol;
|
||||
|
||||
struct SN_env * english_ISO_8859_1_stemmer_new();
|
||||
|
||||
void english_ISO_8859_1_stemmer_delete(struct SN_env * sn_env);
|
||||
|
||||
const sb_symbol * english_ISO_8859_1_stemmer_stem(struct SN_env * sn_env, const sb_symbol * word, int size);
|
||||
|
||||
int english_ISO_8859_1_stemmer_length(struct SN_env * sn_env);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
47
libstemmer_c/libstemmer/libstemmer.c
Normal file
47
libstemmer_c/libstemmer/libstemmer.c
Normal file
@ -0,0 +1,47 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "../include/libstemmer.h"
|
||||
#include "../runtime/api.h"
|
||||
#include "../src_c/stem_ISO_8859_1_english.h"
|
||||
|
||||
extern struct SN_env *
|
||||
english_ISO_8859_1_stemmer_new()
|
||||
{
|
||||
struct SN_env * sn_env = english_ISO_8859_1_create_env();
|
||||
if (sn_env == NULL)
|
||||
{
|
||||
english_ISO_8859_1_stemmer_delete(sn_env);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return sn_env;
|
||||
}
|
||||
|
||||
void
|
||||
english_ISO_8859_1_stemmer_delete(struct SN_env * sn_env)
|
||||
{
|
||||
if (sn_env == 0) return;
|
||||
english_ISO_8859_1_close_env(sn_env);
|
||||
}
|
||||
|
||||
const sb_symbol *
|
||||
english_ISO_8859_1_stemmer_stem(struct SN_env * sn_env, const sb_symbol * word, int size)
|
||||
{
|
||||
int ret;
|
||||
if (SN_set_current(sn_env, size, (const symbol *)(word)))
|
||||
{
|
||||
sn_env->l = 0;
|
||||
return NULL;
|
||||
}
|
||||
ret = english_ISO_8859_1_stem(sn_env);
|
||||
if (ret < 0) return NULL;
|
||||
sn_env->p[sn_env->l] = 0;
|
||||
return (const sb_symbol *)(sn_env->p);
|
||||
}
|
||||
|
||||
int
|
||||
english_ISO_8859_1_stemmer_length(struct SN_env * sn_env)
|
||||
{
|
||||
return sn_env->l;
|
||||
}
|
66
libstemmer_c/runtime/api.c
Normal file
66
libstemmer_c/runtime/api.c
Normal file
@ -0,0 +1,66 @@
|
||||
|
||||
#include <stdlib.h> /* for calloc, free */
|
||||
#include "header.h"
|
||||
|
||||
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
|
||||
{
|
||||
struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
|
||||
if (z == NULL) return NULL;
|
||||
z->p = create_s();
|
||||
if (z->p == NULL) goto error;
|
||||
if (S_size)
|
||||
{
|
||||
int i;
|
||||
z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
|
||||
if (z->S == NULL) goto error;
|
||||
|
||||
for (i = 0; i < S_size; i++)
|
||||
{
|
||||
z->S[i] = create_s();
|
||||
if (z->S[i] == NULL) goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if (I_size)
|
||||
{
|
||||
z->I = (int *) calloc(I_size, sizeof(int));
|
||||
if (z->I == NULL) goto error;
|
||||
}
|
||||
|
||||
if (B_size)
|
||||
{
|
||||
z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
|
||||
if (z->B == NULL) goto error;
|
||||
}
|
||||
|
||||
return z;
|
||||
error:
|
||||
SN_close_env(z, S_size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
extern void SN_close_env(struct SN_env * z, int S_size)
|
||||
{
|
||||
if (z == NULL) return;
|
||||
if (S_size)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < S_size; i++)
|
||||
{
|
||||
lose_s(z->S[i]);
|
||||
}
|
||||
free(z->S);
|
||||
}
|
||||
free(z->I);
|
||||
free(z->B);
|
||||
if (z->p) lose_s(z->p);
|
||||
free(z);
|
||||
}
|
||||
|
||||
extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
|
||||
{
|
||||
int err = replace_s(z, 0, z->l, size, s, NULL);
|
||||
z->c = 0;
|
||||
return err;
|
||||
}
|
||||
|
26
libstemmer_c/runtime/api.h
Normal file
26
libstemmer_c/runtime/api.h
Normal file
@ -0,0 +1,26 @@
|
||||
|
||||
typedef unsigned char symbol;
|
||||
|
||||
/* Or replace 'char' above with 'short' for 16 bit characters.
|
||||
|
||||
More precisely, replace 'char' with whatever type guarantees the
|
||||
character width you need. Note however that sizeof(symbol) should divide
|
||||
HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
|
||||
there is an alignment problem. In the unlikely event of a problem here,
|
||||
consult Martin Porter.
|
||||
|
||||
*/
|
||||
|
||||
struct SN_env {
|
||||
symbol * p;
|
||||
int c; int l; int lb; int bra; int ket;
|
||||
symbol * * S;
|
||||
int * I;
|
||||
unsigned char * B;
|
||||
};
|
||||
|
||||
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
|
||||
extern void SN_close_env(struct SN_env * z, int S_size);
|
||||
|
||||
extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
|
||||
|
58
libstemmer_c/runtime/header.h
Normal file
58
libstemmer_c/runtime/header.h
Normal file
@ -0,0 +1,58 @@
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "api.h"
|
||||
|
||||
#define MAXINT INT_MAX
|
||||
#define MININT INT_MIN
|
||||
|
||||
#define HEAD 2*sizeof(int)
|
||||
|
||||
#define SIZE(p) ((int *)(p))[-1]
|
||||
#define SET_SIZE(p, n) ((int *)(p))[-1] = n
|
||||
#define CAPACITY(p) ((int *)(p))[-2]
|
||||
|
||||
struct among
|
||||
{ int s_size; /* number of chars in string */
|
||||
const symbol * s; /* search string */
|
||||
int substring_i;/* index to longest matching substring */
|
||||
int result; /* result of the lookup */
|
||||
int (* function)(struct SN_env *);
|
||||
};
|
||||
|
||||
extern symbol * create_s(void);
|
||||
extern void lose_s(symbol * p);
|
||||
|
||||
extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
|
||||
|
||||
extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
|
||||
extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
|
||||
extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
|
||||
extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
|
||||
|
||||
extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
|
||||
extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
|
||||
extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
|
||||
extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
|
||||
|
||||
extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
|
||||
extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
|
||||
extern int eq_v(struct SN_env * z, const symbol * p);
|
||||
extern int eq_v_b(struct SN_env * z, const symbol * p);
|
||||
|
||||
extern int find_among(struct SN_env * z, const struct among * v, int v_size);
|
||||
extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);
|
||||
|
||||
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
|
||||
extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
|
||||
extern int slice_from_v(struct SN_env * z, const symbol * p);
|
||||
extern int slice_del(struct SN_env * z);
|
||||
|
||||
extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
|
||||
extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);
|
||||
|
||||
extern symbol * slice_to(struct SN_env * z, symbol * p);
|
||||
extern symbol * assign_to(struct SN_env * z, symbol * p);
|
||||
|
||||
extern void debug(struct SN_env * z, int number, int line_count);
|
||||
|
478
libstemmer_c/runtime/utilities.c
Normal file
478
libstemmer_c/runtime/utilities.c
Normal file
@ -0,0 +1,478 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "header.h"
|
||||
|
||||
#define unless(C) if(!(C))
|
||||
|
||||
#define CREATE_SIZE 1
|
||||
|
||||
extern symbol * create_s(void) {
|
||||
symbol * p;
|
||||
void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
|
||||
if (mem == NULL) return NULL;
|
||||
p = (symbol *) (HEAD + (char *) mem);
|
||||
CAPACITY(p) = CREATE_SIZE;
|
||||
SET_SIZE(p, CREATE_SIZE);
|
||||
return p;
|
||||
}
|
||||
|
||||
extern void lose_s(symbol * p) {
|
||||
if (p == NULL) return;
|
||||
free((char *) p - HEAD);
|
||||
}
|
||||
|
||||
/*
|
||||
new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
|
||||
if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new
|
||||
position, or 0 on failure.
|
||||
|
||||
-- used to implement hop and next in the utf8 case.
|
||||
*/
|
||||
|
||||
extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
|
||||
int b;
|
||||
if (n >= 0) {
|
||||
for (; n > 0; n--) {
|
||||
if (c >= l) return -1;
|
||||
b = p[c++];
|
||||
if (b >= 0xC0) { /* 1100 0000 */
|
||||
while (c < l) {
|
||||
b = p[c];
|
||||
if (b >= 0xC0 || b < 0x80) break;
|
||||
/* break unless b is 10------ */
|
||||
c++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (; n < 0; n++) {
|
||||
if (c <= lb) return -1;
|
||||
b = p[--c];
|
||||
if (b >= 0x80) { /* 1000 0000 */
|
||||
while (c > lb) {
|
||||
b = p[c];
|
||||
if (b >= 0xC0) break; /* 1100 0000 */
|
||||
c--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/* Code for character groupings: utf8 cases */
|
||||
|
||||
static int get_utf8(const symbol * p, int c, int l, int * slot) {
|
||||
int b0, b1;
|
||||
if (c >= l) return 0;
|
||||
b0 = p[c++];
|
||||
if (b0 < 0xC0 || c == l) { /* 1100 0000 */
|
||||
* slot = b0; return 1;
|
||||
}
|
||||
b1 = p[c++];
|
||||
if (b0 < 0xE0 || c == l) { /* 1110 0000 */
|
||||
* slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
|
||||
}
|
||||
* slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3;
|
||||
}
|
||||
|
||||
static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
|
||||
int b0, b1;
|
||||
if (c <= lb) return 0;
|
||||
b0 = p[--c];
|
||||
if (b0 < 0x80 || c == lb) { /* 1000 0000 */
|
||||
* slot = b0; return 1;
|
||||
}
|
||||
b1 = p[--c];
|
||||
if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */
|
||||
* slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
|
||||
}
|
||||
* slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
|
||||
}
|
||||
|
||||
extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
||||
do {
|
||||
int ch;
|
||||
int w = get_utf8(z->p, z->c, z->l, & ch);
|
||||
unless (w) return -1;
|
||||
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return w;
|
||||
z->c += w;
|
||||
} while (repeat);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
||||
do {
|
||||
int ch;
|
||||
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
|
||||
unless (w) return -1;
|
||||
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return w;
|
||||
z->c -= w;
|
||||
} while (repeat);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
||||
do {
|
||||
int ch;
|
||||
int w = get_utf8(z->p, z->c, z->l, & ch);
|
||||
unless (w) return -1;
|
||||
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return w;
|
||||
z->c += w;
|
||||
} while (repeat);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
||||
do {
|
||||
int ch;
|
||||
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
|
||||
unless (w) return -1;
|
||||
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return w;
|
||||
z->c -= w;
|
||||
} while (repeat);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Code for character groupings: non-utf8 cases */
|
||||
|
||||
extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
||||
do {
|
||||
int ch;
|
||||
if (z->c >= z->l) return -1;
|
||||
ch = z->p[z->c];
|
||||
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return 1;
|
||||
z->c++;
|
||||
} while (repeat);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
||||
do {
|
||||
int ch;
|
||||
if (z->c <= z->lb) return -1;
|
||||
ch = z->p[z->c - 1];
|
||||
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return 1;
|
||||
z->c--;
|
||||
} while (repeat);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
||||
do {
|
||||
int ch;
|
||||
if (z->c >= z->l) return -1;
|
||||
ch = z->p[z->c];
|
||||
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return 1;
|
||||
z->c++;
|
||||
} while (repeat);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
||||
do {
|
||||
int ch;
|
||||
if (z->c <= z->lb) return -1;
|
||||
ch = z->p[z->c - 1];
|
||||
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return 1;
|
||||
z->c--;
|
||||
} while (repeat);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int eq_s(struct SN_env * z, int s_size, const symbol * s) {
|
||||
if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
|
||||
z->c += s_size; return 1;
|
||||
}
|
||||
|
||||
extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) {
|
||||
if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
|
||||
z->c -= s_size; return 1;
|
||||
}
|
||||
|
||||
extern int eq_v(struct SN_env * z, const symbol * p) {
|
||||
return eq_s(z, SIZE(p), p);
|
||||
}
|
||||
|
||||
extern int eq_v_b(struct SN_env * z, const symbol * p) {
|
||||
return eq_s_b(z, SIZE(p), p);
|
||||
}
|
||||
|
||||
extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
|
||||
|
||||
int i = 0;
|
||||
int j = v_size;
|
||||
|
||||
int c = z->c; int l = z->l;
|
||||
symbol * q = z->p + c;
|
||||
|
||||
const struct among * w;
|
||||
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
|
||||
int first_key_inspected = 0;
|
||||
|
||||
while(1) {
|
||||
int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j; /* smaller */
|
||||
w = v + k;
|
||||
{
|
||||
int i2; for (i2 = common; i2 < w->s_size; i2++) {
|
||||
if (c + common == l) { diff = -1; break; }
|
||||
diff = q[common] - w->s[i2];
|
||||
if (diff != 0) break;
|
||||
common++;
|
||||
}
|
||||
}
|
||||
if (diff < 0) { j = k; common_j = common; }
|
||||
else { i = k; common_i = common; }
|
||||
if (j - i <= 1) {
|
||||
if (i > 0) break; /* v->s has been inspected */
|
||||
if (j == i) break; /* only one item in v */
|
||||
|
||||
/* - but now we need to go round once more to get
|
||||
v->s inspected. This looks messy, but is actually
|
||||
the optimal approach. */
|
||||
|
||||
if (first_key_inspected) break;
|
||||
first_key_inspected = 1;
|
||||
}
|
||||
}
|
||||
while(1) {
|
||||
w = v + i;
|
||||
if (common_i >= w->s_size) {
|
||||
z->c = c + w->s_size;
|
||||
if (w->function == 0) return w->result;
|
||||
{
|
||||
int res = w->function(z);
|
||||
z->c = c + w->s_size;
|
||||
if (res) return w->result;
|
||||
}
|
||||
}
|
||||
i = w->substring_i;
|
||||
if (i < 0) return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* find_among_b is for backwards processing. Same comments apply */
|
||||
|
||||
extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
|
||||
|
||||
int i = 0;
|
||||
int j = v_size;
|
||||
|
||||
int c = z->c; int lb = z->lb;
|
||||
symbol * q = z->p + c - 1;
|
||||
|
||||
const struct among * w;
|
||||
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
|
||||
int first_key_inspected = 0;
|
||||
|
||||
while(1) {
|
||||
int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j;
|
||||
w = v + k;
|
||||
{
|
||||
int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) {
|
||||
if (c - common == lb) { diff = -1; break; }
|
||||
diff = q[- common] - w->s[i2];
|
||||
if (diff != 0) break;
|
||||
common++;
|
||||
}
|
||||
}
|
||||
if (diff < 0) { j = k; common_j = common; }
|
||||
else { i = k; common_i = common; }
|
||||
if (j - i <= 1) {
|
||||
if (i > 0) break;
|
||||
if (j == i) break;
|
||||
if (first_key_inspected) break;
|
||||
first_key_inspected = 1;
|
||||
}
|
||||
}
|
||||
while(1) {
|
||||
w = v + i;
|
||||
if (common_i >= w->s_size) {
|
||||
z->c = c - w->s_size;
|
||||
if (w->function == 0) return w->result;
|
||||
{
|
||||
int res = w->function(z);
|
||||
z->c = c - w->s_size;
|
||||
if (res) return w->result;
|
||||
}
|
||||
}
|
||||
i = w->substring_i;
|
||||
if (i < 0) return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Increase the size of the buffer pointed to by p to at least n symbols.
|
||||
* If insufficient memory, returns NULL and frees the old buffer.
|
||||
*/
|
||||
static symbol * increase_size(symbol * p, int n) {
|
||||
symbol * q;
|
||||
int new_size = n + 20;
|
||||
void * mem = realloc((char *) p - HEAD,
|
||||
HEAD + (new_size + 1) * sizeof(symbol));
|
||||
if (mem == NULL) {
|
||||
lose_s(p);
|
||||
return NULL;
|
||||
}
|
||||
q = (symbol *) (HEAD + (char *)mem);
|
||||
CAPACITY(q) = new_size;
|
||||
return q;
|
||||
}
|
||||
|
||||
/* to replace symbols between c_bra and c_ket in z->p by the
|
||||
s_size symbols at s.
|
||||
Returns 0 on success, -1 on error.
|
||||
Also, frees z->p (and sets it to NULL) on error.
|
||||
*/
|
||||
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
|
||||
{
|
||||
int adjustment;
|
||||
int len;
|
||||
if (z->p == NULL) {
|
||||
z->p = create_s();
|
||||
if (z->p == NULL) return -1;
|
||||
}
|
||||
adjustment = s_size - (c_ket - c_bra);
|
||||
len = SIZE(z->p);
|
||||
if (adjustment != 0) {
|
||||
if (adjustment + len > CAPACITY(z->p)) {
|
||||
z->p = increase_size(z->p, adjustment + len);
|
||||
if (z->p == NULL) return -1;
|
||||
}
|
||||
memmove(z->p + c_ket + adjustment,
|
||||
z->p + c_ket,
|
||||
(len - c_ket) * sizeof(symbol));
|
||||
SET_SIZE(z->p, adjustment + len);
|
||||
z->l += adjustment;
|
||||
if (z->c >= c_ket)
|
||||
z->c += adjustment;
|
||||
else
|
||||
if (z->c > c_bra)
|
||||
z->c = c_bra;
|
||||
}
|
||||
unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
|
||||
if (adjptr != NULL)
|
||||
*adjptr = adjustment;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int slice_check(struct SN_env * z) {
|
||||
|
||||
if (z->bra < 0 ||
|
||||
z->bra > z->ket ||
|
||||
z->ket > z->l ||
|
||||
z->p == NULL ||
|
||||
z->l > SIZE(z->p)) /* this line could be removed */
|
||||
{
|
||||
#if 0
|
||||
fprintf(stderr, "faulty slice operation:\n");
|
||||
debug(z, -1, 0);
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) {
|
||||
if (slice_check(z)) return -1;
|
||||
return replace_s(z, z->bra, z->ket, s_size, s, NULL);
|
||||
}
|
||||
|
||||
extern int slice_from_v(struct SN_env * z, const symbol * p) {
|
||||
return slice_from_s(z, SIZE(p), p);
|
||||
}
|
||||
|
||||
extern int slice_del(struct SN_env * z) {
|
||||
return slice_from_s(z, 0, 0);
|
||||
}
|
||||
|
||||
extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
|
||||
int adjustment;
|
||||
if (replace_s(z, bra, ket, s_size, s, &adjustment))
|
||||
return -1;
|
||||
if (bra <= z->bra) z->bra += adjustment;
|
||||
if (bra <= z->ket) z->ket += adjustment;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
|
||||
int adjustment;
|
||||
if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
|
||||
return -1;
|
||||
if (bra <= z->bra) z->bra += adjustment;
|
||||
if (bra <= z->ket) z->ket += adjustment;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern symbol * slice_to(struct SN_env * z, symbol * p) {
|
||||
if (slice_check(z)) {
|
||||
lose_s(p);
|
||||
return NULL;
|
||||
}
|
||||
{
|
||||
int len = z->ket - z->bra;
|
||||
if (CAPACITY(p) < len) {
|
||||
p = increase_size(p, len);
|
||||
if (p == NULL)
|
||||
return NULL;
|
||||
}
|
||||
memmove(p, z->p + z->bra, len * sizeof(symbol));
|
||||
SET_SIZE(p, len);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
extern symbol * assign_to(struct SN_env * z, symbol * p) {
|
||||
int len = z->l;
|
||||
if (CAPACITY(p) < len) {
|
||||
p = increase_size(p, len);
|
||||
if (p == NULL)
|
||||
return NULL;
|
||||
}
|
||||
memmove(p, z->p, len * sizeof(symbol));
|
||||
SET_SIZE(p, len);
|
||||
return p;
|
||||
}
|
||||
|
||||
#if 0
|
||||
extern void debug(struct SN_env * z, int number, int line_count) {
|
||||
int i;
|
||||
int limit = SIZE(z->p);
|
||||
/*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
|
||||
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
|
||||
for (i = 0; i <= limit; i++) {
|
||||
if (z->lb == i) printf("{");
|
||||
if (z->bra == i) printf("[");
|
||||
if (z->c == i) printf("|");
|
||||
if (z->ket == i) printf("]");
|
||||
if (z->l == i) printf("}");
|
||||
if (i < limit)
|
||||
{ int ch = z->p[i];
|
||||
if (ch == 0) ch = '#';
|
||||
printf("%c", ch);
|
||||
}
|
||||
}
|
||||
printf("'\n");
|
||||
}
|
||||
#endif
|
1117
libstemmer_c/src_c/stem_ISO_8859_1_english.c
Normal file
1117
libstemmer_c/src_c/stem_ISO_8859_1_english.c
Normal file
File diff suppressed because it is too large
Load Diff
16
libstemmer_c/src_c/stem_ISO_8859_1_english.h
Normal file
16
libstemmer_c/src_c/stem_ISO_8859_1_english.h
Normal file
@ -0,0 +1,16 @@
|
||||
|
||||
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern struct SN_env * english_ISO_8859_1_create_env(void);
|
||||
extern void english_ISO_8859_1_close_env(struct SN_env * z);
|
||||
|
||||
extern int english_ISO_8859_1_stem(struct SN_env * z);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -230,6 +230,17 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do
|
||||
uploadFeature
|
||||
(candidatesCoreResource candidatesFeature)
|
||||
|
||||
tagsFeature <- mkTagsFeature
|
||||
coreFeature
|
||||
uploadFeature
|
||||
usersFeature
|
||||
|
||||
versionsFeature <- mkVersionsFeature
|
||||
coreFeature
|
||||
uploadFeature
|
||||
tagsFeature
|
||||
usersFeature
|
||||
|
||||
documentationCoreFeature <- mkDocumentationCoreFeature
|
||||
(coreResource coreFeature)
|
||||
(map packageId . allPackages <$> queryGetPackageIndex coreFeature)
|
||||
@ -237,6 +248,7 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do
|
||||
tarIndexCacheFeature
|
||||
reportsCoreFeature
|
||||
usersFeature
|
||||
versionsFeature
|
||||
|
||||
documentationCandidatesFeature <- mkDocumentationCandidatesFeature
|
||||
(candidatesCoreResource candidatesFeature)
|
||||
@ -245,6 +257,7 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do
|
||||
tarIndexCacheFeature
|
||||
reportsCandidatesFeature
|
||||
usersFeature
|
||||
versionsFeature
|
||||
|
||||
downloadFeature <- mkDownloadFeature
|
||||
coreFeature
|
||||
@ -254,22 +267,11 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do
|
||||
coreFeature
|
||||
usersFeature
|
||||
|
||||
tagsFeature <- mkTagsFeature
|
||||
coreFeature
|
||||
uploadFeature
|
||||
usersFeature
|
||||
|
||||
analyticsPixelsFeature <- mkAnalyticsPixelsFeature
|
||||
coreFeature
|
||||
usersFeature
|
||||
uploadFeature
|
||||
|
||||
versionsFeature <- mkVersionsFeature
|
||||
coreFeature
|
||||
uploadFeature
|
||||
tagsFeature
|
||||
usersFeature
|
||||
|
||||
{- [reverse index disabled]
|
||||
reverseFeature <- mkReverseFeature
|
||||
coreFeature
|
||||
|
@ -1,5 +1,6 @@
|
||||
{-# LANGUAGE RankNTypes, FlexibleContexts,
|
||||
NamedFieldPuns, RecordWildCards, PatternGuards #-}
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
module Distribution.Server.Features.Documentation (
|
||||
DocumentationFeature(..),
|
||||
DocumentationResource(..),
|
||||
@ -41,6 +42,9 @@ import Data.Maybe
|
||||
import Data.Time.Clock (NominalDiffTime, diffUTCTime, getCurrentTime)
|
||||
import System.Directory (getModificationTime)
|
||||
import Control.Applicative
|
||||
import Distribution.Server.Features.PreferredVersions
|
||||
import Distribution.Server.Features.PreferredVersions.State (getVersionStatus)
|
||||
import Distribution.Server.Packages.Types
|
||||
-- TODO:
|
||||
-- 1. Write an HTML view for organizing uploads
|
||||
-- 2. Have cabal generate a standard doc tarball, and serve that here
|
||||
@ -51,6 +55,8 @@ data DocumentationFeature = DocumentationFeature {
|
||||
queryDocumentation :: forall m. MonadIO m => PackageIdentifier -> m (Maybe BlobId),
|
||||
queryDocumentationIndex :: forall m. MonadIO m => m (Map.Map PackageId BlobId),
|
||||
|
||||
latestPackageWithDocumentation :: forall m. MonadIO m => PreferredInfo -> [PkgInfo] -> m (Maybe PackageId),
|
||||
|
||||
uploadDocumentation :: DynamicPath -> ServerPartE Response,
|
||||
deleteDocumentation :: DynamicPath -> ServerPartE Response,
|
||||
|
||||
@ -82,6 +88,7 @@ initDocumentationFeature :: String
|
||||
-> TarIndexCacheFeature
|
||||
-> ReportsFeature
|
||||
-> UserFeature
|
||||
-> VersionsFeature
|
||||
-> IO DocumentationFeature)
|
||||
initDocumentationFeature name
|
||||
env@ServerEnv{serverStateDir} = do
|
||||
@ -91,9 +98,9 @@ initDocumentationFeature name
|
||||
-- Hooks
|
||||
documentationChangeHook <- newHook
|
||||
|
||||
return $ \core getPackages upload tarIndexCache reportsCore user -> do
|
||||
return $ \core getPackages upload tarIndexCache reportsCore user version -> do
|
||||
let feature = documentationFeature name env
|
||||
core getPackages upload tarIndexCache reportsCore user
|
||||
core getPackages upload tarIndexCache reportsCore user version
|
||||
documentationState
|
||||
documentationChangeHook
|
||||
return feature
|
||||
@ -139,6 +146,7 @@ documentationFeature :: String
|
||||
-> TarIndexCacheFeature
|
||||
-> ReportsFeature
|
||||
-> UserFeature
|
||||
-> VersionsFeature
|
||||
-> StateComponent AcidState Documentation
|
||||
-> Hook PackageId ()
|
||||
-> DocumentationFeature
|
||||
@ -149,13 +157,14 @@ documentationFeature name
|
||||
, guardValidPackageId
|
||||
, corePackagePage
|
||||
, corePackagesPage
|
||||
, lookupPackageId
|
||||
, lookupPackageName
|
||||
}
|
||||
getPackages
|
||||
UploadFeature{..}
|
||||
TarIndexCacheFeature{cachedTarIndex}
|
||||
ReportsFeature{..}
|
||||
UserFeature{ guardAuthorised_ }
|
||||
VersionsFeature{queryGetPreferredInfo}
|
||||
documentationState
|
||||
documentationChangeHook
|
||||
= DocumentationFeature{..}
|
||||
@ -352,15 +361,30 @@ documentationFeature name
|
||||
runHook_ documentationChangeHook pkgid
|
||||
noContent (toResponse ())
|
||||
|
||||
latestPackageWithDocumentation :: MonadIO m => PreferredInfo -> [PkgInfo] -> m (Maybe PackageId)
|
||||
latestPackageWithDocumentation prefInfo ps = helper (reverse ps)
|
||||
where
|
||||
helper [] = helper2 (reverse ps)
|
||||
helper (pkg:pkgs) = do
|
||||
hasDoc <- queryHasDocumentation (pkgInfoId pkg)
|
||||
let status = getVersionStatus prefInfo (packageVersion pkg)
|
||||
if hasDoc && status == NormalVersion
|
||||
then pure (Just (packageId pkg))
|
||||
else helper pkgs
|
||||
|
||||
helper2 [] = pure Nothing
|
||||
helper2 (pkg:pkgs) = do
|
||||
hasDoc <- queryHasDocumentation (pkgInfoId pkg)
|
||||
if hasDoc
|
||||
then pure (Just (packageId pkg))
|
||||
else helper2 pkgs
|
||||
|
||||
withDocumentation :: Resource -> DynamicPath
|
||||
-> (PackageId -> BlobId -> TarIndex -> ServerPartE Response)
|
||||
-> ServerPartE Response
|
||||
withDocumentation self dpath func = do
|
||||
pkgid <- packageInPath dpath
|
||||
|
||||
-- lookupPackageId returns the latest version if no version is specified.
|
||||
pkginfo <- lookupPackageId pkgid
|
||||
|
||||
-- Set up the canonical URL to point to the unversioned path
|
||||
let basedpath =
|
||||
[ if var == "package"
|
||||
@ -375,17 +399,27 @@ documentationFeature name
|
||||
-- See https://support.google.com/webmasters/answer/139066?hl=en#6
|
||||
setHeaderM "Link" canonicalHeader
|
||||
|
||||
case pkgVersion pkgid == nullVersion of
|
||||
-- if no version is given we want to redirect to the latest version
|
||||
True -> tempRedirect latestPkgPath (toResponse "")
|
||||
where
|
||||
latest = packageId pkginfo
|
||||
dpath' = [ if var == "package"
|
||||
then (var, display latest)
|
||||
else e
|
||||
| e@(var, _) <- dpath ]
|
||||
latestPkgPath = (renderResource' self dpath')
|
||||
-- Essentially errNotFound, but overloaded to specify a header.
|
||||
-- (Needed since errNotFound throws away result of setHeaderM)
|
||||
let errNotFoundH title message = throwError
|
||||
(ErrorResponse 404
|
||||
[("Link", canonicalHeader)]
|
||||
title message)
|
||||
|
||||
case pkgVersion pkgid == nullVersion of
|
||||
-- if no version is given we want to redirect to the latest version with docs
|
||||
True -> do
|
||||
pkgs <- lookupPackageName (pkgName pkgid)
|
||||
prefInfo <- queryGetPreferredInfo (pkgName pkgid)
|
||||
latestPackageWithDocumentation prefInfo pkgs >>= \case
|
||||
Just latestWithDocs -> do
|
||||
let dpath' = [ if var == "package"
|
||||
then (var, display latestWithDocs)
|
||||
else e
|
||||
| e@(var, _) <- dpath ]
|
||||
latestPkgPath = (renderResource' self dpath')
|
||||
tempRedirect latestPkgPath (toResponse "")
|
||||
Nothing -> errNotFoundH "Not Found" [MText "There is no documentation for this package."]
|
||||
False -> do
|
||||
mdocs <- queryState documentationState $ LookupDocumentation pkgid
|
||||
case mdocs of
|
||||
@ -397,13 +431,6 @@ documentationFeature name
|
||||
, MLink canonicalLink canonicalLink
|
||||
, MText " for the latest version."
|
||||
]
|
||||
where
|
||||
-- Essentially errNotFound, but overloaded to specify a header.
|
||||
-- (Needed since errNotFound throws away result of setHeaderM)
|
||||
errNotFoundH title message = throwError
|
||||
(ErrorResponse 404
|
||||
[("Link", canonicalHeader)]
|
||||
title message)
|
||||
Just blob -> do
|
||||
index <- liftIO $ cachedTarIndex blob
|
||||
func pkgid blob index
|
||||
@ -439,6 +466,7 @@ checkDocTarball pkgid =
|
||||
docMetaPath = DocMeta.packageDocMetaTarPath pkgid
|
||||
|
||||
|
||||
|
||||
{------------------------------------------------------------------------------
|
||||
Auxiliary
|
||||
------------------------------------------------------------------------------}
|
||||
|
@ -606,6 +606,8 @@ mkHtmlCore ServerEnv{serverBaseURI, serverBlobStore}
|
||||
deprs <- queryGetDeprecatedFor pkgname
|
||||
mreadme <- makeReadme render
|
||||
hasDocs <- queryHasDocumentation documentationFeature realpkg
|
||||
mDocPkgId <- if hasDocs then pure Nothing
|
||||
else latestPackageWithDocumentation documentationFeature prefInfo pkgs
|
||||
rptStats <- queryLastReportStats reportsFeature realpkg
|
||||
candidates <- lookupCandidateName pkgname
|
||||
buildStatus <- renderBuildStatus
|
||||
@ -670,7 +672,7 @@ mkHtmlCore ServerEnv{serverBaseURI, serverBlobStore}
|
||||
-- Items not related to IO (mostly pure functions)
|
||||
PagesNew.packagePageTemplate render
|
||||
mdocIndex mdocMeta mreadme
|
||||
docURL distributions
|
||||
docURL mDocPkgId distributions
|
||||
deprs
|
||||
utilities
|
||||
False
|
||||
@ -1294,7 +1296,7 @@ mkHtmlCandidates utilities@HtmlUtilities{..}
|
||||
] ++
|
||||
PagesNew.packagePageTemplate render
|
||||
mdocIndex Nothing mreadme
|
||||
docURL [] Nothing
|
||||
docURL Nothing [] Nothing
|
||||
utilities
|
||||
True
|
||||
|
||||
|
@ -12,7 +12,7 @@ import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
import Data.Char
|
||||
import qualified NLP.Tokenize as NLP
|
||||
import qualified NLP.Snowball as NLP
|
||||
import qualified Distribution.Server.Util.NLP.Snowball as NLP
|
||||
import qualified Data.Foldable as F
|
||||
|
||||
import qualified Documentation.Haddock.Markup as Haddock
|
||||
@ -26,7 +26,7 @@ extraStems ss x = x : mapMaybe (`T.stripSuffix` x) ss
|
||||
extractSynopsisTerms :: [Text] -> Set Text -> String -> [Text]
|
||||
extractSynopsisTerms ss stopWords =
|
||||
concatMap (extraStems ss) --note this adds extra possible stems, it doesn't delete any given one.
|
||||
. NLP.stems NLP.English
|
||||
. NLP.stems
|
||||
. filter (`Set.notMember` stopWords)
|
||||
. map (T.toCaseFold . T.pack)
|
||||
. concatMap splitTok
|
||||
@ -54,7 +54,7 @@ splitTok tok =
|
||||
extractDescriptionTerms :: [Text] -> Set Text -> String -> [Text]
|
||||
extractDescriptionTerms ss stopWords =
|
||||
concatMap (extraStems ss)
|
||||
. NLP.stems NLP.English
|
||||
. NLP.stems
|
||||
. filter (`Set.notMember` stopWords)
|
||||
. map (T.toCaseFold . T.pack)
|
||||
. maybe
|
||||
|
@ -14,7 +14,7 @@ import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
import Data.Text (Text, unpack)
|
||||
import qualified Data.Text as T
|
||||
import NLP.Snowball
|
||||
import Distribution.Server.Util.NLP.Snowball
|
||||
|
||||
import Distribution.Package
|
||||
import Distribution.PackageDescription
|
||||
@ -62,7 +62,7 @@ pkgSearchConfig =
|
||||
normaliseQueryToken tok =
|
||||
let tokFold = T.toCaseFold tok
|
||||
-- we don't need to use extraStems here because the index is inflated by it already.
|
||||
tokStem = stem English tokFold
|
||||
tokStem = stem tokFold
|
||||
in \field -> case field of
|
||||
NameField -> tokFold
|
||||
SynopsisField -> tokStem
|
||||
|
@ -34,7 +34,7 @@ import Distribution.Utils.ShortText (fromShortText, ShortText)
|
||||
import Text.XHtml.Strict hiding (p, name, title, content)
|
||||
import qualified Text.XHtml.Strict
|
||||
|
||||
import Data.Maybe (fromMaybe, maybeToList, isJust, mapMaybe)
|
||||
import Data.Maybe (fromMaybe, maybeToList, isJust, mapMaybe, catMaybes)
|
||||
import Data.List (intersperse, intercalate, partition)
|
||||
import Control.Arrow (second)
|
||||
import System.FilePath.Posix ((</>), (<.>))
|
||||
@ -151,8 +151,8 @@ renderPackageFlags render docURL =
|
||||
code = (thespan ! [theclass "code"] <<)
|
||||
whenNotNull xs a = if null xs then [] else a
|
||||
|
||||
moduleSection :: PackageRender -> Maybe TarIndex -> URL -> Bool -> [Html]
|
||||
moduleSection render mdocIndex docURL quickNav =
|
||||
moduleSection :: PackageRender -> Maybe TarIndex -> URL -> Maybe PackageId -> Bool -> [Html]
|
||||
moduleSection render mdocIndex docURL mPkgId quickNav =
|
||||
maybeToList $ fmap msect (rendModules render mdocIndex)
|
||||
where msect ModSigIndex{ modIndex = m, sigIndex = s } = toHtml $
|
||||
(if not (null s)
|
||||
@ -164,16 +164,25 @@ moduleSection render mdocIndex docURL quickNav =
|
||||
[renderDocIndexLink] ++
|
||||
[renderModuleForest docURL m ]
|
||||
else [])
|
||||
renderDocIndexLink = case mdocIndex of
|
||||
Just tindex ->
|
||||
let docIndexURL | isJust (Tar.lookup tindex "doc-index-All.html") = docURL </> "doc-index-All.html"
|
||||
| otherwise = docURL </> "doc-index.html"
|
||||
in paragraph ! [thestyle "font-size: small"]
|
||||
<< ("[" +++ anchor ! [href docIndexURL] << "Index" +++ "]" +++
|
||||
(if quickNav
|
||||
then " [" +++ anchor ! [identifier "quickjump-trigger", href "#"] << "Quick Jump" +++ "]"
|
||||
else mempty))
|
||||
Nothing -> mempty
|
||||
renderDocIndexLink = case concatLinks indexLinks of
|
||||
Nothing -> mempty
|
||||
Just links -> paragraph ! [thestyle "font-size: small"] << ("[" +++ links +++ "]")
|
||||
where
|
||||
indexLinks = catMaybes $ case mdocIndex of
|
||||
Just tindex ->
|
||||
let docIndexURL | isJust (Tar.lookup tindex "doc-index-All.html") = docURL </> "doc-index-All.html"
|
||||
| otherwise = docURL </> "doc-index.html"
|
||||
in [ Just $ anchor ! [href docIndexURL] << "Index"
|
||||
, if quickNav
|
||||
then Just $ anchor ! [identifier "quickjump-trigger", href "#"] << "Quick Jump"
|
||||
else Nothing
|
||||
]
|
||||
Nothing -> []
|
||||
++ [fmap (\pkgId -> anchor ! [href (packageURL pkgId)] << "Last Documentation") mPkgId]
|
||||
|
||||
concatLinks [] = Nothing
|
||||
concatLinks [h] = Just h
|
||||
concatLinks (h:hs) = (h +++) . ("] [" +++) <$> concatLinks hs
|
||||
|
||||
tabulate :: [(String, Html)] -> Html
|
||||
tabulate items = table ! [theclass "properties"] <<
|
||||
|
@ -80,14 +80,14 @@ import Distribution.Server.Features.Html.HtmlUtilities
|
||||
-- votes it has.
|
||||
packagePageTemplate :: PackageRender
|
||||
-> Maybe TarIndex -> Maybe DocMeta -> Maybe BS.ByteString
|
||||
-> URL -> [(DistroName, DistroPackageInfo)]
|
||||
-> URL -> Maybe PackageId -> [(DistroName, DistroPackageInfo)]
|
||||
-> Maybe [PackageName]
|
||||
-> HtmlUtilities
|
||||
-> Bool
|
||||
-> [TemplateAttr]
|
||||
packagePageTemplate render
|
||||
mdocIndex mdocMeta mreadme
|
||||
docURL distributions
|
||||
docURL mPkgId distributions
|
||||
deprs utilities isCandidate =
|
||||
if isCandidate
|
||||
then
|
||||
@ -97,7 +97,7 @@ packagePageTemplate render
|
||||
, "doc" $= docFieldsTemplate
|
||||
] ++
|
||||
-- Miscellaneous things that could still stand to be refactored a bit.
|
||||
[ "moduleList" $= Old.moduleSection render mdocIndex docURL False
|
||||
[ "moduleList" $= Old.moduleSection render mdocIndex docURL mPkgId False
|
||||
, "downloadSection" $= Old.downloadSection render
|
||||
]
|
||||
else
|
||||
@ -107,7 +107,7 @@ packagePageTemplate render
|
||||
, "doc" $= docFieldsTemplate
|
||||
] ++
|
||||
-- Miscellaneous things that could still stand to be refactored a bit.
|
||||
[ "moduleList" $= Old.moduleSection render mdocIndex docURL hasQuickNav
|
||||
[ "moduleList" $= Old.moduleSection render mdocIndex docURL mPkgId hasQuickNav
|
||||
, "executables" $= (commaList . map toHtml $ rendExecNames render)
|
||||
, "downloadSection" $= Old.downloadSection render
|
||||
, "stability" $= renderStability desc
|
||||
@ -339,7 +339,6 @@ candidatesPageTemplate cands candidates candidatesCore=
|
||||
, toHtml $ ". " ++ fromShortText (synopsis desc)
|
||||
]
|
||||
|
||||
|
||||
-- #ToDo: Pick out several interesting versions to display, with a link to
|
||||
-- display all versions.
|
||||
renderVersion :: PackageId -> [(Version, VersionStatus)] -> Maybe String -> Html
|
||||
|
24
src/Distribution/Server/Util/NLP/LICENSE
Normal file
24
src/Distribution/Server/Util/NLP/LICENSE
Normal file
@ -0,0 +1,24 @@
|
||||
Copyright (c) 2012, Dag Odenhall
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
81
src/Distribution/Server/Util/NLP/Snowball.hs
Normal file
81
src/Distribution/Server/Util/NLP/Snowball.hs
Normal file
@ -0,0 +1,81 @@
|
||||
module Distribution.Server.Util.NLP.Snowball where
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
import Control.Concurrent (MVar, newMVar, withMVar)
|
||||
import Control.Monad (forM, when)
|
||||
-------------------------------------------------------------------------------
|
||||
import Data.ByteString.Char8 (packCStringLen, useAsCString)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as Text
|
||||
import qualified Data.Text.Encoding as Text
|
||||
-------------------------------------------------------------------------------
|
||||
import Foreign (ForeignPtr, FunPtr, Ptr, newForeignPtr,
|
||||
nullPtr, withForeignPtr)
|
||||
import Foreign.C (CInt (..), CString)
|
||||
-------------------------------------------------------------------------------
|
||||
import System.IO.Unsafe (unsafePerformIO)
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
stem :: Text -> Text
|
||||
stem word = let [a] = stems [word] in a
|
||||
|
||||
stems :: [Text] -> [Text]
|
||||
stems ws =
|
||||
unsafePerformIO $
|
||||
do stemmer <- newStemmer
|
||||
stemsWith stemmer ws
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
-- | A thread and memory safe Snowball stemmer instance.
|
||||
newtype Stemmer = Stemmer (MVar (ForeignPtr Struct))
|
||||
|
||||
-- | Create a new reusable 'Stemmer' instance.
|
||||
newStemmer :: IO Stemmer
|
||||
newStemmer = do
|
||||
struct <- stemmer_new
|
||||
when (struct == nullPtr) $
|
||||
error "Text.Snowball.newStemmer: nullPtr"
|
||||
structPtr <- newForeignPtr stemmer_delete struct
|
||||
mvar <- newMVar (structPtr)
|
||||
return $ Stemmer mvar
|
||||
|
||||
-- | Use a 'Stemmer' to stem a word. This can be used more efficiently
|
||||
-- than 'stem' because you can keep a stemmer around and reuse it, but it
|
||||
-- requires 'IO' to ensure thread safety.
|
||||
stemWith :: Stemmer -> Text -> IO Text
|
||||
stemWith stemmer word = do
|
||||
[a] <- stemsWith stemmer [word]
|
||||
return a
|
||||
|
||||
-- | Use a 'Stemmer' to stem multiple words in one go. This can be more
|
||||
-- efficient than @'mapM' 'stemWith'@ because the 'Stemmer' is only
|
||||
-- locked once.
|
||||
stemsWith :: Stemmer -> [Text] -> IO [Text]
|
||||
stemsWith (Stemmer mvar) ws =
|
||||
withMVar mvar $ \(structPtr) ->
|
||||
withForeignPtr structPtr $ \struct ->
|
||||
forM ws $ \word ->
|
||||
useAsCString (Text.encodeUtf8 word) $ \word' ->
|
||||
do ptr <- stemmer_stem struct word' $
|
||||
fromIntegral $ Text.length word
|
||||
len <- stemmer_length struct
|
||||
bytes <- packCStringLen (ptr,fromIntegral len)
|
||||
return $ Text.decodeUtf8 bytes
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
data Struct
|
||||
|
||||
foreign import ccall unsafe "libstemmer.h english_ISO_8859_1_stemmer_new"
|
||||
stemmer_new :: IO (Ptr Struct)
|
||||
|
||||
foreign import ccall unsafe "libstemmer.h &english_ISO_8859_1_stemmer_delete"
|
||||
stemmer_delete :: FunPtr (Ptr Struct -> IO ())
|
||||
|
||||
foreign import ccall unsafe "libstemmer.h english_ISO_8859_1_stemmer_stem"
|
||||
stemmer_stem :: Ptr Struct -> CString -> CInt -> IO (CString)
|
||||
|
||||
foreign import ccall unsafe "libstemmer.h english_ISO_8859_1_stemmer_length"
|
||||
stemmer_length :: Ptr Struct -> IO CInt
|
Loading…
Reference in New Issue
Block a user