replace '_' and ' ' when matching wikipedia article names

This commit is contained in:
Brian Quinion 2013-09-09 14:10:09 +01:00
parent 28a5ef899e
commit dab59d59de

View File

@ -20,5 +20,9 @@ $PSQL -c "alter table entity add column description_en text"
$PSQL -c "update entity set description_en = description from entity_description where entity.entity_id = entity_description.entity_id and language = 'en'"
cat totals.txt | $PSQL -c "COPY import_link_hit from STDIN WITH CSV DELIMITER ' '"
$PSQL -c "insert into link_hit select target||'wiki', catch_decode_url_part(value), sum(hits) from import_link_hit group by target||'wiki', catch_decode_url_part(value)"
$PSQL -c "truncate link_hit"
$PSQL -c "insert into link_hit select target||'wiki', replace(catch_decode_url_part(value), '_', ' '), sum(hits) from import_link_hit where replace(catch_decode_url_part(value), '_', ' ') is not null group by target||'wiki', replace(dcatch_decode_url_part(value), '_', ' ')"
$PSQL -c "truncate entity_link_hit"
$PSQL -c "insert into entity_link_hit select entity_id, target, value, coalesce(hits,0) from entity_link left outer join link_hit using (target, value)"
$PSQL -c "create table entity_hit as select entity_id,sum(hits) as hits from entity_link_hit group by entity_id"
$PSQL -c "create unique index idx_entity_hit on entity_hit using btree (entity_id)"